nkeynes@284: /** nkeynes@333: * $Id: pvr2mem.c,v 1.8 2007-01-27 06:21:35 nkeynes Exp $ nkeynes@284: * nkeynes@284: * PVR2 (Video) VRAM handling routines (mainly for the 64-bit region) nkeynes@284: * nkeynes@284: * Copyright (c) 2005 Nathan Keynes. nkeynes@284: * nkeynes@284: * This program is free software; you can redistribute it and/or modify nkeynes@284: * it under the terms of the GNU General Public License as published by nkeynes@284: * the Free Software Foundation; either version 2 of the License, or nkeynes@284: * (at your option) any later version. nkeynes@284: * nkeynes@284: * This program is distributed in the hope that it will be useful, nkeynes@284: * but WITHOUT ANY WARRANTY; without even the implied warranty of nkeynes@284: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the nkeynes@284: * GNU General Public License for more details. nkeynes@284: */ nkeynes@284: #include "pvr2.h" nkeynes@325: #include "asic.h" nkeynes@309: #include nkeynes@309: #include nkeynes@284: nkeynes@284: extern char *video_base; nkeynes@284: nkeynes@325: void pvr2_dma_write( sh4addr_t destaddr, char *src, uint32_t count ) nkeynes@325: { nkeynes@325: int region; nkeynes@325: nkeynes@325: switch( destaddr & 0x13800000 ) { nkeynes@325: case 0x10000000: nkeynes@325: case 0x12000000: nkeynes@325: pvr2_ta_write( src, count ); nkeynes@325: break; nkeynes@325: case 0x11000000: nkeynes@325: case 0x11800000: nkeynes@325: region = MMIO_READ( ASIC, PVRDMARGN1 ); nkeynes@325: if( region == 0 ) { nkeynes@325: pvr2_vram64_write( destaddr, src, count ); nkeynes@325: } else { nkeynes@325: char *dest = mem_get_region(destaddr); nkeynes@325: memcpy( dest, src, count ); nkeynes@325: } nkeynes@325: break; nkeynes@325: case 0x10800000: nkeynes@325: case 0x12800000: nkeynes@325: pvr2_yuv_write( src, count ); nkeynes@325: break; nkeynes@325: case 0x13000000: nkeynes@325: case 0x13800000: nkeynes@325: region = MMIO_READ( ASIC, PVRDMARGN2 ); nkeynes@325: if( region == 0 ) { nkeynes@325: pvr2_vram64_write( destaddr, src, count ); nkeynes@325: } else { nkeynes@325: char *dest = mem_get_region(destaddr); nkeynes@325: memcpy( dest, src, count ); nkeynes@325: } nkeynes@325: } nkeynes@325: } nkeynes@325: nkeynes@284: void pvr2_vram64_write( sh4addr_t destaddr, char *src, uint32_t length ) nkeynes@284: { nkeynes@284: int bank_flag = (destaddr & 0x04) >> 2; nkeynes@284: uint32_t *banks[2]; nkeynes@284: uint32_t *dwsrc; nkeynes@284: int i; nkeynes@284: nkeynes@284: destaddr = destaddr & 0x7FFFFF; nkeynes@284: if( destaddr + length > 0x800000 ) { nkeynes@284: length = 0x800000 - destaddr; nkeynes@284: } nkeynes@284: nkeynes@284: for( i=destaddr & 0xFFFFF000; i < destaddr + length; i+= PAGE_SIZE ) { nkeynes@284: texcache_invalidate_page( i ); nkeynes@284: } nkeynes@284: nkeynes@284: banks[0] = ((uint32_t *)(video_base + ((destaddr & 0x007FFFF8) >>1))); nkeynes@284: banks[1] = banks[0] + 0x100000; nkeynes@284: if( bank_flag ) nkeynes@284: banks[0]++; nkeynes@284: nkeynes@284: /* Handle non-aligned start of source */ nkeynes@284: if( destaddr & 0x03 ) { nkeynes@284: char *dest = ((char *)banks[bank_flag]) + (destaddr & 0x03); nkeynes@284: for( i= destaddr & 0x03; i < 4 && length > 0; i++, length-- ) { nkeynes@284: *dest++ = *src++; nkeynes@284: } nkeynes@284: bank_flag = !bank_flag; nkeynes@284: } nkeynes@284: nkeynes@284: dwsrc = (uint32_t *)src; nkeynes@284: while( length >= 4 ) { nkeynes@284: *banks[bank_flag]++ = *dwsrc++; nkeynes@284: bank_flag = !bank_flag; nkeynes@284: length -= 4; nkeynes@284: } nkeynes@284: nkeynes@284: /* Handle non-aligned end of source */ nkeynes@284: if( length ) { nkeynes@284: src = (char *)dwsrc; nkeynes@284: char *dest = (char *)banks[bank_flag]; nkeynes@284: while( length-- > 0 ) { nkeynes@284: *dest++ = *src++; nkeynes@284: } nkeynes@284: } nkeynes@284: } nkeynes@284: nkeynes@284: /** nkeynes@284: * Write an image to 64-bit vram, with a line-stride different from the line-size. nkeynes@284: * The destaddr must be 32-bit aligned, and both line_bytes and line_stride_bytes nkeynes@284: * must be multiples of 4. nkeynes@284: */ nkeynes@284: void pvr2_vram64_write_stride( sh4addr_t destaddr, char *src, uint32_t line_bytes, nkeynes@284: uint32_t line_stride_bytes, uint32_t line_count ) nkeynes@284: { nkeynes@284: int bank_flag = (destaddr & 0x04) >> 2; nkeynes@284: uint32_t *banks[2]; nkeynes@284: uint32_t *dwsrc; nkeynes@284: uint32_t line_gap; nkeynes@284: int line_gap_flag; nkeynes@284: int i,j; nkeynes@284: nkeynes@284: destaddr = destaddr & 0x7FFFF8; nkeynes@284: i = line_stride_bytes - line_bytes; nkeynes@284: line_gap_flag = i & 0x04; nkeynes@284: line_gap = i >> 3; nkeynes@284: line_bytes >>= 2; nkeynes@284: nkeynes@284: for( i=destaddr & 0xFFFFF000; i < destaddr + line_stride_bytes*line_count; i+= PAGE_SIZE ) { nkeynes@284: texcache_invalidate_page( i ); nkeynes@284: } nkeynes@284: nkeynes@284: banks[0] = (uint32_t *)(video_base + (destaddr >>1)); nkeynes@284: banks[1] = banks[0] + 0x100000; nkeynes@284: if( bank_flag ) nkeynes@284: banks[0]++; nkeynes@284: nkeynes@284: dwsrc = (uint32_t *)src; nkeynes@284: for( i=0; i= line_bytes. nkeynes@284: * This method is used to extract a "stride" texture from vram. nkeynes@284: */ nkeynes@284: void pvr2_vram64_read_stride( char *dest, uint32_t dest_line_bytes, sh4addr_t srcaddr, nkeynes@284: uint32_t src_line_bytes, uint32_t line_count ) nkeynes@284: { nkeynes@284: int bank_flag = (srcaddr & 0x04) >> 2; nkeynes@284: uint32_t *banks[2]; nkeynes@284: uint32_t *dwdest; nkeynes@284: uint32_t dest_line_gap; nkeynes@284: uint32_t src_line_gap; nkeynes@284: uint32_t line_bytes; nkeynes@284: int src_line_gap_flag; nkeynes@284: int i,j; nkeynes@284: nkeynes@284: srcaddr = srcaddr & 0x7FFFF8; nkeynes@284: if( src_line_bytes <= dest_line_bytes ) { nkeynes@284: dest_line_gap = (dest_line_bytes - src_line_bytes) >> 2; nkeynes@284: src_line_gap = 0; nkeynes@284: src_line_gap_flag = 0; nkeynes@284: line_bytes = src_line_bytes >> 2; nkeynes@284: } else { nkeynes@284: i = (src_line_bytes - dest_line_bytes); nkeynes@284: src_line_gap_flag = i & 0x04; nkeynes@284: src_line_gap = i >> 3; nkeynes@284: line_bytes = dest_line_bytes >> 2; nkeynes@284: } nkeynes@284: nkeynes@284: banks[0] = (uint32_t *)(video_base + (srcaddr>>1)); nkeynes@284: banks[1] = banks[0] + 0x100000; nkeynes@284: if( bank_flag ) nkeynes@284: banks[0]++; nkeynes@284: nkeynes@284: dwdest = (uint32_t *)dest; nkeynes@284: for( i=0; i> 1; nkeynes@315: uint8_t t1 = *banks[offset<4?0:1]++; nkeynes@315: uint8_t t2 = *banks[offset<3?0:1]++; nkeynes@315: dest[y1*stride + x1] = (t1 & 0x0F) | (t2<<4); nkeynes@315: dest[(y1+1)*stride + x1] = (t1>>4) | (t2&0xF0); nkeynes@315: } else if( width == 4 ) { nkeynes@315: pvr2_vram64_detwiddle_4( dest, banks, offset, x1, y1, 2, stride ); nkeynes@315: pvr2_vram64_detwiddle_4( dest, banks, offset+2, x1, y1+2, 2, stride ); nkeynes@315: pvr2_vram64_detwiddle_4( dest, banks, offset+4, x1+2, y1, 2, stride ); nkeynes@315: pvr2_vram64_detwiddle_4( dest, banks, offset+6, x1+2, y1+2, 2, stride ); nkeynes@315: nkeynes@315: } else { nkeynes@315: int subdivide = width >> 1; nkeynes@315: pvr2_vram64_detwiddle_4( dest, banks, offset, x1, y1, subdivide, stride ); nkeynes@315: pvr2_vram64_detwiddle_4( dest, banks, offset, x1, y1+subdivide, subdivide, stride ); nkeynes@315: pvr2_vram64_detwiddle_4( dest, banks, offset, x1+subdivide, y1, subdivide, stride ); nkeynes@315: pvr2_vram64_detwiddle_4( dest, banks, offset, x1+subdivide, y1+subdivide, subdivide, stride ); nkeynes@315: } nkeynes@315: } nkeynes@315: nkeynes@315: /** nkeynes@315: * @param dest Destination image buffer nkeynes@315: * @param banks Source data expressed as two bank pointers nkeynes@315: * @param offset Offset into banks[0] specifying where the next byte nkeynes@315: * to read is (0..3) nkeynes@315: * @param x1,y1 Destination coordinates nkeynes@315: * @param width Width of current destination block nkeynes@315: * @param stride Total width of image (ie stride) nkeynes@310: */ nkeynes@310: nkeynes@310: static void pvr2_vram64_detwiddle_8( uint8_t *dest, uint8_t *banks[2], int offset, nkeynes@315: int x1, int y1, int width, int stride ) nkeynes@310: { nkeynes@310: if( width == 2 ) { nkeynes@315: dest[y1*stride + x1] = *banks[0]++; nkeynes@315: dest[(y1+1)*stride + x1] = *banks[offset<3?0:1]++; nkeynes@315: dest[y1*stride + x1 + 1] = *banks[offset<2?0:1]++; nkeynes@315: dest[(y1+1)*stride + x1 + 1] = *banks[offset==0?0:1]++; nkeynes@310: uint8_t *tmp = banks[0]; /* swap banks */ nkeynes@310: banks[0] = banks[1]; nkeynes@310: banks[1] = tmp; nkeynes@310: } else { nkeynes@310: int subdivide = width >> 1; nkeynes@315: pvr2_vram64_detwiddle_8( dest, banks, offset, x1, y1, subdivide, stride ); nkeynes@315: pvr2_vram64_detwiddle_8( dest, banks, offset, x1, y1+subdivide, subdivide, stride ); nkeynes@315: pvr2_vram64_detwiddle_8( dest, banks, offset, x1+subdivide, y1, subdivide, stride ); nkeynes@315: pvr2_vram64_detwiddle_8( dest, banks, offset, x1+subdivide, y1+subdivide, subdivide, stride ); nkeynes@284: } nkeynes@310: } nkeynes@310: nkeynes@310: /** nkeynes@310: * @param dest Destination image buffer nkeynes@310: * @param banks Source data expressed as two bank pointers nkeynes@310: * @param offset Offset into banks[0] specifying where the next word nkeynes@310: * to read is (0 or 1) nkeynes@310: * @param x1,y1 Destination coordinates nkeynes@310: * @param width Width of current destination block nkeynes@315: * @param stride Total width of image (ie stride) nkeynes@310: */ nkeynes@310: nkeynes@310: static void pvr2_vram64_detwiddle_16( uint16_t *dest, uint16_t *banks[2], int offset, nkeynes@315: int x1, int y1, int width, int stride ) nkeynes@310: { nkeynes@310: if( width == 2 ) { nkeynes@315: dest[y1*stride + x1] = *banks[0]++; nkeynes@315: dest[(y1+1)*stride + x1] = *banks[offset]++; nkeynes@315: dest[y1*stride + x1 + 1] = *banks[1]++; nkeynes@315: dest[(y1+1)*stride + x1 + 1] = *banks[offset^1]++; nkeynes@310: } else { nkeynes@310: int subdivide = width >> 1; nkeynes@315: pvr2_vram64_detwiddle_16( dest, banks, offset, x1, y1, subdivide, stride ); nkeynes@315: pvr2_vram64_detwiddle_16( dest, banks, offset, x1, y1+subdivide, subdivide, stride ); nkeynes@315: pvr2_vram64_detwiddle_16( dest, banks, offset, x1+subdivide, y1, subdivide, stride ); nkeynes@315: pvr2_vram64_detwiddle_16( dest, banks, offset, x1+subdivide, y1+subdivide, subdivide, stride ); nkeynes@310: } nkeynes@310: } nkeynes@310: nkeynes@310: /** nkeynes@315: * Read an image from 64-bit vram stored as twiddled 4-bit pixels. The nkeynes@315: * image is written out to the destination in detwiddled form. nkeynes@315: * @param dest destination buffer, which must be at least width*height/2 in length nkeynes@315: * @param srcaddr source address in vram nkeynes@315: * @param width image width (must be a power of 2) nkeynes@315: * @param height image height (must be a power of 2) nkeynes@315: */ nkeynes@315: void pvr2_vram64_read_twiddled_4( char *dest, sh4addr_t srcaddr, uint32_t width, uint32_t height ) nkeynes@315: { nkeynes@315: int offset_flag = (srcaddr & 0x07); nkeynes@315: uint8_t *banks[2]; nkeynes@315: uint8_t *wdest = (uint8_t*)dest; nkeynes@315: uint32_t stride = width >> 1; nkeynes@315: int i,j; nkeynes@315: nkeynes@315: srcaddr = srcaddr & 0x7FFFF8; nkeynes@315: nkeynes@315: banks[0] = (uint8_t *)(video_base + (srcaddr>>1)); nkeynes@315: banks[1] = banks[0] + 0x400000; nkeynes@315: if( offset_flag & 0x04 ) { // If source is not 64-bit aligned, swap the banks nkeynes@315: uint8_t *tmp = banks[0]; nkeynes@315: banks[0] = banks[1]; nkeynes@315: banks[1] = tmp + 4; nkeynes@315: offset_flag &= 0x03; nkeynes@315: } nkeynes@315: banks[0] += offset_flag; nkeynes@315: nkeynes@315: if( width > height ) { nkeynes@315: for( i=0; i width ) { nkeynes@315: for( i=0; i>1)); nkeynes@310: banks[1] = banks[0] + 0x400000; nkeynes@310: if( offset_flag & 0x04 ) { // If source is not 64-bit aligned, swap the banks nkeynes@310: uint8_t *tmp = banks[0]; nkeynes@310: banks[0] = banks[1]; nkeynes@310: banks[1] = tmp + 4; nkeynes@310: offset_flag &= 0x03; nkeynes@310: } nkeynes@310: banks[0] += offset_flag; nkeynes@310: nkeynes@310: if( width > height ) { nkeynes@310: for( i=0; i width ) { nkeynes@310: for( i=0; i> 1; nkeynes@310: uint16_t *banks[2]; nkeynes@310: uint16_t *wdest = (uint16_t*)dest; nkeynes@310: int i,j; nkeynes@310: nkeynes@310: srcaddr = srcaddr & 0x7FFFF8; nkeynes@310: nkeynes@310: banks[0] = (uint16_t *)(video_base + (srcaddr>>1)); nkeynes@310: banks[1] = banks[0] + 0x200000; nkeynes@310: if( offset_flag & 0x02 ) { // If source is not 64-bit aligned, swap the banks nkeynes@310: uint16_t *tmp = banks[0]; nkeynes@310: banks[0] = banks[1]; nkeynes@310: banks[1] = tmp + 2; nkeynes@310: offset_flag &= 0x01; nkeynes@310: } nkeynes@310: banks[0] += offset_flag; nkeynes@310: nkeynes@310: nkeynes@310: if( width > height ) { nkeynes@310: for( i=0; i width ) { nkeynes@310: for( i=0; i= src ) { nkeynes@284: memcpy( dest, p, line_length ); nkeynes@333: p -= src_stride; nkeynes@284: dest += line_length; nkeynes@284: } nkeynes@284: } nkeynes@284: nkeynes@284: void pvr2_vram64_read( char *dest, sh4addr_t srcaddr, uint32_t length ) nkeynes@284: { nkeynes@284: int bank_flag = (srcaddr & 0x04) >> 2; nkeynes@284: uint32_t *banks[2]; nkeynes@284: uint32_t *dwdest; nkeynes@284: int i; nkeynes@284: nkeynes@284: srcaddr = srcaddr & 0x7FFFFF; nkeynes@284: if( srcaddr + length > 0x800000 ) nkeynes@284: length = 0x800000 - srcaddr; nkeynes@284: nkeynes@284: banks[0] = ((uint32_t *)(video_base + ((srcaddr&0x007FFFF8)>>1))); nkeynes@284: banks[1] = banks[0] + 0x100000; nkeynes@284: if( bank_flag ) nkeynes@284: banks[0]++; nkeynes@284: nkeynes@284: /* Handle non-aligned start of source */ nkeynes@284: if( srcaddr & 0x03 ) { nkeynes@284: char *src = ((char *)banks[bank_flag]) + (srcaddr & 0x03); nkeynes@284: for( i= srcaddr & 0x03; i < 4 && length > 0; i++, length-- ) { nkeynes@284: *dest++ = *src++; nkeynes@284: } nkeynes@284: bank_flag = !bank_flag; nkeynes@284: } nkeynes@284: nkeynes@284: dwdest = (uint32_t *)dest; nkeynes@284: while( length >= 4 ) { nkeynes@284: *dwdest++ = *banks[bank_flag]++; nkeynes@284: bank_flag = !bank_flag; nkeynes@284: length -= 4; nkeynes@284: } nkeynes@284: nkeynes@284: /* Handle non-aligned end of source */ nkeynes@284: if( length ) { nkeynes@284: dest = (char *)dwdest; nkeynes@284: char *src = (char *)banks[bank_flag]; nkeynes@284: while( length-- > 0 ) { nkeynes@284: *dest++ = *src++; nkeynes@284: } nkeynes@284: } nkeynes@284: } nkeynes@284: nkeynes@309: void pvr2_vram64_dump_file( sh4addr_t addr, uint32_t length, gchar *filename ) nkeynes@309: { nkeynes@309: uint32_t tmp[length>>2]; nkeynes@309: FILE *f = fopen(filename, "wo"); nkeynes@309: unsigned int i, j; nkeynes@309: nkeynes@309: if( f == NULL ) { nkeynes@309: ERROR( "Unable to write to dump file '%s' (%s)", filename, strerror(errno) ); nkeynes@309: return; nkeynes@309: } nkeynes@309: pvr2_vram64_read( tmp, addr, length ); nkeynes@309: fprintf( f, "%08X\n", addr ); nkeynes@309: for( i =0; i>2; i+=8 ) { nkeynes@309: for( j=i; jrender_addr == -1 ) nkeynes@315: return; nkeynes@327: GLenum type = colour_formats[buffer->colour_format].type; nkeynes@327: GLenum format = colour_formats[buffer->colour_format].format; nkeynes@327: int line_size = buffer->width * colour_formats[buffer->colour_format].bpp; nkeynes@327: int size = line_size * buffer->height; nkeynes@315: nkeynes@315: if( backBuffer ) { nkeynes@315: glFinish(); nkeynes@315: glReadBuffer( GL_BACK ); nkeynes@315: } else { nkeynes@315: glReadBuffer( GL_FRONT ); nkeynes@315: } nkeynes@315: nkeynes@315: if( buffer->render_addr & 0xFF000000 == 0x04000000 ) { nkeynes@315: /* Interlaced buffer. Go the double copy... :( */ nkeynes@315: char target[size]; nkeynes@315: glReadPixels( 0, 0, buffer->width, buffer->height, format, type, target ); nkeynes@315: pvr2_vram64_write( buffer->render_addr, target, size ); nkeynes@315: } else { nkeynes@315: /* Regular buffer */ nkeynes@315: char target[size]; nkeynes@315: glReadPixels( 0, 0, buffer->width, buffer->height, format, type, target ); nkeynes@333: if( (buffer->scale & 0xFFFF) == 0x0800 ) { nkeynes@333: pvr2_vram_write_invert( buffer->render_addr, target, size, line_size, line_size << 1 ); nkeynes@333: } else { nkeynes@333: pvr2_vram_write_invert( buffer->render_addr, target, size, line_size, line_size ); nkeynes@333: } nkeynes@315: } nkeynes@315: } nkeynes@315: nkeynes@315: nkeynes@315: /** nkeynes@315: * Copy data from PVR ram into the GL render buffer. nkeynes@315: * nkeynes@315: * @param buffer A render buffer indicating the address to read from, and the nkeynes@315: * format the data is in. nkeynes@315: * @param backBuffer TRUE to write the back buffer, FALSE for nkeynes@315: * the front buffer. nkeynes@315: */ nkeynes@315: void pvr2_render_buffer_copy_from_sh4( pvr2_render_buffer_t buffer, nkeynes@315: gboolean backBuffer ) nkeynes@315: { nkeynes@315: if( buffer->render_addr == -1 ) nkeynes@315: return; nkeynes@315: nkeynes@327: GLenum type = colour_formats[buffer->colour_format].type; nkeynes@327: GLenum format = colour_formats[buffer->colour_format].format; nkeynes@327: int line_size = buffer->width * colour_formats[buffer->colour_format].bpp; nkeynes@327: int size = line_size * buffer->height; nkeynes@327: nkeynes@315: if( backBuffer ) { nkeynes@315: glDrawBuffer( GL_BACK ); nkeynes@315: } else { nkeynes@315: glDrawBuffer( GL_FRONT ); nkeynes@315: } nkeynes@315: nkeynes@315: glRasterPos2i( 0, 0 ); nkeynes@315: if( buffer->render_addr & 0xFF000000 == 0x04000000 ) { nkeynes@315: /* Interlaced buffer. Go the double copy... :( */ nkeynes@315: char target[size]; nkeynes@315: pvr2_vram64_read( target, buffer->render_addr, size ); nkeynes@315: glDrawPixels( buffer->width, buffer->height, nkeynes@315: format, type, target ); nkeynes@315: } else { nkeynes@315: /* Regular buffer - go direct */ nkeynes@315: char *target = mem_get_region( buffer->render_addr ); nkeynes@315: glDrawPixels( buffer->width, buffer->height, nkeynes@315: format, type, target ); nkeynes@315: } nkeynes@315: }