Search
lxdream.org :: lxdream/src/pvr2/pvr2mem.c
lxdream 0.9.1
released Jun 29
Download Now
filename src/pvr2/pvr2mem.c
changeset 934:3acd3b3ee6d1
prev931:430048ea8b71
next1067:d3c00ffccfcd
author nkeynes
date Fri Dec 26 14:25:23 2008 +0000 (15 years ago)
branchlxdream-mem
permissions -rw-r--r--
last change Change RAM regions to use static arrays rather than mmap regions, for a 2-3% performance gain.
General mem cleanups, including some save state fixes that break states again.
view annotate diff log raw
     1 /**
     2  * $Id$
     3  *
     4  * PVR2 (Video) VRAM handling routines (mainly for the 64-bit region)
     5  *
     6  * Copyright (c) 2005 Nathan Keynes.
     7  *
     8  * This program is free software; you can redistribute it and/or modify
     9  * it under the terms of the GNU General Public License as published by
    10  * the Free Software Foundation; either version 2 of the License, or
    11  * (at your option) any later version.
    12  *
    13  * This program is distributed in the hope that it will be useful,
    14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    16  * GNU General Public License for more details.
    17  */
    18 #include <string.h>
    19 #include <stdio.h>
    20 #include <errno.h>
    21 #include "sh4/sh4core.h"
    22 #include "pvr2.h"
    23 #include "asic.h"
    24 #include "dream.h"
    26 unsigned char pvr2_main_ram[8 MB];
    28 /************************* VRAM32 address space ***************************/
    30 static int32_t FASTCALL pvr2_vram32_read_long( sh4addr_t addr )
    31 {
    32     pvr2_render_buffer_invalidate(addr, FALSE);
    33     return *((int32_t *)(pvr2_main_ram+(addr&0x007FFFFF)));
    34 }
    35 static int32_t FASTCALL pvr2_vram32_read_word( sh4addr_t addr )
    36 {
    37     pvr2_render_buffer_invalidate(addr, FALSE);
    38     return SIGNEXT16(*((int16_t *)(pvr2_main_ram+(addr&0x007FFFFF))));
    39 }
    40 static int32_t FASTCALL pvr2_vram32_read_byte( sh4addr_t addr )
    41 {
    42     pvr2_render_buffer_invalidate(addr, FALSE);
    43     return SIGNEXT8(*((int8_t *)(pvr2_main_ram+(addr&0x007FFFFF))));
    44 }
    45 static void FASTCALL pvr2_vram32_write_long( sh4addr_t addr, uint32_t val )
    46 {
    47     pvr2_render_buffer_invalidate(addr, TRUE);
    48     *(uint32_t *)(pvr2_main_ram + (addr&0x007FFFFF)) = val;
    49 }
    50 static void FASTCALL pvr2_vram32_write_word( sh4addr_t addr, uint32_t val )
    51 {
    52     pvr2_render_buffer_invalidate(addr, TRUE);
    53     *(uint16_t *)(pvr2_main_ram + (addr&0x007FFFFF)) = (uint16_t)val;
    54 }
    55 static void FASTCALL pvr2_vram32_write_byte( sh4addr_t addr, uint32_t val )
    56 {
    57     pvr2_render_buffer_invalidate(addr, TRUE);
    58     *(uint8_t *)(pvr2_main_ram + (addr&0x007FFFFF)) = (uint8_t)val;
    59 }
    60 static void FASTCALL pvr2_vram32_read_burst( unsigned char *dest, sh4addr_t addr )
    61 {
    62     // Render buffers pretty much have to be (at least) 32-byte aligned
    63     pvr2_render_buffer_invalidate(addr, FALSE);
    64     memcpy( dest, (pvr2_main_ram + (addr&0x007FFFFF)), 32 );
    65 }
    66 static void FASTCALL pvr2_vram32_write_burst( sh4addr_t addr, unsigned char *src )
    67 {
    68     // Render buffers pretty much have to be (at least) 32-byte aligned
    69     pvr2_render_buffer_invalidate(addr, TRUE);
    70     memcpy( (pvr2_main_ram + (addr&0x007FFFFF)), src, 32 );    
    71 }
    73 struct mem_region_fn mem_region_vram32 = { pvr2_vram32_read_long, pvr2_vram32_write_long, 
    74         pvr2_vram32_read_word, pvr2_vram32_write_word, 
    75         pvr2_vram32_read_byte, pvr2_vram32_write_byte, 
    76         pvr2_vram32_read_burst, pvr2_vram32_write_burst }; 
    78 /************************* VRAM64 address space ***************************/
    80 #define TRANSLATE_VIDEO_64BIT_ADDRESS(a)  ( (((a)&0x00FFFFF8)>>1)|(((a)&0x00000004)<<20)|((a)&0x03) )
    82 static int32_t FASTCALL pvr2_vram64_read_long( sh4addr_t addr )
    83 {
    84     addr = TRANSLATE_VIDEO_64BIT_ADDRESS(addr);
    85     pvr2_render_buffer_invalidate(addr, FALSE);
    86     return *((int32_t *)(pvr2_main_ram+(addr&0x007FFFFF)));
    87 }
    88 static int32_t FASTCALL pvr2_vram64_read_word( sh4addr_t addr )
    89 {
    90     addr = TRANSLATE_VIDEO_64BIT_ADDRESS(addr);
    91     pvr2_render_buffer_invalidate(addr, FALSE);
    92     return SIGNEXT16(*((int16_t *)(pvr2_main_ram+(addr&0x007FFFFF))));
    93 }
    94 static int32_t FASTCALL pvr2_vram64_read_byte( sh4addr_t addr )
    95 {
    96     addr = TRANSLATE_VIDEO_64BIT_ADDRESS(addr);
    97     pvr2_render_buffer_invalidate(addr, FALSE);
    98     return SIGNEXT8(*((int8_t *)(pvr2_main_ram+(addr&0x007FFFFF))));
    99 }
   100 static void FASTCALL pvr2_vram64_write_long( sh4addr_t addr, uint32_t val )
   101 {
   102     texcache_invalidate_page(addr& 0x007FFFFF);
   103     addr = TRANSLATE_VIDEO_64BIT_ADDRESS(addr);
   104     pvr2_render_buffer_invalidate(addr, TRUE);
   105     *(uint32_t *)(pvr2_main_ram + (addr&0x007FFFFF)) = val;
   106 }
   107 static void FASTCALL pvr2_vram64_write_word( sh4addr_t addr, uint32_t val )
   108 {
   109     texcache_invalidate_page(addr& 0x007FFFFF);
   110     addr = TRANSLATE_VIDEO_64BIT_ADDRESS(addr);
   111     pvr2_render_buffer_invalidate(addr, TRUE);
   112     *(uint16_t *)(pvr2_main_ram + (addr&0x007FFFFF)) = (uint16_t)val;
   113 }
   114 static void FASTCALL pvr2_vram64_write_byte( sh4addr_t addr, uint32_t val )
   115 {
   116     texcache_invalidate_page(addr& 0x007FFFFF);
   117     addr = TRANSLATE_VIDEO_64BIT_ADDRESS(addr);
   118     pvr2_render_buffer_invalidate(addr, TRUE);
   119     *(uint8_t *)(pvr2_main_ram + (addr&0x007FFFFF)) = (uint8_t)val;
   120 }
   121 static void FASTCALL pvr2_vram64_read_burst( unsigned char *dest, sh4addr_t addr )
   122 {
   123     pvr2_vram64_read( dest, addr, 32 );
   124 }
   125 static void FASTCALL pvr2_vram64_write_burst( sh4addr_t addr, unsigned char *src )
   126 {
   127     pvr2_vram64_write( addr, src, 32 );
   128 }
   130 struct mem_region_fn mem_region_vram64 = { pvr2_vram64_read_long, pvr2_vram64_write_long, 
   131         pvr2_vram64_read_word, pvr2_vram64_write_word, 
   132         pvr2_vram64_read_byte, pvr2_vram64_write_byte, 
   133         pvr2_vram64_read_burst, pvr2_vram64_write_burst }; 
   135 /******************************* Burst areas ******************************/
   137 static void FASTCALL pvr2_vramdma1_write_burst( sh4addr_t destaddr, unsigned char *src )
   138 {
   139     int region = MMIO_READ( ASIC, PVRDMARGN1 );
   140     if( region == 0 ) {
   141         pvr2_vram64_write( destaddr, src, 32 );
   142     } else {
   143         destaddr &= PVR2_RAM_MASK;
   144         unsigned char *dest = pvr2_main_ram + destaddr;
   145         memcpy( dest, src, 32 );
   146     }   
   147 }
   149 static void FASTCALL pvr2_vramdma2_write_burst( sh4addr_t destaddr, unsigned char *src )
   150 {
   151     int region = MMIO_READ( ASIC, PVRDMARGN2 );
   152     if( region == 0 ) {
   153         pvr2_vram64_write( destaddr, src, 32 );
   154     } else {
   155         destaddr &= PVR2_RAM_MASK;
   156         unsigned char *dest = pvr2_main_ram + destaddr;
   157         memcpy( dest, src, 32 );
   158     }
   159 }
   161 static void FASTCALL pvr2_yuv_write_burst( sh4addr_t destaddr, unsigned char *src )
   162 {
   163     pvr2_yuv_write( src, 32 );
   164 }
   166 struct mem_region_fn mem_region_pvr2ta = {
   167         unmapped_read_long, unmapped_write_long,
   168         unmapped_read_long, unmapped_write_long,
   169         unmapped_read_long, unmapped_write_long,
   170         unmapped_read_burst, pvr2_ta_write_burst };
   172 struct mem_region_fn mem_region_pvr2yuv = {
   173         unmapped_read_long, unmapped_write_long,
   174         unmapped_read_long, unmapped_write_long,
   175         unmapped_read_long, unmapped_write_long,
   176         unmapped_read_burst, pvr2_yuv_write_burst };
   178 struct mem_region_fn mem_region_pvr2vdma1 = {
   179         unmapped_read_long, unmapped_write_long,
   180         unmapped_read_long, unmapped_write_long,
   181         unmapped_read_long, unmapped_write_long,
   182         unmapped_read_burst, pvr2_vramdma1_write_burst };
   184 struct mem_region_fn mem_region_pvr2vdma2 = {
   185         unmapped_read_long, unmapped_write_long,
   186         unmapped_read_long, unmapped_write_long,
   187         unmapped_read_long, unmapped_write_long,
   188         unmapped_read_burst, pvr2_vramdma2_write_burst };
   191 void pvr2_dma_write( sh4addr_t destaddr, unsigned char *src, uint32_t count )
   192 {
   193     int region;
   195     switch( destaddr & 0x13800000 ) {
   196     case 0x10000000:
   197     case 0x12000000:
   198         pvr2_ta_write( src, count );
   199         break;
   200     case 0x11000000:
   201     case 0x11800000:
   202         region = MMIO_READ( ASIC, PVRDMARGN1 );
   203         if( region == 0 ) {
   204             pvr2_vram64_write( destaddr, src, count );
   205         } else {
   206             destaddr &= PVR2_RAM_MASK;
   207             unsigned char *dest = pvr2_main_ram + destaddr;
   208             if( PVR2_RAM_SIZE - destaddr < count ) {
   209                 count = PVR2_RAM_SIZE - destaddr;
   210             }
   211             memcpy( dest, src, count );
   212         }
   213         break;
   214     case 0x10800000:
   215     case 0x12800000:
   216         pvr2_yuv_write( src, count );
   217         break;
   218     case 0x13000000:
   219     case 0x13800000:
   220         region = MMIO_READ( ASIC, PVRDMARGN2 );
   221         if( region == 0 ) {
   222             pvr2_vram64_write( destaddr, src, count );
   223         } else {
   224             destaddr &= PVR2_RAM_MASK;
   225             unsigned char *dest = pvr2_main_ram + destaddr;
   226             if( PVR2_RAM_SIZE - destaddr < count ) {
   227                 count = PVR2_RAM_SIZE - destaddr;
   228             }
   229             memcpy( dest, src, count );
   230         }
   231     }
   232 }
   234 void pvr2_vram64_write( sh4addr_t destaddr, unsigned char *src, uint32_t length )
   235 {
   236     int bank_flag = (destaddr & 0x04) >> 2;
   237     uint32_t *banks[2];
   238     uint32_t *dwsrc;
   239     int i;
   241     destaddr = destaddr & 0x7FFFFF;
   242     if( destaddr + length > 0x800000 ) {
   243         length = 0x800000 - destaddr;
   244     }
   246     for( i=destaddr & 0xFFFFF000; i < destaddr + length; i+= LXDREAM_PAGE_SIZE ) {
   247         texcache_invalidate_page( i );
   248     }
   250     banks[0] = ((uint32_t *)(pvr2_main_ram + ((destaddr & 0x007FFFF8) >>1)));
   251     banks[1] = banks[0] + 0x100000;
   252     if( bank_flag )
   253         banks[0]++;
   255     /* Handle non-aligned start of source */
   256     if( destaddr & 0x03 ) {
   257         unsigned char *dest = ((unsigned char *)banks[bank_flag]) + (destaddr & 0x03);
   258         for( i= destaddr & 0x03; i < 4 && length > 0; i++, length-- ) {
   259             *dest++ = *src++;
   260         }
   261         bank_flag = !bank_flag;
   262     }
   264     dwsrc = (uint32_t *)src;
   265     while( length >= 4 ) {
   266         *banks[bank_flag]++ = *dwsrc++;
   267         bank_flag = !bank_flag;
   268         length -= 4;
   269     }
   271     /* Handle non-aligned end of source */
   272     if( length ) {
   273         src = (unsigned char *)dwsrc;
   274         unsigned char *dest = (unsigned char *)banks[bank_flag];
   275         while( length-- > 0 ) {
   276             *dest++ = *src++;
   277         }
   278     }
   279 }
   281 /**
   282  * Write an image to 64-bit vram, with a line-stride different from the line-size.
   283  * The destaddr must be 64-bit aligned, and both line_bytes and line_stride_bytes
   284  * must be multiples of 8.
   285  */
   286 void pvr2_vram64_write_stride( sh4addr_t destaddr, unsigned char *src, uint32_t line_bytes,
   287                                uint32_t line_stride_bytes, uint32_t line_count )
   288 {
   289     int i,j;
   290     uint32_t *banks[2];
   291     uint32_t *dwsrc = (uint32_t *)src;
   292     uint32_t line_gap = (line_stride_bytes - line_bytes) >> 3;
   294     destaddr = destaddr & 0x7FFFF8;
   295     line_bytes >>= 3;
   297     for( i=destaddr; i < destaddr + line_stride_bytes*line_count; i+= LXDREAM_PAGE_SIZE ) {
   298         texcache_invalidate_page( i );
   299     }
   301     banks[0] = (uint32_t *)(pvr2_main_ram + (destaddr >>1));
   302     banks[1] = banks[0] + 0x100000;
   304     for( i=0; i<line_count; i++ ) {
   305         for( j=0; j<line_bytes; j++ ) {
   306             *banks[0]++ = *dwsrc++;
   307             *banks[1]++ = *dwsrc++;
   308         }
   309         banks[0] += line_gap;
   310         banks[1] += line_gap;
   311     }
   312 }
   314 /**
   315  * Read an image from 64-bit vram, with a destination line-stride different from the line-size.
   316  * The srcaddr must be 32-bit aligned, and both line_bytes and line_stride_bytes
   317  * must be multiples of 4. line_stride_bytes must be >= line_bytes.
   318  * This method is used to extract a "stride" texture from vram.
   319  */
   320 void pvr2_vram64_read_stride( unsigned char *dest, uint32_t dest_line_bytes, sh4addr_t srcaddr,
   321                               uint32_t src_line_bytes, uint32_t line_count )
   322 {
   323     int bank_flag = (srcaddr & 0x04) >> 2;
   324     uint32_t *banks[2];
   325     uint32_t *dwdest;
   326     uint32_t dest_line_gap = 0;
   327     uint32_t src_line_gap = 0;
   328     uint32_t line_bytes;
   329     int src_line_gap_flag;
   330     int i,j;
   332     srcaddr = srcaddr & 0x7FFFF8;
   333     if( src_line_bytes <= dest_line_bytes ) {
   334         dest_line_gap = (dest_line_bytes - src_line_bytes) >> 2;
   335         src_line_gap = 0;
   336         src_line_gap_flag = 0;
   337         line_bytes = src_line_bytes >> 2;
   338     } else {
   339         i = (src_line_bytes - dest_line_bytes);
   340         src_line_gap_flag = i & 0x04;
   341         src_line_gap = i >> 3;
   342         line_bytes = dest_line_bytes >> 2;
   343     }
   345     banks[0] = (uint32_t *)(pvr2_main_ram + (srcaddr>>1));
   346     banks[1] = banks[0] + 0x100000;
   347     if( bank_flag )
   348         banks[0]++;
   350     dwdest = (uint32_t *)dest;
   351     for( i=0; i<line_count; i++ ) {
   352         for( j=0; j<line_bytes; j++ ) {
   353             *dwdest++ = *banks[bank_flag]++;
   354             bank_flag = !bank_flag;
   355         }
   356         dwdest += dest_line_gap;
   357         banks[0] += src_line_gap;
   358         banks[1] += src_line_gap;
   359         if( src_line_gap_flag ) {
   360             banks[bank_flag]++;
   361             bank_flag = !bank_flag;
   362         }
   363     }
   364 }
   367 /**
   368  * @param dest Destination image buffer
   369  * @param banks Source data expressed as two bank pointers
   370  * @param offset Offset into banks[0] specifying where the next byte
   371  *  to read is (0..3)
   372  * @param x1,y1 Destination coordinates
   373  * @param width Width of current destination block
   374  * @param stride Total width of image (ie stride) in bytes
   375  */
   377 static void pvr2_vram64_detwiddle_4( uint8_t *dest, uint8_t *banks[2], int offset,
   378                                      int x1, int y1, int width, int stride )
   379 {
   380     if( width == 2 ) {
   381         x1 = x1 >> 1;
   382         uint8_t t1 = *banks[offset<4?0:1]++;
   383         uint8_t t2 = *banks[offset<3?0:1]++;
   384         dest[y1*stride + x1] = (t1 & 0x0F) | (t2<<4);
   385         dest[(y1+1)*stride + x1] = (t1>>4) | (t2&0xF0);
   386     } else if( width == 4 ) {
   387         pvr2_vram64_detwiddle_4( dest, banks, offset, x1, y1, 2, stride );
   388         pvr2_vram64_detwiddle_4( dest, banks, offset+2, x1, y1+2, 2, stride );
   389         pvr2_vram64_detwiddle_4( dest, banks, offset+4, x1+2, y1, 2, stride );
   390         pvr2_vram64_detwiddle_4( dest, banks, offset+6, x1+2, y1+2, 2, stride );
   392     } else {
   393         int subdivide = width >> 1;
   394         pvr2_vram64_detwiddle_4( dest, banks, offset, x1, y1, subdivide, stride );
   395         pvr2_vram64_detwiddle_4( dest, banks, offset, x1, y1+subdivide, subdivide, stride );
   396         pvr2_vram64_detwiddle_4( dest, banks, offset, x1+subdivide, y1, subdivide, stride );
   397         pvr2_vram64_detwiddle_4( dest, banks, offset, x1+subdivide, y1+subdivide, subdivide, stride );
   398     }
   399 }
   401 /**
   402  * @param dest Destination image buffer
   403  * @param banks Source data expressed as two bank pointers
   404  * @param offset Offset into banks[0] specifying where the next byte
   405  *  to read is (0..3)
   406  * @param x1,y1 Destination coordinates
   407  * @param width Width of current destination block
   408  * @param stride Total width of image (ie stride)
   409  */
   411 static void pvr2_vram64_detwiddle_8( uint8_t *dest, uint8_t *banks[2], int offset,
   412                                      int x1, int y1, int width, int stride )
   413 {
   414     if( width == 2 ) {
   415         dest[y1*stride + x1] = *banks[0]++;
   416         dest[(y1+1)*stride + x1] = *banks[offset<3?0:1]++;
   417         dest[y1*stride + x1 + 1] = *banks[offset<2?0:1]++;
   418         dest[(y1+1)*stride + x1 + 1] = *banks[offset==0?0:1]++;
   419         uint8_t *tmp = banks[0]; /* swap banks */
   420         banks[0] = banks[1];
   421         banks[1] = tmp;
   422     } else {
   423         int subdivide = width >> 1;
   424         pvr2_vram64_detwiddle_8( dest, banks, offset, x1, y1, subdivide, stride );
   425         pvr2_vram64_detwiddle_8( dest, banks, offset, x1, y1+subdivide, subdivide, stride );
   426         pvr2_vram64_detwiddle_8( dest, banks, offset, x1+subdivide, y1, subdivide, stride );
   427         pvr2_vram64_detwiddle_8( dest, banks, offset, x1+subdivide, y1+subdivide, subdivide, stride );
   428     }
   429 }
   431 /**
   432  * @param dest Destination image buffer
   433  * @param banks Source data expressed as two bank pointers
   434  * @param offset Offset into banks[0] specifying where the next word
   435  *  to read is (0 or 1)
   436  * @param x1,y1 Destination coordinates
   437  * @param width Width of current destination block
   438  * @param stride Total width of image (ie stride)
   439  */
   441 static void pvr2_vram64_detwiddle_16( uint16_t *dest, uint16_t *banks[2], int offset,
   442                                       int x1, int y1, int width, int stride )
   443 {
   444     if( width == 2 ) {
   445         dest[y1*stride + x1] = *banks[0]++;
   446         dest[(y1+1)*stride + x1] = *banks[offset]++;
   447         dest[y1*stride + x1 + 1] = *banks[1]++;
   448         dest[(y1+1)*stride + x1 + 1] = *banks[offset^1]++;
   449     } else {
   450         int subdivide = width >> 1;
   451         pvr2_vram64_detwiddle_16( dest, banks, offset, x1, y1, subdivide, stride );
   452         pvr2_vram64_detwiddle_16( dest, banks, offset, x1, y1+subdivide, subdivide, stride );
   453         pvr2_vram64_detwiddle_16( dest, banks, offset, x1+subdivide, y1, subdivide, stride );
   454         pvr2_vram64_detwiddle_16( dest, banks, offset, x1+subdivide, y1+subdivide, subdivide, stride );
   455     }
   456 }
   458 /**
   459  * Read an image from 64-bit vram stored as twiddled 4-bit pixels. The
   460  * image is written out to the destination in detwiddled form.
   461  * @param dest destination buffer, which must be at least width*height/2 in length
   462  * @param srcaddr source address in vram
   463  * @param width image width (must be a power of 2)
   464  * @param height image height (must be a power of 2)
   465  */
   466 void pvr2_vram64_read_twiddled_4( unsigned char *dest, sh4addr_t srcaddr, uint32_t width, uint32_t height )
   467 {
   468     int offset_flag = (srcaddr & 0x07);
   469     uint8_t *banks[2];
   470     uint8_t *wdest = (uint8_t*)dest;
   471     uint32_t stride = width >> 1;
   472     int i;
   474     srcaddr = srcaddr & 0x7FFFF8;
   476     banks[0] = (uint8_t *)(pvr2_main_ram + (srcaddr>>1));
   477     banks[1] = banks[0] + 0x400000;
   478     if( offset_flag & 0x04 ) { // If source is not 64-bit aligned, swap the banks
   479         uint8_t *tmp = banks[0];
   480         banks[0] = banks[1];
   481         banks[1] = tmp + 4;
   482         offset_flag &= 0x03;
   483     }
   484     banks[0] += offset_flag;
   486     if( width > height ) {
   487         for( i=0; i<width; i+=height ) {
   488             pvr2_vram64_detwiddle_4( wdest, banks, offset_flag, i, 0, height, stride );
   489         }
   490     } else if( height > width ) {
   491         for( i=0; i<height; i+=width ) {
   492             pvr2_vram64_detwiddle_4( wdest, banks, offset_flag, 0, i, width, stride );
   493         }
   494     } else if( width == 1 ) {
   495         *wdest = *banks[0];
   496     } else {
   497         pvr2_vram64_detwiddle_4( wdest, banks, offset_flag, 0, 0, width, stride );
   498     }
   499 }
   501 /**
   502  * Read an image from 64-bit vram stored as twiddled 8-bit pixels. The
   503  * image is written out to the destination in detwiddled form.
   504  * @param dest destination buffer, which must be at least width*height in length
   505  * @param srcaddr source address in vram
   506  * @param width image width (must be a power of 2)
   507  * @param height image height (must be a power of 2)
   508  */
   509 void pvr2_vram64_read_twiddled_8( unsigned char *dest, sh4addr_t srcaddr, uint32_t width, uint32_t height )
   510 {
   511     int offset_flag = (srcaddr & 0x07);
   512     uint8_t *banks[2];
   513     uint8_t *wdest = (uint8_t*)dest;
   514     int i;
   516     srcaddr = srcaddr & 0x7FFFF8;
   518     banks[0] = (uint8_t *)(pvr2_main_ram + (srcaddr>>1));
   519     banks[1] = banks[0] + 0x400000;
   520     if( offset_flag & 0x04 ) { // If source is not 64-bit aligned, swap the banks
   521         uint8_t *tmp = banks[0];
   522         banks[0] = banks[1];
   523         banks[1] = tmp + 4;
   524         offset_flag &= 0x03;
   525     }
   526     banks[0] += offset_flag;
   528     if( width > height ) {
   529         for( i=0; i<width; i+=height ) {
   530             pvr2_vram64_detwiddle_8( wdest, banks, offset_flag, i, 0, height, width );
   531         }
   532     } else if( height > width ) {
   533         for( i=0; i<height; i+=width ) {
   534             pvr2_vram64_detwiddle_8( wdest, banks, offset_flag, 0, i, width, width );
   535         }
   536     } else if( width == 1 ) {
   537         *wdest = *banks[0];
   538     } else {
   539         pvr2_vram64_detwiddle_8( wdest, banks, offset_flag, 0, 0, width, width );
   540     }
   541 }
   543 /**
   544  * Read an image from 64-bit vram stored as twiddled 16-bit pixels. The
   545  * image is written out to the destination in detwiddled form.
   546  * @param dest destination buffer, which must be at least width*height*2 in length
   547  * @param srcaddr source address in vram (must be 16-bit aligned)
   548  * @param width image width (must be a power of 2)
   549  * @param height image height (must be a power of 2)
   550  */
   551 void pvr2_vram64_read_twiddled_16( unsigned char *dest, sh4addr_t srcaddr, uint32_t width, uint32_t height ) {
   552     int offset_flag = (srcaddr & 0x06) >> 1;
   553     uint16_t *banks[2];
   554     uint16_t *wdest = (uint16_t*)dest;
   555     int i;
   557     srcaddr = srcaddr & 0x7FFFF8;
   559     banks[0] = (uint16_t *)(pvr2_main_ram + (srcaddr>>1));
   560     banks[1] = banks[0] + 0x200000;
   561     if( offset_flag & 0x02 ) { // If source is not 64-bit aligned, swap the banks
   562         uint16_t *tmp = banks[0];
   563         banks[0] = banks[1];
   564         banks[1] = tmp + 2;
   565         offset_flag &= 0x01;
   566     }
   567     banks[0] += offset_flag;
   570     if( width > height ) {
   571         for( i=0; i<width; i+=height ) {
   572             pvr2_vram64_detwiddle_16( wdest, banks, offset_flag, i, 0, height, width );
   573         }
   574     } else if( height > width ) {
   575         for( i=0; i<height; i+=width ) {
   576             pvr2_vram64_detwiddle_16( wdest, banks, offset_flag, 0, i, width, width );
   577         }
   578     } else if( width == 1 ) {
   579         *wdest = *banks[0];
   580     } else {
   581         pvr2_vram64_detwiddle_16( wdest, banks, offset_flag, 0, 0, width, width );
   582     }
   583 }
   585 static void pvr2_vram_write_invert( sh4addr_t destaddr, unsigned char *src, uint32_t src_size, 
   586                              uint32_t line_size, uint32_t dest_stride,
   587                              uint32_t src_stride )
   588 {
   589     unsigned char *dest = pvr2_main_ram + (destaddr & 0x007FFFFF);
   590     unsigned char *p = src + src_size - src_stride;
   591     while( p >= src ) {
   592         memcpy( dest, p, line_size );
   593         p -= src_stride;
   594         dest += dest_stride;
   595     }
   596 }
   598 static void pvr2_vram64_write_invert( sh4addr_t destaddr, unsigned char *src, 
   599                                       uint32_t src_size, uint32_t line_size, 
   600                                       uint32_t dest_stride, uint32_t src_stride )
   601 {
   602     int i,j;
   603     uint32_t *banks[2];
   604     uint32_t *dwsrc = (uint32_t *)(src + src_size - src_stride);
   605     int32_t src_line_gap = ((int32_t)src_stride + line_size) >> 2; 
   606     int32_t dest_line_gap = ((int32_t)dest_stride - (int32_t)line_size) >> 3;
   608     destaddr = destaddr & 0x7FFFF8;
   610     for( i=destaddr; i < destaddr + dest_stride*(src_size/src_stride); i+= LXDREAM_PAGE_SIZE ) {
   611         texcache_invalidate_page( i );
   612     }
   614     banks[0] = (uint32_t *)(pvr2_main_ram + (destaddr >>1));
   615     banks[1] = banks[0] + 0x100000;
   617     while( dwsrc >= (uint32_t *)src ) { 
   618         for( j=0; j<line_size; j+=8 ) {
   619             *banks[0]++ = *dwsrc++;
   620             *banks[1]++ = *dwsrc++;
   621         }
   622         banks[0] += dest_line_gap;
   623         banks[1] += dest_line_gap;
   624         dwsrc -= src_line_gap;
   625     }    
   626 }
   628 /**
   629  * Copy a pixel buffer to vram, flipping and scaling at the same time. This
   630  * is not massively efficient, but it's used pretty rarely.
   631  */
   632 static void pvr2_vram_write_invert_hscale( sh4addr_t destaddr, unsigned char *src, uint32_t src_size, 
   633                              uint32_t line_size, uint32_t dest_stride,
   634                              uint32_t src_stride, int bpp )
   635 {
   636     unsigned char *dest = pvr2_main_ram + (destaddr & 0x007FFFFF);
   637     unsigned char *p = src + src_size - src_stride;
   638     while( p >= src ) {
   639         unsigned char *s = p, *d = dest;
   640         int i;
   641         while( s < p+line_size ) {
   642             for( i=0; i<bpp; i++ ) {
   643                 *d++ = *s++;
   644             }
   645             s+= bpp;
   646         }
   647         p -= src_stride;
   648         dest += dest_stride;
   649     }
   650 }
   652 void pvr2_vram64_read( unsigned char *dest, sh4addr_t srcaddr, uint32_t length )
   653 {
   654     int bank_flag = (srcaddr & 0x04) >> 2;
   655     uint32_t *banks[2];
   656     uint32_t *dwdest;
   657     int i;
   659     srcaddr = srcaddr & 0x7FFFFF;
   660     if( srcaddr + length > 0x800000 )
   661         length = 0x800000 - srcaddr;
   663     banks[0] = ((uint32_t *)(pvr2_main_ram + ((srcaddr&0x007FFFF8)>>1)));
   664     banks[1] = banks[0] + 0x100000;
   665     if( bank_flag )
   666         banks[0]++;
   668     /* Handle non-aligned start of source */
   669     if( srcaddr & 0x03 ) {
   670         char *src = ((char *)banks[bank_flag]) + (srcaddr & 0x03);
   671         for( i= srcaddr & 0x03; i < 4 && length > 0; i++, length-- ) {
   672             *dest++ = *src++;
   673         }
   674         bank_flag = !bank_flag;
   675     }
   677     dwdest = (uint32_t *)dest;
   678     while( length >= 4 ) {
   679         *dwdest++ = *banks[bank_flag]++;
   680         bank_flag = !bank_flag;
   681         length -= 4;
   682     }
   684     /* Handle non-aligned end of source */
   685     if( length ) {
   686         dest = (unsigned char *)dwdest;
   687         unsigned char *src = (unsigned char *)banks[bank_flag];
   688         while( length-- > 0 ) {
   689             *dest++ = *src++;
   690         }
   691     }
   692 }
   694 void pvr2_vram64_dump_file( sh4addr_t addr, uint32_t length, gchar *filename )
   695 {
   696     uint32_t tmp[length>>2];
   697     FILE *f = fopen(filename, "wo");
   698     unsigned int i, j;
   700     if( f == NULL ) {
   701         ERROR( "Unable to write to dump file '%s' (%s)", filename, strerror(errno) );
   702         return;
   703     }
   704     pvr2_vram64_read( (unsigned char *)tmp, addr, length );
   705     fprintf( f, "%08X\n", addr );
   706     for( i =0; i<length>>2; i+=8 ) {
   707         for( j=i; j<i+8; j++ ) {
   708             if( j < length )
   709                 fprintf( f, " %08X", tmp[j] );
   710             else
   711                 fprintf( f, "         " );
   712         }
   713         fprintf( f, "\n" );
   714     }
   715     fclose(f);
   716 }
   718 void pvr2_vram64_dump( sh4addr_t addr, uint32_t length, FILE *f )
   719 {
   720     unsigned char tmp[length];
   721     pvr2_vram64_read( tmp, addr, length );
   722     fwrite_dump( tmp, length, f );
   723 }
   727 /**
   728  * Flush the indicated render buffer back to PVR. Caller is responsible for
   729  * tracking whether there is actually anything in the buffer.
   730  *
   731  * FIXME: Handle horizontal scaler 
   732  *
   733  * @param buffer A render buffer indicating the address to store to, and the
   734  * format the data needs to be in.
   735  */
   736 void pvr2_render_buffer_copy_to_sh4( render_buffer_t buffer )
   737 {
   738     int line_size = buffer->width * colour_formats[buffer->colour_format].bpp;
   739     int src_stride = line_size;
   740     unsigned char target[buffer->size];
   742     display_driver->read_render_buffer( target, buffer, line_size, buffer->colour_format );
   744     if( (buffer->scale & 0xFFFF) == 0x0800 )
   745         src_stride <<= 1;
   747     if( (buffer->address & 0xFF000000) == 0x04000000 ) {
   748         pvr2_vram64_write_invert( buffer->address, target, buffer->size, line_size, 
   749                                   buffer->rowstride, src_stride );
   750     } else {
   751         /* Regular buffer */
   752         if( buffer->scale & SCALER_HSCALE ) {
   753             pvr2_vram_write_invert_hscale( buffer->address, target, buffer->size, line_size, buffer->rowstride,
   754                                            src_stride, colour_formats[buffer->colour_format].bpp );
   755         } else {
   756             pvr2_vram_write_invert( buffer->address, target, buffer->size, line_size, buffer->rowstride,
   757                                     src_stride );
   758         }
   759     }
   760     buffer->flushed = TRUE;
   761 }
.