4 * PVR2 (Video) VRAM handling routines (mainly for the 64-bit region)
6 * Copyright (c) 2005 Nathan Keynes.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
25 extern unsigned char *video_base;
27 void pvr2_dma_write( sh4addr_t destaddr, unsigned char *src, uint32_t count )
31 switch( destaddr & 0x13800000 ) {
34 pvr2_ta_write( src, count );
38 region = MMIO_READ( ASIC, PVRDMARGN1 );
40 pvr2_vram64_write( destaddr, src, count );
42 destaddr &= PVR2_RAM_MASK;
43 unsigned char *dest = video_base + destaddr;
44 if( PVR2_RAM_SIZE - destaddr < count ) {
45 count = PVR2_RAM_SIZE - destaddr;
47 memcpy( dest, src, count );
52 pvr2_yuv_write( src, count );
56 region = MMIO_READ( ASIC, PVRDMARGN2 );
58 pvr2_vram64_write( destaddr, src, count );
60 destaddr &= PVR2_RAM_MASK;
61 unsigned char *dest = video_base + destaddr;
62 if( PVR2_RAM_SIZE - destaddr < count ) {
63 count = PVR2_RAM_SIZE - destaddr;
65 memcpy( dest, src, count );
70 void pvr2_vram64_write( sh4addr_t destaddr, unsigned char *src, uint32_t length )
72 int bank_flag = (destaddr & 0x04) >> 2;
77 destaddr = destaddr & 0x7FFFFF;
78 if( destaddr + length > 0x800000 ) {
79 length = 0x800000 - destaddr;
82 for( i=destaddr & 0xFFFFF000; i < destaddr + length; i+= LXDREAM_PAGE_SIZE ) {
83 texcache_invalidate_page( i );
86 banks[0] = ((uint32_t *)(video_base + ((destaddr & 0x007FFFF8) >>1)));
87 banks[1] = banks[0] + 0x100000;
91 /* Handle non-aligned start of source */
92 if( destaddr & 0x03 ) {
93 unsigned char *dest = ((unsigned char *)banks[bank_flag]) + (destaddr & 0x03);
94 for( i= destaddr & 0x03; i < 4 && length > 0; i++, length-- ) {
97 bank_flag = !bank_flag;
100 dwsrc = (uint32_t *)src;
101 while( length >= 4 ) {
102 *banks[bank_flag]++ = *dwsrc++;
103 bank_flag = !bank_flag;
107 /* Handle non-aligned end of source */
109 src = (unsigned char *)dwsrc;
110 unsigned char *dest = (unsigned char *)banks[bank_flag];
111 while( length-- > 0 ) {
118 * Write an image to 64-bit vram, with a line-stride different from the line-size.
119 * The destaddr must be 64-bit aligned, and both line_bytes and line_stride_bytes
120 * must be multiples of 8.
122 void pvr2_vram64_write_stride( sh4addr_t destaddr, unsigned char *src, uint32_t line_bytes,
123 uint32_t line_stride_bytes, uint32_t line_count )
127 uint32_t *dwsrc = (uint32_t *)src;
128 uint32_t line_gap = (line_stride_bytes - line_bytes) >> 3;
130 destaddr = destaddr & 0x7FFFF8;
133 for( i=destaddr; i < destaddr + line_stride_bytes*line_count; i+= LXDREAM_PAGE_SIZE ) {
134 texcache_invalidate_page( i );
137 banks[0] = (uint32_t *)(video_base + (destaddr >>1));
138 banks[1] = banks[0] + 0x100000;
140 for( i=0; i<line_count; i++ ) {
141 for( j=0; j<line_bytes; j++ ) {
142 *banks[0]++ = *dwsrc++;
143 *banks[1]++ = *dwsrc++;
145 banks[0] += line_gap;
146 banks[1] += line_gap;
151 * Read an image from 64-bit vram, with a destination line-stride different from the line-size.
152 * The srcaddr must be 32-bit aligned, and both line_bytes and line_stride_bytes
153 * must be multiples of 4. line_stride_bytes must be >= line_bytes.
154 * This method is used to extract a "stride" texture from vram.
156 void pvr2_vram64_read_stride( unsigned char *dest, uint32_t dest_line_bytes, sh4addr_t srcaddr,
157 uint32_t src_line_bytes, uint32_t line_count )
159 int bank_flag = (srcaddr & 0x04) >> 2;
162 uint32_t dest_line_gap = 0;
163 uint32_t src_line_gap = 0;
165 int src_line_gap_flag;
168 srcaddr = srcaddr & 0x7FFFF8;
169 if( src_line_bytes <= dest_line_bytes ) {
170 dest_line_gap = (dest_line_bytes - src_line_bytes) >> 2;
172 src_line_gap_flag = 0;
173 line_bytes = src_line_bytes >> 2;
175 i = (src_line_bytes - dest_line_bytes);
176 src_line_gap_flag = i & 0x04;
177 src_line_gap = i >> 3;
178 line_bytes = dest_line_bytes >> 2;
181 banks[0] = (uint32_t *)(video_base + (srcaddr>>1));
182 banks[1] = banks[0] + 0x100000;
186 dwdest = (uint32_t *)dest;
187 for( i=0; i<line_count; i++ ) {
188 for( j=0; j<line_bytes; j++ ) {
189 *dwdest++ = *banks[bank_flag]++;
190 bank_flag = !bank_flag;
192 dwdest += dest_line_gap;
193 banks[0] += src_line_gap;
194 banks[1] += src_line_gap;
195 if( src_line_gap_flag ) {
197 bank_flag = !bank_flag;
204 * @param dest Destination image buffer
205 * @param banks Source data expressed as two bank pointers
206 * @param offset Offset into banks[0] specifying where the next byte
208 * @param x1,y1 Destination coordinates
209 * @param width Width of current destination block
210 * @param stride Total width of image (ie stride) in bytes
213 static void pvr2_vram64_detwiddle_4( uint8_t *dest, uint8_t *banks[2], int offset,
214 int x1, int y1, int width, int stride )
218 uint8_t t1 = *banks[offset<4?0:1]++;
219 uint8_t t2 = *banks[offset<3?0:1]++;
220 dest[y1*stride + x1] = (t1 & 0x0F) | (t2<<4);
221 dest[(y1+1)*stride + x1] = (t1>>4) | (t2&0xF0);
222 } else if( width == 4 ) {
223 pvr2_vram64_detwiddle_4( dest, banks, offset, x1, y1, 2, stride );
224 pvr2_vram64_detwiddle_4( dest, banks, offset+2, x1, y1+2, 2, stride );
225 pvr2_vram64_detwiddle_4( dest, banks, offset+4, x1+2, y1, 2, stride );
226 pvr2_vram64_detwiddle_4( dest, banks, offset+6, x1+2, y1+2, 2, stride );
229 int subdivide = width >> 1;
230 pvr2_vram64_detwiddle_4( dest, banks, offset, x1, y1, subdivide, stride );
231 pvr2_vram64_detwiddle_4( dest, banks, offset, x1, y1+subdivide, subdivide, stride );
232 pvr2_vram64_detwiddle_4( dest, banks, offset, x1+subdivide, y1, subdivide, stride );
233 pvr2_vram64_detwiddle_4( dest, banks, offset, x1+subdivide, y1+subdivide, subdivide, stride );
238 * @param dest Destination image buffer
239 * @param banks Source data expressed as two bank pointers
240 * @param offset Offset into banks[0] specifying where the next byte
242 * @param x1,y1 Destination coordinates
243 * @param width Width of current destination block
244 * @param stride Total width of image (ie stride)
247 static void pvr2_vram64_detwiddle_8( uint8_t *dest, uint8_t *banks[2], int offset,
248 int x1, int y1, int width, int stride )
251 dest[y1*stride + x1] = *banks[0]++;
252 dest[(y1+1)*stride + x1] = *banks[offset<3?0:1]++;
253 dest[y1*stride + x1 + 1] = *banks[offset<2?0:1]++;
254 dest[(y1+1)*stride + x1 + 1] = *banks[offset==0?0:1]++;
255 uint8_t *tmp = banks[0]; /* swap banks */
259 int subdivide = width >> 1;
260 pvr2_vram64_detwiddle_8( dest, banks, offset, x1, y1, subdivide, stride );
261 pvr2_vram64_detwiddle_8( dest, banks, offset, x1, y1+subdivide, subdivide, stride );
262 pvr2_vram64_detwiddle_8( dest, banks, offset, x1+subdivide, y1, subdivide, stride );
263 pvr2_vram64_detwiddle_8( dest, banks, offset, x1+subdivide, y1+subdivide, subdivide, stride );
268 * @param dest Destination image buffer
269 * @param banks Source data expressed as two bank pointers
270 * @param offset Offset into banks[0] specifying where the next word
271 * to read is (0 or 1)
272 * @param x1,y1 Destination coordinates
273 * @param width Width of current destination block
274 * @param stride Total width of image (ie stride)
277 static void pvr2_vram64_detwiddle_16( uint16_t *dest, uint16_t *banks[2], int offset,
278 int x1, int y1, int width, int stride )
281 dest[y1*stride + x1] = *banks[0]++;
282 dest[(y1+1)*stride + x1] = *banks[offset]++;
283 dest[y1*stride + x1 + 1] = *banks[1]++;
284 dest[(y1+1)*stride + x1 + 1] = *banks[offset^1]++;
286 int subdivide = width >> 1;
287 pvr2_vram64_detwiddle_16( dest, banks, offset, x1, y1, subdivide, stride );
288 pvr2_vram64_detwiddle_16( dest, banks, offset, x1, y1+subdivide, subdivide, stride );
289 pvr2_vram64_detwiddle_16( dest, banks, offset, x1+subdivide, y1, subdivide, stride );
290 pvr2_vram64_detwiddle_16( dest, banks, offset, x1+subdivide, y1+subdivide, subdivide, stride );
295 * Read an image from 64-bit vram stored as twiddled 4-bit pixels. The
296 * image is written out to the destination in detwiddled form.
297 * @param dest destination buffer, which must be at least width*height/2 in length
298 * @param srcaddr source address in vram
299 * @param width image width (must be a power of 2)
300 * @param height image height (must be a power of 2)
302 void pvr2_vram64_read_twiddled_4( unsigned char *dest, sh4addr_t srcaddr, uint32_t width, uint32_t height )
304 int offset_flag = (srcaddr & 0x07);
306 uint8_t *wdest = (uint8_t*)dest;
307 uint32_t stride = width >> 1;
310 srcaddr = srcaddr & 0x7FFFF8;
312 banks[0] = (uint8_t *)(video_base + (srcaddr>>1));
313 banks[1] = banks[0] + 0x400000;
314 if( offset_flag & 0x04 ) { // If source is not 64-bit aligned, swap the banks
315 uint8_t *tmp = banks[0];
320 banks[0] += offset_flag;
322 if( width > height ) {
323 for( i=0; i<width; i+=height ) {
324 pvr2_vram64_detwiddle_4( wdest, banks, offset_flag, i, 0, height, stride );
326 } else if( height > width ) {
327 for( i=0; i<height; i+=width ) {
328 pvr2_vram64_detwiddle_4( wdest, banks, offset_flag, 0, i, width, stride );
330 } else if( width == 1 ) {
333 pvr2_vram64_detwiddle_4( wdest, banks, offset_flag, 0, 0, width, stride );
338 * Read an image from 64-bit vram stored as twiddled 8-bit pixels. The
339 * image is written out to the destination in detwiddled form.
340 * @param dest destination buffer, which must be at least width*height in length
341 * @param srcaddr source address in vram
342 * @param width image width (must be a power of 2)
343 * @param height image height (must be a power of 2)
345 void pvr2_vram64_read_twiddled_8( unsigned char *dest, sh4addr_t srcaddr, uint32_t width, uint32_t height )
347 int offset_flag = (srcaddr & 0x07);
349 uint8_t *wdest = (uint8_t*)dest;
352 srcaddr = srcaddr & 0x7FFFF8;
354 banks[0] = (uint8_t *)(video_base + (srcaddr>>1));
355 banks[1] = banks[0] + 0x400000;
356 if( offset_flag & 0x04 ) { // If source is not 64-bit aligned, swap the banks
357 uint8_t *tmp = banks[0];
362 banks[0] += offset_flag;
364 if( width > height ) {
365 for( i=0; i<width; i+=height ) {
366 pvr2_vram64_detwiddle_8( wdest, banks, offset_flag, i, 0, height, width );
368 } else if( height > width ) {
369 for( i=0; i<height; i+=width ) {
370 pvr2_vram64_detwiddle_8( wdest, banks, offset_flag, 0, i, width, width );
372 } else if( width == 1 ) {
375 pvr2_vram64_detwiddle_8( wdest, banks, offset_flag, 0, 0, width, width );
380 * Read an image from 64-bit vram stored as twiddled 16-bit pixels. The
381 * image is written out to the destination in detwiddled form.
382 * @param dest destination buffer, which must be at least width*height*2 in length
383 * @param srcaddr source address in vram (must be 16-bit aligned)
384 * @param width image width (must be a power of 2)
385 * @param height image height (must be a power of 2)
387 void pvr2_vram64_read_twiddled_16( unsigned char *dest, sh4addr_t srcaddr, uint32_t width, uint32_t height ) {
388 int offset_flag = (srcaddr & 0x06) >> 1;
390 uint16_t *wdest = (uint16_t*)dest;
393 srcaddr = srcaddr & 0x7FFFF8;
395 banks[0] = (uint16_t *)(video_base + (srcaddr>>1));
396 banks[1] = banks[0] + 0x200000;
397 if( offset_flag & 0x02 ) { // If source is not 64-bit aligned, swap the banks
398 uint16_t *tmp = banks[0];
403 banks[0] += offset_flag;
406 if( width > height ) {
407 for( i=0; i<width; i+=height ) {
408 pvr2_vram64_detwiddle_16( wdest, banks, offset_flag, i, 0, height, width );
410 } else if( height > width ) {
411 for( i=0; i<height; i+=width ) {
412 pvr2_vram64_detwiddle_16( wdest, banks, offset_flag, 0, i, width, width );
414 } else if( width == 1 ) {
417 pvr2_vram64_detwiddle_16( wdest, banks, offset_flag, 0, 0, width, width );
421 static void pvr2_vram_write_invert( sh4addr_t destaddr, unsigned char *src, uint32_t src_size,
422 uint32_t line_size, uint32_t dest_stride,
423 uint32_t src_stride )
425 unsigned char *dest = video_base + (destaddr & 0x007FFFFF);
426 unsigned char *p = src + src_size - src_stride;
428 memcpy( dest, p, line_size );
434 static void pvr2_vram64_write_invert( sh4addr_t destaddr, unsigned char *src,
435 uint32_t src_size, uint32_t line_size,
436 uint32_t dest_stride, uint32_t src_stride )
440 uint32_t *dwsrc = (uint32_t *)(src + src_size - src_stride);
441 int32_t src_line_gap = ((int32_t)src_stride + line_size) >> 2;
442 int32_t dest_line_gap = ((int32_t)dest_stride - (int32_t)line_size) >> 3;
444 destaddr = destaddr & 0x7FFFF8;
446 for( i=destaddr; i < destaddr + dest_stride*(src_size/src_stride); i+= LXDREAM_PAGE_SIZE ) {
447 texcache_invalidate_page( i );
450 banks[0] = (uint32_t *)(video_base + (destaddr >>1));
451 banks[1] = banks[0] + 0x100000;
453 while( dwsrc >= (uint32_t *)src ) {
454 for( j=0; j<line_size; j+=8 ) {
455 *banks[0]++ = *dwsrc++;
456 *banks[1]++ = *dwsrc++;
458 banks[0] += dest_line_gap;
459 banks[1] += dest_line_gap;
460 dwsrc -= src_line_gap;
465 * Copy a pixel buffer to vram, flipping and scaling at the same time. This
466 * is not massively efficient, but it's used pretty rarely.
468 static void pvr2_vram_write_invert_hscale( sh4addr_t destaddr, unsigned char *src, uint32_t src_size,
469 uint32_t line_size, uint32_t dest_stride,
470 uint32_t src_stride, int bpp )
472 unsigned char *dest = video_base + (destaddr & 0x007FFFFF);
473 unsigned char *p = src + src_size - src_stride;
475 unsigned char *s = p, *d = dest;
477 while( s < p+line_size ) {
478 for( i=0; i<bpp; i++ ) {
488 void pvr2_vram64_read( unsigned char *dest, sh4addr_t srcaddr, uint32_t length )
490 int bank_flag = (srcaddr & 0x04) >> 2;
495 srcaddr = srcaddr & 0x7FFFFF;
496 if( srcaddr + length > 0x800000 )
497 length = 0x800000 - srcaddr;
499 banks[0] = ((uint32_t *)(video_base + ((srcaddr&0x007FFFF8)>>1)));
500 banks[1] = banks[0] + 0x100000;
504 /* Handle non-aligned start of source */
505 if( srcaddr & 0x03 ) {
506 char *src = ((char *)banks[bank_flag]) + (srcaddr & 0x03);
507 for( i= srcaddr & 0x03; i < 4 && length > 0; i++, length-- ) {
510 bank_flag = !bank_flag;
513 dwdest = (uint32_t *)dest;
514 while( length >= 4 ) {
515 *dwdest++ = *banks[bank_flag]++;
516 bank_flag = !bank_flag;
520 /* Handle non-aligned end of source */
522 dest = (unsigned char *)dwdest;
523 unsigned char *src = (unsigned char *)banks[bank_flag];
524 while( length-- > 0 ) {
530 void pvr2_vram64_dump_file( sh4addr_t addr, uint32_t length, gchar *filename )
532 uint32_t tmp[length>>2];
533 FILE *f = fopen(filename, "wo");
537 ERROR( "Unable to write to dump file '%s' (%s)", filename, strerror(errno) );
540 pvr2_vram64_read( (unsigned char *)tmp, addr, length );
541 fprintf( f, "%08X\n", addr );
542 for( i =0; i<length>>2; i+=8 ) {
543 for( j=i; j<i+8; j++ ) {
545 fprintf( f, " %08X", tmp[j] );
554 void pvr2_vram64_dump( sh4addr_t addr, uint32_t length, FILE *f )
556 unsigned char tmp[length];
557 pvr2_vram64_read( tmp, addr, length );
558 fwrite_dump( tmp, length, f );
564 * Flush the indicated render buffer back to PVR. Caller is responsible for
565 * tracking whether there is actually anything in the buffer.
567 * FIXME: Handle horizontal scaler
569 * @param buffer A render buffer indicating the address to store to, and the
570 * format the data needs to be in.
572 void pvr2_render_buffer_copy_to_sh4( render_buffer_t buffer )
574 int line_size = buffer->width * colour_formats[buffer->colour_format].bpp;
575 int src_stride = line_size;
576 unsigned char target[buffer->size];
578 display_driver->read_render_buffer( target, buffer, line_size, buffer->colour_format );
580 if( (buffer->scale & 0xFFFF) == 0x0800 )
583 if( (buffer->address & 0xFF000000) == 0x04000000 ) {
584 pvr2_vram64_write_invert( buffer->address, target, buffer->size, line_size,
585 buffer->rowstride, src_stride );
588 if( buffer->scale & SCALER_HSCALE ) {
589 pvr2_vram_write_invert_hscale( buffer->address, target, buffer->size, line_size, buffer->rowstride,
590 src_stride, colour_formats[buffer->colour_format].bpp );
592 pvr2_vram_write_invert( buffer->address, target, buffer->size, line_size, buffer->rowstride,
596 buffer->flushed = TRUE;
.