filename | src/pvr2/pvr2mem.c |
changeset | 857:3d8944884eaa |
prev | 856:02ac5f37bfc9 |
next | 869:b6f38c7ee7a3 |
author | nkeynes |
date | Sun Sep 28 01:09:51 2008 +0000 (15 years ago) |
permissions | -rw-r--r-- |
last change | Initial shadow volume implementation for opaque polygons (stencil isn't quite right, but we get some kind of shadows now) |
view | annotate | diff | log | raw |
1 /**
2 * $Id$
3 *
4 * PVR2 (Video) VRAM handling routines (mainly for the 64-bit region)
5 *
6 * Copyright (c) 2005 Nathan Keynes.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 */
18 #include <string.h>
19 #include <stdio.h>
20 #include <errno.h>
21 #include "pvr2.h"
22 #include "asic.h"
23 #include "dream.h"
25 extern unsigned char *video_base;
27 void pvr2_dma_write( sh4addr_t destaddr, unsigned char *src, uint32_t count )
28 {
29 int region;
31 switch( destaddr & 0x13800000 ) {
32 case 0x10000000:
33 case 0x12000000:
34 pvr2_ta_write( src, count );
35 break;
36 case 0x11000000:
37 case 0x11800000:
38 region = MMIO_READ( ASIC, PVRDMARGN1 );
39 if( region == 0 ) {
40 pvr2_vram64_write( destaddr, src, count );
41 } else {
42 destaddr &= PVR2_RAM_MASK;
43 unsigned char *dest = video_base + destaddr;
44 if( PVR2_RAM_SIZE - destaddr < count ) {
45 count = PVR2_RAM_SIZE - destaddr;
46 }
47 memcpy( dest, src, count );
48 }
49 break;
50 case 0x10800000:
51 case 0x12800000:
52 pvr2_yuv_write( src, count );
53 break;
54 case 0x13000000:
55 case 0x13800000:
56 region = MMIO_READ( ASIC, PVRDMARGN2 );
57 if( region == 0 ) {
58 pvr2_vram64_write( destaddr, src, count );
59 } else {
60 destaddr &= PVR2_RAM_MASK;
61 unsigned char *dest = video_base + destaddr;
62 if( PVR2_RAM_SIZE - destaddr < count ) {
63 count = PVR2_RAM_SIZE - destaddr;
64 }
65 memcpy( dest, src, count );
66 }
67 }
68 }
70 void pvr2_vram64_write( sh4addr_t destaddr, unsigned char *src, uint32_t length )
71 {
72 int bank_flag = (destaddr & 0x04) >> 2;
73 uint32_t *banks[2];
74 uint32_t *dwsrc;
75 int i;
77 destaddr = destaddr & 0x7FFFFF;
78 if( destaddr + length > 0x800000 ) {
79 length = 0x800000 - destaddr;
80 }
82 for( i=destaddr & 0xFFFFF000; i < destaddr + length; i+= LXDREAM_PAGE_SIZE ) {
83 texcache_invalidate_page( i );
84 }
86 banks[0] = ((uint32_t *)(video_base + ((destaddr & 0x007FFFF8) >>1)));
87 banks[1] = banks[0] + 0x100000;
88 if( bank_flag )
89 banks[0]++;
91 /* Handle non-aligned start of source */
92 if( destaddr & 0x03 ) {
93 unsigned char *dest = ((unsigned char *)banks[bank_flag]) + (destaddr & 0x03);
94 for( i= destaddr & 0x03; i < 4 && length > 0; i++, length-- ) {
95 *dest++ = *src++;
96 }
97 bank_flag = !bank_flag;
98 }
100 dwsrc = (uint32_t *)src;
101 while( length >= 4 ) {
102 *banks[bank_flag]++ = *dwsrc++;
103 bank_flag = !bank_flag;
104 length -= 4;
105 }
107 /* Handle non-aligned end of source */
108 if( length ) {
109 src = (unsigned char *)dwsrc;
110 unsigned char *dest = (unsigned char *)banks[bank_flag];
111 while( length-- > 0 ) {
112 *dest++ = *src++;
113 }
114 }
115 }
117 /**
118 * Write an image to 64-bit vram, with a line-stride different from the line-size.
119 * The destaddr must be 32-bit aligned, and both line_bytes and line_stride_bytes
120 * must be multiples of 4.
121 */
122 void pvr2_vram64_write_stride( sh4addr_t destaddr, unsigned char *src, uint32_t line_bytes,
123 uint32_t line_stride_bytes, uint32_t line_count )
124 {
125 int bank_flag = (destaddr & 0x04) >> 2;
126 uint32_t *banks[2];
127 uint32_t *dwsrc;
128 uint32_t line_gap;
129 int line_gap_flag;
130 int i,j;
132 destaddr = destaddr & 0x7FFFF8;
133 i = line_stride_bytes - line_bytes;
134 line_gap_flag = i & 0x04;
135 line_gap = i >> 3;
136 line_bytes >>= 2;
138 for( i=destaddr & 0xFFFFF000; i < destaddr + line_stride_bytes*line_count; i+= LXDREAM_PAGE_SIZE ) {
139 texcache_invalidate_page( i );
140 }
142 banks[0] = (uint32_t *)(video_base + (destaddr >>1));
143 banks[1] = banks[0] + 0x100000;
144 if( bank_flag )
145 banks[0]++;
147 dwsrc = (uint32_t *)src;
148 for( i=0; i<line_count; i++ ) {
149 for( j=0; j<line_bytes; j++ ) {
150 *banks[bank_flag]++ = *dwsrc++;
151 bank_flag = !bank_flag;
152 }
153 banks[0] += line_gap;
154 banks[1] += line_gap;
155 if( line_gap_flag ) {
156 banks[bank_flag]++;
157 bank_flag = !bank_flag;
158 }
159 }
160 }
162 /**
163 * Read an image from 64-bit vram, with a destination line-stride different from the line-size.
164 * The srcaddr must be 32-bit aligned, and both line_bytes and line_stride_bytes
165 * must be multiples of 4. line_stride_bytes must be >= line_bytes.
166 * This method is used to extract a "stride" texture from vram.
167 */
168 void pvr2_vram64_read_stride( unsigned char *dest, uint32_t dest_line_bytes, sh4addr_t srcaddr,
169 uint32_t src_line_bytes, uint32_t line_count )
170 {
171 int bank_flag = (srcaddr & 0x04) >> 2;
172 uint32_t *banks[2];
173 uint32_t *dwdest;
174 uint32_t dest_line_gap = 0;
175 uint32_t src_line_gap = 0;
176 uint32_t line_bytes;
177 int src_line_gap_flag;
178 int i,j;
180 srcaddr = srcaddr & 0x7FFFF8;
181 if( src_line_bytes <= dest_line_bytes ) {
182 dest_line_gap = (dest_line_bytes - src_line_bytes) >> 2;
183 src_line_gap = 0;
184 src_line_gap_flag = 0;
185 line_bytes = src_line_bytes >> 2;
186 } else {
187 i = (src_line_bytes - dest_line_bytes);
188 src_line_gap_flag = i & 0x04;
189 src_line_gap = i >> 3;
190 line_bytes = dest_line_bytes >> 2;
191 }
193 banks[0] = (uint32_t *)(video_base + (srcaddr>>1));
194 banks[1] = banks[0] + 0x100000;
195 if( bank_flag )
196 banks[0]++;
198 dwdest = (uint32_t *)dest;
199 for( i=0; i<line_count; i++ ) {
200 for( j=0; j<line_bytes; j++ ) {
201 *dwdest++ = *banks[bank_flag]++;
202 bank_flag = !bank_flag;
203 }
204 dwdest += dest_line_gap;
205 banks[0] += src_line_gap;
206 banks[1] += src_line_gap;
207 if( src_line_gap_flag ) {
208 banks[bank_flag]++;
209 bank_flag = !bank_flag;
210 }
211 }
212 }
215 /**
216 * @param dest Destination image buffer
217 * @param banks Source data expressed as two bank pointers
218 * @param offset Offset into banks[0] specifying where the next byte
219 * to read is (0..3)
220 * @param x1,y1 Destination coordinates
221 * @param width Width of current destination block
222 * @param stride Total width of image (ie stride) in bytes
223 */
225 static void pvr2_vram64_detwiddle_4( uint8_t *dest, uint8_t *banks[2], int offset,
226 int x1, int y1, int width, int stride )
227 {
228 if( width == 2 ) {
229 x1 = x1 >> 1;
230 uint8_t t1 = *banks[offset<4?0:1]++;
231 uint8_t t2 = *banks[offset<3?0:1]++;
232 dest[y1*stride + x1] = (t1 & 0x0F) | (t2<<4);
233 dest[(y1+1)*stride + x1] = (t1>>4) | (t2&0xF0);
234 } else if( width == 4 ) {
235 pvr2_vram64_detwiddle_4( dest, banks, offset, x1, y1, 2, stride );
236 pvr2_vram64_detwiddle_4( dest, banks, offset+2, x1, y1+2, 2, stride );
237 pvr2_vram64_detwiddle_4( dest, banks, offset+4, x1+2, y1, 2, stride );
238 pvr2_vram64_detwiddle_4( dest, banks, offset+6, x1+2, y1+2, 2, stride );
240 } else {
241 int subdivide = width >> 1;
242 pvr2_vram64_detwiddle_4( dest, banks, offset, x1, y1, subdivide, stride );
243 pvr2_vram64_detwiddle_4( dest, banks, offset, x1, y1+subdivide, subdivide, stride );
244 pvr2_vram64_detwiddle_4( dest, banks, offset, x1+subdivide, y1, subdivide, stride );
245 pvr2_vram64_detwiddle_4( dest, banks, offset, x1+subdivide, y1+subdivide, subdivide, stride );
246 }
247 }
249 /**
250 * @param dest Destination image buffer
251 * @param banks Source data expressed as two bank pointers
252 * @param offset Offset into banks[0] specifying where the next byte
253 * to read is (0..3)
254 * @param x1,y1 Destination coordinates
255 * @param width Width of current destination block
256 * @param stride Total width of image (ie stride)
257 */
259 static void pvr2_vram64_detwiddle_8( uint8_t *dest, uint8_t *banks[2], int offset,
260 int x1, int y1, int width, int stride )
261 {
262 if( width == 2 ) {
263 dest[y1*stride + x1] = *banks[0]++;
264 dest[(y1+1)*stride + x1] = *banks[offset<3?0:1]++;
265 dest[y1*stride + x1 + 1] = *banks[offset<2?0:1]++;
266 dest[(y1+1)*stride + x1 + 1] = *banks[offset==0?0:1]++;
267 uint8_t *tmp = banks[0]; /* swap banks */
268 banks[0] = banks[1];
269 banks[1] = tmp;
270 } else {
271 int subdivide = width >> 1;
272 pvr2_vram64_detwiddle_8( dest, banks, offset, x1, y1, subdivide, stride );
273 pvr2_vram64_detwiddle_8( dest, banks, offset, x1, y1+subdivide, subdivide, stride );
274 pvr2_vram64_detwiddle_8( dest, banks, offset, x1+subdivide, y1, subdivide, stride );
275 pvr2_vram64_detwiddle_8( dest, banks, offset, x1+subdivide, y1+subdivide, subdivide, stride );
276 }
277 }
279 /**
280 * @param dest Destination image buffer
281 * @param banks Source data expressed as two bank pointers
282 * @param offset Offset into banks[0] specifying where the next word
283 * to read is (0 or 1)
284 * @param x1,y1 Destination coordinates
285 * @param width Width of current destination block
286 * @param stride Total width of image (ie stride)
287 */
289 static void pvr2_vram64_detwiddle_16( uint16_t *dest, uint16_t *banks[2], int offset,
290 int x1, int y1, int width, int stride )
291 {
292 if( width == 2 ) {
293 dest[y1*stride + x1] = *banks[0]++;
294 dest[(y1+1)*stride + x1] = *banks[offset]++;
295 dest[y1*stride + x1 + 1] = *banks[1]++;
296 dest[(y1+1)*stride + x1 + 1] = *banks[offset^1]++;
297 } else {
298 int subdivide = width >> 1;
299 pvr2_vram64_detwiddle_16( dest, banks, offset, x1, y1, subdivide, stride );
300 pvr2_vram64_detwiddle_16( dest, banks, offset, x1, y1+subdivide, subdivide, stride );
301 pvr2_vram64_detwiddle_16( dest, banks, offset, x1+subdivide, y1, subdivide, stride );
302 pvr2_vram64_detwiddle_16( dest, banks, offset, x1+subdivide, y1+subdivide, subdivide, stride );
303 }
304 }
306 /**
307 * Read an image from 64-bit vram stored as twiddled 4-bit pixels. The
308 * image is written out to the destination in detwiddled form.
309 * @param dest destination buffer, which must be at least width*height/2 in length
310 * @param srcaddr source address in vram
311 * @param width image width (must be a power of 2)
312 * @param height image height (must be a power of 2)
313 */
314 void pvr2_vram64_read_twiddled_4( unsigned char *dest, sh4addr_t srcaddr, uint32_t width, uint32_t height )
315 {
316 int offset_flag = (srcaddr & 0x07);
317 uint8_t *banks[2];
318 uint8_t *wdest = (uint8_t*)dest;
319 uint32_t stride = width >> 1;
320 int i;
322 srcaddr = srcaddr & 0x7FFFF8;
324 banks[0] = (uint8_t *)(video_base + (srcaddr>>1));
325 banks[1] = banks[0] + 0x400000;
326 if( offset_flag & 0x04 ) { // If source is not 64-bit aligned, swap the banks
327 uint8_t *tmp = banks[0];
328 banks[0] = banks[1];
329 banks[1] = tmp + 4;
330 offset_flag &= 0x03;
331 }
332 banks[0] += offset_flag;
334 if( width > height ) {
335 for( i=0; i<width; i+=height ) {
336 pvr2_vram64_detwiddle_4( wdest, banks, offset_flag, i, 0, height, stride );
337 }
338 } else if( height > width ) {
339 for( i=0; i<height; i+=width ) {
340 pvr2_vram64_detwiddle_4( wdest, banks, offset_flag, 0, i, width, stride );
341 }
342 } else if( width == 1 ) {
343 *wdest = *banks[0];
344 } else {
345 pvr2_vram64_detwiddle_4( wdest, banks, offset_flag, 0, 0, width, stride );
346 }
347 }
349 /**
350 * Read an image from 64-bit vram stored as twiddled 8-bit pixels. The
351 * image is written out to the destination in detwiddled form.
352 * @param dest destination buffer, which must be at least width*height in length
353 * @param srcaddr source address in vram
354 * @param width image width (must be a power of 2)
355 * @param height image height (must be a power of 2)
356 */
357 void pvr2_vram64_read_twiddled_8( unsigned char *dest, sh4addr_t srcaddr, uint32_t width, uint32_t height )
358 {
359 int offset_flag = (srcaddr & 0x07);
360 uint8_t *banks[2];
361 uint8_t *wdest = (uint8_t*)dest;
362 int i;
364 srcaddr = srcaddr & 0x7FFFF8;
366 banks[0] = (uint8_t *)(video_base + (srcaddr>>1));
367 banks[1] = banks[0] + 0x400000;
368 if( offset_flag & 0x04 ) { // If source is not 64-bit aligned, swap the banks
369 uint8_t *tmp = banks[0];
370 banks[0] = banks[1];
371 banks[1] = tmp + 4;
372 offset_flag &= 0x03;
373 }
374 banks[0] += offset_flag;
376 if( width > height ) {
377 for( i=0; i<width; i+=height ) {
378 pvr2_vram64_detwiddle_8( wdest, banks, offset_flag, i, 0, height, width );
379 }
380 } else if( height > width ) {
381 for( i=0; i<height; i+=width ) {
382 pvr2_vram64_detwiddle_8( wdest, banks, offset_flag, 0, i, width, width );
383 }
384 } else if( width == 1 ) {
385 *wdest = *banks[0];
386 } else {
387 pvr2_vram64_detwiddle_8( wdest, banks, offset_flag, 0, 0, width, width );
388 }
389 }
391 /**
392 * Read an image from 64-bit vram stored as twiddled 16-bit pixels. The
393 * image is written out to the destination in detwiddled form.
394 * @param dest destination buffer, which must be at least width*height*2 in length
395 * @param srcaddr source address in vram (must be 16-bit aligned)
396 * @param width image width (must be a power of 2)
397 * @param height image height (must be a power of 2)
398 */
399 void pvr2_vram64_read_twiddled_16( unsigned char *dest, sh4addr_t srcaddr, uint32_t width, uint32_t height ) {
400 int offset_flag = (srcaddr & 0x06) >> 1;
401 uint16_t *banks[2];
402 uint16_t *wdest = (uint16_t*)dest;
403 int i;
405 srcaddr = srcaddr & 0x7FFFF8;
407 banks[0] = (uint16_t *)(video_base + (srcaddr>>1));
408 banks[1] = banks[0] + 0x200000;
409 if( offset_flag & 0x02 ) { // If source is not 64-bit aligned, swap the banks
410 uint16_t *tmp = banks[0];
411 banks[0] = banks[1];
412 banks[1] = tmp + 2;
413 offset_flag &= 0x01;
414 }
415 banks[0] += offset_flag;
418 if( width > height ) {
419 for( i=0; i<width; i+=height ) {
420 pvr2_vram64_detwiddle_16( wdest, banks, offset_flag, i, 0, height, width );
421 }
422 } else if( height > width ) {
423 for( i=0; i<height; i+=width ) {
424 pvr2_vram64_detwiddle_16( wdest, banks, offset_flag, 0, i, width, width );
425 }
426 } else if( width == 1 ) {
427 *wdest = *banks[0];
428 } else {
429 pvr2_vram64_detwiddle_16( wdest, banks, offset_flag, 0, 0, width, width );
430 }
431 }
433 static void pvr2_vram_write_invert( sh4addr_t destaddr, unsigned char *src, uint32_t src_size,
434 uint32_t line_size, uint32_t dest_stride,
435 uint32_t src_stride )
436 {
437 unsigned char *dest = video_base + (destaddr & 0x007FFFFF);
438 unsigned char *p = src + src_size - src_stride;
439 while( p >= src ) {
440 memcpy( dest, p, line_size );
441 p -= src_stride;
442 dest += dest_stride;
443 }
444 }
446 /**
447 * Copy a pixel buffer to vram, flipping and scaling at the same time. This
448 * is not massively efficient, but it's used pretty rarely.
449 */
450 static void pvr2_vram_write_invert_hscale( sh4addr_t destaddr, unsigned char *src, uint32_t src_size,
451 uint32_t line_size, uint32_t dest_stride,
452 uint32_t src_stride, int bpp )
453 {
454 unsigned char *dest = video_base + (destaddr & 0x007FFFFF);
455 unsigned char *p = src + src_size - src_stride;
456 while( p >= src ) {
457 unsigned char *s = p, *d = dest;
458 int i;
459 while( s < p+line_size ) {
460 for( i=0; i<bpp; i++ ) {
461 *d++ = *s++;
462 }
463 s+= bpp;
464 }
465 p -= src_stride;
466 dest += dest_stride;
467 }
468 }
470 void pvr2_vram64_read( unsigned char *dest, sh4addr_t srcaddr, uint32_t length )
471 {
472 int bank_flag = (srcaddr & 0x04) >> 2;
473 uint32_t *banks[2];
474 uint32_t *dwdest;
475 int i;
477 srcaddr = srcaddr & 0x7FFFFF;
478 if( srcaddr + length > 0x800000 )
479 length = 0x800000 - srcaddr;
481 banks[0] = ((uint32_t *)(video_base + ((srcaddr&0x007FFFF8)>>1)));
482 banks[1] = banks[0] + 0x100000;
483 if( bank_flag )
484 banks[0]++;
486 /* Handle non-aligned start of source */
487 if( srcaddr & 0x03 ) {
488 char *src = ((char *)banks[bank_flag]) + (srcaddr & 0x03);
489 for( i= srcaddr & 0x03; i < 4 && length > 0; i++, length-- ) {
490 *dest++ = *src++;
491 }
492 bank_flag = !bank_flag;
493 }
495 dwdest = (uint32_t *)dest;
496 while( length >= 4 ) {
497 *dwdest++ = *banks[bank_flag]++;
498 bank_flag = !bank_flag;
499 length -= 4;
500 }
502 /* Handle non-aligned end of source */
503 if( length ) {
504 dest = (unsigned char *)dwdest;
505 unsigned char *src = (unsigned char *)banks[bank_flag];
506 while( length-- > 0 ) {
507 *dest++ = *src++;
508 }
509 }
510 }
512 void pvr2_vram64_dump_file( sh4addr_t addr, uint32_t length, gchar *filename )
513 {
514 uint32_t tmp[length>>2];
515 FILE *f = fopen(filename, "wo");
516 unsigned int i, j;
518 if( f == NULL ) {
519 ERROR( "Unable to write to dump file '%s' (%s)", filename, strerror(errno) );
520 return;
521 }
522 pvr2_vram64_read( (unsigned char *)tmp, addr, length );
523 fprintf( f, "%08X\n", addr );
524 for( i =0; i<length>>2; i+=8 ) {
525 for( j=i; j<i+8; j++ ) {
526 if( j < length )
527 fprintf( f, " %08X", tmp[j] );
528 else
529 fprintf( f, " " );
530 }
531 fprintf( f, "\n" );
532 }
533 fclose(f);
534 }
536 void pvr2_vram64_dump( sh4addr_t addr, uint32_t length, FILE *f )
537 {
538 unsigned char tmp[length];
539 pvr2_vram64_read( tmp, addr, length );
540 fwrite_dump( tmp, length, f );
541 }
545 /**
546 * Flush the indicated render buffer back to PVR. Caller is responsible for
547 * tracking whether there is actually anything in the buffer.
548 *
549 * FIXME: Handle horizontal scaler
550 *
551 * @param buffer A render buffer indicating the address to store to, and the
552 * format the data needs to be in.
553 */
554 void pvr2_render_buffer_copy_to_sh4( render_buffer_t buffer )
555 {
556 if( (buffer->address & 0xFF000000) == 0x04000000 ) {
557 /* Interlaced buffer. Go the double copy... :( */
558 unsigned char target[buffer->size];
559 display_driver->read_render_buffer( target, buffer, buffer->rowstride, buffer->colour_format );
560 pvr2_vram64_write( buffer->address, target, buffer->size );
561 } else {
562 /* Regular buffer */
563 int line_size = buffer->width * colour_formats[buffer->colour_format].bpp;
564 int src_stride = line_size;
565 if( (buffer->scale & 0xFFFF) == 0x0800 )
566 src_stride <<= 1;
568 if( buffer->scale & SCALER_HSCALE ) {
569 unsigned char target[buffer->size];
570 display_driver->read_render_buffer( target, buffer, line_size, buffer->colour_format );
571 pvr2_vram_write_invert_hscale( buffer->address, target, buffer->size, line_size, buffer->rowstride,
572 src_stride, colour_formats[buffer->colour_format].bpp );
573 } else {
574 unsigned char target[buffer->size];
575 display_driver->read_render_buffer( target, buffer, line_size, buffer->colour_format );
576 pvr2_vram_write_invert( buffer->address, target, buffer->size, line_size, buffer->rowstride,
577 src_stride );
578 }
579 }
580 buffer->flushed = TRUE;
581 }
.