2 * $Id: rendcore.c,v 1.14 2007-01-26 01:37:39 nkeynes Exp $
6 * Copyright (c) 2005 Nathan Keynes.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
19 #include "pvr2/pvr2.h"
22 int pvr2_poly_depthmode[8] = { GL_NEVER, GL_LESS, GL_EQUAL, GL_LEQUAL,
23 GL_GREATER, GL_NOTEQUAL, GL_GEQUAL,
25 int pvr2_poly_srcblend[8] = {
26 GL_ZERO, GL_ONE, GL_DST_COLOR, GL_ONE_MINUS_DST_COLOR,
27 GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_DST_ALPHA,
28 GL_ONE_MINUS_DST_ALPHA };
29 int pvr2_poly_dstblend[8] = {
30 GL_ZERO, GL_ONE, GL_SRC_COLOR, GL_ONE_MINUS_SRC_COLOR,
31 GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_DST_ALPHA,
32 GL_ONE_MINUS_DST_ALPHA };
33 int pvr2_poly_texblend[4] = {
39 int pvr2_render_colour_format[8] = {
40 COLFMT_ARGB1555, COLFMT_RGB565, COLFMT_ARGB4444, COLFMT_ARGB1555,
41 COLFMT_RGB888, COLFMT_ARGB8888, COLFMT_ARGB8888, COLFMT_ARGB4444 };
49 #define SEGMENT_END 0x80000000
50 #define SEGMENT_ZCLEAR 0x40000000
51 #define SEGMENT_SORT_TRANS 0x20000000
52 #define SEGMENT_START 0x10000000
53 #define SEGMENT_X(c) (((c) >> 2) & 0x3F)
54 #define SEGMENT_Y(c) (((c) >> 8) & 0x3F)
55 #define NO_POINTER 0x80000000
57 extern char *video_base;
59 gboolean pvr2_force_fragment_alpha = FALSE;
64 pvraddr_t opaquemod_ptr;
66 pvraddr_t transmod_ptr;
67 pvraddr_t punchout_ptr;
71 * Convert a half-float (16-bit) FP number to a regular 32-bit float.
72 * Source is 1-bit sign, 5-bit exponent, 10-bit mantissa.
73 * TODO: Check the correctness of this.
75 float halftofloat( uint16_t half )
81 /* int e = ((half & 0x7C00) >> 10) - 15 + 127;
83 temp.i = ((half & 0x8000) << 16) | (e << 23) |
84 ((half & 0x03FF) << 13); */
85 temp.i = ((uint32_t)half)<<16;
91 * Setup the GL context for the supplied polygon context.
92 * @param context pointer to 3 or 5 words of polygon context
93 * @param modified boolean flag indicating that the modified
94 * version should be used, rather than the normal version.
96 void render_set_context( uint32_t *context, int render_mode )
98 uint32_t poly1 = context[0], poly2, texture;
99 if( render_mode == RENDER_FULLMOD ) {
101 texture = context[4];
104 texture = context[2];
107 if( POLY1_DEPTH_ENABLE(poly1) ) {
108 glEnable( GL_DEPTH_TEST );
109 glDepthFunc( POLY1_DEPTH_MODE(poly1) );
111 glDisable( GL_DEPTH_TEST );
114 switch( POLY1_CULL_MODE(poly1) ) {
117 glDisable( GL_CULL_FACE );
120 glEnable( GL_CULL_FACE );
121 glFrontFace( GL_CW );
124 glEnable( GL_CULL_FACE );
125 glFrontFace( GL_CCW );
129 if( POLY1_SPECULAR(poly1) ) {
130 glEnable(GL_COLOR_SUM);
132 glDisable(GL_COLOR_SUM);
135 if( POLY1_TEXTURED(poly1) ) {
136 int width = POLY2_TEX_WIDTH(poly2);
137 int height = POLY2_TEX_HEIGHT(poly2);
138 glEnable(GL_TEXTURE_2D);
139 texcache_get_texture( (texture&0x000FFFFF)<<3, width, height, texture );
140 glTexEnvi( GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, POLY2_TEX_BLEND(poly2) );
141 if( POLY2_TEX_CLAMP_U(poly2) ) {
142 glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP );
144 glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT );
146 if( POLY2_TEX_CLAMP_V(poly2) ) {
147 glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP );
149 glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT );
152 glDisable( GL_TEXTURE_2D );
155 glShadeModel( POLY1_SHADE_MODEL(poly1) );
157 int srcblend = POLY2_SRC_BLEND(poly2);
158 int destblend = POLY2_DEST_BLEND(poly2);
159 glBlendFunc( srcblend, destblend );
161 if( POLY2_SRC_BLEND_TARGET(poly2) || POLY2_DEST_BLEND_TARGET(poly2) ) {
162 ERROR( "Accumulation buffer not supported" );
165 pvr2_force_fragment_alpha = POLY2_ALPHA_ENABLE(poly2) ? FALSE : TRUE;
169 #define FARGB_A(x) (((float)(((x)>>24)+1))/256.0)
170 #define FARGB_R(x) (((float)((((x)>>16)&0xFF)+1))/256.0)
171 #define FARGB_G(x) (((float)((((x)>>8)&0xFF)+1))/256.0)
172 #define FARGB_B(x) (((float)(((x)&0xFF)+1))/256.0)
174 void render_unpack_vertexes( struct vertex_unpacked *out, uint32_t poly1,
175 uint32_t *vertexes, int num_vertexes,
176 int vertex_size, int render_mode )
179 if( render_mode == RENDER_FULLMOD ) {
180 m = (vertex_size - 3)/2;
183 for( i=0; i<num_vertexes; i++ ) {
184 float *vertexf = (float *)vertexes;
186 out[i].x = vertexf[0];
187 out[i].y = vertexf[1];
188 out[i].z = vertexf[2];
189 if( POLY1_TEXTURED(poly1) ) {
190 if( POLY1_UV16(poly1) ) {
191 out[i].u = halftofloat(vertexes[k]>>16);
192 out[i].v = halftofloat(vertexes[k]);
195 out[i].u = vertexf[k];
196 out[i].v = vertexf[k+1];
203 uint32_t argb = vertexes[k++];
204 out[i].rgba[0] = FARGB_R(argb);
205 out[i].rgba[1] = FARGB_G(argb);
206 out[i].rgba[2] = FARGB_B(argb);
207 out[i].rgba[3] = FARGB_A(argb);
208 if( POLY1_SPECULAR(poly1) ) {
209 uint32_t offset = vertexes[k++];
210 out[i].offset_rgba[0] = FARGB_R(argb);
211 out[i].offset_rgba[1] = FARGB_G(argb);
212 out[i].offset_rgba[2] = FARGB_B(argb);
213 out[i].offset_rgba[3] = FARGB_A(argb);
215 vertexes += vertex_size;
220 * Unpack the vertexes for a quad, calculating the values for the last
222 * FIXME: Integrate this with rendbkg somehow
224 void render_unpack_quad( struct vertex_unpacked *unpacked, uint32_t poly1,
225 uint32_t *vertexes, int vertex_size,
229 struct vertex_unpacked diff0, diff1;
231 render_unpack_vertexes( unpacked, poly1, vertexes, 3, vertex_size, render_mode );
233 diff0.x = unpacked[0].x - unpacked[1].x;
234 diff0.y = unpacked[0].y - unpacked[1].y;
235 diff1.x = unpacked[2].x - unpacked[1].x;
236 diff1.y = unpacked[2].y - unpacked[1].y;
238 float detxy = ((diff1.y) * (diff0.x)) - ((diff0.y) * (diff1.x));
239 float *vertexf = (float *)(vertexes+(vertex_size*3));
241 memcpy( &unpacked[3], &unpacked[2], sizeof(struct vertex_unpacked) );
242 unpacked[3].x = vertexf[0];
243 unpacked[3].y = vertexf[1];
247 unpacked[3].x = vertexf[0];
248 unpacked[3].y = vertexf[1];
249 float t = ((unpacked[3].x - unpacked[1].x) * diff1.y -
250 (unpacked[3].y - unpacked[1].y) * diff1.x) / detxy;
251 float s = ((unpacked[3].y - unpacked[1].y) * diff0.x -
252 (unpacked[3].x - unpacked[1].x) * diff0.y) / detxy;
253 diff0.z = (1/unpacked[0].z) - (1/unpacked[1].z);
254 diff1.z = (1/unpacked[2].z) - (1/unpacked[1].z);
255 unpacked[3].z = 1/((1/unpacked[1].z) + (t*diff0.z) + (s*diff1.z));
257 diff0.u = unpacked[0].u - unpacked[1].u;
258 diff0.v = unpacked[0].v - unpacked[1].v;
259 diff1.u = unpacked[2].u - unpacked[1].u;
260 diff1.v = unpacked[2].v - unpacked[1].v;
261 unpacked[3].u = unpacked[1].u + (t*diff0.u) + (s*diff1.u);
262 unpacked[3].v = unpacked[1].v + (t*diff0.v) + (s*diff1.v);
264 if( !POLY1_GOURAUD_SHADED(poly1) ) {
265 memcpy( unpacked[3].rgba, unpacked[2].rgba, sizeof(unpacked[2].rgba) );
266 memcpy( unpacked[3].offset_rgba, unpacked[2].offset_rgba, sizeof(unpacked[2].offset_rgba) );
268 for( i=0; i<4; i++ ) {
269 float d0 = unpacked[0].rgba[i] - unpacked[1].rgba[i];
270 float d1 = unpacked[2].rgba[i] - unpacked[1].rgba[i];
271 unpacked[3].rgba[i] = unpacked[1].rgba[i] + (t*d0) + (s*d1);
272 d0 = unpacked[0].offset_rgba[i] - unpacked[1].offset_rgba[i];
273 d1 = unpacked[2].offset_rgba[i] - unpacked[1].offset_rgba[i];
274 unpacked[3].offset_rgba[i] = unpacked[1].offset_rgba[i] + (t*d0) + (s*d1);
279 void render_unpacked_vertex_array( uint32_t poly1, struct vertex_unpacked *vertexes[],
283 glBegin( GL_TRIANGLE_STRIP );
285 for( i=0; i<num_vertexes; i++ ) {
286 if( POLY1_TEXTURED(poly1) ) {
287 glTexCoord2f( vertexes[i]->u, vertexes[i]->v );
290 if( pvr2_force_fragment_alpha ) {
291 glColor4f( vertexes[i]->rgba[0], vertexes[i]->rgba[1], vertexes[i]->rgba[2], 1.0 );
293 glColor4f( vertexes[i]->rgba[0], vertexes[i]->rgba[1], vertexes[i]->rgba[2],
294 vertexes[i]->rgba[3] );
296 if( POLY1_SPECULAR(poly1) ) {
297 glSecondaryColor3fEXT( vertexes[i]->offset_rgba[0],
298 vertexes[i]->offset_rgba[1],
299 vertexes[i]->offset_rgba[2] );
301 glVertex3f( vertexes[i]->x, vertexes[i]->y, 1/vertexes[i]->z );
307 void render_quad_vertexes( uint32_t poly1, uint32_t *vertexes, int vertex_size, int render_mode )
309 struct vertex_unpacked unpacked[4];
310 struct vertex_unpacked *pt[4] = {&unpacked[0], &unpacked[1], &unpacked[3], &unpacked[2]};
311 render_unpack_quad( unpacked, poly1, vertexes, vertex_size, render_mode );
312 render_unpacked_vertex_array( poly1, pt, 4 );
315 void render_vertex_array( uint32_t poly1, uint32_t *vert_array[], int num_vertexes, int vertex_size,
320 if( render_mode == RENDER_FULLMOD ) {
321 m = (vertex_size - 3)/2;
324 glBegin( GL_TRIANGLE_STRIP );
326 for( i=0; i<num_vertexes; i++ ) {
327 uint32_t *vertexes = vert_array[i];
328 float *vertexf = (float *)vert_array[i];
331 if( POLY1_TEXTURED(poly1) ) {
332 if( POLY1_UV16(poly1) ) {
333 glTexCoord2f( halftofloat(vertexes[k]>>16),
334 halftofloat(vertexes[k]) );
337 glTexCoord2f( vertexf[k], vertexf[k+1] );
342 argb = vertexes[k++];
343 if( pvr2_force_fragment_alpha ) {
344 glColor4ub( (GLubyte)(argb >> 16), (GLubyte)(argb >> 8),
345 (GLubyte)argb, 0xFF );
347 glColor4ub( (GLubyte)(argb >> 16), (GLubyte)(argb >> 8),
348 (GLubyte)argb, (GLubyte)(argb >> 24) );
351 if( POLY1_SPECULAR(poly1) ) {
352 uint32_t spec = vertexes[k++];
353 glSecondaryColor3ubEXT( (GLubyte)(spec >> 16), (GLubyte)(spec >> 8),
356 glVertex3f( vertexf[0], vertexf[1], 1/vertexf[2] );
357 vertexes += vertex_size;
363 void render_vertexes( uint32_t poly1, uint32_t *vertexes, int num_vertexes, int vertex_size,
366 uint32_t *vert_array[num_vertexes];
368 for( i=0; i<num_vertexes; i++ ) {
369 vert_array[i] = vertexes;
370 vertexes += vertex_size;
372 render_vertex_array( poly1, vert_array, num_vertexes, vertex_size, render_mode );
376 * Render a simple (not auto-sorted) tile
378 void render_tile( pvraddr_t tile_entry, int render_mode, gboolean cheap_modifier_mode ) {
379 uint32_t poly_bank = MMIO_READ(PVR2,RENDER_POLYBASE);
380 uint32_t *tile_list = (uint32_t *)(video_base+tile_entry);
382 uint32_t entry = *tile_list++;
383 if( entry >> 28 == 0x0F ) {
385 } else if( entry >> 28 == 0x0E ) {
386 tile_list = (uint32_t *)(video_base + (entry&0x007FFFFF));
388 uint32_t *polygon = (uint32_t *)(video_base + poly_bank + ((entry & 0x000FFFFF) << 2));
389 int is_modified = entry & 0x01000000;
390 int vertex_length = (entry >> 21) & 0x07;
391 int context_length = 3;
392 if( is_modified && !cheap_modifier_mode ) {
398 if( (entry & 0xE0000000) == 0x80000000 ) {
400 int strip_count = ((entry >> 25) & 0x0F)+1;
401 int polygon_length = 3 * vertex_length + context_length;
403 for( i=0; i<strip_count; i++ ) {
404 render_set_context( polygon, render_mode );
405 render_vertexes( *polygon, polygon+context_length, 3, vertex_length,
407 polygon += polygon_length;
409 } else if( (entry & 0xE0000000) == 0xA0000000 ) {
411 int strip_count = ((entry >> 25) & 0x0F)+1;
412 int polygon_length = 4 * vertex_length + context_length;
414 for( i=0; i<strip_count; i++ ) {
415 render_set_context( polygon, render_mode );
416 render_quad_vertexes( *polygon, polygon+context_length, vertex_length,
418 polygon += polygon_length;
422 int i, first=-1, last = -1;
423 for( i=0; i<6; i++ ) {
424 if( entry & (0x40000000>>i) ) {
425 if( first == -1 ) first = i;
431 render_set_context(polygon, render_mode);
432 render_vertexes( *polygon, polygon+context_length + (first*vertex_length),
433 (last-first+3), vertex_length, render_mode );
440 void pvr2_render_tilebuffer( int width, int height, int clipx1, int clipy1,
441 int clipx2, int clipy2 ) {
443 pvraddr_t segmentbase = MMIO_READ( PVR2, RENDER_TILEBASE );
445 gboolean cheap_shadow;
447 int obj_config = MMIO_READ( PVR2, RENDER_OBJCFG );
448 int isp_config = MMIO_READ( PVR2, RENDER_ISPCFG );
449 int shadow_cfg = MMIO_READ( PVR2, RENDER_SHADOW );
451 if( (obj_config & 0x00200000) == 0 ) {
452 if( isp_config & 1 ) {
461 cheap_shadow = shadow_cfg & 0x100 ? TRUE : FALSE;
463 struct tile_segment *segment = (struct tile_segment *)(video_base + segmentbase);
465 struct timeval tv_start, tv_end;
466 gettimeofday(&tv_start, NULL);
467 glEnable( GL_SCISSOR_TEST );
469 // fwrite_dump32v( (uint32_t *)segment, sizeof(struct tile_segment), 6, stderr );
470 int tilex = SEGMENT_X(segment->control);
471 int tiley = SEGMENT_Y(segment->control);
475 if( x1 + 32 <= clipx1 ||
479 /* Tile completely clipped, skip */
483 /* Set a scissor on the visible part of the tile */
484 int w = MIN(x1+32, clipx2) - x1;
485 int h = MIN(y1+32, clipy2) - y1;
488 glScissor( x1, height-y1-h, w, h );
490 if( (segment->opaque_ptr & NO_POINTER) == 0 ) {
491 if( (segment->opaquemod_ptr & NO_POINTER) == 0 ) {
494 render_tile( segment->opaque_ptr, RENDER_NORMAL, cheap_shadow );
497 if( (segment->trans_ptr & NO_POINTER) == 0 ) {
498 if( (segment->transmod_ptr & NO_POINTER) == 0 ) {
501 if( tile_sort == 2 ||
502 (tile_sort == 1 && ((segment->control & SEGMENT_SORT_TRANS)==0)) ) {
503 render_autosort_tile( segment->trans_ptr, RENDER_NORMAL, cheap_shadow );
505 render_tile( segment->trans_ptr, RENDER_NORMAL, cheap_shadow );
509 if( (segment->punchout_ptr & NO_POINTER) == 0 ) {
510 render_tile( segment->punchout_ptr, RENDER_NORMAL, cheap_shadow );
512 } while( ((segment++)->control & SEGMENT_END) == 0 );
513 glDisable( GL_SCISSOR_TEST );
515 gettimeofday(&tv_end, NULL);
516 timersub(&tv_end,&tv_start, &tv_start);
519 static float render_find_maximum_tile_z( pvraddr_t tile_entry, float inputz )
521 uint32_t poly_bank = MMIO_READ(PVR2,RENDER_POLYBASE);
522 uint32_t *tile_list = (uint32_t *)(video_base+tile_entry);
523 int shadow_cfg = MMIO_READ( PVR2, RENDER_SHADOW ) & 0x100;
527 uint32_t entry = *tile_list++;
528 if( entry >> 28 == 0x0F ) {
530 } else if( entry >> 28 == 0x0E ) {
531 tile_list = (uint32_t *)(video_base + (entry&0x007FFFFF));
533 uint32_t *polygon = (uint32_t *)(video_base + poly_bank + ((entry & 0x000FFFFF) << 2));
534 int is_modified = entry & 0x01000000;
535 int vertex_length = (entry >> 21) & 0x07;
536 int context_length = 3;
537 if( (entry & 0x01000000) && shadow_cfg ) {
542 if( (entry & 0xE0000000) == 0x80000000 ) {
544 int strip_count = ((entry >> 25) & 0x0F)+1;
545 float *vertexz = (float *)(polygon+context_length+2);
546 for( i=0; i<strip_count; i++ ) {
547 for( j=0; j<3; j++ ) {
551 vertexz += vertex_length;
553 vertexz += context_length;
555 } else if( (entry & 0xE0000000) == 0xA0000000 ) {
557 int strip_count = ((entry >> 25) & 0x0F)+1;
558 int polygon_length = 4 * vertex_length + context_length;
560 float *vertexz = (float *)(polygon+context_length+2);
561 for( i=0; i<strip_count; i++ ) {
562 for( j=0; j<4; j++ ) {
566 vertexz += vertex_length;
568 vertexz+=context_length;
572 int i, first=-1, last = -1;
573 float *vertexz = (float *)polygon+context_length+2;
574 for( i=0; i<6; i++ ) {
575 if( (entry & (0x40000000>>i)) && *vertexz > z ) {
578 vertexz += vertex_length;
587 * Scan through the scene to determine the largest z value (in order to set up
588 * an appropriate near clip plane).
590 float pvr2_render_find_maximum_z( )
592 pvraddr_t segmentbase = MMIO_READ( PVR2, RENDER_TILEBASE );
593 float maximumz = MMIO_READF( PVR2, RENDER_FARCLIP ); /* Initialize to the far clip plane */
595 struct tile_segment *segment = (struct tile_segment *)(video_base + segmentbase);
598 if( (segment->opaque_ptr & NO_POINTER) == 0 ) {
599 maximumz = render_find_maximum_tile_z(segment->opaque_ptr, maximumz);
601 if( (segment->opaquemod_ptr & NO_POINTER) == 0 ) {
602 maximumz = render_find_maximum_tile_z(segment->opaquemod_ptr, maximumz);
604 if( (segment->trans_ptr & NO_POINTER) == 0 ) {
605 maximumz = render_find_maximum_tile_z(segment->trans_ptr, maximumz);
607 if( (segment->transmod_ptr & NO_POINTER) == 0 ) {
608 maximumz = render_find_maximum_tile_z(segment->transmod_ptr, maximumz);
610 if( (segment->punchout_ptr & NO_POINTER) == 0 ) {
611 maximumz = render_find_maximum_tile_z(segment->punchout_ptr, maximumz);
614 } while( ((segment++)->control & SEGMENT_END) == 0 );
.