revision 1133:f3da7d810d5c
summary |
tree |
shortlog |
changelog |
graph |
changeset |
raw | bz2 | zip | gz changeset | 1133:f3da7d810d5c |
parent | 1132:1e074a98317c |
child | 1134:f502f3d32f90 |
author | nkeynes |
date | Wed Oct 20 17:56:59 2010 +1000 (13 years ago) |
Perform backface culling in scene preparation rather than leaving it to the
GL - this is a huge performance win, at least on the 9400M - changing cull
state appears to be very expensive, whereas the CPU needed to do the same
job is only just barely measurable.
GL - this is a huge performance win, at least on the 9400M - changing cull
state appears to be very expensive, whereas the CPU needed to do the same
job is only just barely measurable.
src/pvr2/glrender.c | view | annotate | diff | log | ||
src/pvr2/rendsort.c | view | annotate | diff | log | ||
src/pvr2/scene.c | view | annotate | diff | log | ||
src/pvr2/scene.h | view | annotate | diff | log |
1.1 --- a/src/pvr2/glrender.c Tue Oct 19 22:43:10 2010 +10001.2 +++ b/src/pvr2/glrender.c Wed Oct 20 17:56:59 2010 +10001.3 @@ -101,7 +101,7 @@1.4 }1.6 texcache_gl_init(); // Allocate texture IDs1.7 - glCullFace( GL_BACK );1.8 + glDisable( GL_CULL_FACE );1.9 glEnable( GL_BLEND );1.10 glEnable( GL_DEPTH_TEST );1.11 glHint(GL_PERSPECTIVE_CORRECTION_HINT, GL_NICEST);1.12 @@ -131,24 +131,6 @@1.13 glFogf(GL_FOG_END, 1.0);1.14 }1.16 -static void render_set_cull( uint32_t poly1 )1.17 -{1.18 - switch( POLY1_CULL_MODE(poly1) ) {1.19 - case CULL_NONE:1.20 - case CULL_SMALL:1.21 - glDisable( GL_CULL_FACE );1.22 - break;1.23 - case CULL_CCW:1.24 - glEnable( GL_CULL_FACE );1.25 - glFrontFace( GL_CW );1.26 - break;1.27 - case CULL_CW:1.28 - glEnable( GL_CULL_FACE );1.29 - glFrontFace( GL_CCW );1.30 - break;1.31 - }1.32 -}1.33 -1.34 /**1.35 * Setup the basic context that's shared between normal and modified modes -1.36 * depth, culling1.37 @@ -162,7 +144,6 @@1.38 }1.40 glDepthMask( POLY1_DEPTH_WRITE(poly1) ? GL_TRUE : GL_FALSE );1.41 - render_set_cull( poly1 );1.42 }1.44 /**1.45 @@ -223,22 +204,40 @@1.46 render_set_tsp_context(context[0],context[1],context[2]);1.47 }1.49 +static inline void gl_draw_vertexes( struct polygon_struct *poly )1.50 +{1.51 + do {1.52 + glDrawArrays(GL_TRIANGLE_STRIP, poly->vertex_index, poly->vertex_count);1.53 + poly = poly->sub_next;1.54 + } while( poly != NULL );1.55 +}1.56 +1.57 +static inline void gl_draw_mod_vertexes( struct polygon_struct *poly )1.58 +{1.59 + do {1.60 + glDrawArrays(GL_TRIANGLE_STRIP, poly->mod_vertex_index, poly->vertex_count);1.61 + poly = poly->sub_next;1.62 + } while( poly != NULL );1.63 +}1.65 static void gl_render_poly( struct polygon_struct *poly, GLint depth_mode )1.66 {1.67 + if( poly->vertex_count == 0 )1.68 + return; /* Culled */1.69 +1.70 if( poly->tex_id != -1 ) {1.71 glBindTexture(GL_TEXTURE_2D, poly->tex_id);1.72 }1.73 if( poly->mod_vertex_index == -1 ) {1.74 glDisable( GL_STENCIL_TEST );1.75 render_set_context( poly->context, depth_mode );1.76 - glDrawArrays(GL_TRIANGLE_STRIP, poly->vertex_index, poly->vertex_count );1.77 + gl_draw_vertexes(poly);1.78 } else {1.79 glEnable( GL_STENCIL_TEST );1.80 render_set_base_context( poly->context[0], depth_mode );1.81 render_set_tsp_context( poly->context[0], poly->context[1], poly->context[2] );1.82 glStencilFunc(GL_EQUAL, 0, 2);1.83 - glDrawArrays(GL_TRIANGLE_STRIP, poly->vertex_index, poly->vertex_count );1.84 + gl_draw_vertexes(poly);1.86 if( pvr2_scene.shadow_mode == SHADOW_FULL ) {1.87 if( poly->mod_tex_id != -1 ) {1.88 @@ -247,7 +246,7 @@1.89 render_set_tsp_context( poly->context[0], poly->context[3], poly->context[4] );1.90 }1.91 glStencilFunc(GL_EQUAL, 2, 2);1.92 - glDrawArrays(GL_TRIANGLE_STRIP, poly->mod_vertex_index, poly->vertex_count );1.93 + gl_draw_mod_vertexes(poly);1.94 }1.95 }1.97 @@ -258,10 +257,8 @@1.98 }1.99 render_set_context( poly->context, 0 );1.100 glDisable( GL_DEPTH_TEST );1.101 - glDisable( GL_CULL_FACE );1.102 glBlendFunc( GL_ONE, GL_ZERO );1.103 - glDrawArrays(GL_TRIANGLE_STRIP, poly->vertex_index, poly->vertex_count );1.104 - glEnable( GL_CULL_FACE );1.105 + gl_draw_vertexes(poly);1.106 glEnable( GL_DEPTH_TEST );1.107 }1.109 @@ -327,8 +324,10 @@1.110 strip_count = ((entry >> 25) & 0x0F)+1;1.111 poly = pvr2_scene.buf_to_poly_map[entry&0x000FFFFF];1.112 while( strip_count > 0 ) {1.113 - render_set_base_context(poly->context[0],0);1.114 - glDrawArrays(GL_TRIANGLE_STRIP, poly->vertex_index, poly->vertex_count );1.115 + if( poly->vertex_count != 0 ) {1.116 + render_set_base_context(poly->context[0],0);1.117 + gl_draw_vertexes(poly);1.118 + }1.119 poly = poly->next;1.120 strip_count--;1.121 }1.122 @@ -336,8 +335,10 @@1.123 default:1.124 if( entry & 0x7E000000 ) {1.125 poly = pvr2_scene.buf_to_poly_map[entry&0x000FFFFF];1.126 - render_set_base_context(poly->context[0],0);1.127 - glDrawArrays(GL_TRIANGLE_STRIP, poly->vertex_index, poly->vertex_count );1.128 + if( poly->vertex_count != 0 ) {1.129 + render_set_base_context(poly->context[0],0);1.130 + gl_draw_vertexes(poly);1.131 + }1.132 }1.133 }1.134 }1.135 @@ -368,8 +369,12 @@1.136 * now :)1.137 */1.139 - render_set_cull(poly->context[0]);1.140 - glDrawArrays(GL_TRIANGLE_STRIP, poly->vertex_index, poly->vertex_count );1.141 + if( poly->vertex_count == 0 )1.142 + return; /* Culled */1.143 +1.144 + gl_draw_vertexes(poly);1.145 +1.146 +1.148 int poly_type = POLY1_VOLUME_MODE(poly->context[0]);1.149 if( poly_type == PVR2_VOLUME_REGION0 ) {1.150 @@ -381,7 +386,6 @@1.151 glStencilMask( 0x03 );1.152 glStencilFunc(GL_EQUAL, 0x02, 0x03);1.153 glStencilOp(GL_ZERO, GL_KEEP, GL_KEEP);1.154 - glDisable( GL_CULL_FACE );1.155 glDisable( GL_DEPTH_TEST );1.157 drawrect2d( tile_bounds, pvr2_scene.bounds[4] );1.158 @@ -399,7 +403,6 @@1.159 */1.160 glStencilMask( 0x02 );1.161 glStencilOp( GL_INVERT, GL_INVERT, GL_INVERT );1.162 - glDisable( GL_CULL_FACE );1.163 glDisable( GL_DEPTH_TEST );1.165 drawrect2d( tile_bounds, pvr2_scene.bounds[4] );1.166 @@ -427,7 +430,6 @@1.167 return;1.169 glDisable( GL_TEXTURE_2D );1.170 - glDisable( GL_CULL_FACE );1.171 glEnable( GL_STENCIL_TEST );1.172 glEnable( GL_DEPTH_TEST );1.173 glDepthFunc( GL_LEQUAL );1.174 @@ -552,10 +554,7 @@1.175 gl_render_tilelist(segment->punchout_ptr, GL_GEQUAL );1.176 glDisable(GL_ALPHA_TEST );1.177 }1.178 - glDisable( GL_STENCIL_TEST );1.179 - glStencilMask(0x03);1.180 - glClear( GL_STENCIL_BUFFER_BIT );1.181 -1.182 +1.183 if( IS_TILE_PTR(segment->trans_ptr) ) {1.184 if( pvr2_scene.sort_mode == SORT_NEVER ||1.185 (pvr2_scene.sort_mode == SORT_TILEFLAG && (segment->control&SEGMENT_SORT_TRANS))) {
2.1 --- a/src/pvr2/rendsort.c Tue Oct 19 22:43:10 2010 +10002.2 +++ b/src/pvr2/rendsort.c Wed Oct 20 17:56:59 2010 +10002.3 @@ -38,7 +38,8 @@2.5 /**2.6 * Count the number of triangles in the list starting at the given2.7 - * pvr memory address.2.8 + * pvr memory address. This is an upper bound as it includes2.9 + * triangles that have been culled out.2.10 */2.11 static int sort_count_triangles( pvraddr_t tile_entry ) {2.12 uint32_t *tile_list = (uint32_t *)(pvr2_main_ram+tile_entry);2.13 @@ -54,11 +55,11 @@2.14 } else if( entry >> 29 == 0x05 ) { /* Quad array */2.15 count += ((((entry >> 25) & 0x0F)+1)<<1);2.16 } else { /* Polygon */2.17 - int i;2.18 - for( i=0; i<6; i++ ) {2.19 - if( entry & (0x40000000>>i) ) {2.20 - count++;2.21 - }2.22 + struct polygon_struct *poly = pvr2_scene.buf_to_poly_map[entry&0x000FFFFF];2.23 + while( poly != NULL ) {2.24 + if( poly->vertex_count != 0 )2.25 + count += poly->vertex_count-2;2.26 + poly = poly->sub_next;2.27 }2.28 }2.29 }2.30 @@ -94,6 +95,8 @@2.31 vertexes[0].z*triangle->mz;2.32 }2.34 +2.35 +2.36 /**2.37 * Extract a triangle list from the tile (basically indexes into the polygon list, plus2.38 * computing maxz while we go through it2.39 @@ -118,7 +121,8 @@2.40 poly = pvr2_scene.buf_to_poly_map[entry&0x000FFFFF];2.41 while( strip_count > 0 ) {2.42 assert( poly != NULL );2.43 - for( i=0; i<poly->vertex_count-2; i++ ) {2.44 + for( i=0; i+2<poly->vertex_count; i++ ) {2.45 + /* Note: tris + quads can't have sub-polys */2.46 sort_add_triangle( &triangles[count], poly, i );2.47 count++;2.48 }2.49 @@ -129,11 +133,17 @@2.50 default:2.51 if( entry & 0x7E000000 ) {2.52 poly = pvr2_scene.buf_to_poly_map[entry&0x000FFFFF];2.53 - for( i=0; i<6; i++ ) {2.54 - if( entry & (0x40000000>>i) ) {2.55 + /* FIXME: This could end up including a triangle that was2.56 + * excluded from the tile, if it is part of a strip that2.57 + * still has some other triangles in the tile.2.58 + * (This couldn't happen with TA output though).2.59 + */2.60 + while( poly != NULL ) {2.61 + for( i=0; i+2<poly->vertex_count; i++ ) {2.62 sort_add_triangle( &triangles[count], poly, i );2.63 count++;2.64 }2.65 + poly = poly->sub_next;2.66 }2.67 }2.68 }2.69 @@ -151,13 +161,6 @@2.70 }2.71 render_set_context( poly->context, GL_GEQUAL );2.72 glDepthMask(GL_FALSE);2.73 - /* Fix cull direction */2.74 - if( triangles[i]->triangle_num & 1 ) {2.75 - glCullFace(GL_FRONT);2.76 - } else {2.77 - glCullFace(GL_BACK);2.78 - }2.79 -2.80 glDrawArrays(GL_TRIANGLE_STRIP, poly->vertex_index + triangles[i]->triangle_num, 3 );2.81 }2.82 }2.83 @@ -263,9 +266,9 @@2.84 triangle_order[i] = &triangles[i];2.85 }2.86 int extracted_triangles = sort_extract_triangles(tile_entry, triangles);2.87 - assert( extracted_triangles == num_triangles );2.88 - sort_triangles( triangle_order, num_triangles, triangle_order );2.89 - sort_render_triangles(triangle_order, num_triangles);2.90 + assert( extracted_triangles <= num_triangles );2.91 + sort_triangles( triangle_order, extracted_triangles, triangle_order );2.92 + sort_render_triangles(triangle_order, extracted_triangles);2.93 glCullFace(GL_BACK);2.94 assert( triangles[num_triangles].poly == (void *)SENTINEL );2.95 }
3.1 --- a/src/pvr2/scene.c Tue Oct 19 22:43:10 2010 +10003.2 +++ b/src/pvr2/scene.c Wed Oct 20 17:56:59 2010 +10003.3 @@ -192,6 +192,7 @@3.4 poly->vertex_index = -1;3.5 poly->mod_vertex_index = -1;3.6 poly->next = NULL;3.7 + poly->sub_next = NULL;3.8 pvr2_scene.buf_to_poly_map[poly_idx] = poly;3.9 pvr2_scene.vertex_count += (vertex_count * vert_mul);3.10 return poly;3.11 @@ -199,6 +200,33 @@3.12 }3.14 /**3.15 + * Given a starting polygon, break it at the specified triangle so that the3.16 + * preceding triangles are retained, and the remainder are contained in a3.17 + * new sub-polygon. Does not preserve winding.3.18 + */3.19 +static struct polygon_struct *scene_split_subpolygon( struct polygon_struct *parent, int split_offset )3.20 +{3.21 + assert( split_offset > 0 && split_offset < (parent->vertex_count-2) );3.22 + assert( pvr2_scene.poly_count < MAX_POLYGONS );3.23 + struct polygon_struct *poly = &pvr2_scene.poly_array[pvr2_scene.poly_count++];3.24 + poly->vertex_count = parent->vertex_count - split_offset;3.25 + poly->vertex_index = parent->vertex_index + split_offset;3.26 + if( parent->mod_vertex_index == -1 ) {3.27 + poly->mod_vertex_index = -1;3.28 + } else {3.29 + poly->mod_vertex_index = parent->mod_vertex_index + split_offset;3.30 + }3.31 + poly->context = parent->context;3.32 + poly->next = NULL;3.33 + poly->sub_next = parent->sub_next;3.34 +3.35 + parent->sub_next = poly;3.36 + parent->vertex_count = split_offset + 2;3.37 +3.38 + return poly;3.39 +}3.40 +3.41 +/**3.42 * Decode a single PVR2 renderable vertex (opaque/trans/punch-out, but not shadow3.43 * volume)3.44 * @param vert Pointer to output vertex structure3.45 @@ -395,6 +423,64 @@3.46 }3.47 }3.49 +/**3.50 + * Manually cull back-facing polygons where we can - this actually saves3.51 + * us a lot of time vs passing everything to GL to do it.3.52 + */3.53 +static void scene_backface_cull()3.54 +{3.55 + unsigned poly_idx;3.56 + unsigned poly_count = pvr2_scene.poly_count; /* Note: we don't want to process any sub-polygons created here */3.57 + for( poly_idx = 0; poly_idx<poly_count; poly_idx++ ) {3.58 + uint32_t poly1 = pvr2_scene.poly_array[poly_idx].context[0];3.59 + if( POLY1_CULL_ENABLE(poly1) ) {3.60 + struct polygon_struct *poly = &pvr2_scene.poly_array[poly_idx];3.61 + unsigned vert_idx = poly->vertex_index;3.62 + unsigned tri_count = poly->vertex_count-2;3.63 + struct vertex_struct *vert = &pvr2_scene.vertex_array[vert_idx];3.64 + unsigned i;3.65 + gboolean ccw = (POLY1_CULL_MODE(poly1) == CULL_CCW);3.66 + int first_visible = -1, last_visible = -1;3.67 + for( i=0; i<tri_count; i++ ) {3.68 + float ux = vert[i+1].x - vert[i].x;3.69 + float uy = vert[i+1].y - vert[i].y;3.70 + float vx = vert[i+2].x - vert[i].x;3.71 + float vy = vert[i+2].y - vert[i].y;3.72 + float nz = (ux*vy) - (uy*vx);3.73 + if( ccw ? nz > 0 : nz < 0 ) {3.74 + /* Surface is visible */3.75 + if( first_visible == -1 ) {3.76 + first_visible = i;3.77 + /* Elide the initial hidden triangles (note we don't3.78 + * need to care about winding anymore here) */3.79 + poly->vertex_index += i;3.80 + poly->vertex_count -= i;3.81 + if( poly->mod_vertex_index != -1 )3.82 + poly->mod_vertex_index += i;3.83 + } else if( last_visible != i-1 ) {3.84 + /* And... here we have to split the polygon. Allocate a new3.85 + * sub-polygon to hold the vertex references */3.86 + struct polygon_struct *sub = scene_split_subpolygon(poly, (i-first_visible));3.87 + poly->vertex_count -= (i-first_visible-1) - last_visible;3.88 + first_visible = i;3.89 + poly = sub;3.90 + }3.91 + last_visible = i;3.92 + } /* Else culled */3.93 + /* Invert ccw flag for triangle strip processing */3.94 + ccw = !ccw;3.95 + }3.96 + if( last_visible == -1 ) {3.97 + /* No visible surfaces, so we can mark the whole polygon as being vertex-less */3.98 + poly->vertex_count = 0;3.99 + } else if( last_visible != tri_count-1 ) {3.100 + /* Remove final hidden tris */3.101 + poly->vertex_count -= (tri_count - 1 - last_visible);3.102 + }3.103 + }3.104 + }3.105 +}3.106 +3.107 static void scene_add_cheap_shadow_vertexes( struct vertex_struct *src, struct vertex_struct *dest, int count )3.108 {3.109 unsigned int i, j;3.110 @@ -655,6 +741,7 @@3.111 context_length += (bgplane & 0x07) * vertex_length;3.113 poly->next = NULL;3.114 + poly->sub_next = NULL;3.115 pvr2_scene.bkgnd_poly = poly;3.117 struct vertex_struct base_vertexes[3];3.118 @@ -804,6 +891,7 @@3.120 scene_extract_background();3.121 scene_compute_lut_fog();3.122 + scene_backface_cull();3.124 vertex_buffer_unmap();3.125 }
4.1 --- a/src/pvr2/scene.h Tue Oct 19 22:43:10 2010 +10004.2 +++ b/src/pvr2/scene.h Wed Oct 20 17:56:59 2010 +10004.3 @@ -49,6 +49,7 @@4.4 uint32_t mod_tex_id;4.5 int32_t mod_vertex_index; // index of first modified vertex in vertex buffer4.6 struct polygon_struct *next; // chain for tri/quad arrays4.7 + struct polygon_struct *sub_next; // chain for internal sub-polygons4.8 };4.10 void pvr2_scene_init(void);4.11 @@ -74,10 +75,11 @@4.13 /**4.14 * Maximum polygons - smallest is 1 polygon in 48 bytes, giving4.15 - * 87381, plus 1 for the background4.16 + * 87381, plus 1 for the background. Allow the same amount again4.17 + * for split polygons (worst case)4.18 *4.19 */4.20 -#define MAX_POLYGONS 873824.21 +#define MAX_POLYGONS (87382*2)4.22 #define MAX_POLY_BUFFER_SIZE (MAX_POLYGONS*sizeof(struct polygon_struct))4.23 #define BUF_POLY_MAP_SIZE (4 MB)
.