Search
lxdream.org :: lxdream :: r1133:f3da7d810d5c
lxdream 0.9.1
released Jun 29
Download Now
changeset1133:f3da7d810d5c
parent1132:1e074a98317c
child1134:f502f3d32f90
authornkeynes
dateWed Oct 20 17:56:59 2010 +1000 (9 years ago)
Perform backface culling in scene preparation rather than leaving it to the
GL - this is a huge performance win, at least on the 9400M - changing cull
state appears to be very expensive, whereas the CPU needed to do the same
job is only just barely measurable.
src/pvr2/glrender.c
src/pvr2/rendsort.c
src/pvr2/scene.c
src/pvr2/scene.h
1.1 --- a/src/pvr2/glrender.c Tue Oct 19 22:43:10 2010 +1000
1.2 +++ b/src/pvr2/glrender.c Wed Oct 20 17:56:59 2010 +1000
1.3 @@ -101,7 +101,7 @@
1.4 }
1.5
1.6 texcache_gl_init(); // Allocate texture IDs
1.7 - glCullFace( GL_BACK );
1.8 + glDisable( GL_CULL_FACE );
1.9 glEnable( GL_BLEND );
1.10 glEnable( GL_DEPTH_TEST );
1.11 glHint(GL_PERSPECTIVE_CORRECTION_HINT, GL_NICEST);
1.12 @@ -131,24 +131,6 @@
1.13 glFogf(GL_FOG_END, 1.0);
1.14 }
1.15
1.16 -static void render_set_cull( uint32_t poly1 )
1.17 -{
1.18 - switch( POLY1_CULL_MODE(poly1) ) {
1.19 - case CULL_NONE:
1.20 - case CULL_SMALL:
1.21 - glDisable( GL_CULL_FACE );
1.22 - break;
1.23 - case CULL_CCW:
1.24 - glEnable( GL_CULL_FACE );
1.25 - glFrontFace( GL_CW );
1.26 - break;
1.27 - case CULL_CW:
1.28 - glEnable( GL_CULL_FACE );
1.29 - glFrontFace( GL_CCW );
1.30 - break;
1.31 - }
1.32 -}
1.33 -
1.34 /**
1.35 * Setup the basic context that's shared between normal and modified modes -
1.36 * depth, culling
1.37 @@ -162,7 +144,6 @@
1.38 }
1.39
1.40 glDepthMask( POLY1_DEPTH_WRITE(poly1) ? GL_TRUE : GL_FALSE );
1.41 - render_set_cull( poly1 );
1.42 }
1.43
1.44 /**
1.45 @@ -223,22 +204,40 @@
1.46 render_set_tsp_context(context[0],context[1],context[2]);
1.47 }
1.48
1.49 +static inline void gl_draw_vertexes( struct polygon_struct *poly )
1.50 +{
1.51 + do {
1.52 + glDrawArrays(GL_TRIANGLE_STRIP, poly->vertex_index, poly->vertex_count);
1.53 + poly = poly->sub_next;
1.54 + } while( poly != NULL );
1.55 +}
1.56 +
1.57 +static inline void gl_draw_mod_vertexes( struct polygon_struct *poly )
1.58 +{
1.59 + do {
1.60 + glDrawArrays(GL_TRIANGLE_STRIP, poly->mod_vertex_index, poly->vertex_count);
1.61 + poly = poly->sub_next;
1.62 + } while( poly != NULL );
1.63 +}
1.64
1.65 static void gl_render_poly( struct polygon_struct *poly, GLint depth_mode )
1.66 {
1.67 + if( poly->vertex_count == 0 )
1.68 + return; /* Culled */
1.69 +
1.70 if( poly->tex_id != -1 ) {
1.71 glBindTexture(GL_TEXTURE_2D, poly->tex_id);
1.72 }
1.73 if( poly->mod_vertex_index == -1 ) {
1.74 glDisable( GL_STENCIL_TEST );
1.75 render_set_context( poly->context, depth_mode );
1.76 - glDrawArrays(GL_TRIANGLE_STRIP, poly->vertex_index, poly->vertex_count );
1.77 + gl_draw_vertexes(poly);
1.78 } else {
1.79 glEnable( GL_STENCIL_TEST );
1.80 render_set_base_context( poly->context[0], depth_mode );
1.81 render_set_tsp_context( poly->context[0], poly->context[1], poly->context[2] );
1.82 glStencilFunc(GL_EQUAL, 0, 2);
1.83 - glDrawArrays(GL_TRIANGLE_STRIP, poly->vertex_index, poly->vertex_count );
1.84 + gl_draw_vertexes(poly);
1.85
1.86 if( pvr2_scene.shadow_mode == SHADOW_FULL ) {
1.87 if( poly->mod_tex_id != -1 ) {
1.88 @@ -247,7 +246,7 @@
1.89 render_set_tsp_context( poly->context[0], poly->context[3], poly->context[4] );
1.90 }
1.91 glStencilFunc(GL_EQUAL, 2, 2);
1.92 - glDrawArrays(GL_TRIANGLE_STRIP, poly->mod_vertex_index, poly->vertex_count );
1.93 + gl_draw_mod_vertexes(poly);
1.94 }
1.95 }
1.96
1.97 @@ -258,10 +257,8 @@
1.98 }
1.99 render_set_context( poly->context, 0 );
1.100 glDisable( GL_DEPTH_TEST );
1.101 - glDisable( GL_CULL_FACE );
1.102 glBlendFunc( GL_ONE, GL_ZERO );
1.103 - glDrawArrays(GL_TRIANGLE_STRIP, poly->vertex_index, poly->vertex_count );
1.104 - glEnable( GL_CULL_FACE );
1.105 + gl_draw_vertexes(poly);
1.106 glEnable( GL_DEPTH_TEST );
1.107 }
1.108
1.109 @@ -327,8 +324,10 @@
1.110 strip_count = ((entry >> 25) & 0x0F)+1;
1.111 poly = pvr2_scene.buf_to_poly_map[entry&0x000FFFFF];
1.112 while( strip_count > 0 ) {
1.113 - render_set_base_context(poly->context[0],0);
1.114 - glDrawArrays(GL_TRIANGLE_STRIP, poly->vertex_index, poly->vertex_count );
1.115 + if( poly->vertex_count != 0 ) {
1.116 + render_set_base_context(poly->context[0],0);
1.117 + gl_draw_vertexes(poly);
1.118 + }
1.119 poly = poly->next;
1.120 strip_count--;
1.121 }
1.122 @@ -336,8 +335,10 @@
1.123 default:
1.124 if( entry & 0x7E000000 ) {
1.125 poly = pvr2_scene.buf_to_poly_map[entry&0x000FFFFF];
1.126 - render_set_base_context(poly->context[0],0);
1.127 - glDrawArrays(GL_TRIANGLE_STRIP, poly->vertex_index, poly->vertex_count );
1.128 + if( poly->vertex_count != 0 ) {
1.129 + render_set_base_context(poly->context[0],0);
1.130 + gl_draw_vertexes(poly);
1.131 + }
1.132 }
1.133 }
1.134 }
1.135 @@ -368,8 +369,12 @@
1.136 * now :)
1.137 */
1.138
1.139 - render_set_cull(poly->context[0]);
1.140 - glDrawArrays(GL_TRIANGLE_STRIP, poly->vertex_index, poly->vertex_count );
1.141 + if( poly->vertex_count == 0 )
1.142 + return; /* Culled */
1.143 +
1.144 + gl_draw_vertexes(poly);
1.145 +
1.146 +
1.147
1.148 int poly_type = POLY1_VOLUME_MODE(poly->context[0]);
1.149 if( poly_type == PVR2_VOLUME_REGION0 ) {
1.150 @@ -381,7 +386,6 @@
1.151 glStencilMask( 0x03 );
1.152 glStencilFunc(GL_EQUAL, 0x02, 0x03);
1.153 glStencilOp(GL_ZERO, GL_KEEP, GL_KEEP);
1.154 - glDisable( GL_CULL_FACE );
1.155 glDisable( GL_DEPTH_TEST );
1.156
1.157 drawrect2d( tile_bounds, pvr2_scene.bounds[4] );
1.158 @@ -399,7 +403,6 @@
1.159 */
1.160 glStencilMask( 0x02 );
1.161 glStencilOp( GL_INVERT, GL_INVERT, GL_INVERT );
1.162 - glDisable( GL_CULL_FACE );
1.163 glDisable( GL_DEPTH_TEST );
1.164
1.165 drawrect2d( tile_bounds, pvr2_scene.bounds[4] );
1.166 @@ -427,7 +430,6 @@
1.167 return;
1.168
1.169 glDisable( GL_TEXTURE_2D );
1.170 - glDisable( GL_CULL_FACE );
1.171 glEnable( GL_STENCIL_TEST );
1.172 glEnable( GL_DEPTH_TEST );
1.173 glDepthFunc( GL_LEQUAL );
1.174 @@ -552,10 +554,7 @@
1.175 gl_render_tilelist(segment->punchout_ptr, GL_GEQUAL );
1.176 glDisable(GL_ALPHA_TEST );
1.177 }
1.178 - glDisable( GL_STENCIL_TEST );
1.179 - glStencilMask(0x03);
1.180 - glClear( GL_STENCIL_BUFFER_BIT );
1.181 -
1.182 +
1.183 if( IS_TILE_PTR(segment->trans_ptr) ) {
1.184 if( pvr2_scene.sort_mode == SORT_NEVER ||
1.185 (pvr2_scene.sort_mode == SORT_TILEFLAG && (segment->control&SEGMENT_SORT_TRANS))) {
2.1 --- a/src/pvr2/rendsort.c Tue Oct 19 22:43:10 2010 +1000
2.2 +++ b/src/pvr2/rendsort.c Wed Oct 20 17:56:59 2010 +1000
2.3 @@ -38,7 +38,8 @@
2.4
2.5 /**
2.6 * Count the number of triangles in the list starting at the given
2.7 - * pvr memory address.
2.8 + * pvr memory address. This is an upper bound as it includes
2.9 + * triangles that have been culled out.
2.10 */
2.11 static int sort_count_triangles( pvraddr_t tile_entry ) {
2.12 uint32_t *tile_list = (uint32_t *)(pvr2_main_ram+tile_entry);
2.13 @@ -54,11 +55,11 @@
2.14 } else if( entry >> 29 == 0x05 ) { /* Quad array */
2.15 count += ((((entry >> 25) & 0x0F)+1)<<1);
2.16 } else { /* Polygon */
2.17 - int i;
2.18 - for( i=0; i<6; i++ ) {
2.19 - if( entry & (0x40000000>>i) ) {
2.20 - count++;
2.21 - }
2.22 + struct polygon_struct *poly = pvr2_scene.buf_to_poly_map[entry&0x000FFFFF];
2.23 + while( poly != NULL ) {
2.24 + if( poly->vertex_count != 0 )
2.25 + count += poly->vertex_count-2;
2.26 + poly = poly->sub_next;
2.27 }
2.28 }
2.29 }
2.30 @@ -94,6 +95,8 @@
2.31 vertexes[0].z*triangle->mz;
2.32 }
2.33
2.34 +
2.35 +
2.36 /**
2.37 * Extract a triangle list from the tile (basically indexes into the polygon list, plus
2.38 * computing maxz while we go through it
2.39 @@ -118,7 +121,8 @@
2.40 poly = pvr2_scene.buf_to_poly_map[entry&0x000FFFFF];
2.41 while( strip_count > 0 ) {
2.42 assert( poly != NULL );
2.43 - for( i=0; i<poly->vertex_count-2; i++ ) {
2.44 + for( i=0; i+2<poly->vertex_count; i++ ) {
2.45 + /* Note: tris + quads can't have sub-polys */
2.46 sort_add_triangle( &triangles[count], poly, i );
2.47 count++;
2.48 }
2.49 @@ -129,11 +133,17 @@
2.50 default:
2.51 if( entry & 0x7E000000 ) {
2.52 poly = pvr2_scene.buf_to_poly_map[entry&0x000FFFFF];
2.53 - for( i=0; i<6; i++ ) {
2.54 - if( entry & (0x40000000>>i) ) {
2.55 + /* FIXME: This could end up including a triangle that was
2.56 + * excluded from the tile, if it is part of a strip that
2.57 + * still has some other triangles in the tile.
2.58 + * (This couldn't happen with TA output though).
2.59 + */
2.60 + while( poly != NULL ) {
2.61 + for( i=0; i+2<poly->vertex_count; i++ ) {
2.62 sort_add_triangle( &triangles[count], poly, i );
2.63 count++;
2.64 }
2.65 + poly = poly->sub_next;
2.66 }
2.67 }
2.68 }
2.69 @@ -151,13 +161,6 @@
2.70 }
2.71 render_set_context( poly->context, GL_GEQUAL );
2.72 glDepthMask(GL_FALSE);
2.73 - /* Fix cull direction */
2.74 - if( triangles[i]->triangle_num & 1 ) {
2.75 - glCullFace(GL_FRONT);
2.76 - } else {
2.77 - glCullFace(GL_BACK);
2.78 - }
2.79 -
2.80 glDrawArrays(GL_TRIANGLE_STRIP, poly->vertex_index + triangles[i]->triangle_num, 3 );
2.81 }
2.82 }
2.83 @@ -263,9 +266,9 @@
2.84 triangle_order[i] = &triangles[i];
2.85 }
2.86 int extracted_triangles = sort_extract_triangles(tile_entry, triangles);
2.87 - assert( extracted_triangles == num_triangles );
2.88 - sort_triangles( triangle_order, num_triangles, triangle_order );
2.89 - sort_render_triangles(triangle_order, num_triangles);
2.90 + assert( extracted_triangles <= num_triangles );
2.91 + sort_triangles( triangle_order, extracted_triangles, triangle_order );
2.92 + sort_render_triangles(triangle_order, extracted_triangles);
2.93 glCullFace(GL_BACK);
2.94 assert( triangles[num_triangles].poly == (void *)SENTINEL );
2.95 }
3.1 --- a/src/pvr2/scene.c Tue Oct 19 22:43:10 2010 +1000
3.2 +++ b/src/pvr2/scene.c Wed Oct 20 17:56:59 2010 +1000
3.3 @@ -192,6 +192,7 @@
3.4 poly->vertex_index = -1;
3.5 poly->mod_vertex_index = -1;
3.6 poly->next = NULL;
3.7 + poly->sub_next = NULL;
3.8 pvr2_scene.buf_to_poly_map[poly_idx] = poly;
3.9 pvr2_scene.vertex_count += (vertex_count * vert_mul);
3.10 return poly;
3.11 @@ -199,6 +200,33 @@
3.12 }
3.13
3.14 /**
3.15 + * Given a starting polygon, break it at the specified triangle so that the
3.16 + * preceding triangles are retained, and the remainder are contained in a
3.17 + * new sub-polygon. Does not preserve winding.
3.18 + */
3.19 +static struct polygon_struct *scene_split_subpolygon( struct polygon_struct *parent, int split_offset )
3.20 +{
3.21 + assert( split_offset > 0 && split_offset < (parent->vertex_count-2) );
3.22 + assert( pvr2_scene.poly_count < MAX_POLYGONS );
3.23 + struct polygon_struct *poly = &pvr2_scene.poly_array[pvr2_scene.poly_count++];
3.24 + poly->vertex_count = parent->vertex_count - split_offset;
3.25 + poly->vertex_index = parent->vertex_index + split_offset;
3.26 + if( parent->mod_vertex_index == -1 ) {
3.27 + poly->mod_vertex_index = -1;
3.28 + } else {
3.29 + poly->mod_vertex_index = parent->mod_vertex_index + split_offset;
3.30 + }
3.31 + poly->context = parent->context;
3.32 + poly->next = NULL;
3.33 + poly->sub_next = parent->sub_next;
3.34 +
3.35 + parent->sub_next = poly;
3.36 + parent->vertex_count = split_offset + 2;
3.37 +
3.38 + return poly;
3.39 +}
3.40 +
3.41 +/**
3.42 * Decode a single PVR2 renderable vertex (opaque/trans/punch-out, but not shadow
3.43 * volume)
3.44 * @param vert Pointer to output vertex structure
3.45 @@ -395,6 +423,64 @@
3.46 }
3.47 }
3.48
3.49 +/**
3.50 + * Manually cull back-facing polygons where we can - this actually saves
3.51 + * us a lot of time vs passing everything to GL to do it.
3.52 + */
3.53 +static void scene_backface_cull()
3.54 +{
3.55 + unsigned poly_idx;
3.56 + unsigned poly_count = pvr2_scene.poly_count; /* Note: we don't want to process any sub-polygons created here */
3.57 + for( poly_idx = 0; poly_idx<poly_count; poly_idx++ ) {
3.58 + uint32_t poly1 = pvr2_scene.poly_array[poly_idx].context[0];
3.59 + if( POLY1_CULL_ENABLE(poly1) ) {
3.60 + struct polygon_struct *poly = &pvr2_scene.poly_array[poly_idx];
3.61 + unsigned vert_idx = poly->vertex_index;
3.62 + unsigned tri_count = poly->vertex_count-2;
3.63 + struct vertex_struct *vert = &pvr2_scene.vertex_array[vert_idx];
3.64 + unsigned i;
3.65 + gboolean ccw = (POLY1_CULL_MODE(poly1) == CULL_CCW);
3.66 + int first_visible = -1, last_visible = -1;
3.67 + for( i=0; i<tri_count; i++ ) {
3.68 + float ux = vert[i+1].x - vert[i].x;
3.69 + float uy = vert[i+1].y - vert[i].y;
3.70 + float vx = vert[i+2].x - vert[i].x;
3.71 + float vy = vert[i+2].y - vert[i].y;
3.72 + float nz = (ux*vy) - (uy*vx);
3.73 + if( ccw ? nz > 0 : nz < 0 ) {
3.74 + /* Surface is visible */
3.75 + if( first_visible == -1 ) {
3.76 + first_visible = i;
3.77 + /* Elide the initial hidden triangles (note we don't
3.78 + * need to care about winding anymore here) */
3.79 + poly->vertex_index += i;
3.80 + poly->vertex_count -= i;
3.81 + if( poly->mod_vertex_index != -1 )
3.82 + poly->mod_vertex_index += i;
3.83 + } else if( last_visible != i-1 ) {
3.84 + /* And... here we have to split the polygon. Allocate a new
3.85 + * sub-polygon to hold the vertex references */
3.86 + struct polygon_struct *sub = scene_split_subpolygon(poly, (i-first_visible));
3.87 + poly->vertex_count -= (i-first_visible-1) - last_visible;
3.88 + first_visible = i;
3.89 + poly = sub;
3.90 + }
3.91 + last_visible = i;
3.92 + } /* Else culled */
3.93 + /* Invert ccw flag for triangle strip processing */
3.94 + ccw = !ccw;
3.95 + }
3.96 + if( last_visible == -1 ) {
3.97 + /* No visible surfaces, so we can mark the whole polygon as being vertex-less */
3.98 + poly->vertex_count = 0;
3.99 + } else if( last_visible != tri_count-1 ) {
3.100 + /* Remove final hidden tris */
3.101 + poly->vertex_count -= (tri_count - 1 - last_visible);
3.102 + }
3.103 + }
3.104 + }
3.105 +}
3.106 +
3.107 static void scene_add_cheap_shadow_vertexes( struct vertex_struct *src, struct vertex_struct *dest, int count )
3.108 {
3.109 unsigned int i, j;
3.110 @@ -655,6 +741,7 @@
3.111 context_length += (bgplane & 0x07) * vertex_length;
3.112
3.113 poly->next = NULL;
3.114 + poly->sub_next = NULL;
3.115 pvr2_scene.bkgnd_poly = poly;
3.116
3.117 struct vertex_struct base_vertexes[3];
3.118 @@ -804,6 +891,7 @@
3.119
3.120 scene_extract_background();
3.121 scene_compute_lut_fog();
3.122 + scene_backface_cull();
3.123
3.124 vertex_buffer_unmap();
3.125 }
4.1 --- a/src/pvr2/scene.h Tue Oct 19 22:43:10 2010 +1000
4.2 +++ b/src/pvr2/scene.h Wed Oct 20 17:56:59 2010 +1000
4.3 @@ -49,6 +49,7 @@
4.4 uint32_t mod_tex_id;
4.5 int32_t mod_vertex_index; // index of first modified vertex in vertex buffer
4.6 struct polygon_struct *next; // chain for tri/quad arrays
4.7 + struct polygon_struct *sub_next; // chain for internal sub-polygons
4.8 };
4.9
4.10 void pvr2_scene_init(void);
4.11 @@ -74,10 +75,11 @@
4.12
4.13 /**
4.14 * Maximum polygons - smallest is 1 polygon in 48 bytes, giving
4.15 - * 87381, plus 1 for the background
4.16 + * 87381, plus 1 for the background. Allow the same amount again
4.17 + * for split polygons (worst case)
4.18 *
4.19 */
4.20 -#define MAX_POLYGONS 87382
4.21 +#define MAX_POLYGONS (87382*2)
4.22 #define MAX_POLY_BUFFER_SIZE (MAX_POLYGONS*sizeof(struct polygon_struct))
4.23 #define BUF_POLY_MAP_SIZE (4 MB)
4.24
.