4 * Texture cache. Responsible for maintaining a working set of OpenGL
8 * Copyright (c) 2005 Nathan Keynes.
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
23 #include "pvr2/pvr2.h"
24 #include "pvr2/pvr2mmio.h"
26 /** Specifies the maximum number of OpenGL
27 * textures we're willing to have open at a time. If more are
28 * needed, textures will be evicted in LRU order.
30 #define MAX_TEXTURES 256
36 * find entry by texture_addr
38 * move entry to tail of lru list
42 typedef signed short texcache_entry_index;
43 #define EMPTY_ENTRY -1
45 static texcache_entry_index texcache_free_ptr = 0;
46 static GLuint texcache_free_list[MAX_TEXTURES];
48 typedef struct texcache_entry {
49 uint32_t texture_addr;
50 int width, height, mode;
52 render_buffer_t buffer;
53 texcache_entry_index next;
57 static texcache_entry_index texcache_page_lookup[PVR2_RAM_PAGES];
58 static uint32_t texcache_ref_counter;
59 static struct texcache_entry texcache_active_list[MAX_TEXTURES];
60 static uint32_t texcache_palette_mode;
61 static uint32_t texcache_stride_width;
64 * Initialize the texture cache.
69 for( i=0; i<PVR2_RAM_PAGES; i++ ) {
70 texcache_page_lookup[i] = EMPTY_ENTRY;
72 for( i=0; i<MAX_TEXTURES; i++ ) {
73 texcache_free_list[i] = i;
74 texcache_active_list[i].texture_addr = -1;
75 texcache_active_list[i].buffer = NULL;
76 texcache_active_list[i].next = EMPTY_ENTRY;
78 texcache_free_ptr = 0;
79 texcache_ref_counter = 0;
80 texcache_palette_mode = 0;
81 texcache_stride_width = 0;
85 * Setup the initial texture ids (must be called after the GL context is
88 void texcache_gl_init( )
91 GLuint texids[MAX_TEXTURES];
93 glGenTextures( MAX_TEXTURES, texids );
94 for( i=0; i<MAX_TEXTURES; i++ ) {
95 texcache_active_list[i].texture_id = texids[i];
99 void texcache_release_render_buffer( render_buffer_t buffer )
101 if( !buffer->flushed )
102 pvr2_render_buffer_copy_to_sh4(buffer);
103 pvr2_destroy_render_buffer(buffer);
107 * Flush all textures from the cache, returning them to the free list.
109 void texcache_flush( )
112 /* clear structures */
113 for( i=0; i<PVR2_RAM_PAGES; i++ ) {
114 texcache_page_lookup[i] = EMPTY_ENTRY;
116 for( i=0; i<MAX_TEXTURES; i++ ) {
117 texcache_free_list[i] = i;
118 texcache_active_list[i].next = EMPTY_ENTRY;
119 texcache_active_list[i].texture_addr = -1;
120 if( texcache_active_list[i].buffer != NULL ) {
121 texcache_release_render_buffer(texcache_active_list[i].buffer);
122 texcache_active_list[i].buffer = NULL;
125 texcache_free_ptr = 0;
126 texcache_ref_counter = 0;
130 * Flush all textures and delete. The cache will be non-functional until
131 * the next call to texcache_init(). This would typically be done if
132 * switching GL targets.
134 void texcache_shutdown( )
136 GLuint texids[MAX_TEXTURES];
140 for( i=0; i<MAX_TEXTURES; i++ ) {
141 texids[i] = texcache_active_list[i].texture_id;
143 glDeleteTextures( MAX_TEXTURES, texids );
146 static void texcache_evict( int slot )
148 /* Remove the selected slot from the lookup table */
149 assert( texcache_active_list[slot].texture_addr != -1 );
150 uint32_t evict_page = texcache_active_list[slot].texture_addr >> 12;
151 texcache_entry_index replace_next = texcache_active_list[slot].next;
152 texcache_active_list[slot].texture_addr = -1;
153 texcache_active_list[slot].next = EMPTY_ENTRY; /* Just for safety */
154 if( texcache_active_list[slot].buffer != NULL ) {
155 texcache_release_render_buffer(texcache_active_list[slot].buffer);
156 texcache_active_list[slot].buffer = NULL;
158 if( texcache_page_lookup[evict_page] == slot ) {
159 texcache_page_lookup[evict_page] = replace_next;
161 texcache_entry_index idx = texcache_page_lookup[evict_page];
162 texcache_entry_index next;
164 next = texcache_active_list[idx].next;
166 assert( idx != replace_next );
167 texcache_active_list[idx].next = replace_next;
171 } while( next != EMPTY_ENTRY );
176 * Evict a single texture from the cache.
177 * @return the slot of the evicted texture.
179 static texcache_entry_index texcache_evict_lru( void )
181 /* Full table scan - take over the entry with the lowest lru value */
182 texcache_entry_index slot = 0;
183 int lru_value = texcache_active_list[0].lru_count;
185 for( i=1; i<MAX_TEXTURES; i++ ) {
186 /* FIXME: account for rollover */
187 if( texcache_active_list[i].lru_count < lru_value ) {
189 lru_value = texcache_active_list[i].lru_count;
192 texcache_evict(slot);
198 * Evict all textures contained in the page identified by a texture address.
200 void texcache_invalidate_page( uint32_t texture_addr ) {
201 uint32_t texture_page = texture_addr >> 12;
202 texcache_entry_index idx = texcache_page_lookup[texture_page];
203 if( idx == EMPTY_ENTRY )
205 assert( texcache_free_ptr >= 0 );
207 texcache_entry_t entry = &texcache_active_list[idx];
208 entry->texture_addr = -1;
209 if( entry->buffer != NULL ) {
210 texcache_release_render_buffer(entry->buffer);
211 entry->buffer = NULL;
215 texcache_free_list[texcache_free_ptr] = idx;
217 entry->next = EMPTY_ENTRY;
218 } while( idx != EMPTY_ENTRY );
219 texcache_page_lookup[texture_page] = EMPTY_ENTRY;
223 * Mark all textures that use the palette table as needing a re-read (ie
224 * for when the palette is changed. We could track exactly which ones are
225 * affected, but it's not clear that the extra maintanence overhead is
228 void texcache_invalidate_palette( )
231 for( i=0; i<MAX_TEXTURES; i++ ) {
232 if( texcache_active_list[i].texture_addr != -1 &&
233 PVR2_TEX_IS_PALETTE(texcache_active_list[i].mode) ) {
236 texcache_free_list[texcache_free_ptr] = i;
241 * Mark all stride textures as needing a re-read (ie when the stride width
244 void texcache_invalidate_stride( )
247 for( i=0; i<MAX_TEXTURES; i++ ) {
248 if( texcache_active_list[i].texture_addr != -1 &&
249 PVR2_TEX_IS_STRIDE(texcache_active_list[i].mode) ) {
252 texcache_free_list[texcache_free_ptr] = i;
257 void texcache_set_config( uint32_t palette_mode, uint32_t stride )
259 if( palette_mode != texcache_palette_mode )
260 texcache_invalidate_palette();
261 if( stride != texcache_stride_width )
262 texcache_invalidate_stride();
264 texcache_palette_mode = palette_mode;
265 texcache_stride_width = stride;
268 static void decode_pal8_to_32( uint32_t *out, uint8_t *in, int inbytes, uint32_t *pal )
271 for( i=0; i<inbytes; i++ ) {
276 static void decode_pal8_to_16( uint16_t *out, uint8_t *in, int inbytes, uint32_t *pal )
279 for( i=0; i<inbytes; i++ ) {
280 *out++ = (uint16_t)pal[*in++];
284 static void decode_pal4_to_32( uint32_t *out, uint8_t *in, int inbytes, uint32_t *pal )
287 for( i=0; i<inbytes; i++ ) {
288 *out++ = pal[*in & 0x0F];
289 *out++ = pal[(*in >> 4)];
295 static void decode_pal4_to_16( uint16_t *out, uint8_t *in, int inbytes, uint32_t *pal )
298 for( i=0; i<inbytes; i++ ) {
299 *out++ = (uint16_t)pal[*in & 0x0F];
300 *out++ = (uint16_t)pal[(*in >> 4)];
305 #define VQ_CODEBOOK_SIZE 2048 /* 256 entries * 4 pixels per quad * 2 byte pixels */
308 uint16_t quad[256][4];
311 static void vq_get_codebook( struct vq_codebook *codebook,
314 /* Detwiddle the codebook, for the sake of my own sanity if nothing else */
315 uint16_t *p = (uint16_t *)input;
317 for( i=0; i<256; i++ ) {
318 codebook->quad[i][0] = *p++;
319 codebook->quad[i][2] = *p++;
320 codebook->quad[i][1] = *p++;
321 codebook->quad[i][3] = *p++;
325 static void vq_decode( uint16_t *output, unsigned char *input, int width, int height,
326 struct vq_codebook *codebook ) {
329 uint8_t *c = (uint8_t *)input;
330 for( j=0; j<height; j+=2 ) {
331 for( i=0; i<width; i+=2 ) {
333 output[i + j*width] = codebook->quad[code][0];
334 output[i + 1 + j*width] = codebook->quad[code][1];
335 output[i + (j+1)*width] = codebook->quad[code][2];
336 output[i + 1 + (j+1)*width] = codebook->quad[code][3];
341 static inline uint32_t yuv_to_rgb32( float y, float u, float v )
345 int r = (int)(y + v*1.375);
346 int g = (int)(y - u*0.34375 - v*0.6875);
347 int b = (int)(y + u*1.71875);
348 if( r > 255 ) { r = 255; } else if( r < 0 ) { r = 0; }
349 if( g > 255 ) { g = 255; } else if( g < 0 ) { g = 0; }
350 if( b > 255 ) { b = 255; } else if( b < 0 ) { b = 0; }
351 return 0xFF000000 | (r<<16) | (g<<8) | (b);
356 * Convert raster YUV texture data into RGB32 data - most GL implementations don't
357 * directly support this format unfortunately. The input data is formatted as
358 * 32 bits = 2 horizontal pixels, UYVY. This is currently done rather inefficiently
361 static void yuv_decode( uint32_t *output, uint32_t *input, int width, int height )
365 for( y=0; y<height; y++ ) {
366 for( x=0; x<width; x+=2 ) {
367 float u = (float)(*p & 0xFF);
368 float y0 = (float)( (*p>>8)&0xFF );
369 float v = (float)( (*p>>16)&0xFF );
370 float y1 = (float)( (*p>>24)&0xFF );
371 *output++ = yuv_to_rgb32( y0, u, v );
372 *output++ = yuv_to_rgb32( y1, u, v );
378 static gboolean is_npot_texture( int width )
380 while( width != 0 ) {
389 * Load texture data from the given address and parameters into the currently
390 * bound OpenGL texture.
392 static void texcache_load_texture( uint32_t texture_addr, int width, int height,
394 int bpp_shift = 1; /* bytes per (output) pixel as a power of 2 */
395 GLint intFormat = GL_RGBA, format, type;
396 int tex_format = mode & PVR2_TEX_FORMAT_MASK;
397 struct vq_codebook codebook;
398 GLint filter = GL_LINEAR;
400 glPixelStorei( GL_UNPACK_ROW_LENGTH, 0 );
402 /* Decode the format parameters */
403 switch( tex_format ) {
404 case PVR2_TEX_FORMAT_IDX4:
405 case PVR2_TEX_FORMAT_IDX8:
406 /* For indexed-colour modes, we need to lookup the palette control
407 * word to determine the de-indexed texture format.
409 switch( texcache_palette_mode ) {
410 case 0: /* ARGB1555 */
412 type = GL_UNSIGNED_SHORT_1_5_5_5_REV;
417 type = GL_UNSIGNED_SHORT_5_6_5;
419 case 2: /* ARGB4444 */
421 type = GL_UNSIGNED_SHORT_4_4_4_4_REV;
423 case 3: /* ARGB8888 */
425 type = GL_UNSIGNED_BYTE;
429 return; /* Can't happen, but it makes gcc stop complaining */
434 case PVR2_TEX_FORMAT_ARGB1555:
436 type = GL_UNSIGNED_SHORT_1_5_5_5_REV;
438 case PVR2_TEX_FORMAT_RGB565:
441 type = GL_UNSIGNED_SHORT_5_6_5;
443 case PVR2_TEX_FORMAT_ARGB4444:
445 type = GL_UNSIGNED_SHORT_4_4_4_4_REV;
447 case PVR2_TEX_FORMAT_YUV422:
448 /* YUV422 isn't directly supported by most implementations, so decode
449 * it to a (reasonably) standard ARGB32.
453 type = GL_UNSIGNED_BYTE;
455 case PVR2_TEX_FORMAT_BUMPMAP:
456 WARN( "Bumpmap not supported" );
460 if( PVR2_TEX_IS_STRIDE(mode) && tex_format != PVR2_TEX_FORMAT_IDX4 &&
461 tex_format != PVR2_TEX_FORMAT_IDX8 ) {
462 /* Stride textures cannot be mip-mapped, compressed, indexed or twiddled */
463 unsigned char data[(width*height) << bpp_shift];
464 if( tex_format == PVR2_TEX_FORMAT_YUV422 ) {
465 unsigned char tmp[(width*height)<<1];
466 pvr2_vram64_read_stride( tmp, width<<1, texture_addr, texcache_stride_width<<1, height );
467 yuv_decode( (uint32_t *)data, (uint32_t *)tmp, width, height );
469 pvr2_vram64_read_stride( data, width<<bpp_shift, texture_addr, texcache_stride_width<<bpp_shift, height );
471 glTexImage2D( GL_TEXTURE_2D, 0, intFormat, width, height, 0, format, type, data );
472 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, filter);
473 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
477 if( PVR2_TEX_IS_COMPRESSED(mode) ) {
478 uint16_t tmp[VQ_CODEBOOK_SIZE];
479 pvr2_vram64_read( (unsigned char *)tmp, texture_addr, VQ_CODEBOOK_SIZE );
480 texture_addr += VQ_CODEBOOK_SIZE;
481 vq_get_codebook( &codebook, tmp );
484 int level=0, last_level = 0, mip_width = width, mip_height = height, src_bytes, dest_bytes;
485 if( PVR2_TEX_IS_MIPMAPPED(mode) ) {
486 uint32_t src_offset = 0;
487 filter = GL_LINEAR_MIPMAP_LINEAR;
488 mip_height = height = width;
489 while( (1<<last_level) < width ) {
491 src_offset += ((width>>last_level)*(width>>last_level));
496 if( PVR2_TEX_IS_COMPRESSED(mode) ) {
498 } else if( tex_format == PVR2_TEX_FORMAT_IDX4 ) {
500 } else if( tex_format == PVR2_TEX_FORMAT_YUV422 ) {
502 } else if( tex_format != PVR2_TEX_FORMAT_IDX8 ) {
503 src_offset <<= bpp_shift;
505 texture_addr += src_offset;
509 dest_bytes = (mip_width * mip_height) << bpp_shift;
510 src_bytes = dest_bytes; // Modes will change this (below)
512 for( level=0; level<= last_level; level++ ) {
513 unsigned char data[dest_bytes];
514 /* load data from image, detwiddling/uncompressing as required */
515 if( tex_format == PVR2_TEX_FORMAT_IDX8 ) {
516 src_bytes = (mip_width * mip_height);
517 int bank = (mode >> 25) &0x03;
518 uint32_t *palette = ((uint32_t *)mmio_region_PVR2PAL.mem) + (bank<<8);
519 unsigned char tmp[src_bytes];
520 pvr2_vram64_read_twiddled_8( tmp, texture_addr, mip_width, mip_height );
521 if( bpp_shift == 2 ) {
522 decode_pal8_to_32( (uint32_t *)data, tmp, src_bytes, palette );
524 decode_pal8_to_16( (uint16_t *)data, tmp, src_bytes, palette );
526 } else if( tex_format == PVR2_TEX_FORMAT_IDX4 ) {
527 src_bytes = (mip_width * mip_height) >> 1;
528 int bank = (mode >>21 ) & 0x3F;
529 uint32_t *palette = ((uint32_t *)mmio_region_PVR2PAL.mem) + (bank<<4);
530 unsigned char tmp[src_bytes];
531 pvr2_vram64_read_twiddled_4( tmp, texture_addr, mip_width, mip_height );
532 if( bpp_shift == 2 ) {
533 decode_pal4_to_32( (uint32_t *)data, tmp, src_bytes, palette );
535 decode_pal4_to_16( (uint16_t *)data, tmp, src_bytes, palette );
537 } else if( tex_format == PVR2_TEX_FORMAT_YUV422 ) {
538 src_bytes = ((mip_width*mip_height)<<1);
539 unsigned char tmp[src_bytes];
540 if( PVR2_TEX_IS_TWIDDLED(mode) ) {
541 pvr2_vram64_read_twiddled_16( tmp, texture_addr, mip_width, mip_height );
543 pvr2_vram64_read( tmp, texture_addr, src_bytes );
545 yuv_decode( (uint32_t *)data, (uint32_t *)tmp, mip_width, mip_height );
546 } else if( PVR2_TEX_IS_COMPRESSED(mode) ) {
547 src_bytes = ((mip_width*mip_height) >> 2);
548 unsigned char tmp[src_bytes];
549 if( PVR2_TEX_IS_TWIDDLED(mode) ) {
550 pvr2_vram64_read_twiddled_8( tmp, texture_addr, mip_width>>1, mip_height>>1 );
552 pvr2_vram64_read( tmp, texture_addr, src_bytes );
554 vq_decode( (uint16_t *)data, tmp, mip_width, mip_height, &codebook );
555 } else if( PVR2_TEX_IS_TWIDDLED(mode) ) {
556 pvr2_vram64_read_twiddled_16( data, texture_addr, mip_width, mip_height );
558 pvr2_vram64_read( data, texture_addr, src_bytes );
562 if( level == last_level && level != 0 ) { /* 1x1 stored within a 2x2 */
563 glTexImage2D( GL_TEXTURE_2D, level, intFormat, 1, 1, 0, format, type,
564 data + (3 << bpp_shift) );
566 glTexImage2D( GL_TEXTURE_2D, level, intFormat, mip_width, mip_height, 0, format, type,
568 if( mip_width > 2 ) {
574 texture_addr -= src_bytes;
578 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, filter);
579 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
582 static int texcache_find_texture_slot( uint32_t texture_word, int width, int height )
584 uint32_t texture_addr = (texture_word & 0x000FFFFF)<<3;
585 uint32_t texture_page = texture_addr >> 12;
586 texcache_entry_index next;
587 texcache_entry_index idx = texcache_page_lookup[texture_page];
588 while( idx != EMPTY_ENTRY ) {
589 texcache_entry_t entry = &texcache_active_list[idx];
590 if( entry->texture_addr == texture_addr &&
591 entry->mode == texture_word &&
592 entry->width == width &&
593 entry->height == height ) {
594 entry->lru_count = texcache_ref_counter++;
602 static int texcache_alloc_texture_slot( uint32_t texture_word, int width, int height )
604 uint32_t texture_addr = (texture_word & 0x000FFFFF)<<3;
605 uint32_t texture_page = texture_addr >> 12;
606 texcache_entry_index slot = 0;
608 if( texcache_free_ptr < MAX_TEXTURES ) {
609 slot = texcache_free_list[texcache_free_ptr++];
611 slot = texcache_evict_lru();
614 /* Construct new entry */
615 assert( texcache_active_list[slot].texture_addr == -1 );
616 texcache_active_list[slot].texture_addr = texture_addr;
617 texcache_active_list[slot].width = width;
618 texcache_active_list[slot].height = height;
619 texcache_active_list[slot].mode = texture_word;
620 texcache_active_list[slot].lru_count = texcache_ref_counter++;
622 /* Add entry to the lookup table */
623 int next = texcache_page_lookup[texture_page];
626 fprintf( stderr, "Active list: " );
627 for( i=0; i<MAX_TEXTURES; i++ ) {
628 fprintf( stderr, "%d, ", texcache_active_list[i].next );
630 fprintf( stderr, "\n" );
631 assert( next != slot );
634 texcache_active_list[slot].next = next;
635 texcache_page_lookup[texture_page] = slot;
640 * Return a texture ID for the texture specified at the supplied address
641 * and given parameters (the same sequence of bytes could in theory have
642 * multiple interpretations). We use the texture address as the primary
643 * index, but allow for multiple instances at each address. The texture
644 * will be bound to the GL_TEXTURE_2D target before being returned.
646 * If the texture has already been bound, return the ID to which it was
647 * bound. Otherwise obtain an unused texture ID and set it up appropriately.
649 GLuint texcache_get_texture( uint32_t texture_word, int width, int height )
651 int slot = texcache_find_texture_slot( texture_word, width, height );
654 /* Not found - check the free list */
655 slot = texcache_alloc_texture_slot( texture_word, width, height );
657 /* Construct the GL texture */
658 uint32_t texture_addr = (texture_word & 0x000FFFFF)<<3;
659 glBindTexture( GL_TEXTURE_2D, texcache_active_list[slot].texture_id );
660 texcache_load_texture( texture_addr, width, height, texture_word );
663 return texcache_active_list[slot].texture_id;
666 render_buffer_t texcache_get_render_buffer( uint32_t texture_addr, int mode, int width, int height )
668 uint32_t texture_word = ((texture_addr >> 3) & 0x000FFFFF) | PVR2_TEX_UNTWIDDLED;
670 case COLFMT_BGRA1555: texture_word |= PVR2_TEX_FORMAT_ARGB1555; break;
671 case COLFMT_RGB565: texture_word |= PVR2_TEX_FORMAT_RGB565; break;
672 case COLFMT_BGRA4444: texture_word |= PVR2_TEX_FORMAT_ARGB4444; break;
674 WARN( "Rendering to non-texture colour format" );
676 if( is_npot_texture(width) )
677 texture_word |= PVR2_TEX_STRIDE;
680 int slot = texcache_find_texture_slot( texture_word, width, height );
682 slot = texcache_alloc_texture_slot( texture_word, width, height );
685 texcache_entry_t entry = &texcache_active_list[slot];
687 if( entry->buffer == NULL ) {
688 entry->buffer = pvr2_create_render_buffer( texture_addr, width, height, entry->texture_id );
689 } else if( entry->buffer->width != width || entry->buffer->height != height ) {
690 texcache_release_render_buffer(entry->buffer);
691 entry->buffer = pvr2_create_render_buffer( texture_addr, width, height, entry->texture_id );
694 return entry->buffer;
698 * Check the integrity of the texcache. Verifies that every cache slot
699 * appears exactly once on either the free list or one page list. For
700 * active slots, the texture address must also match the page it appears on.
703 void texcache_integrity_check()
706 int slot_found[MAX_TEXTURES];
708 memset( slot_found, 0, sizeof(slot_found) );
710 /* Check entries on the free list */
711 for( i= texcache_free_ptr; i< MAX_TEXTURES; i++ ) {
712 int slot = texcache_free_list[i];
713 assert( slot_found[slot] == 0 );
714 assert( texcache_active_list[slot].next == EMPTY_ENTRY );
715 slot_found[slot] = 1;
718 /* Check entries on the active lists */
719 for( i=0; i< PVR2_RAM_PAGES; i++ ) {
720 int slot = texcache_page_lookup[i];
721 while( slot != EMPTY_ENTRY ) {
722 assert( slot_found[slot] == 0 );
723 assert( (texcache_active_list[slot].texture_addr >> 12) == i );
724 slot_found[slot] = 2;
725 slot = texcache_active_list[slot].next;
729 /* Make sure we didn't miss any entries */
730 for( i=0; i<MAX_TEXTURES; i++ ) {
731 assert( slot_found[i] != 0 );
.