nkeynes@103: /** nkeynes@287: * $Id: texcache.c,v 1.12 2007-01-15 10:37:02 nkeynes Exp $ nkeynes@103: * nkeynes@103: * Texture cache. Responsible for maintaining a working set of OpenGL nkeynes@103: * textures. nkeynes@103: * nkeynes@103: * nkeynes@103: * Copyright (c) 2005 Nathan Keynes. nkeynes@103: * nkeynes@103: * This program is free software; you can redistribute it and/or modify nkeynes@103: * it under the terms of the GNU General Public License as published by nkeynes@103: * the Free Software Foundation; either version 2 of the License, or nkeynes@103: * (at your option) any later version. nkeynes@103: * nkeynes@103: * This program is distributed in the hope that it will be useful, nkeynes@103: * but WITHOUT ANY WARRANTY; without even the implied warranty of nkeynes@103: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the nkeynes@103: * GNU General Public License for more details. nkeynes@103: */ nkeynes@103: nkeynes@103: #include nkeynes@103: #include "pvr2/pvr2.h" nkeynes@103: nkeynes@103: /** Specifies the maximum number of OpenGL nkeynes@103: * textures we're willing to have open at a time. If more are nkeynes@103: * needed, textures will be evicted in LRU order. nkeynes@103: */ nkeynes@103: #define MAX_TEXTURES 64 nkeynes@103: nkeynes@103: /** nkeynes@103: * Data structure: nkeynes@103: * nkeynes@103: * Main operations: nkeynes@103: * find entry by texture_addr nkeynes@103: * add new entry nkeynes@103: * move entry to tail of lru list nkeynes@103: * remove entry nkeynes@103: */ nkeynes@103: nkeynes@103: typedef signed short texcache_entry_index; nkeynes@107: #define EMPTY_ENTRY 0xFF nkeynes@103: nkeynes@107: static texcache_entry_index texcache_free_ptr = 0; nkeynes@103: static GLuint texcache_free_list[MAX_TEXTURES]; nkeynes@103: nkeynes@103: typedef struct texcache_entry { nkeynes@103: uint32_t texture_addr; nkeynes@103: int width, height, mode; nkeynes@103: GLuint texture_id; nkeynes@103: texcache_entry_index next; nkeynes@103: uint32_t lru_count; nkeynes@103: } *texcache_entry_t; nkeynes@103: nkeynes@103: static uint8_t texcache_page_lookup[PVR2_RAM_PAGES]; nkeynes@103: static uint32_t texcache_ref_counter; nkeynes@103: static struct texcache_entry texcache_active_list[MAX_TEXTURES]; nkeynes@103: nkeynes@103: /** nkeynes@108: * Initialize the texture cache. nkeynes@103: */ nkeynes@103: void texcache_init( ) nkeynes@103: { nkeynes@103: int i; nkeynes@103: for( i=0; i> 12; nkeynes@103: texcache_entry_index idx = texcache_page_lookup[texture_page]; nkeynes@103: if( idx == EMPTY_ENTRY ) nkeynes@103: return; nkeynes@107: assert( texcache_free_ptr >= 0 ); nkeynes@103: do { nkeynes@103: texcache_entry_t entry = &texcache_active_list[idx]; nkeynes@103: /* release entry */ nkeynes@103: texcache_free_ptr--; nkeynes@103: texcache_free_list[texcache_free_ptr] = idx; nkeynes@103: idx = entry->next; nkeynes@103: entry->next = EMPTY_ENTRY; nkeynes@103: } while( idx != EMPTY_ENTRY ); nkeynes@103: texcache_page_lookup[texture_page] = EMPTY_ENTRY; nkeynes@103: } nkeynes@103: nkeynes@103: /** nkeynes@103: * Evict a single texture from the cache. nkeynes@103: * @return the slot of the evicted texture. nkeynes@103: */ nkeynes@103: static texcache_entry_index texcache_evict( void ) nkeynes@103: { nkeynes@103: /* Full table scan - take over the entry with the lowest lru value */ nkeynes@103: texcache_entry_index slot = 0; nkeynes@103: int lru_value = texcache_active_list[0].lru_count; nkeynes@103: int i; nkeynes@103: for( i=1; i> 12; nkeynes@103: texcache_entry_index replace_next = texcache_active_list[slot].next; nkeynes@103: texcache_active_list[slot].next = EMPTY_ENTRY; /* Just for safety */ nkeynes@103: if( texcache_page_lookup[evict_page] == slot ) { nkeynes@103: texcache_page_lookup[evict_page] = replace_next; nkeynes@103: } else { nkeynes@103: texcache_entry_index idx = texcache_page_lookup[evict_page]; nkeynes@103: texcache_entry_index next; nkeynes@103: do { nkeynes@103: next = texcache_active_list[idx].next; nkeynes@103: if( next == slot ) { nkeynes@103: texcache_active_list[idx].next = replace_next; nkeynes@103: break; nkeynes@103: } nkeynes@103: idx = next; nkeynes@103: } while( next != EMPTY_ENTRY ); nkeynes@103: } nkeynes@103: return slot; nkeynes@103: } nkeynes@103: nkeynes@129: static void detwiddle_pal8_to_32(int x1, int y1, int size, int totsize, nkeynes@113: char **in, uint32_t *out, uint32_t *pal) { nkeynes@113: if (size == 1) { nkeynes@113: out[y1 * totsize + x1] = pal[**in]; nkeynes@113: (*in)++; nkeynes@113: } else { nkeynes@113: int ns = size>>1; nkeynes@129: detwiddle_pal8_to_32(x1, y1, ns, totsize, in, out, pal); nkeynes@129: detwiddle_pal8_to_32(x1, y1+ns, ns, totsize, in, out, pal); nkeynes@129: detwiddle_pal8_to_32(x1+ns, y1, ns, totsize, in, out, pal); nkeynes@129: detwiddle_pal8_to_32(x1+ns, y1+ns, ns, totsize, in, out, pal); nkeynes@129: } nkeynes@129: } nkeynes@129: nkeynes@129: static void detwiddle_pal8_to_16(int x1, int y1, int size, int totsize, nkeynes@129: char **in, uint16_t *out, uint16_t *pal) { nkeynes@129: if (size == 1) { nkeynes@129: out[y1 * totsize + x1] = pal[**in]; nkeynes@129: (*in)++; nkeynes@129: } else { nkeynes@129: int ns = size>>1; nkeynes@129: detwiddle_pal8_to_16(x1, y1, ns, totsize, in, out, pal); nkeynes@129: detwiddle_pal8_to_16(x1, y1+ns, ns, totsize, in, out, pal); nkeynes@129: detwiddle_pal8_to_16(x1+ns, y1, ns, totsize, in, out, pal); nkeynes@129: detwiddle_pal8_to_16(x1+ns, y1+ns, ns, totsize, in, out, pal); nkeynes@113: } nkeynes@113: } nkeynes@113: nkeynes@126: static void detwiddle_16_to_16(int x1, int y1, int size, int totsize, nkeynes@126: uint16_t **in, uint16_t *out ) { nkeynes@126: if (size == 1) { nkeynes@126: out[y1 * totsize + x1] = **in; nkeynes@126: (*in)++; nkeynes@126: } else { nkeynes@126: int ns = size>>1; nkeynes@126: detwiddle_16_to_16(x1, y1, ns, totsize, in, out); nkeynes@126: detwiddle_16_to_16(x1, y1+ns, ns, totsize, in, out); nkeynes@126: detwiddle_16_to_16(x1+ns, y1, ns, totsize, in, out); nkeynes@126: detwiddle_16_to_16(x1+ns, y1+ns, ns, totsize, in, out); nkeynes@126: } nkeynes@126: } nkeynes@126: nkeynes@224: #define VQ_CODEBOOK_SIZE 2048 /* 256 entries * 4 pixels per quad * 2 byte pixels */ nkeynes@224: nkeynes@224: struct vq_codebook { nkeynes@224: uint16_t quad[256][4]; nkeynes@224: }; nkeynes@224: nkeynes@224: static void detwiddle_vq_to_16(int x1, int y1, int size, int totsize, nkeynes@224: uint8_t **in, uint16_t *out, struct vq_codebook *codebook ) { nkeynes@224: if( size == 2 ) { nkeynes@224: uint8_t code = **in; nkeynes@224: (*in)++; nkeynes@224: out[y1 * totsize + x1] = codebook->quad[code][0]; nkeynes@224: out[y1 * totsize + x1 + 1] = codebook->quad[code][1]; nkeynes@224: out[(y1+1) * totsize + x1] = codebook->quad[code][2]; nkeynes@224: out[(y1+1) * totsize + x1 + 1] = codebook->quad[code][3]; nkeynes@224: } else { nkeynes@224: int ns = size>>1; nkeynes@224: detwiddle_vq_to_16(x1, y1, ns, totsize, in, out, codebook); nkeynes@224: detwiddle_vq_to_16(x1, y1+ns, ns, totsize, in, out, codebook); nkeynes@224: detwiddle_vq_to_16(x1+ns, y1, ns, totsize, in, out, codebook); nkeynes@224: detwiddle_vq_to_16(x1+ns, y1+ns, ns, totsize, in, out, codebook); nkeynes@224: } nkeynes@224: } nkeynes@224: nkeynes@270: static void vq_get_codebook( struct vq_codebook *codebook, nkeynes@270: uint16_t *input ) nkeynes@270: { nkeynes@270: /* Detwiddle the codebook, for the sake of my own sanity if nothing else */ nkeynes@270: uint16_t *p = (uint16_t *)input; nkeynes@270: int i; nkeynes@270: for( i=0; i<256; i++ ) { nkeynes@270: codebook->quad[i][0] = *p++; nkeynes@270: codebook->quad[i][2] = *p++; nkeynes@270: codebook->quad[i][1] = *p++; nkeynes@270: codebook->quad[i][3] = *p++; nkeynes@270: } nkeynes@270: } nkeynes@270: nkeynes@270: nkeynes@224: static void vq_decode( int width, int height, char *input, uint16_t *output, nkeynes@270: struct vq_codebook *codebook, int twiddled ) { nkeynes@224: int i,j; nkeynes@224: nkeynes@270: uint8_t *c = (uint8_t *)input; nkeynes@224: if( twiddled ) { nkeynes@270: detwiddle_vq_to_16( 0, 0, width, width, &c, output, codebook ); nkeynes@224: } else { nkeynes@224: for( j=0; jquad[code][0]; nkeynes@270: output[i + 1 + j*width] = codebook->quad[code][1]; nkeynes@270: output[i + (j+1)*width] = codebook->quad[code][2]; nkeynes@270: output[i + 1 + (j+1)*width] = codebook->quad[code][3]; nkeynes@224: } nkeynes@224: } nkeynes@224: } nkeynes@224: } nkeynes@113: nkeynes@282: static inline uint32_t yuv_to_rgb32( float y, float u, float v ) nkeynes@282: { nkeynes@282: u -= 128; nkeynes@282: v -= 128; nkeynes@282: int r = (int)(y + v*1.375); nkeynes@282: int g = (int)(y - u*0.34375 - v*0.6875); nkeynes@282: int b = (int)(y + u*1.71875); nkeynes@282: if( r > 255 ) { r = 255; } else if( r < 0 ) { r = 0; } nkeynes@282: if( g > 255 ) { g = 255; } else if( g < 0 ) { g = 0; } nkeynes@282: if( b > 255 ) { b = 255; } else if( b < 0 ) { b = 0; } nkeynes@282: return 0xFF000000 | (r<<24) | (g<<16) | (b<<16); nkeynes@282: } nkeynes@282: nkeynes@282: nkeynes@282: /** nkeynes@282: * Convert non-twiddled YUV texture data into RGB32 data - most GL implementations don't nkeynes@282: * directly support this format unfortunately. The input data is formatted as nkeynes@282: * 32 bits = 2 horizontal pixels, UYVY. This is currently done rather inefficiently nkeynes@282: * in floating point. nkeynes@282: */ nkeynes@282: static void yuv_decode( int width, int height, uint32_t *input, uint32_t *output ) nkeynes@282: { nkeynes@282: int x, y; nkeynes@282: uint32_t *p = input; nkeynes@282: for( y=0; y>8)&0xFF ); nkeynes@282: float v = (float)( (*p>>16)&0xFF ); nkeynes@282: float y1 = (float)( (*p>>24)&0xFF ); nkeynes@282: *output++ = yuv_to_rgb32( y0, u, v ); nkeynes@282: *output++ = yuv_to_rgb32( y1, u, v ); nkeynes@287: p++; nkeynes@282: } nkeynes@282: } nkeynes@282: } nkeynes@282: nkeynes@103: /** nkeynes@103: * Load texture data from the given address and parameters into the currently nkeynes@103: * bound OpenGL texture. nkeynes@103: */ nkeynes@103: static texcache_load_texture( uint32_t texture_addr, int width, int height, nkeynes@103: int mode ) { nkeynes@284: int bpp_shift = 1; /* bytes per (output) pixel as a power of 2 */ nkeynes@103: GLint intFormat, format, type; nkeynes@108: int tex_format = mode & PVR2_TEX_FORMAT_MASK; nkeynes@270: struct vq_codebook codebook; nkeynes@270: GLint filter = GL_LINEAR; nkeynes@108: nkeynes@270: /* Decode the format parameters */ nkeynes@270: switch( tex_format ) { nkeynes@270: case PVR2_TEX_FORMAT_IDX4: nkeynes@270: ERROR( "4-bit indexed textures not supported" ); nkeynes@270: case PVR2_TEX_FORMAT_IDX8: nkeynes@284: /* For indexed-colour modes, we need to lookup the palette control nkeynes@284: * word to determine the de-indexed texture format. nkeynes@284: */ nkeynes@191: switch( MMIO_READ( PVR2, RENDER_PALETTE ) & 0x03 ) { nkeynes@108: case 0: /* ARGB1555 */ nkeynes@108: intFormat = GL_RGB5_A1; nkeynes@108: format = GL_RGBA; nkeynes@129: type = GL_UNSIGNED_SHORT_1_5_5_5_REV; nkeynes@108: break; nkeynes@284: case 1: /* RGB565 */ nkeynes@108: intFormat = GL_RGB; nkeynes@108: format = GL_RGB; nkeynes@129: type = GL_UNSIGNED_SHORT_5_6_5_REV; nkeynes@108: break; nkeynes@284: case 2: /* ARGB4444 */ nkeynes@108: intFormat = GL_RGBA4; nkeynes@129: format = GL_BGRA; nkeynes@129: type = GL_UNSIGNED_SHORT_4_4_4_4_REV; nkeynes@108: break; nkeynes@284: case 3: /* ARGB8888 */ nkeynes@108: intFormat = GL_RGBA8; nkeynes@113: format = GL_BGRA; nkeynes@113: type = GL_UNSIGNED_INT_8_8_8_8_REV; nkeynes@284: bpp_shift = 2; nkeynes@108: break; nkeynes@108: } nkeynes@270: break; nkeynes@270: nkeynes@270: case PVR2_TEX_FORMAT_ARGB1555: nkeynes@270: intFormat = GL_RGB5_A1; nkeynes@270: format = GL_RGBA; nkeynes@270: type = GL_UNSIGNED_SHORT_1_5_5_5_REV; nkeynes@270: break; nkeynes@270: case PVR2_TEX_FORMAT_RGB565: nkeynes@270: intFormat = GL_RGB; nkeynes@270: format = GL_RGB; nkeynes@270: type = GL_UNSIGNED_SHORT_5_6_5_REV; nkeynes@270: break; nkeynes@270: case PVR2_TEX_FORMAT_ARGB4444: nkeynes@270: intFormat = GL_RGBA4; nkeynes@270: format = GL_BGRA; nkeynes@270: type = GL_UNSIGNED_SHORT_4_4_4_4_REV; nkeynes@270: break; nkeynes@270: case PVR2_TEX_FORMAT_YUV422: nkeynes@284: /* YUV422 isn't directly supported by most implementations, so decode nkeynes@284: * it to a (reasonably) standard ARGB32. nkeynes@284: */ nkeynes@284: bpp_shift = 2; nkeynes@282: intFormat = GL_RGBA8; nkeynes@282: format = GL_BGRA; nkeynes@282: type = GL_UNSIGNED_INT_8_8_8_8_REV; nkeynes@270: break; nkeynes@270: case PVR2_TEX_FORMAT_BUMPMAP: nkeynes@270: ERROR( "Bumpmap not supported" ); nkeynes@270: break; nkeynes@270: } nkeynes@270: nkeynes@284: if( PVR2_TEX_IS_STRIDE(mode) ) { nkeynes@284: /* Stride textures cannot be mip-mapped, compressed, indexed or twiddled */ nkeynes@284: uint32_t stride = (MMIO_READ( PVR2, RENDER_TEXSIZE ) & 0x003F) << 5; nkeynes@284: char data[(width*height) << bpp_shift]; nkeynes@284: if( tex_format == PVR2_TEX_FORMAT_YUV422 ) { nkeynes@284: char tmp[(width*height)<<1]; nkeynes@284: pvr2_vram64_read_stride( &tmp, width<<1, texture_addr, stride<<1, height ); nkeynes@284: yuv_decode(width, height, &tmp, &data ); nkeynes@284: } else { nkeynes@284: pvr2_vram64_read_stride( &data, width<> i; nkeynes@270: mip_height= height >> i; nkeynes@270: filter = GL_LINEAR_MIPMAP_LINEAR; nkeynes@270: } nkeynes@284: mip_bytes = (mip_width * mip_width) << bpp_shift; nkeynes@108: nkeynes@270: if( PVR2_TEX_IS_COMPRESSED(mode) ) { nkeynes@270: uint16_t tmp[VQ_CODEBOOK_SIZE]; nkeynes@270: pvr2_vram64_read( (char *)tmp, texture_addr, VQ_CODEBOOK_SIZE ); nkeynes@270: texture_addr += VQ_CODEBOOK_SIZE; nkeynes@270: vq_get_codebook( &codebook, tmp ); nkeynes@270: } nkeynes@270: nkeynes@270: for( level=last_level; level>= 0; level-- ) { nkeynes@270: char data[mip_bytes]; nkeynes@270: /* load data from image, detwiddling/uncompressing as required */ nkeynes@108: if( tex_format == PVR2_TEX_FORMAT_IDX8 ) { nkeynes@284: int inputlength = mip_bytes >> bpp_shift; nkeynes@108: int bank = (mode >> 25) &0x03; nkeynes@284: char *palette = mmio_region_PVR2PAL.mem + (bank * (256 << bpp_shift)); nkeynes@270: char tmp[inputlength]; nkeynes@270: char *p = tmp; nkeynes@270: pvr2_vram64_read( tmp, texture_addr, inputlength ); nkeynes@284: if( bpp_shift == 2 ) { nkeynes@270: detwiddle_pal8_to_32( 0, 0, mip_width, mip_width, &p, nkeynes@113: (uint32_t *)data, (uint32_t *)palette ); nkeynes@113: } else { nkeynes@270: detwiddle_pal8_to_16( 0, 0, mip_width, mip_width, &p, nkeynes@129: (uint16_t *)data, (uint16_t *)palette ); nkeynes@108: } nkeynes@282: } else if( tex_format == PVR2_TEX_FORMAT_YUV422 ) { nkeynes@282: int inputlength = ((mip_width*mip_height)<<1); nkeynes@282: char tmp[inputlength]; nkeynes@282: pvr2_vram64_read( tmp, texture_addr, inputlength ); nkeynes@282: yuv_decode( mip_width, mip_height, tmp, (uint32_t *)&data ); nkeynes@270: } else if( PVR2_TEX_IS_COMPRESSED(mode) ) { nkeynes@270: int inputlength = ((mip_width*mip_height) >> 2); nkeynes@270: char tmp[inputlength]; nkeynes@270: pvr2_vram64_read( tmp, texture_addr, inputlength ); nkeynes@270: vq_decode( mip_width, mip_height, tmp, (uint16_t *)&data, &codebook, nkeynes@270: PVR2_TEX_IS_TWIDDLED(mode) ); nkeynes@270: } else if( PVR2_TEX_IS_TWIDDLED(mode) ) { nkeynes@270: char tmp[mip_bytes]; nkeynes@270: uint16_t *p = (uint16_t *)tmp; nkeynes@270: pvr2_vram64_read( tmp, texture_addr, mip_bytes ); nkeynes@270: /* Untwiddle */ nkeynes@270: detwiddle_16_to_16( 0, 0, mip_width, mip_width, &p, (uint16_t *)&data ); nkeynes@270: } else { nkeynes@270: pvr2_vram64_read( data, texture_addr, mip_bytes ); nkeynes@108: } nkeynes@270: nkeynes@270: if( PVR2_TEX_IS_MIPMAPPED(mode) && mip_width == 2 ) { nkeynes@270: /* Opengl requires a 1x1 texture, but the PVR2 doesn't. This should nkeynes@270: * strictly speaking be the average of the 2x2 texture, but we're nkeynes@270: * lazy at the moment */ nkeynes@270: glTexImage2D( GL_TEXTURE_2D, level+1, intFormat, 1, 1, 0, format, type, data ); nkeynes@108: } nkeynes@108: nkeynes@108: /* Pass to GL */ nkeynes@270: glTexImage2D( GL_TEXTURE_2D, level, intFormat, mip_width, mip_height, 0, format, type, nkeynes@108: data ); nkeynes@270: texture_addr += mip_bytes; nkeynes@270: mip_width <<= 1; nkeynes@270: mip_height <<= 1; nkeynes@270: mip_bytes <<= 2; nkeynes@103: } nkeynes@270: nkeynes@270: glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, filter); nkeynes@108: glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); nkeynes@103: } nkeynes@103: nkeynes@103: /** nkeynes@103: * Return a texture ID for the texture specified at the supplied address nkeynes@103: * and given parameters (the same sequence of bytes could in theory have nkeynes@103: * multiple interpretations). We use the texture address as the primary nkeynes@103: * index, but allow for multiple instances at each address. The texture nkeynes@103: * will be bound to the GL_TEXTURE_2D target before being returned. nkeynes@103: * nkeynes@103: * If the texture has already been bound, return the ID to which it was nkeynes@103: * bound. Otherwise obtain an unused texture ID and set it up appropriately. nkeynes@103: */ nkeynes@103: GLuint texcache_get_texture( uint32_t texture_addr, int width, int height, nkeynes@103: int mode ) nkeynes@103: { nkeynes@103: uint32_t texture_page = texture_addr >> 12; nkeynes@103: texcache_entry_index idx = texcache_page_lookup[texture_page]; nkeynes@103: while( idx != EMPTY_ENTRY ) { nkeynes@103: texcache_entry_t entry = &texcache_active_list[idx]; nkeynes@103: if( entry->texture_addr == texture_addr && nkeynes@103: entry->mode == mode && nkeynes@103: entry->width == width && nkeynes@103: entry->height == height ) { nkeynes@103: entry->lru_count = texcache_ref_counter++; nkeynes@103: glBindTexture( GL_TEXTURE_2D, entry->texture_id ); nkeynes@103: return entry->texture_id; nkeynes@103: } nkeynes@103: idx = entry->next; nkeynes@103: } nkeynes@103: nkeynes@103: /* Not found - check the free list */ nkeynes@103: int slot = 0; nkeynes@103: nkeynes@103: if( texcache_free_ptr < MAX_TEXTURES ) { nkeynes@103: slot = texcache_free_list[texcache_free_ptr++]; nkeynes@103: } else { nkeynes@103: slot = texcache_evict(); nkeynes@103: } nkeynes@103: nkeynes@103: /* Construct new entry */ nkeynes@103: texcache_active_list[slot].texture_addr = texture_addr; nkeynes@103: texcache_active_list[slot].width = width; nkeynes@103: texcache_active_list[slot].height = height; nkeynes@103: texcache_active_list[slot].mode = mode; nkeynes@103: texcache_active_list[slot].lru_count = texcache_ref_counter++; nkeynes@103: nkeynes@103: /* Add entry to the lookup table */ nkeynes@103: texcache_active_list[slot].next = texcache_page_lookup[texture_page]; nkeynes@103: texcache_page_lookup[texture_page] = slot; nkeynes@103: nkeynes@103: /* Construct the GL texture */ nkeynes@108: glBindTexture( GL_TEXTURE_2D, texcache_active_list[slot].texture_id ); nkeynes@103: texcache_load_texture( texture_addr, width, height, mode ); nkeynes@103: nkeynes@103: return texcache_active_list[slot].texture_id; nkeynes@103: }