revision 1182:b38a327ad8fa
summary |
tree |
shortlog |
changelog |
graph |
changeset |
raw | bz2 | zip | gz changeset | 1182:b38a327ad8fa |
parent | 1181:3dc028106b08 |
child | 1183:425d9de21c78 |
author | nkeynes |
date | Sun Nov 27 18:20:21 2011 +1000 (12 years ago) |
Add block profiling option to count the number of executions of each block,
and dump them out from most-to-least used.
and dump them out from most-to-least used.
1.1 --- a/src/sh4/sh4.c Fri Nov 25 09:02:13 2011 +10001.2 +++ b/src/sh4/sh4.c Sun Nov 27 18:20:21 2011 +10001.3 @@ -235,6 +235,9 @@1.4 /* If we were running with the translator, update new_pc and in_delay_slot */1.5 sh4r.new_pc = sh4r.pc+2;1.6 sh4r.in_delay_slot = FALSE;1.7 + if( sh4_translate_get_profile_blocks() ) {1.8 + xlat_dump_cache_by_activity(30);1.9 + }1.10 }1.12 }
2.1 --- a/src/sh4/sh4trans.h Fri Nov 25 09:02:13 2011 +10002.2 +++ b/src/sh4/sh4trans.h Sun Nov 27 18:20:21 2011 +10002.3 @@ -103,6 +103,16 @@2.4 void sh4_translate_set_fastmem( gboolean flag );2.6 /**2.7 + * Enable/disable basic block profiling2.8 + */2.9 +void sh4_translate_set_profile_blocks( gboolean flag );2.10 +2.11 +/**2.12 + * Get the boolean flag indicating whether block profiling is on.2.13 + */2.14 +gboolean sh4_translate_get_profile_blocks();2.15 +2.16 +/**2.17 * Set the address spaces for the translated code.2.18 */2.19 void sh4_translate_set_address_space( struct mem_region_fn **priv, struct mem_region_fn **user );
3.1 --- a/src/sh4/sh4x86.in Fri Nov 25 09:02:13 2011 +10003.2 +++ b/src/sh4/sh4x86.in Sun Nov 27 18:20:21 2011 +10003.3 @@ -106,6 +106,7 @@3.4 xlat_block_begin_callback_t begin_callback;3.5 xlat_block_end_callback_t end_callback;3.6 gboolean fastmem;3.7 + gboolean profile_blocks;3.9 /* Allocated memory for the (block-wide) back-patch list */3.10 struct backpatch_record *backpatch_list;3.11 @@ -166,6 +167,7 @@3.12 sh4_x86.end_callback = NULL;3.13 sh4_translate_set_address_space( sh4_address_space, sh4_user_address_space );3.14 sh4_x86.fastmem = TRUE;3.15 + sh4_x86.profile_blocks = FALSE;3.16 sh4_x86.sse3_enabled = is_sse3_supported();3.17 x86_disasm_init();3.18 x86_set_symtab( x86_symbol_table, sizeof(x86_symbol_table)/sizeof(struct x86_symbol) );3.19 @@ -182,6 +184,16 @@3.20 sh4_x86.fastmem = flag;3.21 }3.23 +void sh4_translate_set_profile_blocks( gboolean flag )3.24 +{3.25 + sh4_x86.profile_blocks = flag;3.26 +}3.27 +3.28 +gboolean sh4_translate_get_profile_blocks()3.29 +{3.30 + return sh4_x86.profile_blocks;3.31 +}3.32 +3.33 /**3.34 * Disassemble the given translated code block, and it's source SH4 code block3.35 * side-by-side. The current native pc will be marked if non-null.3.36 @@ -476,6 +488,11 @@3.38 #define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2); sh4_x86.in_delay_slot = DELAY_NONE; return 2;3.40 +/** Offset of xlat_sh4_mode field relative to the code pointer */3.41 +#define XLAT_SH4_MODE_CODE_OFFSET (uint32_t)(offsetof(struct xlat_cache_block, xlat_sh4_mode) - offsetof(struct xlat_cache_block,code) )3.42 +#define XLAT_CHAIN_CODE_OFFSET (uint32_t)(offsetof(struct xlat_cache_block, chain) - offsetof(struct xlat_cache_block,code) )3.43 +#define XLAT_ACTIVE_CODE_OFFSET (uint32_t)(offsetof(struct xlat_cache_block, active) - offsetof(struct xlat_cache_block,code) )3.44 +3.45 void sh4_translate_begin_block( sh4addr_t pc )3.46 {3.47 sh4_x86.code = xlat_output;3.48 @@ -493,6 +510,10 @@3.49 if( sh4_x86.begin_callback ) {3.50 CALL_ptr( sh4_x86.begin_callback );3.51 }3.52 + if( sh4_x86.profile_blocks ) {3.53 + MOVP_immptr_rptr( ((uintptr_t)sh4_x86.code) + XLAT_ACTIVE_CODE_OFFSET, REG_EAX );3.54 + ADDL_imms_r32disp( 1, REG_EAX, 0 );3.55 + }3.56 }3.59 @@ -519,10 +540,6 @@3.61 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)3.63 -/** Offset of xlat_sh4_mode field relative to the code pointer */3.64 -#define XLAT_SH4_MODE_CODE_OFFSET (uint32_t)(offsetof(struct xlat_cache_block, xlat_sh4_mode) - offsetof(struct xlat_cache_block,code) )3.65 -#define XLAT_CHAIN_CODE_OFFSET (uint32_t)(offsetof(struct xlat_cache_block, chain) - offsetof(struct xlat_cache_block,code) )3.66 -3.67 /**3.68 * Test if the loaded target code pointer in %eax is valid, and if so jump3.69 * directly into it, bypassing the normal exit.
4.1 --- a/src/xlat/xltcache.c Fri Nov 25 09:02:13 2011 +10004.2 +++ b/src/xlat/xltcache.c Sun Nov 27 18:20:21 2011 +10004.3 @@ -37,6 +37,8 @@4.4 #define XLAT_LUT_ENTRY_EMPTY (void *)04.5 #define XLAT_LUT_ENTRY_USED (void *)14.7 +#define XLAT_ADDR_FROM_ENTRY(pagenum,entrynum) ((((pagenum)&0xFFFF)<<13)|(((entrynum)<<1)&0x1FFE))4.8 +4.9 #define NEXT(block) ( (xlat_cache_block_t)&((block)->code[(block)->size]))4.10 #define IS_ENTRY_POINT(ent) (ent > XLAT_LUT_ENTRY_USED)4.11 #define IS_ENTRY_USED(ent) (ent != XLAT_LUT_ENTRY_EMPTY)4.12 @@ -575,3 +577,92 @@4.13 #endif4.14 }4.16 +typedef struct {4.17 + xlat_cache_block_t block;4.18 + sh4addr_t sh4_pc;4.19 +} block_sh4_entry;4.20 +4.21 +unsigned int xlat_get_active_block_count()4.22 +{4.23 + unsigned int count = 0;4.24 + xlat_cache_block_t ptr = xlat_new_cache;4.25 + while( ptr->size != 0 ) {4.26 + if( ptr->active != 0 ) {4.27 + count++;4.28 + }4.29 + ptr = NEXT(ptr);4.30 + }4.31 + return count;4.32 +}4.33 +4.34 +unsigned int xlat_get_active_blocks( block_sh4_entry *blocks, unsigned int size )4.35 +{4.36 + unsigned int count = 0;4.37 + xlat_cache_block_t ptr = xlat_new_cache;4.38 + while( ptr->size != 0 ) {4.39 + if( ptr->active != 0 ) {4.40 + blocks[count].block = ptr;4.41 + blocks[count].sh4_pc = 0;4.42 + count++;4.43 + }4.44 + if( count >= size )4.45 + break;4.46 + ptr = NEXT(ptr);4.47 + }4.48 + return count;4.49 +}4.50 +4.51 +void xlat_get_block_sh4addrs( block_sh4_entry *blocks, unsigned int size )4.52 +{4.53 + unsigned i;4.54 + for( i=0; i<XLAT_LUT_PAGES;i ++ ) {4.55 + void **page = xlat_lut[i];4.56 + if( page != NULL ) {4.57 + for( unsigned j=0; j < XLAT_LUT_PAGE_ENTRIES; j++ ) {4.58 + void *code = (void *)(((uintptr_t)(page[j])) & (~((uintptr_t)0x03)));4.59 + if( code != NULL ) {4.60 + xlat_cache_block_t ptr = XLAT_BLOCK_FOR_CODE(code);4.61 + sh4addr_t pc = XLAT_ADDR_FROM_ENTRY(i,j);4.62 + for( unsigned k=0; k<size; k++ ) {4.63 + if( blocks[k].block == ptr ) {4.64 + blocks[k].sh4_pc = pc;4.65 + ptr = ptr->chain;4.66 + if( ptr == NULL )4.67 + break;4.68 + else {4.69 + ptr = XLAT_BLOCK_FOR_CODE(ptr);4.70 + k = 0;4.71 + }4.72 + }4.73 + }4.74 + }4.75 + }4.76 + }4.77 + }4.78 +}4.79 +4.80 +static int xlat_compare_active_field( const void *a, const void *b )4.81 +{4.82 + const block_sh4_entry *ptra = (const block_sh4_entry *)a;4.83 + const block_sh4_entry *ptrb = (const block_sh4_entry *)b;4.84 + return ptrb->block->active - ptra->block->active;4.85 +}4.86 +4.87 +void xlat_dump_cache_by_activity( unsigned int topN )4.88 +{4.89 + int i=0;4.90 + int count = xlat_get_active_block_count();4.91 +4.92 + block_sh4_entry blocks[count];4.93 + xlat_get_active_blocks(blocks, count);4.94 + xlat_get_block_sh4addrs(blocks,count);4.95 + qsort(blocks, count, sizeof(block_sh4_entry), xlat_compare_active_field);4.96 +4.97 + if( topN == 0 || topN > count )4.98 + topN = count;4.99 + for( unsigned int i=0; i<topN; i++ ) {4.100 + fprintf(stderr, "0x%08X (%p): %d\n", blocks[i].sh4_pc, blocks[i].block->code, blocks[i].block->active);4.101 + sh4_translate_disasm_block( stderr, blocks[i].block->code, blocks[i].sh4_pc, NULL );4.102 + fprintf(stderr, "\n");4.103 + }4.104 +}
5.1 --- a/src/xlat/xltcache.h Fri Nov 25 09:02:13 2011 +10005.2 +++ b/src/xlat/xltcache.h Sun Nov 27 18:20:21 2011 +10005.3 @@ -181,4 +181,11 @@5.4 */5.5 void xlat_check_integrity();5.7 +/**5.8 + * Dump out the top N translated blocks by number of executions (requires block5.9 + * profiling to be turned on in order to give meaningful results).5.10 + * @param topN Number of blocks to print. If 0, print all blocks in the cache5.11 + */5.12 +void xlat_dump_cache_by_activity( unsigned int topN );5.13 +5.14 #endif /* lxdream_xltcache_H */
.