Search
lxdream.org :: lxdream :: r1182:b38a327ad8fa
lxdream 0.9.1
released Jun 29
Download Now
changeset1182:b38a327ad8fa
parent1181:3dc028106b08
child1183:425d9de21c78
authornkeynes
dateSun Nov 27 18:20:21 2011 +1000 (7 years ago)
Add block profiling option to count the number of executions of each block,
and dump them out from most-to-least used.
src/sh4/sh4.c
src/sh4/sh4trans.h
src/sh4/sh4x86.in
src/xlat/xltcache.c
src/xlat/xltcache.h
1.1 --- a/src/sh4/sh4.c Fri Nov 25 09:02:13 2011 +1000
1.2 +++ b/src/sh4/sh4.c Sun Nov 27 18:20:21 2011 +1000
1.3 @@ -235,6 +235,9 @@
1.4 /* If we were running with the translator, update new_pc and in_delay_slot */
1.5 sh4r.new_pc = sh4r.pc+2;
1.6 sh4r.in_delay_slot = FALSE;
1.7 + if( sh4_translate_get_profile_blocks() ) {
1.8 + xlat_dump_cache_by_activity(30);
1.9 + }
1.10 }
1.11
1.12 }
2.1 --- a/src/sh4/sh4trans.h Fri Nov 25 09:02:13 2011 +1000
2.2 +++ b/src/sh4/sh4trans.h Sun Nov 27 18:20:21 2011 +1000
2.3 @@ -103,6 +103,16 @@
2.4 void sh4_translate_set_fastmem( gboolean flag );
2.5
2.6 /**
2.7 + * Enable/disable basic block profiling
2.8 + */
2.9 +void sh4_translate_set_profile_blocks( gboolean flag );
2.10 +
2.11 +/**
2.12 + * Get the boolean flag indicating whether block profiling is on.
2.13 + */
2.14 +gboolean sh4_translate_get_profile_blocks();
2.15 +
2.16 +/**
2.17 * Set the address spaces for the translated code.
2.18 */
2.19 void sh4_translate_set_address_space( struct mem_region_fn **priv, struct mem_region_fn **user );
3.1 --- a/src/sh4/sh4x86.in Fri Nov 25 09:02:13 2011 +1000
3.2 +++ b/src/sh4/sh4x86.in Sun Nov 27 18:20:21 2011 +1000
3.3 @@ -106,6 +106,7 @@
3.4 xlat_block_begin_callback_t begin_callback;
3.5 xlat_block_end_callback_t end_callback;
3.6 gboolean fastmem;
3.7 + gboolean profile_blocks;
3.8
3.9 /* Allocated memory for the (block-wide) back-patch list */
3.10 struct backpatch_record *backpatch_list;
3.11 @@ -166,6 +167,7 @@
3.12 sh4_x86.end_callback = NULL;
3.13 sh4_translate_set_address_space( sh4_address_space, sh4_user_address_space );
3.14 sh4_x86.fastmem = TRUE;
3.15 + sh4_x86.profile_blocks = FALSE;
3.16 sh4_x86.sse3_enabled = is_sse3_supported();
3.17 x86_disasm_init();
3.18 x86_set_symtab( x86_symbol_table, sizeof(x86_symbol_table)/sizeof(struct x86_symbol) );
3.19 @@ -182,6 +184,16 @@
3.20 sh4_x86.fastmem = flag;
3.21 }
3.22
3.23 +void sh4_translate_set_profile_blocks( gboolean flag )
3.24 +{
3.25 + sh4_x86.profile_blocks = flag;
3.26 +}
3.27 +
3.28 +gboolean sh4_translate_get_profile_blocks()
3.29 +{
3.30 + return sh4_x86.profile_blocks;
3.31 +}
3.32 +
3.33 /**
3.34 * Disassemble the given translated code block, and it's source SH4 code block
3.35 * side-by-side. The current native pc will be marked if non-null.
3.36 @@ -476,6 +488,11 @@
3.37
3.38 #define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2); sh4_x86.in_delay_slot = DELAY_NONE; return 2;
3.39
3.40 +/** Offset of xlat_sh4_mode field relative to the code pointer */
3.41 +#define XLAT_SH4_MODE_CODE_OFFSET (uint32_t)(offsetof(struct xlat_cache_block, xlat_sh4_mode) - offsetof(struct xlat_cache_block,code) )
3.42 +#define XLAT_CHAIN_CODE_OFFSET (uint32_t)(offsetof(struct xlat_cache_block, chain) - offsetof(struct xlat_cache_block,code) )
3.43 +#define XLAT_ACTIVE_CODE_OFFSET (uint32_t)(offsetof(struct xlat_cache_block, active) - offsetof(struct xlat_cache_block,code) )
3.44 +
3.45 void sh4_translate_begin_block( sh4addr_t pc )
3.46 {
3.47 sh4_x86.code = xlat_output;
3.48 @@ -493,6 +510,10 @@
3.49 if( sh4_x86.begin_callback ) {
3.50 CALL_ptr( sh4_x86.begin_callback );
3.51 }
3.52 + if( sh4_x86.profile_blocks ) {
3.53 + MOVP_immptr_rptr( ((uintptr_t)sh4_x86.code) + XLAT_ACTIVE_CODE_OFFSET, REG_EAX );
3.54 + ADDL_imms_r32disp( 1, REG_EAX, 0 );
3.55 + }
3.56 }
3.57
3.58
3.59 @@ -519,10 +540,6 @@
3.60
3.61 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
3.62
3.63 -/** Offset of xlat_sh4_mode field relative to the code pointer */
3.64 -#define XLAT_SH4_MODE_CODE_OFFSET (uint32_t)(offsetof(struct xlat_cache_block, xlat_sh4_mode) - offsetof(struct xlat_cache_block,code) )
3.65 -#define XLAT_CHAIN_CODE_OFFSET (uint32_t)(offsetof(struct xlat_cache_block, chain) - offsetof(struct xlat_cache_block,code) )
3.66 -
3.67 /**
3.68 * Test if the loaded target code pointer in %eax is valid, and if so jump
3.69 * directly into it, bypassing the normal exit.
4.1 --- a/src/xlat/xltcache.c Fri Nov 25 09:02:13 2011 +1000
4.2 +++ b/src/xlat/xltcache.c Sun Nov 27 18:20:21 2011 +1000
4.3 @@ -37,6 +37,8 @@
4.4 #define XLAT_LUT_ENTRY_EMPTY (void *)0
4.5 #define XLAT_LUT_ENTRY_USED (void *)1
4.6
4.7 +#define XLAT_ADDR_FROM_ENTRY(pagenum,entrynum) ((((pagenum)&0xFFFF)<<13)|(((entrynum)<<1)&0x1FFE))
4.8 +
4.9 #define NEXT(block) ( (xlat_cache_block_t)&((block)->code[(block)->size]))
4.10 #define IS_ENTRY_POINT(ent) (ent > XLAT_LUT_ENTRY_USED)
4.11 #define IS_ENTRY_USED(ent) (ent != XLAT_LUT_ENTRY_EMPTY)
4.12 @@ -575,3 +577,92 @@
4.13 #endif
4.14 }
4.15
4.16 +typedef struct {
4.17 + xlat_cache_block_t block;
4.18 + sh4addr_t sh4_pc;
4.19 +} block_sh4_entry;
4.20 +
4.21 +unsigned int xlat_get_active_block_count()
4.22 +{
4.23 + unsigned int count = 0;
4.24 + xlat_cache_block_t ptr = xlat_new_cache;
4.25 + while( ptr->size != 0 ) {
4.26 + if( ptr->active != 0 ) {
4.27 + count++;
4.28 + }
4.29 + ptr = NEXT(ptr);
4.30 + }
4.31 + return count;
4.32 +}
4.33 +
4.34 +unsigned int xlat_get_active_blocks( block_sh4_entry *blocks, unsigned int size )
4.35 +{
4.36 + unsigned int count = 0;
4.37 + xlat_cache_block_t ptr = xlat_new_cache;
4.38 + while( ptr->size != 0 ) {
4.39 + if( ptr->active != 0 ) {
4.40 + blocks[count].block = ptr;
4.41 + blocks[count].sh4_pc = 0;
4.42 + count++;
4.43 + }
4.44 + if( count >= size )
4.45 + break;
4.46 + ptr = NEXT(ptr);
4.47 + }
4.48 + return count;
4.49 +}
4.50 +
4.51 +void xlat_get_block_sh4addrs( block_sh4_entry *blocks, unsigned int size )
4.52 +{
4.53 + unsigned i;
4.54 + for( i=0; i<XLAT_LUT_PAGES;i ++ ) {
4.55 + void **page = xlat_lut[i];
4.56 + if( page != NULL ) {
4.57 + for( unsigned j=0; j < XLAT_LUT_PAGE_ENTRIES; j++ ) {
4.58 + void *code = (void *)(((uintptr_t)(page[j])) & (~((uintptr_t)0x03)));
4.59 + if( code != NULL ) {
4.60 + xlat_cache_block_t ptr = XLAT_BLOCK_FOR_CODE(code);
4.61 + sh4addr_t pc = XLAT_ADDR_FROM_ENTRY(i,j);
4.62 + for( unsigned k=0; k<size; k++ ) {
4.63 + if( blocks[k].block == ptr ) {
4.64 + blocks[k].sh4_pc = pc;
4.65 + ptr = ptr->chain;
4.66 + if( ptr == NULL )
4.67 + break;
4.68 + else {
4.69 + ptr = XLAT_BLOCK_FOR_CODE(ptr);
4.70 + k = 0;
4.71 + }
4.72 + }
4.73 + }
4.74 + }
4.75 + }
4.76 + }
4.77 + }
4.78 +}
4.79 +
4.80 +static int xlat_compare_active_field( const void *a, const void *b )
4.81 +{
4.82 + const block_sh4_entry *ptra = (const block_sh4_entry *)a;
4.83 + const block_sh4_entry *ptrb = (const block_sh4_entry *)b;
4.84 + return ptrb->block->active - ptra->block->active;
4.85 +}
4.86 +
4.87 +void xlat_dump_cache_by_activity( unsigned int topN )
4.88 +{
4.89 + int i=0;
4.90 + int count = xlat_get_active_block_count();
4.91 +
4.92 + block_sh4_entry blocks[count];
4.93 + xlat_get_active_blocks(blocks, count);
4.94 + xlat_get_block_sh4addrs(blocks,count);
4.95 + qsort(blocks, count, sizeof(block_sh4_entry), xlat_compare_active_field);
4.96 +
4.97 + if( topN == 0 || topN > count )
4.98 + topN = count;
4.99 + for( unsigned int i=0; i<topN; i++ ) {
4.100 + fprintf(stderr, "0x%08X (%p): %d\n", blocks[i].sh4_pc, blocks[i].block->code, blocks[i].block->active);
4.101 + sh4_translate_disasm_block( stderr, blocks[i].block->code, blocks[i].sh4_pc, NULL );
4.102 + fprintf(stderr, "\n");
4.103 + }
4.104 +}
5.1 --- a/src/xlat/xltcache.h Fri Nov 25 09:02:13 2011 +1000
5.2 +++ b/src/xlat/xltcache.h Sun Nov 27 18:20:21 2011 +1000
5.3 @@ -181,4 +181,11 @@
5.4 */
5.5 void xlat_check_integrity();
5.6
5.7 +/**
5.8 + * Dump out the top N translated blocks by number of executions (requires block
5.9 + * profiling to be turned on in order to give meaningful results).
5.10 + * @param topN Number of blocks to print. If 0, print all blocks in the cache
5.11 + */
5.12 +void xlat_dump_cache_by_activity( unsigned int topN );
5.13 +
5.14 #endif /* lxdream_xltcache_H */
.