revision 1186:2dc47c67bb93
summary |
tree |
shortlog |
changelog |
graph |
changeset |
raw | bz2 | zip | gz changeset | 1186:2dc47c67bb93 |
parent | 1185:4f02196f68eb |
child | 1187:266e7a1bae90 |
author | nkeynes |
date | Tue Nov 29 17:11:40 2011 +1000 (12 years ago) |
Add support for block linking when the block target is fixed. Only a small
(~3% improvement) so far.
(~3% improvement) so far.
1.1 --- a/src/sh4/sh4trans.h Tue Nov 29 17:03:05 2011 +10001.2 +++ b/src/sh4/sh4trans.h Tue Nov 29 17:11:40 2011 +10001.3 @@ -154,6 +154,11 @@1.4 gboolean sh4_translate_flush_cache( void );1.6 /**1.7 + * Given a block's use_list, remove all direct links to the block.1.8 + */1.9 +void sh4_translate_unlink_block( void *use_list );1.10 +1.11 +/**1.12 * Support function called from the translator when a breakpoint is hit.1.13 * Either returns immediately (to skip the breakpoint), or aborts the current1.14 * cycle and never returns.
2.1 --- a/src/sh4/sh4x86.in Tue Nov 29 17:03:05 2011 +10002.2 +++ b/src/sh4/sh4x86.in Tue Nov 29 17:11:40 2011 +10002.3 @@ -489,9 +489,9 @@2.4 #define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2); sh4_x86.in_delay_slot = DELAY_NONE; return 2;2.6 /** Offset of xlat_sh4_mode field relative to the code pointer */2.7 -#define XLAT_SH4_MODE_CODE_OFFSET (uint32_t)(offsetof(struct xlat_cache_block, xlat_sh4_mode) - offsetof(struct xlat_cache_block,code) )2.8 -#define XLAT_CHAIN_CODE_OFFSET (uint32_t)(offsetof(struct xlat_cache_block, chain) - offsetof(struct xlat_cache_block,code) )2.9 -#define XLAT_ACTIVE_CODE_OFFSET (uint32_t)(offsetof(struct xlat_cache_block, active) - offsetof(struct xlat_cache_block,code) )2.10 +#define XLAT_SH4_MODE_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, xlat_sh4_mode) - offsetof(struct xlat_cache_block,code) )2.11 +#define XLAT_CHAIN_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, chain) - offsetof(struct xlat_cache_block,code) )2.12 +#define XLAT_ACTIVE_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, active) - offsetof(struct xlat_cache_block,code) )2.14 void sh4_translate_begin_block( sh4addr_t pc )2.15 {2.16 @@ -511,7 +511,7 @@2.17 CALL_ptr( sh4_x86.begin_callback );2.18 }2.19 if( sh4_x86.profile_blocks ) {2.20 - MOVP_immptr_rptr( ((uintptr_t)sh4_x86.code) + XLAT_ACTIVE_CODE_OFFSET, REG_EAX );2.21 + MOVP_immptr_rptr( sh4_x86.code + XLAT_ACTIVE_CODE_OFFSET, REG_EAX );2.22 ADDL_imms_r32disp( 1, REG_EAX, 0 );2.23 }2.24 }2.25 @@ -575,6 +575,86 @@2.26 JMP_TARGET(nocode);2.27 }2.29 +/**2.30 + *2.31 + */2.32 +static void FASTCALL sh4_translate_get_code_and_backpatch( uint32_t pc )2.33 +{2.34 + uint8_t *target = (uint8_t *)xlat_get_code_by_vma(pc);2.35 + while( target != NULL && sh4r.xlat_sh4_mode != XLAT_BLOCK_MODE(target) ) {2.36 + target = XLAT_BLOCK_CHAIN(target);2.37 + }2.38 + if( target == NULL ) {2.39 + target = sh4_translate_basic_block( pc );2.40 + }2.41 + uint8_t *backpatch = ((uint8_t *)__builtin_return_address(0)) - (CALL1_PTR_MIN_SIZE);2.42 + *backpatch = 0xE9;2.43 + *(uint32_t *)(backpatch+1) = (uint32_t)(target-backpatch)+PROLOGUE_SIZE-5;2.44 + *(void **)(backpatch+5) = XLAT_BLOCK_FOR_CODE(target)->use_list;2.45 + XLAT_BLOCK_FOR_CODE(target)->use_list = backpatch;2.46 +2.47 + uint8_t **retptr = ((uint8_t **)__builtin_frame_address(0))+1;2.48 + assert( *retptr == ((uint8_t *)__builtin_return_address(0)) );2.49 + *retptr = backpatch;2.50 +}2.51 +2.52 +static void emit_translate_and_backpatch()2.53 +{2.54 + /* NB: this is either 7 bytes (i386) or 12 bytes (x86-64) */2.55 + CALL1_ptr_r32(sh4_translate_get_code_and_backpatch, REG_ARG1);2.56 +2.57 + /* When patched, the jmp instruction will be 5 bytes (either platform) -2.58 + * we need to reserve sizeof(void*) bytes for the use-list2.59 + * pointer2.60 + */2.61 + if( sizeof(void*) == 8 ) {2.62 + NOP();2.63 + } else {2.64 + NOP2();2.65 + }2.66 +}2.67 +2.68 +/**2.69 + * If we're jumping to a fixed address (or at least fixed relative to the2.70 + * current PC, then we can do a direct branch. REG_ARG1 should contain2.71 + * the PC at this point.2.72 + */2.73 +static void jump_next_block_fixed_pc( sh4addr_t pc )2.74 +{2.75 + if( IS_IN_ICACHE(pc) ) {2.76 + if( sh4_x86.sh4_mode != SH4_MODE_UNKNOWN ) {2.77 + /* Fixed address, in cache, and fixed SH4 mode - generate a call to the2.78 + * fetch-and-backpatch routine, which will replace the call with a branch */2.79 + emit_translate_and_backpatch();2.80 + return;2.81 + } else {2.82 + MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );2.83 + ANDP_imms_rptr( -4, REG_EAX );2.84 + }2.85 + } else if( sh4_x86.tlb_on ) {2.86 + CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);2.87 + } else {2.88 + CALL1_ptr_r32(xlat_get_code, REG_ARG1);2.89 + }2.90 + jump_next_block();2.91 +2.92 +2.93 +}2.94 +2.95 +void sh4_translate_unlink_block( void *use_list )2.96 +{2.97 + uint8_t *tmp = xlat_output; /* In case something is active, which should never happen */2.98 + void *next = use_list;2.99 + while( next != NULL ) {2.100 + xlat_output = (uint8_t *)next;2.101 + next = *(void **)(xlat_output+5);2.102 + emit_translate_and_backpatch();2.103 + }2.104 + xlat_output = tmp;2.105 +}2.106 +2.107 +2.108 +2.109 static void exit_block()2.110 {2.111 emit_epilogue();2.112 @@ -645,16 +725,7 @@2.113 MOVL_r32_rbpdisp( REG_ARG1, R_PC );2.114 CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );2.115 JBE_label(exitloop);2.116 -2.117 - if( IS_IN_ICACHE(pc) ) {2.118 - MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );2.119 - ANDP_imms_rptr( -4, REG_EAX );2.120 - } else if( sh4_x86.tlb_on ) {2.121 - CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);2.122 - } else {2.123 - CALL1_ptr_r32(xlat_get_code, REG_ARG1);2.124 - }2.125 - jump_next_block();2.126 + jump_next_block_fixed_pc(pc);2.127 JMP_TARGET(exitloop);2.128 exit_block();2.129 }2.130 @@ -682,16 +753,8 @@2.131 MOVL_r32_rbpdisp( REG_ARG1, R_PC );2.132 CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );2.133 JBE_label(exitloop2);2.134 -2.135 - if( IS_IN_ICACHE(pc) ) {2.136 - MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );2.137 - ANDP_imms_rptr( -4, REG_EAX );2.138 - } else if( sh4_x86.tlb_on ) {2.139 - CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);2.140 - } else {2.141 - CALL1_ptr_r32(xlat_get_code, REG_ARG1);2.142 - }2.143 - jump_next_block();2.144 +2.145 + jump_next_block_fixed_pc(pc);2.146 JMP_TARGET(exitloop2);2.147 }2.148 exit_block();
3.1 --- a/src/xlat/x86/x86op.h Tue Nov 29 17:03:05 2011 +10003.2 +++ b/src/xlat/x86/x86op.h Tue Nov 29 17:11:40 2011 +10003.3 @@ -482,6 +482,9 @@3.4 #define NEGL_rbpdisp(r1) x86_encode_r32_rbspdisp32(0xF7, 3, disp)3.5 #define NEGQ_r64(r1) x86_encode_r64_rm64(0xF7, 3, r1)3.7 +#define NOP() OP(0x90)3.8 +#define NOP2() OP(0x66); OP(0x90)3.9 +3.10 #define NOTB_r8(r1) x86_encode_r32_rm32(0xF6, 2, r1)3.11 #define NOTL_r32(r1) x86_encode_r32_rm32(0xF7, 2, r1)3.12 #define NOTL_rbpdisp(r1) x86_encode_r32_rbspdisp32(0xF7, 2, disp)
4.1 --- a/src/xlat/xltcache.c Tue Nov 29 17:03:05 2011 +10004.2 +++ b/src/xlat/xltcache.c Tue Nov 29 17:11:40 2011 +10004.3 @@ -22,6 +22,7 @@4.5 #include "dreamcast.h"4.6 #include "sh4/sh4core.h"4.7 +#include "sh4/sh4trans.h"4.8 #include "xlat/xltcache.h"4.9 #include "x86dasm/x86dasm.h"4.11 @@ -121,6 +122,13 @@4.12 }4.13 }4.15 +void xlat_delete_block( xlat_cache_block_t block )4.16 +{4.17 + block->active = 0;4.18 + *block->lut_entry = block->chain;4.19 + sh4_translate_unlink_block( block->use_list );4.20 +}4.21 +4.22 static void xlat_flush_page_by_lut( void **page )4.23 {4.24 int i;4.25 @@ -129,7 +137,7 @@4.26 void *p = page[i];4.27 do {4.28 xlat_cache_block_t block = XLAT_BLOCK_FOR_CODE(p);4.29 - block->active = 0;4.30 + xlat_delete_block(block);4.31 p = block->chain;4.32 } while( p != NULL );4.33 }4.34 @@ -377,7 +385,8 @@4.35 #else4.36 void xlat_promote_to_temp_space( xlat_cache_block_t block )4.37 {4.38 - *block->lut_entry = 0;4.39 + *block->lut_entry = block->chain;4.40 + xlat_delete_block(block);4.41 }4.42 #endif4.44 @@ -414,6 +423,7 @@4.45 } else {4.46 xlat_new_create_ptr->chain = NULL;4.47 }4.48 + xlat_new_create_ptr->use_list = NULL;4.50 xlat_lut[XLAT_LUT_PAGE(address)][XLAT_LUT_ENTRY(address)] =4.51 &xlat_new_create_ptr->code;4.52 @@ -424,6 +434,7 @@4.54 xlat_cache_block_t xlat_extend_block( uint32_t newSize )4.55 {4.56 + assert( xlat_new_create_ptr->use_list == NULL );4.57 while( xlat_new_create_ptr->size < newSize ) {4.58 if( xlat_new_cache_ptr->size == 0 ) {4.59 /* Migrate to the front of the cache to keep it contiguous */4.60 @@ -447,6 +458,7 @@4.61 xlat_new_create_ptr->size = allocation;4.62 xlat_new_create_ptr->lut_entry = lut_entry;4.63 xlat_new_create_ptr->chain = chain;4.64 + xlat_new_create_ptr->use_list = NULL;4.65 *lut_entry = &xlat_new_create_ptr->code;4.66 memmove( xlat_new_create_ptr->code, olddata, oldsize );4.67 } else {4.68 @@ -475,12 +487,6 @@4.69 xlat_new_cache_ptr = xlat_cut_block( xlat_new_create_ptr, destsize );4.70 }4.72 -void xlat_delete_block( xlat_cache_block_t block )4.73 -{4.74 - block->active = 0;4.75 - *block->lut_entry = NULL;4.76 -}4.77 -4.78 void xlat_check_cache_integrity( xlat_cache_block_t cache, xlat_cache_block_t ptr, int size )4.79 {4.80 int foundptr = 0;4.81 @@ -517,7 +523,7 @@4.82 if( page != NULL ) {4.83 for( j=0; j<XLAT_LUT_PAGE_ENTRIES; j++ ) {4.84 void *entry = page[j];4.85 - if( ((uintptr_t)entry) > XLAT_LUT_ENTRY_USED ) {4.86 + if( ((uintptr_t)entry) > (uintptr_t)XLAT_LUT_ENTRY_USED ) {4.87 xlat_cache_block_t block = XLAT_BLOCK_FOR_CODE(entry);4.88 if( ptr >= block->code && ptr < block->code + block->size) {4.89 /* Found it */
5.1 --- a/src/xlat/xltcache.h Tue Nov 29 17:03:05 2011 +10005.2 +++ b/src/xlat/xltcache.h Tue Nov 29 17:11:40 2011 +10005.3 @@ -42,6 +42,7 @@5.4 uint32_t size;5.5 void **lut_entry; /* For deletion */5.6 void *chain;5.7 + void *use_list;5.8 uint32_t xlat_sh4_mode; /* comparison with sh4r.xlat_sh4_mode */5.9 uint32_t recover_table_offset; // Offset from code[0] of the recovery table;5.10 uint32_t recover_table_size;
.