filename | src/sh4/sh4x86.in |
changeset | 1186:2dc47c67bb93 |
prev | 1182:b38a327ad8fa |
next | 1191:12fdf3aafcd4 |
author | nkeynes |
date | Tue Nov 29 17:11:40 2011 +1000 (12 years ago) |
permissions | -rw-r--r-- |
last change | Add support for block linking when the block target is fixed. Only a small (~3% improvement) so far. |
file | annotate | diff | log | raw |
1.1 --- a/src/sh4/sh4x86.in Sun Nov 27 18:20:21 2011 +10001.2 +++ b/src/sh4/sh4x86.in Tue Nov 29 17:11:40 2011 +10001.3 @@ -489,9 +489,9 @@1.4 #define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2); sh4_x86.in_delay_slot = DELAY_NONE; return 2;1.6 /** Offset of xlat_sh4_mode field relative to the code pointer */1.7 -#define XLAT_SH4_MODE_CODE_OFFSET (uint32_t)(offsetof(struct xlat_cache_block, xlat_sh4_mode) - offsetof(struct xlat_cache_block,code) )1.8 -#define XLAT_CHAIN_CODE_OFFSET (uint32_t)(offsetof(struct xlat_cache_block, chain) - offsetof(struct xlat_cache_block,code) )1.9 -#define XLAT_ACTIVE_CODE_OFFSET (uint32_t)(offsetof(struct xlat_cache_block, active) - offsetof(struct xlat_cache_block,code) )1.10 +#define XLAT_SH4_MODE_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, xlat_sh4_mode) - offsetof(struct xlat_cache_block,code) )1.11 +#define XLAT_CHAIN_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, chain) - offsetof(struct xlat_cache_block,code) )1.12 +#define XLAT_ACTIVE_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, active) - offsetof(struct xlat_cache_block,code) )1.14 void sh4_translate_begin_block( sh4addr_t pc )1.15 {1.16 @@ -511,7 +511,7 @@1.17 CALL_ptr( sh4_x86.begin_callback );1.18 }1.19 if( sh4_x86.profile_blocks ) {1.20 - MOVP_immptr_rptr( ((uintptr_t)sh4_x86.code) + XLAT_ACTIVE_CODE_OFFSET, REG_EAX );1.21 + MOVP_immptr_rptr( sh4_x86.code + XLAT_ACTIVE_CODE_OFFSET, REG_EAX );1.22 ADDL_imms_r32disp( 1, REG_EAX, 0 );1.23 }1.24 }1.25 @@ -575,6 +575,86 @@1.26 JMP_TARGET(nocode);1.27 }1.29 +/**1.30 + *1.31 + */1.32 +static void FASTCALL sh4_translate_get_code_and_backpatch( uint32_t pc )1.33 +{1.34 + uint8_t *target = (uint8_t *)xlat_get_code_by_vma(pc);1.35 + while( target != NULL && sh4r.xlat_sh4_mode != XLAT_BLOCK_MODE(target) ) {1.36 + target = XLAT_BLOCK_CHAIN(target);1.37 + }1.38 + if( target == NULL ) {1.39 + target = sh4_translate_basic_block( pc );1.40 + }1.41 + uint8_t *backpatch = ((uint8_t *)__builtin_return_address(0)) - (CALL1_PTR_MIN_SIZE);1.42 + *backpatch = 0xE9;1.43 + *(uint32_t *)(backpatch+1) = (uint32_t)(target-backpatch)+PROLOGUE_SIZE-5;1.44 + *(void **)(backpatch+5) = XLAT_BLOCK_FOR_CODE(target)->use_list;1.45 + XLAT_BLOCK_FOR_CODE(target)->use_list = backpatch;1.46 +1.47 + uint8_t **retptr = ((uint8_t **)__builtin_frame_address(0))+1;1.48 + assert( *retptr == ((uint8_t *)__builtin_return_address(0)) );1.49 + *retptr = backpatch;1.50 +}1.51 +1.52 +static void emit_translate_and_backpatch()1.53 +{1.54 + /* NB: this is either 7 bytes (i386) or 12 bytes (x86-64) */1.55 + CALL1_ptr_r32(sh4_translate_get_code_and_backpatch, REG_ARG1);1.56 +1.57 + /* When patched, the jmp instruction will be 5 bytes (either platform) -1.58 + * we need to reserve sizeof(void*) bytes for the use-list1.59 + * pointer1.60 + */1.61 + if( sizeof(void*) == 8 ) {1.62 + NOP();1.63 + } else {1.64 + NOP2();1.65 + }1.66 +}1.67 +1.68 +/**1.69 + * If we're jumping to a fixed address (or at least fixed relative to the1.70 + * current PC, then we can do a direct branch. REG_ARG1 should contain1.71 + * the PC at this point.1.72 + */1.73 +static void jump_next_block_fixed_pc( sh4addr_t pc )1.74 +{1.75 + if( IS_IN_ICACHE(pc) ) {1.76 + if( sh4_x86.sh4_mode != SH4_MODE_UNKNOWN ) {1.77 + /* Fixed address, in cache, and fixed SH4 mode - generate a call to the1.78 + * fetch-and-backpatch routine, which will replace the call with a branch */1.79 + emit_translate_and_backpatch();1.80 + return;1.81 + } else {1.82 + MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );1.83 + ANDP_imms_rptr( -4, REG_EAX );1.84 + }1.85 + } else if( sh4_x86.tlb_on ) {1.86 + CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);1.87 + } else {1.88 + CALL1_ptr_r32(xlat_get_code, REG_ARG1);1.89 + }1.90 + jump_next_block();1.91 +1.92 +1.93 +}1.94 +1.95 +void sh4_translate_unlink_block( void *use_list )1.96 +{1.97 + uint8_t *tmp = xlat_output; /* In case something is active, which should never happen */1.98 + void *next = use_list;1.99 + while( next != NULL ) {1.100 + xlat_output = (uint8_t *)next;1.101 + next = *(void **)(xlat_output+5);1.102 + emit_translate_and_backpatch();1.103 + }1.104 + xlat_output = tmp;1.105 +}1.106 +1.107 +1.108 +1.109 static void exit_block()1.110 {1.111 emit_epilogue();1.112 @@ -645,16 +725,7 @@1.113 MOVL_r32_rbpdisp( REG_ARG1, R_PC );1.114 CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );1.115 JBE_label(exitloop);1.116 -1.117 - if( IS_IN_ICACHE(pc) ) {1.118 - MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );1.119 - ANDP_imms_rptr( -4, REG_EAX );1.120 - } else if( sh4_x86.tlb_on ) {1.121 - CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);1.122 - } else {1.123 - CALL1_ptr_r32(xlat_get_code, REG_ARG1);1.124 - }1.125 - jump_next_block();1.126 + jump_next_block_fixed_pc(pc);1.127 JMP_TARGET(exitloop);1.128 exit_block();1.129 }1.130 @@ -682,16 +753,8 @@1.131 MOVL_r32_rbpdisp( REG_ARG1, R_PC );1.132 CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );1.133 JBE_label(exitloop2);1.134 -1.135 - if( IS_IN_ICACHE(pc) ) {1.136 - MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );1.137 - ANDP_imms_rptr( -4, REG_EAX );1.138 - } else if( sh4_x86.tlb_on ) {1.139 - CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);1.140 - } else {1.141 - CALL1_ptr_r32(xlat_get_code, REG_ARG1);1.142 - }1.143 - jump_next_block();1.144 +1.145 + jump_next_block_fixed_pc(pc);1.146 JMP_TARGET(exitloop2);1.147 }1.148 exit_block();
.