Search
lxdream.org :: lxdream/src/sh4/sh4x86.in :: diff
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 1186:2dc47c67bb93
prev1182:b38a327ad8fa
next1191:12fdf3aafcd4
author nkeynes
date Tue Nov 29 17:11:40 2011 +1000 (10 years ago)
permissions -rw-r--r--
last change Add support for block linking when the block target is fixed. Only a small
(~3% improvement) so far.
file annotate diff log raw
1.1 --- a/src/sh4/sh4x86.in Sun Nov 27 18:20:21 2011 +1000
1.2 +++ b/src/sh4/sh4x86.in Tue Nov 29 17:11:40 2011 +1000
1.3 @@ -489,9 +489,9 @@
1.4 #define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2); sh4_x86.in_delay_slot = DELAY_NONE; return 2;
1.5
1.6 /** Offset of xlat_sh4_mode field relative to the code pointer */
1.7 -#define XLAT_SH4_MODE_CODE_OFFSET (uint32_t)(offsetof(struct xlat_cache_block, xlat_sh4_mode) - offsetof(struct xlat_cache_block,code) )
1.8 -#define XLAT_CHAIN_CODE_OFFSET (uint32_t)(offsetof(struct xlat_cache_block, chain) - offsetof(struct xlat_cache_block,code) )
1.9 -#define XLAT_ACTIVE_CODE_OFFSET (uint32_t)(offsetof(struct xlat_cache_block, active) - offsetof(struct xlat_cache_block,code) )
1.10 +#define XLAT_SH4_MODE_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, xlat_sh4_mode) - offsetof(struct xlat_cache_block,code) )
1.11 +#define XLAT_CHAIN_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, chain) - offsetof(struct xlat_cache_block,code) )
1.12 +#define XLAT_ACTIVE_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, active) - offsetof(struct xlat_cache_block,code) )
1.13
1.14 void sh4_translate_begin_block( sh4addr_t pc )
1.15 {
1.16 @@ -511,7 +511,7 @@
1.17 CALL_ptr( sh4_x86.begin_callback );
1.18 }
1.19 if( sh4_x86.profile_blocks ) {
1.20 - MOVP_immptr_rptr( ((uintptr_t)sh4_x86.code) + XLAT_ACTIVE_CODE_OFFSET, REG_EAX );
1.21 + MOVP_immptr_rptr( sh4_x86.code + XLAT_ACTIVE_CODE_OFFSET, REG_EAX );
1.22 ADDL_imms_r32disp( 1, REG_EAX, 0 );
1.23 }
1.24 }
1.25 @@ -575,6 +575,86 @@
1.26 JMP_TARGET(nocode);
1.27 }
1.28
1.29 +/**
1.30 + *
1.31 + */
1.32 +static void FASTCALL sh4_translate_get_code_and_backpatch( uint32_t pc )
1.33 +{
1.34 + uint8_t *target = (uint8_t *)xlat_get_code_by_vma(pc);
1.35 + while( target != NULL && sh4r.xlat_sh4_mode != XLAT_BLOCK_MODE(target) ) {
1.36 + target = XLAT_BLOCK_CHAIN(target);
1.37 + }
1.38 + if( target == NULL ) {
1.39 + target = sh4_translate_basic_block( pc );
1.40 + }
1.41 + uint8_t *backpatch = ((uint8_t *)__builtin_return_address(0)) - (CALL1_PTR_MIN_SIZE);
1.42 + *backpatch = 0xE9;
1.43 + *(uint32_t *)(backpatch+1) = (uint32_t)(target-backpatch)+PROLOGUE_SIZE-5;
1.44 + *(void **)(backpatch+5) = XLAT_BLOCK_FOR_CODE(target)->use_list;
1.45 + XLAT_BLOCK_FOR_CODE(target)->use_list = backpatch;
1.46 +
1.47 + uint8_t **retptr = ((uint8_t **)__builtin_frame_address(0))+1;
1.48 + assert( *retptr == ((uint8_t *)__builtin_return_address(0)) );
1.49 + *retptr = backpatch;
1.50 +}
1.51 +
1.52 +static void emit_translate_and_backpatch()
1.53 +{
1.54 + /* NB: this is either 7 bytes (i386) or 12 bytes (x86-64) */
1.55 + CALL1_ptr_r32(sh4_translate_get_code_and_backpatch, REG_ARG1);
1.56 +
1.57 + /* When patched, the jmp instruction will be 5 bytes (either platform) -
1.58 + * we need to reserve sizeof(void*) bytes for the use-list
1.59 + * pointer
1.60 + */
1.61 + if( sizeof(void*) == 8 ) {
1.62 + NOP();
1.63 + } else {
1.64 + NOP2();
1.65 + }
1.66 +}
1.67 +
1.68 +/**
1.69 + * If we're jumping to a fixed address (or at least fixed relative to the
1.70 + * current PC, then we can do a direct branch. REG_ARG1 should contain
1.71 + * the PC at this point.
1.72 + */
1.73 +static void jump_next_block_fixed_pc( sh4addr_t pc )
1.74 +{
1.75 + if( IS_IN_ICACHE(pc) ) {
1.76 + if( sh4_x86.sh4_mode != SH4_MODE_UNKNOWN ) {
1.77 + /* Fixed address, in cache, and fixed SH4 mode - generate a call to the
1.78 + * fetch-and-backpatch routine, which will replace the call with a branch */
1.79 + emit_translate_and_backpatch();
1.80 + return;
1.81 + } else {
1.82 + MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
1.83 + ANDP_imms_rptr( -4, REG_EAX );
1.84 + }
1.85 + } else if( sh4_x86.tlb_on ) {
1.86 + CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);
1.87 + } else {
1.88 + CALL1_ptr_r32(xlat_get_code, REG_ARG1);
1.89 + }
1.90 + jump_next_block();
1.91 +
1.92 +
1.93 +}
1.94 +
1.95 +void sh4_translate_unlink_block( void *use_list )
1.96 +{
1.97 + uint8_t *tmp = xlat_output; /* In case something is active, which should never happen */
1.98 + void *next = use_list;
1.99 + while( next != NULL ) {
1.100 + xlat_output = (uint8_t *)next;
1.101 + next = *(void **)(xlat_output+5);
1.102 + emit_translate_and_backpatch();
1.103 + }
1.104 + xlat_output = tmp;
1.105 +}
1.106 +
1.107 +
1.108 +
1.109 static void exit_block()
1.110 {
1.111 emit_epilogue();
1.112 @@ -645,16 +725,7 @@
1.113 MOVL_r32_rbpdisp( REG_ARG1, R_PC );
1.114 CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
1.115 JBE_label(exitloop);
1.116 -
1.117 - if( IS_IN_ICACHE(pc) ) {
1.118 - MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
1.119 - ANDP_imms_rptr( -4, REG_EAX );
1.120 - } else if( sh4_x86.tlb_on ) {
1.121 - CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);
1.122 - } else {
1.123 - CALL1_ptr_r32(xlat_get_code, REG_ARG1);
1.124 - }
1.125 - jump_next_block();
1.126 + jump_next_block_fixed_pc(pc);
1.127 JMP_TARGET(exitloop);
1.128 exit_block();
1.129 }
1.130 @@ -682,16 +753,8 @@
1.131 MOVL_r32_rbpdisp( REG_ARG1, R_PC );
1.132 CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
1.133 JBE_label(exitloop2);
1.134 -
1.135 - if( IS_IN_ICACHE(pc) ) {
1.136 - MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
1.137 - ANDP_imms_rptr( -4, REG_EAX );
1.138 - } else if( sh4_x86.tlb_on ) {
1.139 - CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);
1.140 - } else {
1.141 - CALL1_ptr_r32(xlat_get_code, REG_ARG1);
1.142 - }
1.143 - jump_next_block();
1.144 +
1.145 + jump_next_block_fixed_pc(pc);
1.146 JMP_TARGET(exitloop2);
1.147 }
1.148 exit_block();
.