Search
lxdream.org :: lxdream :: r1186:2dc47c67bb93
lxdream 0.9.1
released Jun 29
Download Now
changeset1186:2dc47c67bb93
parent1185:4f02196f68eb
child1187:266e7a1bae90
authornkeynes
dateTue Nov 29 17:11:40 2011 +1000 (7 years ago)
Add support for block linking when the block target is fixed. Only a small
(~3% improvement) so far.
src/sh4/sh4trans.h
src/sh4/sh4x86.in
src/xlat/x86/x86op.h
src/xlat/xltcache.c
src/xlat/xltcache.h
1.1 --- a/src/sh4/sh4trans.h Tue Nov 29 17:03:05 2011 +1000
1.2 +++ b/src/sh4/sh4trans.h Tue Nov 29 17:11:40 2011 +1000
1.3 @@ -154,6 +154,11 @@
1.4 gboolean sh4_translate_flush_cache( void );
1.5
1.6 /**
1.7 + * Given a block's use_list, remove all direct links to the block.
1.8 + */
1.9 +void sh4_translate_unlink_block( void *use_list );
1.10 +
1.11 +/**
1.12 * Support function called from the translator when a breakpoint is hit.
1.13 * Either returns immediately (to skip the breakpoint), or aborts the current
1.14 * cycle and never returns.
2.1 --- a/src/sh4/sh4x86.in Tue Nov 29 17:03:05 2011 +1000
2.2 +++ b/src/sh4/sh4x86.in Tue Nov 29 17:11:40 2011 +1000
2.3 @@ -489,9 +489,9 @@
2.4 #define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2); sh4_x86.in_delay_slot = DELAY_NONE; return 2;
2.5
2.6 /** Offset of xlat_sh4_mode field relative to the code pointer */
2.7 -#define XLAT_SH4_MODE_CODE_OFFSET (uint32_t)(offsetof(struct xlat_cache_block, xlat_sh4_mode) - offsetof(struct xlat_cache_block,code) )
2.8 -#define XLAT_CHAIN_CODE_OFFSET (uint32_t)(offsetof(struct xlat_cache_block, chain) - offsetof(struct xlat_cache_block,code) )
2.9 -#define XLAT_ACTIVE_CODE_OFFSET (uint32_t)(offsetof(struct xlat_cache_block, active) - offsetof(struct xlat_cache_block,code) )
2.10 +#define XLAT_SH4_MODE_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, xlat_sh4_mode) - offsetof(struct xlat_cache_block,code) )
2.11 +#define XLAT_CHAIN_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, chain) - offsetof(struct xlat_cache_block,code) )
2.12 +#define XLAT_ACTIVE_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, active) - offsetof(struct xlat_cache_block,code) )
2.13
2.14 void sh4_translate_begin_block( sh4addr_t pc )
2.15 {
2.16 @@ -511,7 +511,7 @@
2.17 CALL_ptr( sh4_x86.begin_callback );
2.18 }
2.19 if( sh4_x86.profile_blocks ) {
2.20 - MOVP_immptr_rptr( ((uintptr_t)sh4_x86.code) + XLAT_ACTIVE_CODE_OFFSET, REG_EAX );
2.21 + MOVP_immptr_rptr( sh4_x86.code + XLAT_ACTIVE_CODE_OFFSET, REG_EAX );
2.22 ADDL_imms_r32disp( 1, REG_EAX, 0 );
2.23 }
2.24 }
2.25 @@ -575,6 +575,86 @@
2.26 JMP_TARGET(nocode);
2.27 }
2.28
2.29 +/**
2.30 + *
2.31 + */
2.32 +static void FASTCALL sh4_translate_get_code_and_backpatch( uint32_t pc )
2.33 +{
2.34 + uint8_t *target = (uint8_t *)xlat_get_code_by_vma(pc);
2.35 + while( target != NULL && sh4r.xlat_sh4_mode != XLAT_BLOCK_MODE(target) ) {
2.36 + target = XLAT_BLOCK_CHAIN(target);
2.37 + }
2.38 + if( target == NULL ) {
2.39 + target = sh4_translate_basic_block( pc );
2.40 + }
2.41 + uint8_t *backpatch = ((uint8_t *)__builtin_return_address(0)) - (CALL1_PTR_MIN_SIZE);
2.42 + *backpatch = 0xE9;
2.43 + *(uint32_t *)(backpatch+1) = (uint32_t)(target-backpatch)+PROLOGUE_SIZE-5;
2.44 + *(void **)(backpatch+5) = XLAT_BLOCK_FOR_CODE(target)->use_list;
2.45 + XLAT_BLOCK_FOR_CODE(target)->use_list = backpatch;
2.46 +
2.47 + uint8_t **retptr = ((uint8_t **)__builtin_frame_address(0))+1;
2.48 + assert( *retptr == ((uint8_t *)__builtin_return_address(0)) );
2.49 + *retptr = backpatch;
2.50 +}
2.51 +
2.52 +static void emit_translate_and_backpatch()
2.53 +{
2.54 + /* NB: this is either 7 bytes (i386) or 12 bytes (x86-64) */
2.55 + CALL1_ptr_r32(sh4_translate_get_code_and_backpatch, REG_ARG1);
2.56 +
2.57 + /* When patched, the jmp instruction will be 5 bytes (either platform) -
2.58 + * we need to reserve sizeof(void*) bytes for the use-list
2.59 + * pointer
2.60 + */
2.61 + if( sizeof(void*) == 8 ) {
2.62 + NOP();
2.63 + } else {
2.64 + NOP2();
2.65 + }
2.66 +}
2.67 +
2.68 +/**
2.69 + * If we're jumping to a fixed address (or at least fixed relative to the
2.70 + * current PC, then we can do a direct branch. REG_ARG1 should contain
2.71 + * the PC at this point.
2.72 + */
2.73 +static void jump_next_block_fixed_pc( sh4addr_t pc )
2.74 +{
2.75 + if( IS_IN_ICACHE(pc) ) {
2.76 + if( sh4_x86.sh4_mode != SH4_MODE_UNKNOWN ) {
2.77 + /* Fixed address, in cache, and fixed SH4 mode - generate a call to the
2.78 + * fetch-and-backpatch routine, which will replace the call with a branch */
2.79 + emit_translate_and_backpatch();
2.80 + return;
2.81 + } else {
2.82 + MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
2.83 + ANDP_imms_rptr( -4, REG_EAX );
2.84 + }
2.85 + } else if( sh4_x86.tlb_on ) {
2.86 + CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);
2.87 + } else {
2.88 + CALL1_ptr_r32(xlat_get_code, REG_ARG1);
2.89 + }
2.90 + jump_next_block();
2.91 +
2.92 +
2.93 +}
2.94 +
2.95 +void sh4_translate_unlink_block( void *use_list )
2.96 +{
2.97 + uint8_t *tmp = xlat_output; /* In case something is active, which should never happen */
2.98 + void *next = use_list;
2.99 + while( next != NULL ) {
2.100 + xlat_output = (uint8_t *)next;
2.101 + next = *(void **)(xlat_output+5);
2.102 + emit_translate_and_backpatch();
2.103 + }
2.104 + xlat_output = tmp;
2.105 +}
2.106 +
2.107 +
2.108 +
2.109 static void exit_block()
2.110 {
2.111 emit_epilogue();
2.112 @@ -645,16 +725,7 @@
2.113 MOVL_r32_rbpdisp( REG_ARG1, R_PC );
2.114 CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
2.115 JBE_label(exitloop);
2.116 -
2.117 - if( IS_IN_ICACHE(pc) ) {
2.118 - MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
2.119 - ANDP_imms_rptr( -4, REG_EAX );
2.120 - } else if( sh4_x86.tlb_on ) {
2.121 - CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);
2.122 - } else {
2.123 - CALL1_ptr_r32(xlat_get_code, REG_ARG1);
2.124 - }
2.125 - jump_next_block();
2.126 + jump_next_block_fixed_pc(pc);
2.127 JMP_TARGET(exitloop);
2.128 exit_block();
2.129 }
2.130 @@ -682,16 +753,8 @@
2.131 MOVL_r32_rbpdisp( REG_ARG1, R_PC );
2.132 CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
2.133 JBE_label(exitloop2);
2.134 -
2.135 - if( IS_IN_ICACHE(pc) ) {
2.136 - MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
2.137 - ANDP_imms_rptr( -4, REG_EAX );
2.138 - } else if( sh4_x86.tlb_on ) {
2.139 - CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);
2.140 - } else {
2.141 - CALL1_ptr_r32(xlat_get_code, REG_ARG1);
2.142 - }
2.143 - jump_next_block();
2.144 +
2.145 + jump_next_block_fixed_pc(pc);
2.146 JMP_TARGET(exitloop2);
2.147 }
2.148 exit_block();
3.1 --- a/src/xlat/x86/x86op.h Tue Nov 29 17:03:05 2011 +1000
3.2 +++ b/src/xlat/x86/x86op.h Tue Nov 29 17:11:40 2011 +1000
3.3 @@ -482,6 +482,9 @@
3.4 #define NEGL_rbpdisp(r1) x86_encode_r32_rbspdisp32(0xF7, 3, disp)
3.5 #define NEGQ_r64(r1) x86_encode_r64_rm64(0xF7, 3, r1)
3.6
3.7 +#define NOP() OP(0x90)
3.8 +#define NOP2() OP(0x66); OP(0x90)
3.9 +
3.10 #define NOTB_r8(r1) x86_encode_r32_rm32(0xF6, 2, r1)
3.11 #define NOTL_r32(r1) x86_encode_r32_rm32(0xF7, 2, r1)
3.12 #define NOTL_rbpdisp(r1) x86_encode_r32_rbspdisp32(0xF7, 2, disp)
4.1 --- a/src/xlat/xltcache.c Tue Nov 29 17:03:05 2011 +1000
4.2 +++ b/src/xlat/xltcache.c Tue Nov 29 17:11:40 2011 +1000
4.3 @@ -22,6 +22,7 @@
4.4
4.5 #include "dreamcast.h"
4.6 #include "sh4/sh4core.h"
4.7 +#include "sh4/sh4trans.h"
4.8 #include "xlat/xltcache.h"
4.9 #include "x86dasm/x86dasm.h"
4.10
4.11 @@ -121,6 +122,13 @@
4.12 }
4.13 }
4.14
4.15 +void xlat_delete_block( xlat_cache_block_t block )
4.16 +{
4.17 + block->active = 0;
4.18 + *block->lut_entry = block->chain;
4.19 + sh4_translate_unlink_block( block->use_list );
4.20 +}
4.21 +
4.22 static void xlat_flush_page_by_lut( void **page )
4.23 {
4.24 int i;
4.25 @@ -129,7 +137,7 @@
4.26 void *p = page[i];
4.27 do {
4.28 xlat_cache_block_t block = XLAT_BLOCK_FOR_CODE(p);
4.29 - block->active = 0;
4.30 + xlat_delete_block(block);
4.31 p = block->chain;
4.32 } while( p != NULL );
4.33 }
4.34 @@ -377,7 +385,8 @@
4.35 #else
4.36 void xlat_promote_to_temp_space( xlat_cache_block_t block )
4.37 {
4.38 - *block->lut_entry = 0;
4.39 + *block->lut_entry = block->chain;
4.40 + xlat_delete_block(block);
4.41 }
4.42 #endif
4.43
4.44 @@ -414,6 +423,7 @@
4.45 } else {
4.46 xlat_new_create_ptr->chain = NULL;
4.47 }
4.48 + xlat_new_create_ptr->use_list = NULL;
4.49
4.50 xlat_lut[XLAT_LUT_PAGE(address)][XLAT_LUT_ENTRY(address)] =
4.51 &xlat_new_create_ptr->code;
4.52 @@ -424,6 +434,7 @@
4.53
4.54 xlat_cache_block_t xlat_extend_block( uint32_t newSize )
4.55 {
4.56 + assert( xlat_new_create_ptr->use_list == NULL );
4.57 while( xlat_new_create_ptr->size < newSize ) {
4.58 if( xlat_new_cache_ptr->size == 0 ) {
4.59 /* Migrate to the front of the cache to keep it contiguous */
4.60 @@ -447,6 +458,7 @@
4.61 xlat_new_create_ptr->size = allocation;
4.62 xlat_new_create_ptr->lut_entry = lut_entry;
4.63 xlat_new_create_ptr->chain = chain;
4.64 + xlat_new_create_ptr->use_list = NULL;
4.65 *lut_entry = &xlat_new_create_ptr->code;
4.66 memmove( xlat_new_create_ptr->code, olddata, oldsize );
4.67 } else {
4.68 @@ -475,12 +487,6 @@
4.69 xlat_new_cache_ptr = xlat_cut_block( xlat_new_create_ptr, destsize );
4.70 }
4.71
4.72 -void xlat_delete_block( xlat_cache_block_t block )
4.73 -{
4.74 - block->active = 0;
4.75 - *block->lut_entry = NULL;
4.76 -}
4.77 -
4.78 void xlat_check_cache_integrity( xlat_cache_block_t cache, xlat_cache_block_t ptr, int size )
4.79 {
4.80 int foundptr = 0;
4.81 @@ -517,7 +523,7 @@
4.82 if( page != NULL ) {
4.83 for( j=0; j<XLAT_LUT_PAGE_ENTRIES; j++ ) {
4.84 void *entry = page[j];
4.85 - if( ((uintptr_t)entry) > XLAT_LUT_ENTRY_USED ) {
4.86 + if( ((uintptr_t)entry) > (uintptr_t)XLAT_LUT_ENTRY_USED ) {
4.87 xlat_cache_block_t block = XLAT_BLOCK_FOR_CODE(entry);
4.88 if( ptr >= block->code && ptr < block->code + block->size) {
4.89 /* Found it */
5.1 --- a/src/xlat/xltcache.h Tue Nov 29 17:03:05 2011 +1000
5.2 +++ b/src/xlat/xltcache.h Tue Nov 29 17:11:40 2011 +1000
5.3 @@ -42,6 +42,7 @@
5.4 uint32_t size;
5.5 void **lut_entry; /* For deletion */
5.6 void *chain;
5.7 + void *use_list;
5.8 uint32_t xlat_sh4_mode; /* comparison with sh4r.xlat_sh4_mode */
5.9 uint32_t recover_table_offset; // Offset from code[0] of the recovery table;
5.10 uint32_t recover_table_size;
.