Search
lxdream.org :: lxdream :: r1112:4cac5e474d4c
lxdream 0.9.1
released Jun 29
Download Now
changeset1112:4cac5e474d4c
parent1111:742c073f353f
child1113:174fa8e137b6
authornkeynes
dateTue Jul 13 18:23:16 2010 +1000 (13 years ago)
Rearrange the main translation loop to allow translated blocks to jump
directly to their successors without needing to return to the main loop
in between. Shaves about 6% off the core runtime.
src/eventq.h
src/sh4/sh4.c
src/sh4/sh4trans.c
src/sh4/sh4x86.in
src/test/testsh4x86.c
src/xlat/x86/amd64abi.h
src/xlat/x86/ia32abi.h
src/xlat/x86/x86op.h
1.1 --- a/src/eventq.h Tue Jul 13 16:26:49 2010 +1000
1.2 +++ b/src/eventq.h Tue Jul 13 18:23:16 2010 +1000
1.3 @@ -80,6 +80,7 @@
1.4 #define EVENT_TMU2 99
1.5 #define EVENT_GUNPOS 100
1.6
1.7 +#define EVENT_ENDTIMESLICE 127
1.8 #ifdef __cplusplus
1.9 }
1.10 #endif
2.1 --- a/src/sh4/sh4.c Tue Jul 13 16:26:49 2010 +1000
2.2 +++ b/src/sh4/sh4.c Tue Jul 13 18:23:16 2010 +1000
2.3 @@ -153,6 +153,13 @@
2.4 static gboolean sh4_running = FALSE;
2.5 struct sh4_icache_struct sh4_icache = { NULL, -1, -1, 0 };
2.6
2.7 +/* At the moment this is a dummy event to mark the end of the
2.8 + * timeslice
2.9 + */
2.10 +void sh4_dummy_event(int eventid)
2.11 +{
2.12 +}
2.13 +
2.14 void sh4_translate_set_enabled( gboolean use )
2.15 {
2.16 // No-op if the translator was not built
2.17 @@ -172,6 +179,7 @@
2.18 void sh4_init(void)
2.19 {
2.20 register_io_regions( mmio_list_sh4mmio );
2.21 + register_event_callback( EVENT_ENDTIMESLICE, sh4_dummy_event );
2.22 MMU_init();
2.23 TMU_init();
2.24 xlat_cache_init();
3.1 --- a/src/sh4/sh4trans.c Tue Jul 13 16:26:49 2010 +1000
3.2 +++ b/src/sh4/sh4trans.c Tue Jul 13 18:23:16 2010 +1000
3.3 @@ -33,7 +33,8 @@
3.4 uint32_t sh4_translate_run_slice( uint32_t nanosecs )
3.5 {
3.6 void * (*code)() = NULL;
3.7 - while( sh4r.slice_cycle < nanosecs ) {
3.8 + event_schedule( EVENT_ENDTIMESLICE, nanosecs );
3.9 + for(;;) {
3.10 if( sh4r.event_pending <= sh4r.slice_cycle ) {
3.11 if( sh4r.event_types & PENDING_EVENT ) {
3.12 event_execute();
3.13 @@ -43,33 +44,23 @@
3.14 sh4_accept_interrupt();
3.15 code = NULL;
3.16 }
3.17 + if( sh4r.slice_cycle >= nanosecs )
3.18 + return nanosecs;
3.19 }
3.20
3.21 - if( code == NULL ) {
3.22 - if( IS_SYSCALL(sh4r.pc) ) {
3.23 - uint32_t pc = sh4r.pc;
3.24 - sh4r.pc = sh4r.pr;
3.25 - sh4r.in_delay_slot = 0;
3.26 - syscall_invoke( pc );
3.27 - }
3.28 + if( IS_SYSCALL(sh4r.pc) ) {
3.29 + uint32_t pc = sh4r.pc;
3.30 + sh4r.pc = sh4r.pr;
3.31 + sh4r.in_delay_slot = 0;
3.32 + syscall_invoke( pc );
3.33 + }
3.34
3.35 - code = xlat_get_code_by_vma( sh4r.pc );
3.36 - if( code == NULL || sh4r.xlat_sh4_mode != XLAT_BLOCK_MODE(code) ) {
3.37 - code = sh4_translate_basic_block( sh4r.pc );
3.38 - }
3.39 - } else if( sh4r.xlat_sh4_mode != XLAT_BLOCK_MODE(code) ) {
3.40 - if( !IS_IN_ICACHE(sh4r.pc) ) {
3.41 - /* If TLB is off, we may have gotten here without updating
3.42 - * the icache, so do it now. This should never fail, so...
3.43 - */
3.44 - mmu_update_icache(sh4r.pc);
3.45 - assert( IS_IN_ICACHE(sh4r.pc) );
3.46 - }
3.47 + code = xlat_get_code_by_vma( sh4r.pc );
3.48 + if( code == NULL || sh4r.xlat_sh4_mode != XLAT_BLOCK_MODE(code) ) {
3.49 code = sh4_translate_basic_block( sh4r.pc );
3.50 }
3.51 - code = code();
3.52 + code();
3.53 }
3.54 - return nanosecs;
3.55 }
3.56
3.57 uint8_t *xlat_output;
4.1 --- a/src/sh4/sh4x86.in Tue Jul 13 16:26:49 2010 +1000
4.2 +++ b/src/sh4/sh4x86.in Tue Jul 13 18:23:16 2010 +1000
4.3 @@ -71,6 +71,8 @@
4.4 #define DELAY_PC 1
4.5 #define DELAY_PC_PR 2
4.6
4.7 +#define SH4_MODE_UNKNOWN -1
4.8 +
4.9 struct backpatch_record {
4.10 uint32_t fixup_offset;
4.11 uint32_t fixup_icount;
4.12 @@ -84,6 +86,7 @@
4.13 */
4.14 struct sh4_x86_state {
4.15 int in_delay_slot;
4.16 + uint8_t *code;
4.17 gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
4.18 gboolean branch_taken; /* true if we branched unconditionally */
4.19 gboolean double_prec; /* true if FPU is in double-precision mode */
4.20 @@ -91,6 +94,7 @@
4.21 gboolean sse3_enabled; /* true if host supports SSE3 instructions */
4.22 uint32_t block_start_pc;
4.23 uint32_t stack_posn; /* Trace stack height for alignment purposes */
4.24 + uint32_t sh4_mode; /* Mirror of sh4r.xlat_sh4_mode */
4.25 int tstate;
4.26
4.27 /* mode flags */
4.28 @@ -171,7 +175,7 @@
4.29 fprintf( out, "%c%016lx: %-30s %-40s", (target_pc == (uintptr_t)native_pc ? '*' : ' '),
4.30 target_pc, op, buf );
4.31 #else
4.32 - fprintf( out, "%c%08x: %-30s %-40s", (target_pc == (uintptr_t)native_pc ? '*' : ' '),
4.33 + fprintf( out, "%c%08lx: %-30s %-40s", (target_pc == (uintptr_t)native_pc ? '*' : ' '),
4.34 target_pc, op, buf );
4.35 #endif
4.36 if( source_recov_table < source_recov_end &&
4.37 @@ -249,6 +253,7 @@
4.38 #define SETNE_t() SETCCB_cc_rbpdisp(X86_COND_NE,R_T)
4.39 #define SETC_r8(r1) SETCCB_cc_r8(X86_COND_C, r1)
4.40 #define JAE_label(label) JCC_cc_rel8(X86_COND_AE,-1); MARK_JMP8(label)
4.41 +#define JBE_label(label) JCC_cc_rel8(X86_COND_BE,-1); MARK_JMP8(label)
4.42 #define JE_label(label) JCC_cc_rel8(X86_COND_E,-1); MARK_JMP8(label)
4.43 #define JGE_label(label) JCC_cc_rel8(X86_COND_GE,-1); MARK_JMP8(label)
4.44 #define JNA_label(label) JCC_cc_rel8(X86_COND_NA,-1); MARK_JMP8(label)
4.45 @@ -317,7 +322,7 @@
4.46 /* Exception checks - Note that all exception checks will clobber EAX */
4.47
4.48 #define check_priv( ) \
4.49 - if( (sh4r.xlat_sh4_mode & SR_MD) == 0 ) { \
4.50 + if( (sh4_x86.sh4_mode & SR_MD) == 0 ) { \
4.51 if( sh4_x86.in_delay_slot ) { \
4.52 exit_block_exc(EXC_SLOT_ILLEGAL, (pc-2) ); \
4.53 } else { \
4.54 @@ -365,7 +370,7 @@
4.55 TESTL_imms_r32( 0x00000007, x86reg ); \
4.56 JNE_exc(EXC_DATA_ADDR_WRITE);
4.57
4.58 -#define address_space() ((sh4r.xlat_sh4_mode&SR_MD) ? (uintptr_t)sh4_address_space : (uintptr_t)sh4_user_address_space)
4.59 +#define address_space() ((sh4_x86.sh4_mode&SR_MD) ? (uintptr_t)sh4_address_space : (uintptr_t)sh4_user_address_space)
4.60
4.61 #define UNDEF(ir)
4.62 /* Note: For SR.MD == 1 && MMUCR.AT == 0, there are no memory exceptions, so
4.63 @@ -375,7 +380,7 @@
4.64 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
4.65 {
4.66 decode_address(address_space(), addr_reg);
4.67 - if( !sh4_x86.tlb_on && (sh4r.xlat_sh4_mode & SR_MD) ) {
4.68 + if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) {
4.69 CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
4.70 } else {
4.71 if( addr_reg != REG_ARG1 ) {
4.72 @@ -393,7 +398,7 @@
4.73 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
4.74 {
4.75 decode_address(address_space(), addr_reg);
4.76 - if( !sh4_x86.tlb_on && (sh4r.xlat_sh4_mode & SR_MD) ) {
4.77 + if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) {
4.78 CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
4.79 } else {
4.80 if( value_reg != REG_ARG2 ) {
4.81 @@ -444,8 +449,7 @@
4.82
4.83 void sh4_translate_begin_block( sh4addr_t pc )
4.84 {
4.85 - enter_block();
4.86 - MOVP_immptr_rptr( ((uint8_t *)&sh4r) + 128, REG_EBP );
4.87 + sh4_x86.code = xlat_output;
4.88 sh4_x86.in_delay_slot = FALSE;
4.89 sh4_x86.fpuen_checked = FALSE;
4.90 sh4_x86.branch_taken = FALSE;
4.91 @@ -455,6 +459,8 @@
4.92 sh4_x86.tstate = TSTATE_NONE;
4.93 sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
4.94 sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
4.95 + sh4_x86.sh4_mode = sh4r.xlat_sh4_mode;
4.96 + enter_block();
4.97 }
4.98
4.99
4.100 @@ -481,19 +487,49 @@
4.101
4.102 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
4.103
4.104 +/** Offset of xlat_sh4_mode field relative to the code pointer */
4.105 +#define XLAT_SH4_MODE_CODE_OFFSET (uint32_t)(offsetof(struct xlat_cache_block, xlat_sh4_mode) - offsetof(struct xlat_cache_block,code) )
4.106 +
4.107 +/**
4.108 + * Test if the loaded target code pointer in %eax is valid, and if so jump
4.109 + * directly into it, bypassing the normal exit.
4.110 + */
4.111 +static void jump_next_block()
4.112 +{
4.113 + TESTP_rptr_rptr(REG_EAX, REG_EAX);
4.114 + JE_label(nocode);
4.115 + if( sh4_x86.sh4_mode == SH4_MODE_UNKNOWN ) {
4.116 + /* sr/fpscr was changed, possibly updated xlat_sh4_mode, so reload it */
4.117 + MOVL_rbpdisp_r32( REG_OFFSET(xlat_sh4_mode), REG_ECX );
4.118 + CMPL_r32_r32disp( REG_ECX, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
4.119 + } else {
4.120 + CMPL_imms_r32disp( sh4_x86.sh4_mode, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
4.121 + }
4.122 + JNE_label(wrongmode);
4.123 + LEAP_rptrdisp_rptr(REG_EAX, PROLOGUE_SIZE,REG_EAX);
4.124 + JMP_rptr(REG_EAX);
4.125 + JMP_TARGET(nocode); JMP_TARGET(wrongmode);
4.126 +}
4.127 +
4.128 /**
4.129 * Exit the block with sh4r.pc already written
4.130 */
4.131 void exit_block_pcset( sh4addr_t pc )
4.132 {
4.133 MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
4.134 - ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
4.135 + ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
4.136 + MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
4.137 + CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
4.138 + JBE_label(exitloop);
4.139 MOVL_rbpdisp_r32( R_PC, REG_ARG1 );
4.140 if( sh4_x86.tlb_on ) {
4.141 CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
4.142 } else {
4.143 CALL1_ptr_r32(xlat_get_code,REG_ARG1);
4.144 }
4.145 +
4.146 + jump_next_block();
4.147 + JMP_TARGET(exitloop);
4.148 exit_block();
4.149 }
4.150
4.151 @@ -503,14 +539,20 @@
4.152 void exit_block_newpcset( sh4addr_t pc )
4.153 {
4.154 MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
4.155 - ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
4.156 + ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
4.157 + MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
4.158 MOVL_rbpdisp_r32( R_NEW_PC, REG_ARG1 );
4.159 MOVL_r32_rbpdisp( REG_ARG1, R_PC );
4.160 + CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
4.161 + JBE_label(exitloop);
4.162 if( sh4_x86.tlb_on ) {
4.163 CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
4.164 } else {
4.165 CALL1_ptr_r32(xlat_get_code,REG_ARG1);
4.166 }
4.167 +
4.168 + jump_next_block();
4.169 + JMP_TARGET(exitloop);
4.170 exit_block();
4.171 }
4.172
4.173 @@ -520,18 +562,25 @@
4.174 */
4.175 void exit_block_abs( sh4addr_t pc, sh4addr_t endpc )
4.176 {
4.177 - MOVL_imm32_r32( pc, REG_ECX );
4.178 - MOVL_r32_rbpdisp( REG_ECX, R_PC );
4.179 + MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
4.180 + ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
4.181 + MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
4.182 +
4.183 + MOVL_imm32_r32( pc, REG_ARG1 );
4.184 + MOVL_r32_rbpdisp( REG_ARG1, R_PC );
4.185 + CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
4.186 + JBE_label(exitloop);
4.187 +
4.188 if( IS_IN_ICACHE(pc) ) {
4.189 MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
4.190 ANDP_imms_rptr( -4, REG_EAX );
4.191 } else if( sh4_x86.tlb_on ) {
4.192 - CALL1_ptr_r32(xlat_get_code_by_vma, REG_ECX);
4.193 + CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);
4.194 } else {
4.195 - CALL1_ptr_r32(xlat_get_code, REG_ECX);
4.196 + CALL1_ptr_r32(xlat_get_code, REG_ARG1);
4.197 }
4.198 - MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
4.199 - ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
4.200 + jump_next_block();
4.201 + JMP_TARGET(exitloop);
4.202 exit_block();
4.203 }
4.204
4.205 @@ -540,19 +589,36 @@
4.206 */
4.207 void exit_block_rel( sh4addr_t pc, sh4addr_t endpc )
4.208 {
4.209 - MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ECX );
4.210 - ADDL_rbpdisp_r32( R_PC, REG_ECX );
4.211 - MOVL_r32_rbpdisp( REG_ECX, R_PC );
4.212 - if( IS_IN_ICACHE(pc) ) {
4.213 - MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
4.214 - ANDP_imms_rptr( -4, REG_EAX );
4.215 - } else if( sh4_x86.tlb_on ) {
4.216 - CALL1_ptr_r32(xlat_get_code_by_vma, REG_ECX);
4.217 - } else {
4.218 - CALL1_ptr_r32(xlat_get_code, REG_ECX);
4.219 + MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
4.220 + ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
4.221 + MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
4.222 +
4.223 + if( pc == sh4_x86.block_start_pc && sh4_x86.sh4_mode == sh4r.xlat_sh4_mode ) {
4.224 + /* Special case for tight loops - the PC doesn't change, and
4.225 + * we already know the target address. Just check events pending before
4.226 + * looping.
4.227 + */
4.228 + CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
4.229 + uint32_t backdisp = ((uintptr_t)(sh4_x86.code - xlat_output)) + PROLOGUE_SIZE;
4.230 + JCC_cc_prerel(X86_COND_A, backdisp);
4.231 + } else {
4.232 + MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ARG1 );
4.233 + ADDL_rbpdisp_r32( R_PC, REG_ARG1 );
4.234 + MOVL_r32_rbpdisp( REG_ARG1, R_PC );
4.235 + CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
4.236 + JBE_label(exitloop2);
4.237 +
4.238 + if( IS_IN_ICACHE(pc) ) {
4.239 + MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
4.240 + ANDP_imms_rptr( -4, REG_EAX );
4.241 + } else if( sh4_x86.tlb_on ) {
4.242 + CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);
4.243 + } else {
4.244 + CALL1_ptr_r32(xlat_get_code, REG_ARG1);
4.245 + }
4.246 + jump_next_block();
4.247 + JMP_TARGET(exitloop2);
4.248 }
4.249 - MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
4.250 - ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
4.251 exit_block();
4.252 }
4.253
4.254 @@ -567,13 +633,6 @@
4.255 ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
4.256 MOVL_imm32_r32( code, REG_ARG1 );
4.257 CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
4.258 - MOVL_rbpdisp_r32( R_PC, REG_ARG1 );
4.259 - if( sh4_x86.tlb_on ) {
4.260 - CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
4.261 - } else {
4.262 - CALL1_ptr_r32(xlat_get_code,REG_ARG1);
4.263 - }
4.264 -
4.265 exit_block();
4.266 }
4.267
4.268 @@ -599,13 +658,7 @@
4.269 MOVL_imm32_r32( sh4_x86.in_delay_slot ? 1 : 0, REG_ECX );
4.270 MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(in_delay_slot) );
4.271
4.272 - CALL_ptr( sh4_execute_instruction );
4.273 - MOVL_rbpdisp_r32( R_PC, REG_EAX );
4.274 - if( sh4_x86.tlb_on ) {
4.275 - CALL1_ptr_r32(xlat_get_code_by_vma,REG_EAX);
4.276 - } else {
4.277 - CALL1_ptr_r32(xlat_get_code,REG_EAX);
4.278 - }
4.279 + CALL_ptr( sh4_execute_instruction );
4.280 exit_block();
4.281 }
4.282
4.283 @@ -627,12 +680,6 @@
4.284 MOVL_moffptr_eax( &sh4_cpu_period );
4.285 MULL_r32( REG_EDX );
4.286 ADDL_r32_rbpdisp( REG_EAX, REG_OFFSET(slice_cycle) );
4.287 - MOVL_rbpdisp_r32( R_PC, REG_ARG1 );
4.288 - if( sh4_x86.tlb_on ) {
4.289 - CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);
4.290 - } else {
4.291 - CALL1_ptr_r32(xlat_get_code, REG_ARG1);
4.292 - }
4.293 exit_block();
4.294
4.295 for( i=0; i< sh4_x86.backpatch_posn; i++ ) {
4.296 @@ -1975,6 +2022,7 @@
4.297 sh4_x86.fpuen_checked = FALSE;
4.298 sh4_x86.tstate = TSTATE_NONE;
4.299 sh4_x86.branch_taken = TRUE;
4.300 + sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
4.301 if( UNTRANSLATABLE(pc+2) ) {
4.302 exit_block_emu(pc+2);
4.303 return 2;
4.304 @@ -2539,6 +2587,7 @@
4.305 XORL_imms_rbpdisp( FPSCR_SZ, REG_OFFSET(xlat_sh4_mode) );
4.306 sh4_x86.tstate = TSTATE_NONE;
4.307 sh4_x86.double_size = !sh4_x86.double_size;
4.308 + sh4_x86.sh4_mode = sh4_x86.sh4_mode ^ FPSCR_SZ;
4.309 :}
4.310
4.311 /* Processor control instructions */
4.312 @@ -2552,6 +2601,7 @@
4.313 CALL1_ptr_r32( sh4_write_sr, REG_EAX );
4.314 sh4_x86.fpuen_checked = FALSE;
4.315 sh4_x86.tstate = TSTATE_NONE;
4.316 + sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
4.317 return 2;
4.318 }
4.319 :}
4.320 @@ -2624,6 +2674,7 @@
4.321 CALL1_ptr_r32( sh4_write_sr, REG_EAX );
4.322 sh4_x86.fpuen_checked = FALSE;
4.323 sh4_x86.tstate = TSTATE_NONE;
4.324 + sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
4.325 return 2;
4.326 }
4.327 :}
4.328 @@ -2693,6 +2744,7 @@
4.329 load_reg( REG_EAX, Rm );
4.330 CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
4.331 sh4_x86.tstate = TSTATE_NONE;
4.332 + sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
4.333 return 2;
4.334 :}
4.335 LDS.L @Rm+, FPSCR {:
4.336 @@ -2704,6 +2756,7 @@
4.337 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
4.338 CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
4.339 sh4_x86.tstate = TSTATE_NONE;
4.340 + sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
4.341 return 2;
4.342 :}
4.343 LDS Rm, FPUL {:
5.1 --- a/src/test/testsh4x86.c Tue Jul 13 16:26:49 2010 +1000
5.2 +++ b/src/test/testsh4x86.c Tue Jul 13 18:23:16 2010 +1000
5.3 @@ -113,6 +113,7 @@
5.4 gboolean gui_error_dialog( const char *fmt, ... ) { return TRUE; }
5.5 gboolean FASTCALL mmu_update_icache( sh4vma_t addr ) { return TRUE; }
5.6 void MMU_ldtlb() { }
5.7 +void event_schedule(int event, uint32_t nanos) { }
5.8 struct sh4_icache_struct sh4_icache;
5.9 struct mem_region_fn mem_region_unmapped;
5.10 const struct cpu_desc_struct sh4_cpu_desc;
6.1 --- a/src/xlat/x86/amd64abi.h Tue Jul 13 16:26:49 2010 +1000
6.2 +++ b/src/xlat/x86/amd64abi.h Tue Jul 13 18:23:16 2010 +1000
6.3 @@ -94,6 +94,8 @@
6.4 CALL_r32disp(preg, disp);
6.5 }
6.6
6.7 +#define PROLOGUE_SIZE 15
6.8 +
6.9 /**
6.10 * Emit the 'start of block' assembly. Sets up the stack frame and save
6.11 * SI/DI as required
6.12 @@ -102,6 +104,7 @@
6.13 {
6.14 PUSH_r32(REG_RBP);
6.15 SUBQ_imms_r64( 16, REG_RSP );
6.16 + MOVP_immptr_rptr( ((uint8_t *)&sh4r) + 128, REG_EBP );
6.17 }
6.18
6.19 static inline void exit_block( )
7.1 --- a/src/xlat/x86/ia32abi.h Tue Jul 13 16:26:49 2010 +1000
7.2 +++ b/src/xlat/x86/ia32abi.h Tue Jul 13 18:23:16 2010 +1000
7.3 @@ -132,6 +132,8 @@
7.4
7.5 #endif
7.6
7.7 +#define PROLOGUE_SIZE 9
7.8 +
7.9 /**
7.10 * Emit the 'start of block' assembly. Sets up the stack frame and save
7.11 * SI/DI as required
7.12 @@ -142,6 +144,7 @@
7.13 {
7.14 PUSH_r32(REG_EBP);
7.15 SUBL_imms_r32( 8, REG_ESP );
7.16 + MOVP_immptr_rptr( ((uint8_t *)&sh4r) + 128, REG_EBP );
7.17 }
7.18
7.19 static inline void exit_block( )
8.1 --- a/src/xlat/x86/x86op.h Tue Jul 13 16:26:49 2010 +1000
8.2 +++ b/src/xlat/x86/x86op.h Tue Jul 13 18:23:16 2010 +1000
8.3 @@ -314,6 +314,7 @@
8.4 #define x86_encode_opcode32(opcode,reg) x86_encode_opcodereg(0,opcode,reg)
8.5 #define x86_encode_r32_rm32(opcode,rr,rb) x86_encode_reg_rm(0,opcode,rr,rb)
8.6 #define x86_encode_r64_rm64(opcode,rr,rb) x86_encode_reg_rm(PREF_REXW,opcode,rr,rb)
8.7 +#define x86_encode_rptr_rmptr(opcode,rr,rb) x86_encode_reg_rm(PREF_PTR,opcode,rr,rb)
8.8 #define x86_encode_r32_mem32(opcode,rr,rb,rx,ss,disp32) x86_encode_modrm(0,opcode,rr,rb,rx,ss,disp32)
8.9 #define x86_encode_r64_mem64(opcode,rr,rb,rx,ss,disp32) x86_encode_modrm(PREF_REXW,opcode,rr,rb,rx,ss,disp32)
8.10 #define x86_encode_rptr_memptr(opcode,rr,rb,rx,ss,disp32) x86_encode_modrm(PREF_PTR,opcode,rr,rb,rx,ss,disp32)
8.11 @@ -397,8 +398,10 @@
8.12 #define CMPB_imms_rbpdisp(imm,disp) x86_encode_r32_rbpdisp32(0x80, 7, disp); OP(imm)
8.13 #define CMPB_r8_r8(r1,r2) x86_encode_r32_rm32(0x38, r1, r2)
8.14 #define CMPL_imms_r32(imm,r1) x86_encode_imms_rm32(0x83, 0x81, 7, imm, r1)
8.15 +#define CMPL_imms_r32disp(imm,rb,d) x86_encode_imms_r32disp32(0x83, 0x81, 7, imm, rb, d)
8.16 #define CMPL_imms_rbpdisp(imm,disp) x86_encode_imms_rbpdisp32(0x83, 0x81, 7, imm, disp)
8.17 #define CMPL_r32_r32(r1,r2) x86_encode_r32_rm32(0x39, r1, r2)
8.18 +#define CMPL_r32_r32disp(r1,r2,dsp) x86_encode_r32_mem32disp32(0x39, r1, r2, dsp)
8.19 #define CMPL_r32_rbpdisp(r1,disp) x86_encode_r32_rbpdisp32(0x39, r1, disp)
8.20 #define CMPL_rbpdisp_r32(disp,r1) x86_encode_r32_rbpdisp32(0x3B, r1, disp)
8.21 #define CMPQ_imms_r64(imm,r1) x86_encode_imms_rm64(0x83, 0x81, 7, imm, r1)
8.22 @@ -423,7 +426,7 @@
8.23 #define LEAL_sib_r32(ss,ii,bb,d,r1) x86_encode_r32_mem32(0x8D, r1, bb, ii, ss, d)
8.24 #define LEAQ_r64disp_r64(r1,disp,r2) x86_encode_r64_mem64(0x8D, r2, r1, -1, 0, disp)
8.25 #define LEAQ_rbpdisp_r64(disp,r1) x86_encode_r64_rbpdisp64(0x8D, r1, disp)
8.26 -#define LEAP_rptrdisp_rptr(r1,d,r2) x86_encode_rptr_memptr(0x8D, r2, r1, -1, 0, disp)
8.27 +#define LEAP_rptrdisp_rptr(r1,d,r2) x86_encode_rptr_memptr(0x8D, r2, r1, -1, 0, d)
8.28 #define LEAP_rbpdisp_rptr(disp,r1) x86_encode_rptr_memptr(0x8D, r1, REG_RBP, -1, 0, disp)
8.29 #define LEAP_sib_rptr(ss,ii,bb,d,r1) x86_encode_rptr_memptr(0x8D, r1, bb, ii, ss, d)
8.30
8.31 @@ -562,6 +565,7 @@
8.32 #define TESTL_rbpdisp_r32(disp,r1) x86_encode_r32_rbpdisp32(0x85, r1, disp) /* Same OP */
8.33 #define TESTQ_imms_r64(imm,r1) x86_encode_r64_rm64(0xF7, 0, r1); OP32(imm)
8.34 #define TESTQ_r64_r64(r1,r2) x86_encode_r64_rm64(0x85, r1, r2)
8.35 +#define TESTP_rptr_rptr(r1,r2) x86_encode_rptr_rmptr(0x85, r1, r2)
8.36
8.37 #define XCHGB_r8_r8(r1,r2) x86_encode_r32_rm32(0x86, r1, r2)
8.38 #define XCHGL_r32_r32(r1,r2) x86_encode_r32_rm32(0x87, r1, r2)
8.39 @@ -586,12 +590,13 @@
8.40 #define JCC_cc_rel8(cc,rel) OP(0x70+(cc)); OP(rel)
8.41 #define JCC_cc_rel32(cc,rel) OP(0x0F); OP(0x80+(cc)); OP32(rel)
8.42 #define JCC_cc_rel(cc,rel) if( IS_INT8(rel) ) { JCC_cc_rel8(cc,(int8_t)rel); } else { JCC_cc_rel32(cc,rel); }
8.43 +#define JCC_cc_prerel(cc,rel) if( IS_INT8(rel) ) { JCC_cc_rel8(cc,(int8_t)((rel)-2)); } else { JCC_cc_rel32(cc,((rel)-6)); }
8.44
8.45 #define JMP_rel8(rel) OP(0xEB); OP(rel)
8.46 #define JMP_rel32(rel) OP(0xE9); OP32(rel)
8.47 #define JMP_rel(rel) if( IS_INT8(rel) ) { JMP_rel8((int8_t)rel); } else { JMP_rel32(rel); }
8.48 #define JMP_prerel(rel) if( IS_INT8(((int32_t)rel)-2) ) { JMP_rel8(((int8_t)rel)-2); } else { JMP_rel32(((int32_t)rel)-5); }
8.49 -#define JMP_r32(r1,disp) x86_encode_r32_rm32(0xFF, 4, r1)
8.50 +#define JMP_rptr(r1) x86_encode_r32_rm32(0xFF, 4, r1)
8.51 #define JMP_r32disp(r1,disp) x86_encode_r32_mem32disp32(0xFF, 4, r1, disp)
8.52 #define RET() OP(0xC3)
8.53 #define RET_imm(imm) OP(0xC2); OP16(imm)
.