Search
lxdream.org :: lxdream :: r408:af496b734734
lxdream 0.9.1
released Jun 29
Download Now
changeset408:af496b734734
parent407:d24ab36150c4
child409:549e00835448
authornkeynes
dateFri Sep 28 07:27:20 2007 +0000 (12 years ago)
Change block signature to return pointer to next block (if known)
Rewrite block-exit code
src/sh4/sh4trans.c
src/sh4/sh4trans.h
src/sh4/sh4x86.c
src/sh4/sh4x86.in
1.1 --- a/src/sh4/sh4trans.c Fri Sep 28 07:26:35 2007 +0000
1.2 +++ b/src/sh4/sh4trans.c Fri Sep 28 07:27:20 2007 +0000
1.3 @@ -1,5 +1,5 @@
1.4 /**
1.5 - * $Id: sh4trans.c,v 1.4 2007-09-19 12:09:33 nkeynes Exp $
1.6 + * $Id: sh4trans.c,v 1.5 2007-09-28 07:27:20 nkeynes Exp $
1.7 *
1.8 * SH4 translation core module. This part handles the non-target-specific
1.9 * section of the translation.
1.10 @@ -37,38 +37,36 @@
1.11 }
1.12 }
1.13
1.14 + void * (*code)() = NULL;
1.15 while( sh4r.slice_cycle < nanosecs ) {
1.16 - if( SH4_EVENT_PENDING() ) {
1.17 + if( sh4r.event_pending <= sh4r.slice_cycle ) {
1.18 if( sh4r.event_types & PENDING_EVENT ) {
1.19 event_execute();
1.20 }
1.21 /* Eventq execute may (quite likely) deliver an immediate IRQ */
1.22 if( sh4r.event_types & PENDING_IRQ ) {
1.23 sh4_accept_interrupt();
1.24 + code = NULL;
1.25 }
1.26 }
1.27 +
1.28 + if( code ) { // fast path
1.29 + code = code();
1.30 + } else {
1.31 + if( sh4r.pc > 0xFFFFFF00 ) {
1.32 + syscall_invoke( sh4r.pc );
1.33 + sh4r.in_delay_slot = 0;
1.34 + sh4r.pc = sh4r.pr;
1.35 + }
1.36
1.37 - if( sh4r.pc > 0xFFFFFF00 ) {
1.38 - syscall_invoke( sh4r.pc );
1.39 - sh4r.in_delay_slot = 0;
1.40 - sh4r.pc = sh4r.pr;
1.41 + code = xlat_get_code(sh4r.pc);
1.42 + if( code == NULL ) {
1.43 + code = sh4_translate_basic_block( sh4r.pc );
1.44 + }
1.45 + code = code();
1.46 }
1.47 -
1.48 - gboolean (*code)() = xlat_get_code(sh4r.pc);
1.49 - if( code == NULL ) {
1.50 - code = sh4_translate_basic_block( sh4r.pc );
1.51 - }
1.52 - if( !code() )
1.53 - break;
1.54 }
1.55
1.56 - /* If we aborted early, but the cpu is still technically running,
1.57 - * we're doing a hard abort - cut the timeslice back to what we
1.58 - * actually executed
1.59 - */
1.60 - if( sh4r.slice_cycle < nanosecs && sh4r.sh4_state == SH4_STATE_RUNNING ) {
1.61 - nanosecs = sh4r.slice_cycle;
1.62 - }
1.63 if( sh4r.sh4_state != SH4_STATE_STANDBY ) {
1.64 TMU_run_slice( nanosecs );
1.65 SCIF_run_slice( nanosecs );
1.66 @@ -87,23 +85,24 @@
1.67 */
1.68 void * sh4_translate_basic_block( sh4addr_t start )
1.69 {
1.70 - uint32_t pc = start;
1.71 + sh4addr_t pc = start;
1.72 int done;
1.73 xlat_cache_block_t block = xlat_start_block( start );
1.74 xlat_output = (uint8_t *)block->code;
1.75 uint8_t *eob = xlat_output + block->size;
1.76 - sh4_translate_begin_block();
1.77 + sh4_translate_begin_block(pc);
1.78
1.79 - while( (done = sh4_x86_translate_instruction( pc )) == 0 ) {
1.80 + do {
1.81 if( eob - xlat_output < MAX_INSTRUCTION_SIZE ) {
1.82 uint8_t *oldstart = block->code;
1.83 block = xlat_extend_block();
1.84 xlat_output = block->code + (xlat_output - oldstart);
1.85 eob = block->code + block->size;
1.86 }
1.87 + done = sh4_x86_translate_instruction( pc );
1.88 pc += 2;
1.89 - }
1.90 - pc+=2;
1.91 + } while( !done );
1.92 + pc += (done - 2);
1.93 sh4_translate_end_block(pc);
1.94 xlat_commit_block( xlat_output - block->code, pc-start );
1.95 return block->code;
1.96 @@ -113,7 +112,7 @@
1.97 * Translate a linear basic block to a temporary buffer, execute it, and return
1.98 * the result of the execution. The translation is discarded.
1.99 */
1.100 -gboolean sh4_translate_and_run( sh4addr_t start )
1.101 +void *sh4_translate_and_run( sh4addr_t start )
1.102 {
1.103 char buf[65536];
1.104
1.105 @@ -122,7 +121,7 @@
1.106 xlat_output = buf;
1.107 uint8_t *eob = xlat_output + sizeof(buf);
1.108
1.109 - sh4_translate_begin_block();
1.110 + sh4_translate_begin_block(pc);
1.111
1.112 while( (done = sh4_x86_translate_instruction( pc )) == 0 ) {
1.113 assert( (eob - xlat_output) >= MAX_INSTRUCTION_SIZE );
1.114 @@ -131,6 +130,6 @@
1.115 pc+=2;
1.116 sh4_translate_end_block(pc);
1.117
1.118 - gboolean (*code)() = (void *)buf;
1.119 + void * (*code)() = (void *)buf;
1.120 return code();
1.121 }
2.1 --- a/src/sh4/sh4trans.h Fri Sep 28 07:26:35 2007 +0000
2.2 +++ b/src/sh4/sh4trans.h Fri Sep 28 07:27:20 2007 +0000
2.3 @@ -1,5 +1,5 @@
2.4 /**
2.5 - * $Id: sh4trans.h,v 1.2 2007-09-18 09:11:53 nkeynes Exp $
2.6 + * $Id: sh4trans.h,v 1.3 2007-09-28 07:27:20 nkeynes Exp $
2.7 *
2.8 * SH4->x86 translation module
2.9 *
2.10 @@ -39,7 +39,7 @@
2.11
2.12 /************** Output generation ***************/
2.13
2.14 -void sh4_translate_begin_block();
2.15 +void sh4_translate_begin_block( sh4addr_t pc );
2.16
2.17 uint32_t sh4_x86_translate_instruction( sh4addr_t pc );
2.18
3.1 --- a/src/sh4/sh4x86.c Fri Sep 28 07:26:35 2007 +0000
3.2 +++ b/src/sh4/sh4x86.c Fri Sep 28 07:27:20 2007 +0000
3.3 @@ -1,5 +1,5 @@
3.4 /**
3.5 - * $Id: sh4x86.c,v 1.14 2007-09-20 08:37:19 nkeynes Exp $
3.6 + * $Id: sh4x86.c,v 1.15 2007-09-28 07:27:20 nkeynes Exp $
3.7 *
3.8 * SH4 => x86 translation. This version does no real optimization, it just
3.9 * outputs straight-line x86 code - it mainly exists to provide a baseline
3.10 @@ -42,7 +42,7 @@
3.11 gboolean in_delay_slot;
3.12 gboolean priv_checked; /* true if we've already checked the cpu mode. */
3.13 gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
3.14 - int exit_code;
3.15 + uint32_t block_start_pc;
3.16
3.17 /* Allocated memory for the (block-wide) back-patch list */
3.18 uint32_t **backpatch_list;
3.19 @@ -360,12 +360,11 @@
3.20 * Emit the 'start of block' assembly. Sets up the stack frame and save
3.21 * SI/DI as required
3.22 */
3.23 -void sh4_translate_begin_block()
3.24 +void sh4_translate_begin_block( sh4addr_t pc )
3.25 {
3.26 PUSH_r32(R_EBP);
3.27 /* mov &sh4r, ebp */
3.28 load_imm32( R_EBP, (uint32_t)&sh4r );
3.29 - PUSH_r32(R_EDI);
3.30 PUSH_r32(R_ESI);
3.31 XOR_r32_r32(R_ESI, R_ESI);
3.32
3.33 @@ -373,35 +372,44 @@
3.34 sh4_x86.priv_checked = FALSE;
3.35 sh4_x86.fpuen_checked = FALSE;
3.36 sh4_x86.backpatch_posn = 0;
3.37 - sh4_x86.exit_code = 1;
3.38 + sh4_x86.block_start_pc = pc;
3.39 }
3.40
3.41 /**
3.42 - * Exit the block early (ie branch out), conditionally or otherwise
3.43 + * Exit the block to an absolute PC
3.44 + * Bytes: 30
3.45 */
3.46 -void exit_block( )
3.47 +void exit_block( sh4addr_t pc, sh4addr_t endpc )
3.48 {
3.49 - store_spreg( R_EDI, REG_OFFSET(pc) );
3.50 - MOV_moff32_EAX( (uint32_t)&sh4_cpu_period );
3.51 - load_spreg( R_ECX, REG_OFFSET(slice_cycle) );
3.52 - MUL_r32( R_ESI );
3.53 - ADD_r32_r32( R_EAX, R_ECX );
3.54 - store_spreg( R_ECX, REG_OFFSET(slice_cycle) );
3.55 - load_imm32( R_EAX, sh4_x86.exit_code );
3.56 + load_imm32( R_ECX, pc ); // 5
3.57 + store_spreg( R_ECX, REG_OFFSET(pc) ); // 3
3.58 + MOV_moff32_EAX( (uint32_t)xlat_get_lut_entry(pc) ); // 5
3.59 + AND_imm8s_r32( 0xFC, R_EAX ); // 3
3.60 + load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
3.61 + ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
3.62 POP_r32(R_ESI);
3.63 - POP_r32(R_EDI);
3.64 POP_r32(R_EBP);
3.65 RET();
3.66 }
3.67
3.68 /**
3.69 - * Flush any open regs back to memory, restore SI/DI/, update PC, etc
3.70 + * Exit the block with sh4r.pc already written
3.71 + * Bytes: 16
3.72 + */
3.73 +void exit_block_pcset( pc )
3.74 +{
3.75 + XOR_r32_r32( R_EAX, R_EAX ); // 2
3.76 + load_imm32( R_ECX, ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
3.77 + ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
3.78 + POP_r32(R_ESI);
3.79 + POP_r32(R_EBP);
3.80 + RET();
3.81 +}
3.82 +
3.83 +/**
3.84 + * Write the block trailer (exception handling block)
3.85 */
3.86 void sh4_translate_end_block( sh4addr_t pc ) {
3.87 - assert( !sh4_x86.in_delay_slot ); // should never stop here
3.88 - // Normal termination - save PC, cycle count
3.89 - exit_block( );
3.90 -
3.91 if( sh4_x86.backpatch_posn != 0 ) {
3.92 uint8_t *end_ptr = xlat_output;
3.93 // Exception termination. Jump block for various exception codes:
3.94 @@ -435,8 +443,8 @@
3.95 load_imm32( R_EAX, (uint32_t)sh4_raise_exception ); // 6
3.96 CALL_r32( R_EAX ); // 2
3.97 ADD_imm8s_r32( 4, R_ESP );
3.98 + XOR_r32_r32( R_EAX, R_EAX );
3.99 POP_r32(R_ESI);
3.100 - POP_r32(R_EDI);
3.101 POP_r32(R_EBP);
3.102 RET();
3.103
3.104 @@ -457,7 +465,7 @@
3.105 * @return true if the instruction marks the end of a basic block
3.106 * (eg a branch or
3.107 */
3.108 -uint32_t sh4_x86_translate_instruction( uint32_t pc )
3.109 +uint32_t sh4_x86_translate_instruction( sh4addr_t pc )
3.110 {
3.111 uint32_t ir;
3.112 /* Read instruction */
3.113 @@ -547,12 +555,14 @@
3.114 if( sh4_x86.in_delay_slot ) {
3.115 SLOTILLEGAL();
3.116 } else {
3.117 - load_imm32( R_EAX, pc + 4 );
3.118 - store_spreg( R_EAX, R_PR );
3.119 - load_reg( R_EDI, Rn );
3.120 - ADD_r32_r32( R_EAX, R_EDI );
3.121 + load_imm32( R_ECX, pc + 4 );
3.122 + store_spreg( R_ECX, R_PR );
3.123 + ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_ECX );
3.124 + store_spreg( R_ECX, REG_OFFSET(pc) );
3.125 sh4_x86.in_delay_slot = TRUE;
3.126 - return 0;
3.127 + sh4_x86_translate_instruction( pc + 2 );
3.128 + exit_block_pcset(pc+2);
3.129 + return 4;
3.130 }
3.131 }
3.132 break;
3.133 @@ -562,10 +572,13 @@
3.134 if( sh4_x86.in_delay_slot ) {
3.135 SLOTILLEGAL();
3.136 } else {
3.137 - load_reg( R_EDI, Rn );
3.138 - ADD_imm32_r32( pc + 4, R_EDI );
3.139 + load_reg( R_EAX, Rn );
3.140 + ADD_imm32_r32( pc + 4, R_EAX );
3.141 + store_spreg( R_EAX, REG_OFFSET(pc) );
3.142 sh4_x86.in_delay_slot = TRUE;
3.143 - return 0;
3.144 + sh4_x86_translate_instruction( pc + 2 );
3.145 + exit_block_pcset(pc+2);
3.146 + return 4;
3.147 }
3.148 }
3.149 break;
3.150 @@ -786,9 +799,12 @@
3.151 if( sh4_x86.in_delay_slot ) {
3.152 SLOTILLEGAL();
3.153 } else {
3.154 - load_spreg( R_EDI, R_PR );
3.155 + load_spreg( R_ECX, R_PR );
3.156 + store_spreg( R_ECX, REG_OFFSET(pc) );
3.157 sh4_x86.in_delay_slot = TRUE;
3.158 - return 0;
3.159 + sh4_x86_translate_instruction(pc+2);
3.160 + exit_block_pcset(pc+2);
3.161 + return 4;
3.162 }
3.163 }
3.164 break;
3.165 @@ -796,25 +812,28 @@
3.166 { /* SLEEP */
3.167 check_priv();
3.168 call_func0( sh4_sleep );
3.169 - sh4_x86.exit_code = 0;
3.170 sh4_x86.in_delay_slot = FALSE;
3.171 INC_r32(R_ESI);
3.172 - return 1;
3.173 + exit_block(pc+2, pc+2);
3.174 + return 2;
3.175 }
3.176 break;
3.177 case 0x2:
3.178 { /* RTE */
3.179 - check_priv();
3.180 if( sh4_x86.in_delay_slot ) {
3.181 SLOTILLEGAL();
3.182 } else {
3.183 - load_spreg( R_EDI, R_SPC );
3.184 + check_priv();
3.185 + load_spreg( R_ECX, R_SPC );
3.186 + store_spreg( R_ECX, REG_OFFSET(pc) );
3.187 load_spreg( R_EAX, R_SSR );
3.188 call_func1( sh4_write_sr, R_EAX );
3.189 sh4_x86.in_delay_slot = TRUE;
3.190 sh4_x86.priv_checked = FALSE;
3.191 sh4_x86.fpuen_checked = FALSE;
3.192 - return 0;
3.193 + sh4_x86_translate_instruction(pc+2);
3.194 + exit_block_pcset(pc+2);
3.195 + return 4;
3.196 }
3.197 }
3.198 break;
3.199 @@ -1830,9 +1849,12 @@
3.200 } else {
3.201 load_imm32( R_EAX, pc + 4 );
3.202 store_spreg( R_EAX, R_PR );
3.203 - load_reg( R_EDI, Rn );
3.204 + load_reg( R_ECX, Rn );
3.205 + store_spreg( R_ECX, REG_OFFSET(pc) );
3.206 sh4_x86.in_delay_slot = TRUE;
3.207 - return 0;
3.208 + sh4_x86_translate_instruction(pc+2);
3.209 + exit_block_pcset(pc+2);
3.210 + return 4;
3.211 }
3.212 }
3.213 break;
3.214 @@ -1854,9 +1876,12 @@
3.215 if( sh4_x86.in_delay_slot ) {
3.216 SLOTILLEGAL();
3.217 } else {
3.218 - load_reg( R_EDI, Rn );
3.219 + load_reg( R_ECX, Rn );
3.220 + store_spreg( R_ECX, REG_OFFSET(pc) );
3.221 sh4_x86.in_delay_slot = TRUE;
3.222 - return 0;
3.223 + sh4_x86_translate_instruction(pc+2);
3.224 + exit_block_pcset(pc+2);
3.225 + return 4;
3.226 }
3.227 }
3.228 break;
3.229 @@ -2248,13 +2273,12 @@
3.230 if( sh4_x86.in_delay_slot ) {
3.231 SLOTILLEGAL();
3.232 } else {
3.233 - load_imm32( R_EDI, pc + 2 );
3.234 CMP_imm8s_sh4r( 0, R_T );
3.235 - JE_rel8( 5, nottaken );
3.236 - load_imm32( R_EDI, disp + pc + 4 );
3.237 + JE_rel8( 30, nottaken );
3.238 + exit_block( disp + pc + 4, pc+2 );
3.239 JMP_TARGET(nottaken);
3.240 - INC_r32(R_ESI);
3.241 - return 1;
3.242 + exit_block( pc + 2, pc+2 );
3.243 + return 2;
3.244 }
3.245 }
3.246 break;
3.247 @@ -2264,13 +2288,12 @@
3.248 if( sh4_x86.in_delay_slot ) {
3.249 SLOTILLEGAL();
3.250 } else {
3.251 - load_imm32( R_EDI, pc + 2 );
3.252 CMP_imm8s_sh4r( 0, R_T );
3.253 - JNE_rel8( 5, nottaken );
3.254 - load_imm32( R_EDI, disp + pc + 4 );
3.255 + JNE_rel8( 30, nottaken );
3.256 + exit_block( disp + pc + 4, pc+2 );
3.257 JMP_TARGET(nottaken);
3.258 - INC_r32(R_ESI);
3.259 - return 1;
3.260 + exit_block( pc + 2, pc + 2 );
3.261 + return 2;
3.262 }
3.263 }
3.264 break;
3.265 @@ -2280,13 +2303,16 @@
3.266 if( sh4_x86.in_delay_slot ) {
3.267 SLOTILLEGAL();
3.268 } else {
3.269 - load_imm32( R_EDI, pc + 4 );
3.270 + sh4_x86.in_delay_slot = TRUE;
3.271 CMP_imm8s_sh4r( 0, R_T );
3.272 - JE_rel8( 5, nottaken );
3.273 - load_imm32( R_EDI, disp + pc + 4 );
3.274 - JMP_TARGET(nottaken);
3.275 - sh4_x86.in_delay_slot = TRUE;
3.276 - return 0;
3.277 + OP(0x0F); OP(0x84); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JE rel32
3.278 + sh4_x86_translate_instruction(pc+2);
3.279 + exit_block( disp + pc + 4, pc+4 );
3.280 + // not taken
3.281 + *patch = (xlat_output - ((uint8_t *)patch)) - 4;
3.282 + sh4_x86_translate_instruction(pc+2);
3.283 + exit_block( pc + 4, pc+4 );
3.284 + return 4;
3.285 }
3.286 }
3.287 break;
3.288 @@ -2296,13 +2322,16 @@
3.289 if( sh4_x86.in_delay_slot ) {
3.290 SLOTILLEGAL();
3.291 } else {
3.292 - load_imm32( R_EDI, pc + 4 );
3.293 + sh4_x86.in_delay_slot = TRUE;
3.294 CMP_imm8s_sh4r( 0, R_T );
3.295 - JNE_rel8( 5, nottaken );
3.296 - load_imm32( R_EDI, disp + pc + 4 );
3.297 - JMP_TARGET(nottaken);
3.298 - sh4_x86.in_delay_slot = TRUE;
3.299 - return 0;
3.300 + OP(0x0F); OP(0x85); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JNE rel32
3.301 + sh4_x86_translate_instruction(pc+2);
3.302 + exit_block( disp + pc + 4, pc+4 );
3.303 + // not taken
3.304 + *patch = (xlat_output - ((uint8_t *)patch)) - 4;
3.305 + sh4_x86_translate_instruction(pc+2);
3.306 + exit_block( pc + 4, pc+4 );
3.307 + return 4;
3.308 }
3.309 }
3.310 break;
3.311 @@ -2329,9 +2358,10 @@
3.312 if( sh4_x86.in_delay_slot ) {
3.313 SLOTILLEGAL();
3.314 } else {
3.315 - load_imm32( R_EDI, disp + pc + 4 );
3.316 sh4_x86.in_delay_slot = TRUE;
3.317 - return 0;
3.318 + sh4_x86_translate_instruction( pc + 2 );
3.319 + exit_block( disp + pc + 4, pc+4 );
3.320 + return 4;
3.321 }
3.322 }
3.323 break;
3.324 @@ -2343,9 +2373,10 @@
3.325 } else {
3.326 load_imm32( R_EAX, pc + 4 );
3.327 store_spreg( R_EAX, R_PR );
3.328 - load_imm32( R_EDI, disp + pc + 4 );
3.329 sh4_x86.in_delay_slot = TRUE;
3.330 - return 0;
3.331 + sh4_x86_translate_instruction( pc + 2 );
3.332 + exit_block( disp + pc + 4, pc+4 );
3.333 + return 4;
3.334 }
3.335 }
3.336 break;
3.337 @@ -2389,6 +2420,8 @@
3.338 PUSH_imm32( imm );
3.339 call_func0( sh4_raise_trap );
3.340 ADD_imm8s_r32( 4, R_ESP );
3.341 + exit_block_pcset(pc);
3.342 + return 2;
3.343 }
3.344 }
3.345 break;
3.346 @@ -3207,7 +3240,7 @@
3.347 SLOTILLEGAL();
3.348 } else {
3.349 JMP_exit(EXIT_ILLEGAL);
3.350 - return 1;
3.351 + return 2;
3.352 }
3.353 }
3.354 break;
3.355 @@ -3260,7 +3293,6 @@
3.356 if( sh4_x86.in_delay_slot ) {
3.357 ADD_imm8s_r32(2,R_ESI);
3.358 sh4_x86.in_delay_slot = FALSE;
3.359 - return 1;
3.360 } else {
3.361 INC_r32(R_ESI);
3.362 }
4.1 --- a/src/sh4/sh4x86.in Fri Sep 28 07:26:35 2007 +0000
4.2 +++ b/src/sh4/sh4x86.in Fri Sep 28 07:27:20 2007 +0000
4.3 @@ -1,5 +1,5 @@
4.4 /**
4.5 - * $Id: sh4x86.in,v 1.15 2007-09-20 08:37:19 nkeynes Exp $
4.6 + * $Id: sh4x86.in,v 1.16 2007-09-28 07:27:20 nkeynes Exp $
4.7 *
4.8 * SH4 => x86 translation. This version does no real optimization, it just
4.9 * outputs straight-line x86 code - it mainly exists to provide a baseline
4.10 @@ -42,7 +42,7 @@
4.11 gboolean in_delay_slot;
4.12 gboolean priv_checked; /* true if we've already checked the cpu mode. */
4.13 gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
4.14 - int exit_code;
4.15 + uint32_t block_start_pc;
4.16
4.17 /* Allocated memory for the (block-wide) back-patch list */
4.18 uint32_t **backpatch_list;
4.19 @@ -360,12 +360,11 @@
4.20 * Emit the 'start of block' assembly. Sets up the stack frame and save
4.21 * SI/DI as required
4.22 */
4.23 -void sh4_translate_begin_block()
4.24 +void sh4_translate_begin_block( sh4addr_t pc )
4.25 {
4.26 PUSH_r32(R_EBP);
4.27 /* mov &sh4r, ebp */
4.28 load_imm32( R_EBP, (uint32_t)&sh4r );
4.29 - PUSH_r32(R_EDI);
4.30 PUSH_r32(R_ESI);
4.31 XOR_r32_r32(R_ESI, R_ESI);
4.32
4.33 @@ -373,35 +372,44 @@
4.34 sh4_x86.priv_checked = FALSE;
4.35 sh4_x86.fpuen_checked = FALSE;
4.36 sh4_x86.backpatch_posn = 0;
4.37 - sh4_x86.exit_code = 1;
4.38 + sh4_x86.block_start_pc = pc;
4.39 }
4.40
4.41 /**
4.42 - * Exit the block early (ie branch out), conditionally or otherwise
4.43 + * Exit the block to an absolute PC
4.44 + * Bytes: 30
4.45 */
4.46 -void exit_block( )
4.47 +void exit_block( sh4addr_t pc, sh4addr_t endpc )
4.48 {
4.49 - store_spreg( R_EDI, REG_OFFSET(pc) );
4.50 - MOV_moff32_EAX( (uint32_t)&sh4_cpu_period );
4.51 - load_spreg( R_ECX, REG_OFFSET(slice_cycle) );
4.52 - MUL_r32( R_ESI );
4.53 - ADD_r32_r32( R_EAX, R_ECX );
4.54 - store_spreg( R_ECX, REG_OFFSET(slice_cycle) );
4.55 - load_imm32( R_EAX, sh4_x86.exit_code );
4.56 + load_imm32( R_ECX, pc ); // 5
4.57 + store_spreg( R_ECX, REG_OFFSET(pc) ); // 3
4.58 + MOV_moff32_EAX( (uint32_t)xlat_get_lut_entry(pc) ); // 5
4.59 + AND_imm8s_r32( 0xFC, R_EAX ); // 3
4.60 + load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
4.61 + ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
4.62 POP_r32(R_ESI);
4.63 - POP_r32(R_EDI);
4.64 POP_r32(R_EBP);
4.65 RET();
4.66 }
4.67
4.68 /**
4.69 - * Flush any open regs back to memory, restore SI/DI/, update PC, etc
4.70 + * Exit the block with sh4r.pc already written
4.71 + * Bytes: 16
4.72 + */
4.73 +void exit_block_pcset( pc )
4.74 +{
4.75 + XOR_r32_r32( R_EAX, R_EAX ); // 2
4.76 + load_imm32( R_ECX, ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
4.77 + ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
4.78 + POP_r32(R_ESI);
4.79 + POP_r32(R_EBP);
4.80 + RET();
4.81 +}
4.82 +
4.83 +/**
4.84 + * Write the block trailer (exception handling block)
4.85 */
4.86 void sh4_translate_end_block( sh4addr_t pc ) {
4.87 - assert( !sh4_x86.in_delay_slot ); // should never stop here
4.88 - // Normal termination - save PC, cycle count
4.89 - exit_block( );
4.90 -
4.91 if( sh4_x86.backpatch_posn != 0 ) {
4.92 uint8_t *end_ptr = xlat_output;
4.93 // Exception termination. Jump block for various exception codes:
4.94 @@ -435,8 +443,8 @@
4.95 load_imm32( R_EAX, (uint32_t)sh4_raise_exception ); // 6
4.96 CALL_r32( R_EAX ); // 2
4.97 ADD_imm8s_r32( 4, R_ESP );
4.98 + XOR_r32_r32( R_EAX, R_EAX );
4.99 POP_r32(R_ESI);
4.100 - POP_r32(R_EDI);
4.101 POP_r32(R_EBP);
4.102 RET();
4.103
4.104 @@ -457,7 +465,7 @@
4.105 * @return true if the instruction marks the end of a basic block
4.106 * (eg a branch or
4.107 */
4.108 -uint32_t sh4_x86_translate_instruction( uint32_t pc )
4.109 +uint32_t sh4_x86_translate_instruction( sh4addr_t pc )
4.110 {
4.111 uint32_t ir;
4.112 /* Read instruction */
4.113 @@ -1270,45 +1278,51 @@
4.114 if( sh4_x86.in_delay_slot ) {
4.115 SLOTILLEGAL();
4.116 } else {
4.117 - load_imm32( R_EDI, pc + 2 );
4.118 CMP_imm8s_sh4r( 0, R_T );
4.119 - JNE_rel8( 5, nottaken );
4.120 - load_imm32( R_EDI, disp + pc + 4 );
4.121 + JNE_rel8( 30, nottaken );
4.122 + exit_block( disp + pc + 4, pc+2 );
4.123 JMP_TARGET(nottaken);
4.124 - INC_r32(R_ESI);
4.125 - return 1;
4.126 + exit_block( pc + 2, pc + 2 );
4.127 + return 2;
4.128 }
4.129 :}
4.130 BF/S disp {:
4.131 if( sh4_x86.in_delay_slot ) {
4.132 SLOTILLEGAL();
4.133 } else {
4.134 - load_imm32( R_EDI, pc + 4 );
4.135 + sh4_x86.in_delay_slot = TRUE;
4.136 CMP_imm8s_sh4r( 0, R_T );
4.137 - JNE_rel8( 5, nottaken );
4.138 - load_imm32( R_EDI, disp + pc + 4 );
4.139 - JMP_TARGET(nottaken);
4.140 - sh4_x86.in_delay_slot = TRUE;
4.141 - return 0;
4.142 + OP(0x0F); OP(0x85); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JNE rel32
4.143 + sh4_x86_translate_instruction(pc+2);
4.144 + exit_block( disp + pc + 4, pc+4 );
4.145 + // not taken
4.146 + *patch = (xlat_output - ((uint8_t *)patch)) - 4;
4.147 + sh4_x86_translate_instruction(pc+2);
4.148 + exit_block( pc + 4, pc+4 );
4.149 + return 4;
4.150 }
4.151 :}
4.152 BRA disp {:
4.153 if( sh4_x86.in_delay_slot ) {
4.154 SLOTILLEGAL();
4.155 } else {
4.156 - load_imm32( R_EDI, disp + pc + 4 );
4.157 sh4_x86.in_delay_slot = TRUE;
4.158 - return 0;
4.159 + sh4_x86_translate_instruction( pc + 2 );
4.160 + exit_block( disp + pc + 4, pc+4 );
4.161 + return 4;
4.162 }
4.163 :}
4.164 BRAF Rn {:
4.165 if( sh4_x86.in_delay_slot ) {
4.166 SLOTILLEGAL();
4.167 } else {
4.168 - load_reg( R_EDI, Rn );
4.169 - ADD_imm32_r32( pc + 4, R_EDI );
4.170 + load_reg( R_EAX, Rn );
4.171 + ADD_imm32_r32( pc + 4, R_EAX );
4.172 + store_spreg( R_EAX, REG_OFFSET(pc) );
4.173 sh4_x86.in_delay_slot = TRUE;
4.174 - return 0;
4.175 + sh4_x86_translate_instruction( pc + 2 );
4.176 + exit_block_pcset(pc+2);
4.177 + return 4;
4.178 }
4.179 :}
4.180 BSR disp {:
4.181 @@ -1317,56 +1331,64 @@
4.182 } else {
4.183 load_imm32( R_EAX, pc + 4 );
4.184 store_spreg( R_EAX, R_PR );
4.185 - load_imm32( R_EDI, disp + pc + 4 );
4.186 sh4_x86.in_delay_slot = TRUE;
4.187 - return 0;
4.188 + sh4_x86_translate_instruction( pc + 2 );
4.189 + exit_block( disp + pc + 4, pc+4 );
4.190 + return 4;
4.191 }
4.192 :}
4.193 BSRF Rn {:
4.194 if( sh4_x86.in_delay_slot ) {
4.195 SLOTILLEGAL();
4.196 } else {
4.197 - load_imm32( R_EAX, pc + 4 );
4.198 - store_spreg( R_EAX, R_PR );
4.199 - load_reg( R_EDI, Rn );
4.200 - ADD_r32_r32( R_EAX, R_EDI );
4.201 + load_imm32( R_ECX, pc + 4 );
4.202 + store_spreg( R_ECX, R_PR );
4.203 + ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_ECX );
4.204 + store_spreg( R_ECX, REG_OFFSET(pc) );
4.205 sh4_x86.in_delay_slot = TRUE;
4.206 - return 0;
4.207 + sh4_x86_translate_instruction( pc + 2 );
4.208 + exit_block_pcset(pc+2);
4.209 + return 4;
4.210 }
4.211 :}
4.212 BT disp {:
4.213 if( sh4_x86.in_delay_slot ) {
4.214 SLOTILLEGAL();
4.215 } else {
4.216 - load_imm32( R_EDI, pc + 2 );
4.217 CMP_imm8s_sh4r( 0, R_T );
4.218 - JE_rel8( 5, nottaken );
4.219 - load_imm32( R_EDI, disp + pc + 4 );
4.220 + JE_rel8( 30, nottaken );
4.221 + exit_block( disp + pc + 4, pc+2 );
4.222 JMP_TARGET(nottaken);
4.223 - INC_r32(R_ESI);
4.224 - return 1;
4.225 + exit_block( pc + 2, pc+2 );
4.226 + return 2;
4.227 }
4.228 :}
4.229 BT/S disp {:
4.230 if( sh4_x86.in_delay_slot ) {
4.231 SLOTILLEGAL();
4.232 } else {
4.233 - load_imm32( R_EDI, pc + 4 );
4.234 + sh4_x86.in_delay_slot = TRUE;
4.235 CMP_imm8s_sh4r( 0, R_T );
4.236 - JE_rel8( 5, nottaken );
4.237 - load_imm32( R_EDI, disp + pc + 4 );
4.238 - JMP_TARGET(nottaken);
4.239 - sh4_x86.in_delay_slot = TRUE;
4.240 - return 0;
4.241 + OP(0x0F); OP(0x84); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JE rel32
4.242 + sh4_x86_translate_instruction(pc+2);
4.243 + exit_block( disp + pc + 4, pc+4 );
4.244 + // not taken
4.245 + *patch = (xlat_output - ((uint8_t *)patch)) - 4;
4.246 + sh4_x86_translate_instruction(pc+2);
4.247 + exit_block( pc + 4, pc+4 );
4.248 + return 4;
4.249 }
4.250 :}
4.251 JMP @Rn {:
4.252 if( sh4_x86.in_delay_slot ) {
4.253 SLOTILLEGAL();
4.254 } else {
4.255 - load_reg( R_EDI, Rn );
4.256 + load_reg( R_ECX, Rn );
4.257 + store_spreg( R_ECX, REG_OFFSET(pc) );
4.258 sh4_x86.in_delay_slot = TRUE;
4.259 - return 0;
4.260 + sh4_x86_translate_instruction(pc+2);
4.261 + exit_block_pcset(pc+2);
4.262 + return 4;
4.263 }
4.264 :}
4.265 JSR @Rn {:
4.266 @@ -1375,32 +1397,41 @@
4.267 } else {
4.268 load_imm32( R_EAX, pc + 4 );
4.269 store_spreg( R_EAX, R_PR );
4.270 - load_reg( R_EDI, Rn );
4.271 + load_reg( R_ECX, Rn );
4.272 + store_spreg( R_ECX, REG_OFFSET(pc) );
4.273 sh4_x86.in_delay_slot = TRUE;
4.274 - return 0;
4.275 + sh4_x86_translate_instruction(pc+2);
4.276 + exit_block_pcset(pc+2);
4.277 + return 4;
4.278 }
4.279 :}
4.280 RTE {:
4.281 - check_priv();
4.282 if( sh4_x86.in_delay_slot ) {
4.283 SLOTILLEGAL();
4.284 } else {
4.285 - load_spreg( R_EDI, R_SPC );
4.286 + check_priv();
4.287 + load_spreg( R_ECX, R_SPC );
4.288 + store_spreg( R_ECX, REG_OFFSET(pc) );
4.289 load_spreg( R_EAX, R_SSR );
4.290 call_func1( sh4_write_sr, R_EAX );
4.291 sh4_x86.in_delay_slot = TRUE;
4.292 sh4_x86.priv_checked = FALSE;
4.293 sh4_x86.fpuen_checked = FALSE;
4.294 - return 0;
4.295 + sh4_x86_translate_instruction(pc+2);
4.296 + exit_block_pcset(pc+2);
4.297 + return 4;
4.298 }
4.299 :}
4.300 RTS {:
4.301 if( sh4_x86.in_delay_slot ) {
4.302 SLOTILLEGAL();
4.303 } else {
4.304 - load_spreg( R_EDI, R_PR );
4.305 + load_spreg( R_ECX, R_PR );
4.306 + store_spreg( R_ECX, REG_OFFSET(pc) );
4.307 sh4_x86.in_delay_slot = TRUE;
4.308 - return 0;
4.309 + sh4_x86_translate_instruction(pc+2);
4.310 + exit_block_pcset(pc+2);
4.311 + return 4;
4.312 }
4.313 :}
4.314 TRAPA #imm {:
4.315 @@ -1410,6 +1441,8 @@
4.316 PUSH_imm32( imm );
4.317 call_func0( sh4_raise_trap );
4.318 ADD_imm8s_r32( 4, R_ESP );
4.319 + exit_block_pcset(pc);
4.320 + return 2;
4.321 }
4.322 :}
4.323 UNDEF {:
4.324 @@ -1417,7 +1450,7 @@
4.325 SLOTILLEGAL();
4.326 } else {
4.327 JMP_exit(EXIT_ILLEGAL);
4.328 - return 1;
4.329 + return 2;
4.330 }
4.331 :}
4.332
4.333 @@ -2248,10 +2281,10 @@
4.334 SLEEP {:
4.335 check_priv();
4.336 call_func0( sh4_sleep );
4.337 - sh4_x86.exit_code = 0;
4.338 sh4_x86.in_delay_slot = FALSE;
4.339 INC_r32(R_ESI);
4.340 - return 1;
4.341 + exit_block(pc+2, pc+2);
4.342 + return 2;
4.343 :}
4.344 STC SR, Rn {:
4.345 check_priv();
4.346 @@ -2429,7 +2462,6 @@
4.347 if( sh4_x86.in_delay_slot ) {
4.348 ADD_imm8s_r32(2,R_ESI);
4.349 sh4_x86.in_delay_slot = FALSE;
4.350 - return 1;
4.351 } else {
4.352 INC_r32(R_ESI);
4.353 }
.