Search
lxdream.org :: lxdream/src/sh4/sh4x86.in :: diff
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 408:af496b734734
prev401:f79327f39818
next409:549e00835448
author nkeynes
date Fri Sep 28 07:27:20 2007 +0000 (13 years ago)
permissions -rw-r--r--
last change Change block signature to return pointer to next block (if known)
Rewrite block-exit code
file annotate diff log raw
1.1 --- a/src/sh4/sh4x86.in Thu Sep 20 08:37:19 2007 +0000
1.2 +++ b/src/sh4/sh4x86.in Fri Sep 28 07:27:20 2007 +0000
1.3 @@ -1,5 +1,5 @@
1.4 /**
1.5 - * $Id: sh4x86.in,v 1.15 2007-09-20 08:37:19 nkeynes Exp $
1.6 + * $Id: sh4x86.in,v 1.16 2007-09-28 07:27:20 nkeynes Exp $
1.7 *
1.8 * SH4 => x86 translation. This version does no real optimization, it just
1.9 * outputs straight-line x86 code - it mainly exists to provide a baseline
1.10 @@ -42,7 +42,7 @@
1.11 gboolean in_delay_slot;
1.12 gboolean priv_checked; /* true if we've already checked the cpu mode. */
1.13 gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
1.14 - int exit_code;
1.15 + uint32_t block_start_pc;
1.16
1.17 /* Allocated memory for the (block-wide) back-patch list */
1.18 uint32_t **backpatch_list;
1.19 @@ -360,12 +360,11 @@
1.20 * Emit the 'start of block' assembly. Sets up the stack frame and save
1.21 * SI/DI as required
1.22 */
1.23 -void sh4_translate_begin_block()
1.24 +void sh4_translate_begin_block( sh4addr_t pc )
1.25 {
1.26 PUSH_r32(R_EBP);
1.27 /* mov &sh4r, ebp */
1.28 load_imm32( R_EBP, (uint32_t)&sh4r );
1.29 - PUSH_r32(R_EDI);
1.30 PUSH_r32(R_ESI);
1.31 XOR_r32_r32(R_ESI, R_ESI);
1.32
1.33 @@ -373,35 +372,44 @@
1.34 sh4_x86.priv_checked = FALSE;
1.35 sh4_x86.fpuen_checked = FALSE;
1.36 sh4_x86.backpatch_posn = 0;
1.37 - sh4_x86.exit_code = 1;
1.38 + sh4_x86.block_start_pc = pc;
1.39 }
1.40
1.41 /**
1.42 - * Exit the block early (ie branch out), conditionally or otherwise
1.43 + * Exit the block to an absolute PC
1.44 + * Bytes: 30
1.45 */
1.46 -void exit_block( )
1.47 +void exit_block( sh4addr_t pc, sh4addr_t endpc )
1.48 {
1.49 - store_spreg( R_EDI, REG_OFFSET(pc) );
1.50 - MOV_moff32_EAX( (uint32_t)&sh4_cpu_period );
1.51 - load_spreg( R_ECX, REG_OFFSET(slice_cycle) );
1.52 - MUL_r32( R_ESI );
1.53 - ADD_r32_r32( R_EAX, R_ECX );
1.54 - store_spreg( R_ECX, REG_OFFSET(slice_cycle) );
1.55 - load_imm32( R_EAX, sh4_x86.exit_code );
1.56 + load_imm32( R_ECX, pc ); // 5
1.57 + store_spreg( R_ECX, REG_OFFSET(pc) ); // 3
1.58 + MOV_moff32_EAX( (uint32_t)xlat_get_lut_entry(pc) ); // 5
1.59 + AND_imm8s_r32( 0xFC, R_EAX ); // 3
1.60 + load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
1.61 + ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
1.62 POP_r32(R_ESI);
1.63 - POP_r32(R_EDI);
1.64 POP_r32(R_EBP);
1.65 RET();
1.66 }
1.67
1.68 /**
1.69 - * Flush any open regs back to memory, restore SI/DI/, update PC, etc
1.70 + * Exit the block with sh4r.pc already written
1.71 + * Bytes: 16
1.72 + */
1.73 +void exit_block_pcset( pc )
1.74 +{
1.75 + XOR_r32_r32( R_EAX, R_EAX ); // 2
1.76 + load_imm32( R_ECX, ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
1.77 + ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
1.78 + POP_r32(R_ESI);
1.79 + POP_r32(R_EBP);
1.80 + RET();
1.81 +}
1.82 +
1.83 +/**
1.84 + * Write the block trailer (exception handling block)
1.85 */
1.86 void sh4_translate_end_block( sh4addr_t pc ) {
1.87 - assert( !sh4_x86.in_delay_slot ); // should never stop here
1.88 - // Normal termination - save PC, cycle count
1.89 - exit_block( );
1.90 -
1.91 if( sh4_x86.backpatch_posn != 0 ) {
1.92 uint8_t *end_ptr = xlat_output;
1.93 // Exception termination. Jump block for various exception codes:
1.94 @@ -435,8 +443,8 @@
1.95 load_imm32( R_EAX, (uint32_t)sh4_raise_exception ); // 6
1.96 CALL_r32( R_EAX ); // 2
1.97 ADD_imm8s_r32( 4, R_ESP );
1.98 + XOR_r32_r32( R_EAX, R_EAX );
1.99 POP_r32(R_ESI);
1.100 - POP_r32(R_EDI);
1.101 POP_r32(R_EBP);
1.102 RET();
1.103
1.104 @@ -457,7 +465,7 @@
1.105 * @return true if the instruction marks the end of a basic block
1.106 * (eg a branch or
1.107 */
1.108 -uint32_t sh4_x86_translate_instruction( uint32_t pc )
1.109 +uint32_t sh4_x86_translate_instruction( sh4addr_t pc )
1.110 {
1.111 uint32_t ir;
1.112 /* Read instruction */
1.113 @@ -1270,45 +1278,51 @@
1.114 if( sh4_x86.in_delay_slot ) {
1.115 SLOTILLEGAL();
1.116 } else {
1.117 - load_imm32( R_EDI, pc + 2 );
1.118 CMP_imm8s_sh4r( 0, R_T );
1.119 - JNE_rel8( 5, nottaken );
1.120 - load_imm32( R_EDI, disp + pc + 4 );
1.121 + JNE_rel8( 30, nottaken );
1.122 + exit_block( disp + pc + 4, pc+2 );
1.123 JMP_TARGET(nottaken);
1.124 - INC_r32(R_ESI);
1.125 - return 1;
1.126 + exit_block( pc + 2, pc + 2 );
1.127 + return 2;
1.128 }
1.129 :}
1.130 BF/S disp {:
1.131 if( sh4_x86.in_delay_slot ) {
1.132 SLOTILLEGAL();
1.133 } else {
1.134 - load_imm32( R_EDI, pc + 4 );
1.135 + sh4_x86.in_delay_slot = TRUE;
1.136 CMP_imm8s_sh4r( 0, R_T );
1.137 - JNE_rel8( 5, nottaken );
1.138 - load_imm32( R_EDI, disp + pc + 4 );
1.139 - JMP_TARGET(nottaken);
1.140 - sh4_x86.in_delay_slot = TRUE;
1.141 - return 0;
1.142 + OP(0x0F); OP(0x85); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JNE rel32
1.143 + sh4_x86_translate_instruction(pc+2);
1.144 + exit_block( disp + pc + 4, pc+4 );
1.145 + // not taken
1.146 + *patch = (xlat_output - ((uint8_t *)patch)) - 4;
1.147 + sh4_x86_translate_instruction(pc+2);
1.148 + exit_block( pc + 4, pc+4 );
1.149 + return 4;
1.150 }
1.151 :}
1.152 BRA disp {:
1.153 if( sh4_x86.in_delay_slot ) {
1.154 SLOTILLEGAL();
1.155 } else {
1.156 - load_imm32( R_EDI, disp + pc + 4 );
1.157 sh4_x86.in_delay_slot = TRUE;
1.158 - return 0;
1.159 + sh4_x86_translate_instruction( pc + 2 );
1.160 + exit_block( disp + pc + 4, pc+4 );
1.161 + return 4;
1.162 }
1.163 :}
1.164 BRAF Rn {:
1.165 if( sh4_x86.in_delay_slot ) {
1.166 SLOTILLEGAL();
1.167 } else {
1.168 - load_reg( R_EDI, Rn );
1.169 - ADD_imm32_r32( pc + 4, R_EDI );
1.170 + load_reg( R_EAX, Rn );
1.171 + ADD_imm32_r32( pc + 4, R_EAX );
1.172 + store_spreg( R_EAX, REG_OFFSET(pc) );
1.173 sh4_x86.in_delay_slot = TRUE;
1.174 - return 0;
1.175 + sh4_x86_translate_instruction( pc + 2 );
1.176 + exit_block_pcset(pc+2);
1.177 + return 4;
1.178 }
1.179 :}
1.180 BSR disp {:
1.181 @@ -1317,56 +1331,64 @@
1.182 } else {
1.183 load_imm32( R_EAX, pc + 4 );
1.184 store_spreg( R_EAX, R_PR );
1.185 - load_imm32( R_EDI, disp + pc + 4 );
1.186 sh4_x86.in_delay_slot = TRUE;
1.187 - return 0;
1.188 + sh4_x86_translate_instruction( pc + 2 );
1.189 + exit_block( disp + pc + 4, pc+4 );
1.190 + return 4;
1.191 }
1.192 :}
1.193 BSRF Rn {:
1.194 if( sh4_x86.in_delay_slot ) {
1.195 SLOTILLEGAL();
1.196 } else {
1.197 - load_imm32( R_EAX, pc + 4 );
1.198 - store_spreg( R_EAX, R_PR );
1.199 - load_reg( R_EDI, Rn );
1.200 - ADD_r32_r32( R_EAX, R_EDI );
1.201 + load_imm32( R_ECX, pc + 4 );
1.202 + store_spreg( R_ECX, R_PR );
1.203 + ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_ECX );
1.204 + store_spreg( R_ECX, REG_OFFSET(pc) );
1.205 sh4_x86.in_delay_slot = TRUE;
1.206 - return 0;
1.207 + sh4_x86_translate_instruction( pc + 2 );
1.208 + exit_block_pcset(pc+2);
1.209 + return 4;
1.210 }
1.211 :}
1.212 BT disp {:
1.213 if( sh4_x86.in_delay_slot ) {
1.214 SLOTILLEGAL();
1.215 } else {
1.216 - load_imm32( R_EDI, pc + 2 );
1.217 CMP_imm8s_sh4r( 0, R_T );
1.218 - JE_rel8( 5, nottaken );
1.219 - load_imm32( R_EDI, disp + pc + 4 );
1.220 + JE_rel8( 30, nottaken );
1.221 + exit_block( disp + pc + 4, pc+2 );
1.222 JMP_TARGET(nottaken);
1.223 - INC_r32(R_ESI);
1.224 - return 1;
1.225 + exit_block( pc + 2, pc+2 );
1.226 + return 2;
1.227 }
1.228 :}
1.229 BT/S disp {:
1.230 if( sh4_x86.in_delay_slot ) {
1.231 SLOTILLEGAL();
1.232 } else {
1.233 - load_imm32( R_EDI, pc + 4 );
1.234 + sh4_x86.in_delay_slot = TRUE;
1.235 CMP_imm8s_sh4r( 0, R_T );
1.236 - JE_rel8( 5, nottaken );
1.237 - load_imm32( R_EDI, disp + pc + 4 );
1.238 - JMP_TARGET(nottaken);
1.239 - sh4_x86.in_delay_slot = TRUE;
1.240 - return 0;
1.241 + OP(0x0F); OP(0x84); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JE rel32
1.242 + sh4_x86_translate_instruction(pc+2);
1.243 + exit_block( disp + pc + 4, pc+4 );
1.244 + // not taken
1.245 + *patch = (xlat_output - ((uint8_t *)patch)) - 4;
1.246 + sh4_x86_translate_instruction(pc+2);
1.247 + exit_block( pc + 4, pc+4 );
1.248 + return 4;
1.249 }
1.250 :}
1.251 JMP @Rn {:
1.252 if( sh4_x86.in_delay_slot ) {
1.253 SLOTILLEGAL();
1.254 } else {
1.255 - load_reg( R_EDI, Rn );
1.256 + load_reg( R_ECX, Rn );
1.257 + store_spreg( R_ECX, REG_OFFSET(pc) );
1.258 sh4_x86.in_delay_slot = TRUE;
1.259 - return 0;
1.260 + sh4_x86_translate_instruction(pc+2);
1.261 + exit_block_pcset(pc+2);
1.262 + return 4;
1.263 }
1.264 :}
1.265 JSR @Rn {:
1.266 @@ -1375,32 +1397,41 @@
1.267 } else {
1.268 load_imm32( R_EAX, pc + 4 );
1.269 store_spreg( R_EAX, R_PR );
1.270 - load_reg( R_EDI, Rn );
1.271 + load_reg( R_ECX, Rn );
1.272 + store_spreg( R_ECX, REG_OFFSET(pc) );
1.273 sh4_x86.in_delay_slot = TRUE;
1.274 - return 0;
1.275 + sh4_x86_translate_instruction(pc+2);
1.276 + exit_block_pcset(pc+2);
1.277 + return 4;
1.278 }
1.279 :}
1.280 RTE {:
1.281 - check_priv();
1.282 if( sh4_x86.in_delay_slot ) {
1.283 SLOTILLEGAL();
1.284 } else {
1.285 - load_spreg( R_EDI, R_SPC );
1.286 + check_priv();
1.287 + load_spreg( R_ECX, R_SPC );
1.288 + store_spreg( R_ECX, REG_OFFSET(pc) );
1.289 load_spreg( R_EAX, R_SSR );
1.290 call_func1( sh4_write_sr, R_EAX );
1.291 sh4_x86.in_delay_slot = TRUE;
1.292 sh4_x86.priv_checked = FALSE;
1.293 sh4_x86.fpuen_checked = FALSE;
1.294 - return 0;
1.295 + sh4_x86_translate_instruction(pc+2);
1.296 + exit_block_pcset(pc+2);
1.297 + return 4;
1.298 }
1.299 :}
1.300 RTS {:
1.301 if( sh4_x86.in_delay_slot ) {
1.302 SLOTILLEGAL();
1.303 } else {
1.304 - load_spreg( R_EDI, R_PR );
1.305 + load_spreg( R_ECX, R_PR );
1.306 + store_spreg( R_ECX, REG_OFFSET(pc) );
1.307 sh4_x86.in_delay_slot = TRUE;
1.308 - return 0;
1.309 + sh4_x86_translate_instruction(pc+2);
1.310 + exit_block_pcset(pc+2);
1.311 + return 4;
1.312 }
1.313 :}
1.314 TRAPA #imm {:
1.315 @@ -1410,6 +1441,8 @@
1.316 PUSH_imm32( imm );
1.317 call_func0( sh4_raise_trap );
1.318 ADD_imm8s_r32( 4, R_ESP );
1.319 + exit_block_pcset(pc);
1.320 + return 2;
1.321 }
1.322 :}
1.323 UNDEF {:
1.324 @@ -1417,7 +1450,7 @@
1.325 SLOTILLEGAL();
1.326 } else {
1.327 JMP_exit(EXIT_ILLEGAL);
1.328 - return 1;
1.329 + return 2;
1.330 }
1.331 :}
1.332
1.333 @@ -2248,10 +2281,10 @@
1.334 SLEEP {:
1.335 check_priv();
1.336 call_func0( sh4_sleep );
1.337 - sh4_x86.exit_code = 0;
1.338 sh4_x86.in_delay_slot = FALSE;
1.339 INC_r32(R_ESI);
1.340 - return 1;
1.341 + exit_block(pc+2, pc+2);
1.342 + return 2;
1.343 :}
1.344 STC SR, Rn {:
1.345 check_priv();
1.346 @@ -2429,7 +2462,6 @@
1.347 if( sh4_x86.in_delay_slot ) {
1.348 ADD_imm8s_r32(2,R_ESI);
1.349 sh4_x86.in_delay_slot = FALSE;
1.350 - return 1;
1.351 } else {
1.352 INC_r32(R_ESI);
1.353 }
.