filename | src/sh4/sh4x86.c |
changeset | 374:8f80a795513e |
prev | 368:36fac4c42322 |
next | 375:4627600f7f8e |
author | nkeynes |
date | Tue Sep 11 02:14:46 2007 +0000 (16 years ago) |
permissions | -rw-r--r-- |
last change | Cache the pointer to the last FR bank (speeds fp ops up by about 10%) Implement experimental fix for FLOAT/FTRC Make read/write sr functions non-static (share with translator) Much more translator WIP |
file | annotate | diff | log | raw |
1.1 --- a/src/sh4/sh4x86.c Tue Sep 04 08:40:23 2007 +00001.2 +++ b/src/sh4/sh4x86.c Tue Sep 11 02:14:46 2007 +00001.3 @@ -1,5 +1,5 @@1.4 /**1.5 - * $Id: sh4x86.c,v 1.3 2007-09-04 08:40:23 nkeynes Exp $1.6 + * $Id: sh4x86.c,v 1.4 2007-09-11 02:14:46 nkeynes Exp $1.7 *1.8 * SH4 => x86 translation. This version does no real optimization, it just1.9 * outputs straight-line x86 code - it mainly exists to provide a baseline1.10 @@ -73,7 +73,7 @@1.11 {1.12 unsigned int i;1.13 for( i=0; i<sh4_x86.backpatch_posn; i++ ) {1.14 - *sh4_x86.backpatch_list[i] += (reloc_base - ((uint8_t *)sh4_x86.backpatch_list[i]));1.15 + *sh4_x86.backpatch_list[i] += (reloc_base - ((uint8_t *)sh4_x86.backpatch_list[i]) - 4);1.16 }1.17 }1.19 @@ -97,35 +97,20 @@1.20 OP(REG_OFFSET(r[sh4reg]));1.21 }1.23 -/**1.24 - * Load the SR register into an x86 register1.25 - */1.26 -static inline void read_sr( int x86reg )1.27 +static inline void load_reg16s( int x86reg, int sh4reg )1.28 {1.29 - MOV_ebp_r32( R_M, x86reg );1.30 - SHL1_r32( x86reg );1.31 - OR_ebp_r32( R_Q, x86reg );1.32 - SHL_imm8_r32( 7, x86reg );1.33 - OR_ebp_r32( R_S, x86reg );1.34 - SHL1_r32( x86reg );1.35 - OR_ebp_r32( R_T, x86reg );1.36 - OR_ebp_r32( R_SR, x86reg );1.37 + OP(0x0F);1.38 + OP(0xBF);1.39 + MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));1.40 }1.42 -static inline void write_sr( int x86reg )1.43 +static inline void load_reg16u( int x86reg, int sh4reg )1.44 {1.45 - TEST_imm32_r32( SR_M, x86reg );1.46 - SETNE_ebp(R_M);1.47 - TEST_imm32_r32( SR_Q, x86reg );1.48 - SETNE_ebp(R_Q);1.49 - TEST_imm32_r32( SR_S, x86reg );1.50 - SETNE_ebp(R_S);1.51 - TEST_imm32_r32( SR_T, x86reg );1.52 - SETNE_ebp(R_T);1.53 - AND_imm32_r32( SR_MQSTMASK, x86reg );1.54 - MOV_r32_ebp( x86reg, R_SR );1.55 + OP(0x0F);1.56 + OP(0xB7);1.57 + MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));1.58 +1.59 }1.60 -1.62 static inline void load_spreg( int x86reg, int regoffset )1.63 {1.64 @@ -160,6 +145,49 @@1.65 OP(regoffset);1.66 }1.68 +1.69 +#define load_fr_bank(bankreg) load_spreg( bankreg, REG_OFFSET(fr_bank))1.70 +1.71 +static inline void load_xf_bank( int bankreg )1.72 +{1.73 + load_spreg( bankreg, R_FPSCR );1.74 + SHR_imm8_r32( (21 - 6), bankreg ); // Extract bit 21 then *64 for bank size1.75 + AND_imm8s_r32( 0x40, bankreg ); // Complete extraction1.76 + OP(0x8D); OP(0x44+(bankreg<<3)); OP(0x28+bankreg); OP(REG_OFFSET(fr)); // LEA [ebp+bankreg+disp], bankreg1.77 +}1.78 +1.79 +static inline void push_fr( int bankreg, int frm )1.80 +{1.81 + OP(0xD9); OP(0x40 + bankreg); OP((frm^1)<<2); // FLD.S [bankreg + frm^1*4]1.82 +}1.83 +1.84 +static inline void pop_fr( int bankreg, int frm )1.85 +{1.86 + OP(0xD9); OP(0x58 + bankreg); OP((frm^1)<<2); // FST.S [bankreg + frm^1*4]1.87 +}1.88 +1.89 +static inline void push_dr( int bankreg, int frm )1.90 +{1.91 + if( frm&1 ) {1.92 + // this is technically undefined, but it seems to work consistently - high 32 bits1.93 + // loaded from FRm (32-bits), low 32bits are 0.1.94 + OP(0xFF); OP(0x70 + bankreg); OP((frm^1)<<2); // PUSH [bankreg + frm^1]1.95 + PUSH_imm32(0);1.96 +1.97 +1.98 + } else {1.99 + OP(0xDD); OP(0x40 + bankreg); OP(frm<<2); // FLD.D [bankreg + frm*4]1.100 + }1.101 +}1.102 +1.103 +static inline void pop_dr( int bankreg, int frm )1.104 +{1.105 + if( frm&1 ) {1.106 + } else {1.107 + OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]1.108 + }1.109 +}1.110 +1.111 /**1.112 * Note: clobbers EAX to make the indirect call - this isn't usually1.113 * a problem since the callee will usually clobber it anyway.1.114 @@ -248,7 +276,7 @@1.115 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)1.117 #define RAISE_EXCEPTION( exc ) call_func1(sh4_raise_exception, exc);1.118 -#define CHECKSLOTILLEGAL() if(sh4_x86.in_delay_slot) RAISE_EXCEPTION(EXC_SLOT_ILLEGAL)1.119 +#define SLOTILLEGAL() RAISE_EXCEPTION(EXC_SLOT_ILLEGAL); return 11.123 @@ -259,9 +287,9 @@1.124 void sh4_translate_begin_block()1.125 {1.126 PUSH_r32(R_EBP);1.127 - PUSH_r32(R_ESI);1.128 /* mov &sh4r, ebp */1.129 load_imm32( R_EBP, (uint32_t)&sh4r );1.130 + PUSH_r32(R_EDI);1.131 PUSH_r32(R_ESI);1.133 sh4_x86.in_delay_slot = FALSE;1.134 @@ -273,16 +301,18 @@1.135 /**1.136 * Exit the block early (ie branch out), conditionally or otherwise1.137 */1.138 -void exit_block( uint32_t pc )1.139 +void exit_block( )1.140 {1.141 - load_imm32( R_ECX, pc );1.142 - store_spreg( R_ECX, REG_OFFSET(pc) );1.143 + store_spreg( R_EDI, REG_OFFSET(pc) );1.144 MOV_moff32_EAX( (uint32_t)&sh4_cpu_period );1.145 load_spreg( R_ECX, REG_OFFSET(slice_cycle) );1.146 MUL_r32( R_ESI );1.147 ADD_r32_r32( R_EAX, R_ECX );1.148 store_spreg( R_ECX, REG_OFFSET(slice_cycle) );1.149 XOR_r32_r32( R_EAX, R_EAX );1.150 + POP_r32(R_ESI);1.151 + POP_r32(R_EDI);1.152 + POP_r32(R_EBP);1.153 RET();1.154 }1.156 @@ -292,7 +322,7 @@1.157 void sh4_translate_end_block( sh4addr_t pc ) {1.158 assert( !sh4_x86.in_delay_slot ); // should never stop here1.159 // Normal termination - save PC, cycle count1.160 - exit_block( pc );1.161 + exit_block( );1.163 uint8_t *end_ptr = xlat_output;1.164 // Exception termination. Jump block for various exception codes:1.165 @@ -348,7 +378,7 @@1.166 case 0x0:1.167 { /* STC SR, Rn */1.168 uint32_t Rn = ((ir>>8)&0xF);1.169 - read_sr( R_EAX );1.170 + call_func0(sh4_read_sr);1.171 store_reg( R_EAX, Rn );1.172 }1.173 break;1.174 @@ -388,7 +418,8 @@1.175 case 0x1:1.176 { /* STC Rm_BANK, Rn */1.177 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm_BANK = ((ir>>4)&0x7);1.178 - /* TODO */1.179 + load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );1.180 + store_reg( R_EAX, Rn );1.181 }1.182 break;1.183 }1.184 @@ -398,16 +429,42 @@1.185 case 0x0:1.186 { /* BSRF Rn */1.187 uint32_t Rn = ((ir>>8)&0xF);1.188 + if( sh4_x86.in_delay_slot ) {1.189 + SLOTILLEGAL();1.190 + } else {1.191 + load_imm32( R_EAX, pc + 4 );1.192 + store_spreg( R_EAX, R_PR );1.193 + load_reg( R_EDI, Rn );1.194 + ADD_r32_r32( R_EAX, R_EDI );1.195 + sh4_x86.in_delay_slot = TRUE;1.196 + INC_r32(R_ESI);1.197 + return 0;1.198 + }1.199 }1.200 break;1.201 case 0x2:1.202 { /* BRAF Rn */1.203 uint32_t Rn = ((ir>>8)&0xF);1.204 + if( sh4_x86.in_delay_slot ) {1.205 + SLOTILLEGAL();1.206 + } else {1.207 + load_reg( R_EDI, Rn );1.208 + sh4_x86.in_delay_slot = TRUE;1.209 + INC_r32(R_ESI);1.210 + return 0;1.211 + }1.212 }1.213 break;1.214 case 0x8:1.215 { /* PREF @Rn */1.216 uint32_t Rn = ((ir>>8)&0xF);1.217 + load_reg( R_EAX, Rn );1.218 + PUSH_r32( R_EAX );1.219 + AND_imm32_r32( 0xFC000000, R_EAX );1.220 + CMP_imm32_r32( 0xE0000000, R_EAX );1.221 + JNE_rel8(8);1.222 + call_func0( sh4_flush_store_queue );1.223 + ADD_imm8s_r32( -4, R_ESP );1.224 }1.225 break;1.226 case 0x9:1.227 @@ -430,6 +487,7 @@1.228 uint32_t Rn = ((ir>>8)&0xF);1.229 load_reg( R_EAX, 0 );1.230 load_reg( R_ECX, Rn );1.231 + check_walign32( R_ECX );1.232 MEM_WRITE_LONG( R_ECX, R_EAX );1.233 }1.234 break;1.235 @@ -454,6 +512,7 @@1.236 load_reg( R_EAX, 0 );1.237 load_reg( R_ECX, Rn );1.238 ADD_r32_r32( R_EAX, R_ECX );1.239 + check_walign16( R_ECX );1.240 load_reg( R_EAX, Rm );1.241 MEM_WRITE_WORD( R_ECX, R_EAX );1.242 }1.243 @@ -464,6 +523,7 @@1.244 load_reg( R_EAX, 0 );1.245 load_reg( R_ECX, Rn );1.246 ADD_r32_r32( R_EAX, R_ECX );1.247 + check_walign32( R_ECX );1.248 load_reg( R_EAX, Rm );1.249 MEM_WRITE_LONG( R_ECX, R_EAX );1.250 }1.251 @@ -481,14 +541,21 @@1.252 switch( (ir&0xFF0) >> 4 ) {1.253 case 0x0:1.254 { /* CLRT */1.255 + CLC();1.256 + SETC_t();1.257 }1.258 break;1.259 case 0x1:1.260 { /* SETT */1.261 + STC();1.262 + SETC_t();1.263 }1.264 break;1.265 case 0x2:1.266 { /* CLRMAC */1.267 + XOR_r32_r32(R_EAX, R_EAX);1.268 + store_spreg( R_EAX, R_MACL );1.269 + store_spreg( R_EAX, R_MACH );1.270 }1.271 break;1.272 case 0x3:1.273 @@ -497,10 +564,14 @@1.274 break;1.275 case 0x4:1.276 { /* CLRS */1.277 + CLC();1.278 + SETC_sh4r(R_S);1.279 }1.280 break;1.281 case 0x5:1.282 { /* SETS */1.283 + STC();1.284 + SETC_sh4r(R_S);1.285 }1.286 break;1.287 default:1.288 @@ -595,14 +666,34 @@1.289 switch( (ir&0xFF0) >> 4 ) {1.290 case 0x0:1.291 { /* RTS */1.292 + if( sh4_x86.in_delay_slot ) {1.293 + SLOTILLEGAL();1.294 + } else {1.295 + load_spreg( R_EDI, R_PR );1.296 + sh4_x86.in_delay_slot = TRUE;1.297 + INC_r32(R_ESI);1.298 + return 0;1.299 + }1.300 }1.301 break;1.302 case 0x1:1.303 { /* SLEEP */1.304 + /* TODO */1.305 }1.306 break;1.307 case 0x2:1.308 { /* RTE */1.309 + check_priv();1.310 + if( sh4_x86.in_delay_slot ) {1.311 + SLOTILLEGAL();1.312 + } else {1.313 + load_spreg( R_EDI, R_PR );1.314 + load_spreg( R_EAX, R_SSR );1.315 + call_func1( sh4_write_sr, R_EAX );1.316 + sh4_x86.in_delay_slot = TRUE;1.317 + INC_r32(R_ESI);1.318 + return 0;1.319 + }1.320 }1.321 break;1.322 default:1.323 @@ -626,6 +717,7 @@1.324 load_reg( R_EAX, 0 );1.325 load_reg( R_ECX, Rm );1.326 ADD_r32_r32( R_EAX, R_ECX );1.327 + check_ralign16( R_ECX );1.328 MEM_READ_WORD( R_ECX, R_EAX );1.329 store_reg( R_EAX, Rn );1.330 }1.331 @@ -636,6 +728,7 @@1.332 load_reg( R_EAX, 0 );1.333 load_reg( R_ECX, Rm );1.334 ADD_r32_r32( R_EAX, R_ECX );1.335 + check_ralign32( R_ECX );1.336 MEM_READ_LONG( R_ECX, R_EAX );1.337 store_reg( R_EAX, Rn );1.338 }1.339 @@ -656,6 +749,7 @@1.340 load_reg( R_ECX, Rn );1.341 load_reg( R_EAX, Rm );1.342 ADD_imm32_r32( disp, R_ECX );1.343 + check_walign32( R_ECX );1.344 MEM_WRITE_LONG( R_ECX, R_EAX );1.345 }1.346 break;1.347 @@ -673,6 +767,7 @@1.348 { /* MOV.W Rm, @Rn */1.349 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);1.350 load_reg( R_ECX, Rn );1.351 + check_walign16( R_ECX );1.352 MEM_READ_WORD( R_ECX, R_EAX );1.353 store_reg( R_EAX, Rn );1.354 }1.355 @@ -682,6 +777,7 @@1.356 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);1.357 load_reg( R_EAX, Rm );1.358 load_reg( R_ECX, Rn );1.359 + check_walign32(R_ECX);1.360 MEM_WRITE_LONG( R_ECX, R_EAX );1.361 }1.362 break;1.363 @@ -699,6 +795,7 @@1.364 { /* MOV.W Rm, @-Rn */1.365 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);1.366 load_reg( R_ECX, Rn );1.367 + check_walign16( R_ECX );1.368 load_reg( R_EAX, Rm );1.369 ADD_imm8s_r32( -2, R_ECX );1.370 MEM_WRITE_WORD( R_ECX, R_EAX );1.371 @@ -709,6 +806,7 @@1.372 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);1.373 load_reg( R_EAX, Rm );1.374 load_reg( R_ECX, Rn );1.375 + check_walign32( R_ECX );1.376 ADD_imm8s_r32( -4, R_ECX );1.377 store_reg( R_ECX, Rn );1.378 MEM_WRITE_LONG( R_ECX, R_EAX );1.379 @@ -794,11 +892,19 @@1.380 case 0xE:1.381 { /* MULU.W Rm, Rn */1.382 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);1.383 + load_reg16u( R_EAX, Rm );1.384 + load_reg16u( R_ECX, Rn );1.385 + MUL_r32( R_ECX );1.386 + store_spreg( R_EAX, R_MACL );1.387 }1.388 break;1.389 case 0xF:1.390 { /* MULS.W Rm, Rn */1.391 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);1.392 + load_reg16s( R_EAX, Rm );1.393 + load_reg16s( R_ECX, Rn );1.394 + MUL_r32( R_ECX );1.395 + store_spreg( R_EAX, R_MACL );1.396 }1.397 break;1.398 default:1.399 @@ -838,6 +944,17 @@1.400 case 0x4:1.401 { /* DIV1 Rm, Rn */1.402 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);1.403 + load_reg( R_ECX, Rn );1.404 + LDC_t();1.405 + RCL1_r32( R_ECX ); // OP21.406 + SETC_r32( R_EDX ); // Q1.407 + load_spreg( R_EAX, R_Q );1.408 + CMP_sh4r_r32( R_M, R_EAX );1.409 + JE_rel8(8);1.410 + ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_ECX );1.411 + JMP_rel8(3);1.412 + SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_ECX );1.413 + // TODO1.414 }1.415 break;1.416 case 0x5:1.417 @@ -1091,12 +1208,11 @@1.418 case 0x0:1.419 { /* STC.L SR, @-Rn */1.420 uint32_t Rn = ((ir>>8)&0xF);1.421 - /* TODO */1.422 - load_reg( R_ECX, Rn );1.423 - ADD_imm8s_r32( -4, Rn );1.424 - store_reg( R_ECX, Rn );1.425 - read_sr( R_EAX );1.426 - MEM_WRITE_LONG( R_ECX, R_EAX );1.427 + load_reg( R_ECX, Rn );1.428 + ADD_imm8s_r32( -4, Rn );1.429 + store_reg( R_ECX, Rn );1.430 + call_func0( sh4_read_sr );1.431 + MEM_WRITE_LONG( R_ECX, R_EAX );1.432 }1.433 break;1.434 case 0x1:1.435 @@ -1147,6 +1263,11 @@1.436 case 0x1:1.437 { /* STC.L Rm_BANK, @-Rn */1.438 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm_BANK = ((ir>>4)&0x7);1.439 + load_reg( R_ECX, Rn );1.440 + ADD_imm8s_r32( -4, Rn );1.441 + store_reg( R_ECX, Rn );1.442 + load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );1.443 + MEM_WRITE_LONG( R_ECX, R_EAX );1.444 }1.445 break;1.446 }1.447 @@ -1307,7 +1428,7 @@1.448 ADD_imm8s_r32( 4, R_EAX );1.449 store_reg( R_EAX, Rm );1.450 MEM_READ_LONG( R_ECX, R_EAX );1.451 - write_sr( R_EAX );1.452 + call_func1( sh4_write_sr, R_EAX );1.453 }1.454 break;1.455 case 0x1:1.456 @@ -1362,6 +1483,12 @@1.457 case 0x1:1.458 { /* LDC.L @Rm+, Rn_BANK */1.459 uint32_t Rm = ((ir>>8)&0xF); uint32_t Rn_BANK = ((ir>>4)&0x7);1.460 + load_reg( R_EAX, Rm );1.461 + MOV_r32_r32( R_EAX, R_ECX );1.462 + ADD_imm8s_r32( 4, R_EAX );1.463 + store_reg( R_EAX, Rm );1.464 + MEM_READ_LONG( R_ECX, R_EAX );1.465 + store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );1.466 }1.467 break;1.468 }1.469 @@ -1489,6 +1616,16 @@1.470 case 0x0:1.471 { /* JSR @Rn */1.472 uint32_t Rn = ((ir>>8)&0xF);1.473 + if( sh4_x86.in_delay_slot ) {1.474 + SLOTILLEGAL();1.475 + } else {1.476 + load_imm32( R_EAX, pc + 4 );1.477 + store_spreg( R_EAX, R_PR );1.478 + load_reg( R_EDI, Rn );1.479 + sh4_x86.in_delay_slot = TRUE;1.480 + INC_r32(R_ESI);1.481 + return 0;1.482 + }1.483 }1.484 break;1.485 case 0x1:1.486 @@ -1505,6 +1642,14 @@1.487 case 0x2:1.488 { /* JMP @Rn */1.489 uint32_t Rn = ((ir>>8)&0xF);1.490 + if( sh4_x86.in_delay_slot ) {1.491 + SLOTILLEGAL();1.492 + } else {1.493 + load_reg( R_EDI, Rn );1.494 + sh4_x86.in_delay_slot = TRUE;1.495 + INC_r32(R_ESI);1.496 + return 0;1.497 + }1.498 }1.499 break;1.500 default:1.501 @@ -1555,7 +1700,7 @@1.502 { /* LDC Rm, SR */1.503 uint32_t Rm = ((ir>>8)&0xF);1.504 load_reg( R_EAX, Rm );1.505 - write_sr( R_EAX );1.506 + call_func1( sh4_write_sr, R_EAX );1.507 }1.508 break;1.509 case 0x1:1.510 @@ -1594,6 +1739,8 @@1.511 case 0x1:1.512 { /* LDC Rm, Rn_BANK */1.513 uint32_t Rm = ((ir>>8)&0xF); uint32_t Rn_BANK = ((ir>>4)&0x7);1.514 + load_reg( R_EAX, Rm );1.515 + store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );1.516 }1.517 break;1.518 }1.519 @@ -1610,6 +1757,7 @@1.520 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF); uint32_t disp = (ir&0xF)<<2;1.521 load_reg( R_ECX, Rm );1.522 ADD_imm8s_r32( disp, R_ECX );1.523 + check_ralign32( R_ECX );1.524 MEM_READ_LONG( R_ECX, R_EAX );1.525 store_reg( R_EAX, Rn );1.526 }1.527 @@ -1628,6 +1776,7 @@1.528 { /* MOV.W @Rm, Rn */1.529 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);1.530 load_reg( R_ECX, Rm );1.531 + check_ralign16( R_ECX );1.532 MEM_READ_WORD( R_ECX, R_EAX );1.533 store_reg( R_EAX, Rn );1.534 }1.535 @@ -1636,6 +1785,7 @@1.536 { /* MOV.L @Rm, Rn */1.537 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);1.538 load_reg( R_ECX, Rm );1.539 + check_ralign32( R_ECX );1.540 MEM_READ_LONG( R_ECX, R_EAX );1.541 store_reg( R_EAX, Rn );1.542 }1.543 @@ -1662,6 +1812,7 @@1.544 { /* MOV.W @Rm+, Rn */1.545 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);1.546 load_reg( R_EAX, Rm );1.547 + check_ralign16( R_EAX );1.548 MOV_r32_r32( R_EAX, R_ECX );1.549 ADD_imm8s_r32( 2, R_EAX );1.550 store_reg( R_EAX, Rm );1.551 @@ -1673,6 +1824,7 @@1.552 { /* MOV.L @Rm+, Rn */1.553 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);1.554 load_reg( R_EAX, Rm );1.555 + check_ralign32( R_ECX );1.556 MOV_r32_r32( R_EAX, R_ECX );1.557 ADD_imm8s_r32( 4, R_EAX );1.558 store_reg( R_EAX, Rm );1.559 @@ -1785,6 +1937,7 @@1.560 load_reg( R_ECX, Rn );1.561 load_reg( R_EAX, 0 );1.562 ADD_imm32_r32( disp, R_ECX );1.563 + check_walign16( R_ECX );1.564 MEM_WRITE_WORD( R_ECX, R_EAX );1.565 }1.566 break;1.567 @@ -1802,6 +1955,7 @@1.568 uint32_t Rm = ((ir>>4)&0xF); uint32_t disp = (ir&0xF)<<1;1.569 load_reg( R_ECX, Rm );1.570 ADD_imm32_r32( disp, R_ECX );1.571 + check_ralign16( R_ECX );1.572 MEM_READ_WORD( R_ECX, R_EAX );1.573 store_reg( R_EAX, 0 );1.574 }1.575 @@ -1817,32 +1971,63 @@1.576 case 0x9:1.577 { /* BT disp */1.578 int32_t disp = SIGNEXT8(ir&0xFF)<<1;1.579 - /* If true, result PC += 4 + disp. else result PC = pc+2 */1.580 - return pc + 2;1.581 + if( sh4_x86.in_delay_slot ) {1.582 + SLOTILLEGAL();1.583 + } else {1.584 + load_imm32( R_EDI, pc + 2 );1.585 + CMP_imm8s_sh4r( 0, R_T );1.586 + JE_rel8( 5 );1.587 + load_imm32( R_EDI, disp + pc + 4 );1.588 + INC_r32(R_ESI);1.589 + return 1;1.590 + }1.591 }1.592 break;1.593 case 0xB:1.594 { /* BF disp */1.595 int32_t disp = SIGNEXT8(ir&0xFF)<<1;1.596 - CMP_imm8s_ebp( 0, R_T );1.597 - JNE_rel8( 1 );1.598 - exit_block( disp + pc + 4 );1.599 - return 1;1.600 + if( sh4_x86.in_delay_slot ) {1.601 + SLOTILLEGAL();1.602 + } else {1.603 + load_imm32( R_EDI, pc + 2 );1.604 + CMP_imm8s_sh4r( 0, R_T );1.605 + JNE_rel8( 5 );1.606 + load_imm32( R_EDI, disp + pc + 4 );1.607 + INC_r32(R_ESI);1.608 + return 1;1.609 + }1.610 }1.611 break;1.612 case 0xD:1.613 { /* BT/S disp */1.614 int32_t disp = SIGNEXT8(ir&0xFF)<<1;1.615 - return pc + 4;1.616 + if( sh4_x86.in_delay_slot ) {1.617 + SLOTILLEGAL();1.618 + } else {1.619 + load_imm32( R_EDI, pc + 2 );1.620 + CMP_imm8s_sh4r( 0, R_T );1.621 + JE_rel8( 5 );1.622 + load_imm32( R_EDI, disp + pc + 4 );1.623 + sh4_x86.in_delay_slot = TRUE;1.624 + INC_r32(R_ESI);1.625 + return 0;1.626 + }1.627 }1.628 break;1.629 case 0xF:1.630 { /* BF/S disp */1.631 int32_t disp = SIGNEXT8(ir&0xFF)<<1;1.632 - CMP_imm8s_ebp( 0, R_T );1.633 - JNE_rel8( 1 );1.634 - exit_block( disp + pc + 4 );1.635 - sh4_x86.in_delay_slot = TRUE;1.636 + if( sh4_x86.in_delay_slot ) {1.637 + SLOTILLEGAL();1.638 + } else {1.639 + load_imm32( R_EDI, pc + 2 );1.640 + CMP_imm8s_sh4r( 0, R_T );1.641 + JNE_rel8( 5 );1.642 + load_imm32( R_EDI, disp + pc + 4 );1.643 + sh4_x86.in_delay_slot = TRUE;1.644 + INC_r32(R_ESI);1.645 + return 0;1.646 + }1.647 }1.648 break;1.649 default:1.650 @@ -1853,20 +2038,41 @@1.651 case 0x9:1.652 { /* MOV.W @(disp, PC), Rn */1.653 uint32_t Rn = ((ir>>8)&0xF); uint32_t disp = (ir&0xFF)<<1;1.654 - load_imm32( R_ECX, pc + disp + 4 );1.655 - MEM_READ_WORD( R_ECX, R_EAX );1.656 - store_reg( R_EAX, Rn );1.657 + if( sh4_x86.in_delay_slot ) {1.658 + SLOTILLEGAL();1.659 + } else {1.660 + load_imm32( R_ECX, pc + disp + 4 );1.661 + MEM_READ_WORD( R_ECX, R_EAX );1.662 + store_reg( R_EAX, Rn );1.663 + }1.664 }1.665 break;1.666 case 0xA:1.667 { /* BRA disp */1.668 int32_t disp = SIGNEXT12(ir&0xFFF)<<1;1.669 - exit_block( disp + pc + 4 );1.670 + if( sh4_x86.in_delay_slot ) {1.671 + SLOTILLEGAL();1.672 + } else {1.673 + load_imm32( R_EDI, disp + pc + 4 );1.674 + sh4_x86.in_delay_slot = TRUE;1.675 + INC_r32(R_ESI);1.676 + return 0;1.677 + }1.678 }1.679 break;1.680 case 0xB:1.681 { /* BSR disp */1.682 int32_t disp = SIGNEXT12(ir&0xFFF)<<1;1.683 + if( sh4_x86.in_delay_slot ) {1.684 + SLOTILLEGAL();1.685 + } else {1.686 + load_imm32( R_EAX, pc + 4 );1.687 + store_spreg( R_EAX, R_PR );1.688 + load_imm32( R_EDI, disp + pc + 4 );1.689 + sh4_x86.in_delay_slot = TRUE;1.690 + INC_r32(R_ESI);1.691 + return 0;1.692 + }1.693 }1.694 break;1.695 case 0xC:1.696 @@ -1886,6 +2092,7 @@1.697 load_spreg( R_ECX, R_GBR );1.698 load_reg( R_EAX, 0 );1.699 ADD_imm32_r32( disp, R_ECX );1.700 + check_walign16( R_ECX );1.701 MEM_WRITE_WORD( R_ECX, R_EAX );1.702 }1.703 break;1.704 @@ -1895,12 +2102,19 @@1.705 load_spreg( R_ECX, R_GBR );1.706 load_reg( R_EAX, 0 );1.707 ADD_imm32_r32( disp, R_ECX );1.708 + check_walign32( R_ECX );1.709 MEM_WRITE_LONG( R_ECX, R_EAX );1.710 }1.711 break;1.712 case 0x3:1.713 { /* TRAPA #imm */1.714 uint32_t imm = (ir&0xFF);1.715 + if( sh4_x86.in_delay_slot ) {1.716 + SLOTILLEGAL();1.717 + } else {1.718 + // TODO: Write TRA1.719 + RAISE_EXCEPTION(EXC_TRAP);1.720 + }1.721 }1.722 break;1.723 case 0x4:1.724 @@ -1917,6 +2131,7 @@1.725 uint32_t disp = (ir&0xFF)<<1;1.726 load_spreg( R_ECX, R_GBR );1.727 ADD_imm32_r32( disp, R_ECX );1.728 + check_ralign16( R_ECX );1.729 MEM_READ_WORD( R_ECX, R_EAX );1.730 store_reg( R_EAX, 0 );1.731 }1.732 @@ -1926,6 +2141,7 @@1.733 uint32_t disp = (ir&0xFF)<<2;1.734 load_spreg( R_ECX, R_GBR );1.735 ADD_imm32_r32( disp, R_ECX );1.736 + check_ralign32( R_ECX );1.737 MEM_READ_LONG( R_ECX, R_EAX );1.738 store_reg( R_EAX, 0 );1.739 }1.740 @@ -1933,8 +2149,12 @@1.741 case 0x7:1.742 { /* MOVA @(disp, PC), R0 */1.743 uint32_t disp = (ir&0xFF)<<2;1.744 - load_imm32( R_ECX, (pc & 0xFFFFFFFC) + disp + 4 );1.745 - store_reg( R_ECX, 0 );1.746 + if( sh4_x86.in_delay_slot ) {1.747 + SLOTILLEGAL();1.748 + } else {1.749 + load_imm32( R_ECX, (pc & 0xFFFFFFFC) + disp + 4 );1.750 + store_reg( R_ECX, 0 );1.751 + }1.752 }1.753 break;1.754 case 0x8:1.755 @@ -1985,7 +2205,7 @@1.756 uint32_t imm = (ir&0xFF);1.757 load_reg( R_EAX, 0 );1.758 load_spreg( R_ECX, R_GBR );1.759 - ADD_r32_r32( R_EAX, R_EBX );1.760 + ADD_r32_r32( R_EAX, R_ECX );1.761 MEM_READ_BYTE( R_ECX, R_EAX );1.762 AND_imm32_r32(imm, R_ECX );1.763 MEM_WRITE_BYTE( R_ECX, R_EAX );1.764 @@ -2005,6 +2225,12 @@1.765 case 0xF:1.766 { /* OR.B #imm, @(R0, GBR) */1.767 uint32_t imm = (ir&0xFF);1.768 + load_reg( R_EAX, 0 );1.769 + load_spreg( R_ECX, R_GBR );1.770 + ADD_r32_r32( R_EAX, R_ECX );1.771 + MEM_READ_BYTE( R_ECX, R_EAX );1.772 + OR_imm32_r32(imm, R_ECX );1.773 + MEM_WRITE_BYTE( R_ECX, R_EAX );1.774 }1.775 break;1.776 }1.777 @@ -2012,9 +2238,13 @@1.778 case 0xD:1.779 { /* MOV.L @(disp, PC), Rn */1.780 uint32_t Rn = ((ir>>8)&0xF); uint32_t disp = (ir&0xFF)<<2;1.781 - load_imm32( R_ECX, (pc & 0xFFFFFFFC) + disp + 4 );1.782 - MEM_READ_LONG( R_ECX, R_EAX );1.783 - store_reg( R_EAX, 0 );1.784 + if( sh4_x86.in_delay_slot ) {1.785 + SLOTILLEGAL();1.786 + } else {1.787 + load_imm32( R_ECX, (pc & 0xFFFFFFFC) + disp + 4 );1.788 + MEM_READ_LONG( R_ECX, R_EAX );1.789 + store_reg( R_EAX, 0 );1.790 + }1.791 }1.792 break;1.793 case 0xE:1.794 @@ -2121,6 +2351,17 @@1.795 case 0x5:1.796 { /* FABS FRn */1.797 uint32_t FRn = ((ir>>8)&0xF);1.798 + load_spreg( R_ECX, R_FPSCR );1.799 + load_spreg( R_EDX, REG_OFFSET(fr_bank) );1.800 + TEST_imm32_r32( FPSCR_PR, R_ECX );1.801 + JNE_rel8(10);1.802 + push_fr(R_EDX, FRn); // 31.803 + FABS_st0(); // 21.804 + pop_fr( R_EDX, FRn); //31.805 + JMP_rel8(8); // 21.806 + push_dr(R_EDX, FRn);1.807 + FABS_st0();1.808 + pop_dr(R_EDX, FRn);1.809 }1.810 break;1.811 case 0x6:1.812 @@ -2184,6 +2425,12 @@1.813 break;1.814 case 0x3:1.815 { /* UNDEF */1.816 + if( sh4_x86.in_delay_slot ) {1.817 + RAISE_EXCEPTION(EXC_SLOT_ILLEGAL);1.818 + } else {1.819 + RAISE_EXCEPTION(EXC_ILLEGAL);1.820 + }1.821 + return 1;1.822 }1.823 break;1.824 default:1.825 @@ -2213,6 +2460,9 @@1.826 }1.828 INC_r32(R_ESI);1.829 -1.830 + if( sh4_x86.in_delay_slot ) {1.831 + sh4_x86.in_delay_slot = FALSE;1.832 + return 1;1.833 + }1.834 return 0;1.835 }
.