Search
lxdream.org :: lxdream/src/sh4/sh4x86.c :: diff
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.c
changeset 374:8f80a795513e
prev368:36fac4c42322
next375:4627600f7f8e
author nkeynes
date Tue Sep 11 02:14:46 2007 +0000 (16 years ago)
permissions -rw-r--r--
last change Cache the pointer to the last FR bank (speeds fp ops up by about 10%)
Implement experimental fix for FLOAT/FTRC
Make read/write sr functions non-static (share with translator)
Much more translator WIP
file annotate diff log raw
1.1 --- a/src/sh4/sh4x86.c Tue Sep 04 08:40:23 2007 +0000
1.2 +++ b/src/sh4/sh4x86.c Tue Sep 11 02:14:46 2007 +0000
1.3 @@ -1,5 +1,5 @@
1.4 /**
1.5 - * $Id: sh4x86.c,v 1.3 2007-09-04 08:40:23 nkeynes Exp $
1.6 + * $Id: sh4x86.c,v 1.4 2007-09-11 02:14:46 nkeynes Exp $
1.7 *
1.8 * SH4 => x86 translation. This version does no real optimization, it just
1.9 * outputs straight-line x86 code - it mainly exists to provide a baseline
1.10 @@ -73,7 +73,7 @@
1.11 {
1.12 unsigned int i;
1.13 for( i=0; i<sh4_x86.backpatch_posn; i++ ) {
1.14 - *sh4_x86.backpatch_list[i] += (reloc_base - ((uint8_t *)sh4_x86.backpatch_list[i]));
1.15 + *sh4_x86.backpatch_list[i] += (reloc_base - ((uint8_t *)sh4_x86.backpatch_list[i]) - 4);
1.16 }
1.17 }
1.18
1.19 @@ -97,35 +97,20 @@
1.20 OP(REG_OFFSET(r[sh4reg]));
1.21 }
1.22
1.23 -/**
1.24 - * Load the SR register into an x86 register
1.25 - */
1.26 -static inline void read_sr( int x86reg )
1.27 +static inline void load_reg16s( int x86reg, int sh4reg )
1.28 {
1.29 - MOV_ebp_r32( R_M, x86reg );
1.30 - SHL1_r32( x86reg );
1.31 - OR_ebp_r32( R_Q, x86reg );
1.32 - SHL_imm8_r32( 7, x86reg );
1.33 - OR_ebp_r32( R_S, x86reg );
1.34 - SHL1_r32( x86reg );
1.35 - OR_ebp_r32( R_T, x86reg );
1.36 - OR_ebp_r32( R_SR, x86reg );
1.37 + OP(0x0F);
1.38 + OP(0xBF);
1.39 + MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
1.40 }
1.41
1.42 -static inline void write_sr( int x86reg )
1.43 +static inline void load_reg16u( int x86reg, int sh4reg )
1.44 {
1.45 - TEST_imm32_r32( SR_M, x86reg );
1.46 - SETNE_ebp(R_M);
1.47 - TEST_imm32_r32( SR_Q, x86reg );
1.48 - SETNE_ebp(R_Q);
1.49 - TEST_imm32_r32( SR_S, x86reg );
1.50 - SETNE_ebp(R_S);
1.51 - TEST_imm32_r32( SR_T, x86reg );
1.52 - SETNE_ebp(R_T);
1.53 - AND_imm32_r32( SR_MQSTMASK, x86reg );
1.54 - MOV_r32_ebp( x86reg, R_SR );
1.55 + OP(0x0F);
1.56 + OP(0xB7);
1.57 + MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
1.58 +
1.59 }
1.60 -
1.61
1.62 static inline void load_spreg( int x86reg, int regoffset )
1.63 {
1.64 @@ -160,6 +145,49 @@
1.65 OP(regoffset);
1.66 }
1.67
1.68 +
1.69 +#define load_fr_bank(bankreg) load_spreg( bankreg, REG_OFFSET(fr_bank))
1.70 +
1.71 +static inline void load_xf_bank( int bankreg )
1.72 +{
1.73 + load_spreg( bankreg, R_FPSCR );
1.74 + SHR_imm8_r32( (21 - 6), bankreg ); // Extract bit 21 then *64 for bank size
1.75 + AND_imm8s_r32( 0x40, bankreg ); // Complete extraction
1.76 + OP(0x8D); OP(0x44+(bankreg<<3)); OP(0x28+bankreg); OP(REG_OFFSET(fr)); // LEA [ebp+bankreg+disp], bankreg
1.77 +}
1.78 +
1.79 +static inline void push_fr( int bankreg, int frm )
1.80 +{
1.81 + OP(0xD9); OP(0x40 + bankreg); OP((frm^1)<<2); // FLD.S [bankreg + frm^1*4]
1.82 +}
1.83 +
1.84 +static inline void pop_fr( int bankreg, int frm )
1.85 +{
1.86 + OP(0xD9); OP(0x58 + bankreg); OP((frm^1)<<2); // FST.S [bankreg + frm^1*4]
1.87 +}
1.88 +
1.89 +static inline void push_dr( int bankreg, int frm )
1.90 +{
1.91 + if( frm&1 ) {
1.92 + // this is technically undefined, but it seems to work consistently - high 32 bits
1.93 + // loaded from FRm (32-bits), low 32bits are 0.
1.94 + OP(0xFF); OP(0x70 + bankreg); OP((frm^1)<<2); // PUSH [bankreg + frm^1]
1.95 + PUSH_imm32(0);
1.96 +
1.97 +
1.98 + } else {
1.99 + OP(0xDD); OP(0x40 + bankreg); OP(frm<<2); // FLD.D [bankreg + frm*4]
1.100 + }
1.101 +}
1.102 +
1.103 +static inline void pop_dr( int bankreg, int frm )
1.104 +{
1.105 + if( frm&1 ) {
1.106 + } else {
1.107 + OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]
1.108 + }
1.109 +}
1.110 +
1.111 /**
1.112 * Note: clobbers EAX to make the indirect call - this isn't usually
1.113 * a problem since the callee will usually clobber it anyway.
1.114 @@ -248,7 +276,7 @@
1.115 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
1.116
1.117 #define RAISE_EXCEPTION( exc ) call_func1(sh4_raise_exception, exc);
1.118 -#define CHECKSLOTILLEGAL() if(sh4_x86.in_delay_slot) RAISE_EXCEPTION(EXC_SLOT_ILLEGAL)
1.119 +#define SLOTILLEGAL() RAISE_EXCEPTION(EXC_SLOT_ILLEGAL); return 1
1.120
1.121
1.122
1.123 @@ -259,9 +287,9 @@
1.124 void sh4_translate_begin_block()
1.125 {
1.126 PUSH_r32(R_EBP);
1.127 - PUSH_r32(R_ESI);
1.128 /* mov &sh4r, ebp */
1.129 load_imm32( R_EBP, (uint32_t)&sh4r );
1.130 + PUSH_r32(R_EDI);
1.131 PUSH_r32(R_ESI);
1.132
1.133 sh4_x86.in_delay_slot = FALSE;
1.134 @@ -273,16 +301,18 @@
1.135 /**
1.136 * Exit the block early (ie branch out), conditionally or otherwise
1.137 */
1.138 -void exit_block( uint32_t pc )
1.139 +void exit_block( )
1.140 {
1.141 - load_imm32( R_ECX, pc );
1.142 - store_spreg( R_ECX, REG_OFFSET(pc) );
1.143 + store_spreg( R_EDI, REG_OFFSET(pc) );
1.144 MOV_moff32_EAX( (uint32_t)&sh4_cpu_period );
1.145 load_spreg( R_ECX, REG_OFFSET(slice_cycle) );
1.146 MUL_r32( R_ESI );
1.147 ADD_r32_r32( R_EAX, R_ECX );
1.148 store_spreg( R_ECX, REG_OFFSET(slice_cycle) );
1.149 XOR_r32_r32( R_EAX, R_EAX );
1.150 + POP_r32(R_ESI);
1.151 + POP_r32(R_EDI);
1.152 + POP_r32(R_EBP);
1.153 RET();
1.154 }
1.155
1.156 @@ -292,7 +322,7 @@
1.157 void sh4_translate_end_block( sh4addr_t pc ) {
1.158 assert( !sh4_x86.in_delay_slot ); // should never stop here
1.159 // Normal termination - save PC, cycle count
1.160 - exit_block( pc );
1.161 + exit_block( );
1.162
1.163 uint8_t *end_ptr = xlat_output;
1.164 // Exception termination. Jump block for various exception codes:
1.165 @@ -348,7 +378,7 @@
1.166 case 0x0:
1.167 { /* STC SR, Rn */
1.168 uint32_t Rn = ((ir>>8)&0xF);
1.169 - read_sr( R_EAX );
1.170 + call_func0(sh4_read_sr);
1.171 store_reg( R_EAX, Rn );
1.172 }
1.173 break;
1.174 @@ -388,7 +418,8 @@
1.175 case 0x1:
1.176 { /* STC Rm_BANK, Rn */
1.177 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm_BANK = ((ir>>4)&0x7);
1.178 - /* TODO */
1.179 + load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
1.180 + store_reg( R_EAX, Rn );
1.181 }
1.182 break;
1.183 }
1.184 @@ -398,16 +429,42 @@
1.185 case 0x0:
1.186 { /* BSRF Rn */
1.187 uint32_t Rn = ((ir>>8)&0xF);
1.188 + if( sh4_x86.in_delay_slot ) {
1.189 + SLOTILLEGAL();
1.190 + } else {
1.191 + load_imm32( R_EAX, pc + 4 );
1.192 + store_spreg( R_EAX, R_PR );
1.193 + load_reg( R_EDI, Rn );
1.194 + ADD_r32_r32( R_EAX, R_EDI );
1.195 + sh4_x86.in_delay_slot = TRUE;
1.196 + INC_r32(R_ESI);
1.197 + return 0;
1.198 + }
1.199 }
1.200 break;
1.201 case 0x2:
1.202 { /* BRAF Rn */
1.203 uint32_t Rn = ((ir>>8)&0xF);
1.204 + if( sh4_x86.in_delay_slot ) {
1.205 + SLOTILLEGAL();
1.206 + } else {
1.207 + load_reg( R_EDI, Rn );
1.208 + sh4_x86.in_delay_slot = TRUE;
1.209 + INC_r32(R_ESI);
1.210 + return 0;
1.211 + }
1.212 }
1.213 break;
1.214 case 0x8:
1.215 { /* PREF @Rn */
1.216 uint32_t Rn = ((ir>>8)&0xF);
1.217 + load_reg( R_EAX, Rn );
1.218 + PUSH_r32( R_EAX );
1.219 + AND_imm32_r32( 0xFC000000, R_EAX );
1.220 + CMP_imm32_r32( 0xE0000000, R_EAX );
1.221 + JNE_rel8(8);
1.222 + call_func0( sh4_flush_store_queue );
1.223 + ADD_imm8s_r32( -4, R_ESP );
1.224 }
1.225 break;
1.226 case 0x9:
1.227 @@ -430,6 +487,7 @@
1.228 uint32_t Rn = ((ir>>8)&0xF);
1.229 load_reg( R_EAX, 0 );
1.230 load_reg( R_ECX, Rn );
1.231 + check_walign32( R_ECX );
1.232 MEM_WRITE_LONG( R_ECX, R_EAX );
1.233 }
1.234 break;
1.235 @@ -454,6 +512,7 @@
1.236 load_reg( R_EAX, 0 );
1.237 load_reg( R_ECX, Rn );
1.238 ADD_r32_r32( R_EAX, R_ECX );
1.239 + check_walign16( R_ECX );
1.240 load_reg( R_EAX, Rm );
1.241 MEM_WRITE_WORD( R_ECX, R_EAX );
1.242 }
1.243 @@ -464,6 +523,7 @@
1.244 load_reg( R_EAX, 0 );
1.245 load_reg( R_ECX, Rn );
1.246 ADD_r32_r32( R_EAX, R_ECX );
1.247 + check_walign32( R_ECX );
1.248 load_reg( R_EAX, Rm );
1.249 MEM_WRITE_LONG( R_ECX, R_EAX );
1.250 }
1.251 @@ -481,14 +541,21 @@
1.252 switch( (ir&0xFF0) >> 4 ) {
1.253 case 0x0:
1.254 { /* CLRT */
1.255 + CLC();
1.256 + SETC_t();
1.257 }
1.258 break;
1.259 case 0x1:
1.260 { /* SETT */
1.261 + STC();
1.262 + SETC_t();
1.263 }
1.264 break;
1.265 case 0x2:
1.266 { /* CLRMAC */
1.267 + XOR_r32_r32(R_EAX, R_EAX);
1.268 + store_spreg( R_EAX, R_MACL );
1.269 + store_spreg( R_EAX, R_MACH );
1.270 }
1.271 break;
1.272 case 0x3:
1.273 @@ -497,10 +564,14 @@
1.274 break;
1.275 case 0x4:
1.276 { /* CLRS */
1.277 + CLC();
1.278 + SETC_sh4r(R_S);
1.279 }
1.280 break;
1.281 case 0x5:
1.282 { /* SETS */
1.283 + STC();
1.284 + SETC_sh4r(R_S);
1.285 }
1.286 break;
1.287 default:
1.288 @@ -595,14 +666,34 @@
1.289 switch( (ir&0xFF0) >> 4 ) {
1.290 case 0x0:
1.291 { /* RTS */
1.292 + if( sh4_x86.in_delay_slot ) {
1.293 + SLOTILLEGAL();
1.294 + } else {
1.295 + load_spreg( R_EDI, R_PR );
1.296 + sh4_x86.in_delay_slot = TRUE;
1.297 + INC_r32(R_ESI);
1.298 + return 0;
1.299 + }
1.300 }
1.301 break;
1.302 case 0x1:
1.303 { /* SLEEP */
1.304 + /* TODO */
1.305 }
1.306 break;
1.307 case 0x2:
1.308 { /* RTE */
1.309 + check_priv();
1.310 + if( sh4_x86.in_delay_slot ) {
1.311 + SLOTILLEGAL();
1.312 + } else {
1.313 + load_spreg( R_EDI, R_PR );
1.314 + load_spreg( R_EAX, R_SSR );
1.315 + call_func1( sh4_write_sr, R_EAX );
1.316 + sh4_x86.in_delay_slot = TRUE;
1.317 + INC_r32(R_ESI);
1.318 + return 0;
1.319 + }
1.320 }
1.321 break;
1.322 default:
1.323 @@ -626,6 +717,7 @@
1.324 load_reg( R_EAX, 0 );
1.325 load_reg( R_ECX, Rm );
1.326 ADD_r32_r32( R_EAX, R_ECX );
1.327 + check_ralign16( R_ECX );
1.328 MEM_READ_WORD( R_ECX, R_EAX );
1.329 store_reg( R_EAX, Rn );
1.330 }
1.331 @@ -636,6 +728,7 @@
1.332 load_reg( R_EAX, 0 );
1.333 load_reg( R_ECX, Rm );
1.334 ADD_r32_r32( R_EAX, R_ECX );
1.335 + check_ralign32( R_ECX );
1.336 MEM_READ_LONG( R_ECX, R_EAX );
1.337 store_reg( R_EAX, Rn );
1.338 }
1.339 @@ -656,6 +749,7 @@
1.340 load_reg( R_ECX, Rn );
1.341 load_reg( R_EAX, Rm );
1.342 ADD_imm32_r32( disp, R_ECX );
1.343 + check_walign32( R_ECX );
1.344 MEM_WRITE_LONG( R_ECX, R_EAX );
1.345 }
1.346 break;
1.347 @@ -673,6 +767,7 @@
1.348 { /* MOV.W Rm, @Rn */
1.349 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
1.350 load_reg( R_ECX, Rn );
1.351 + check_walign16( R_ECX );
1.352 MEM_READ_WORD( R_ECX, R_EAX );
1.353 store_reg( R_EAX, Rn );
1.354 }
1.355 @@ -682,6 +777,7 @@
1.356 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
1.357 load_reg( R_EAX, Rm );
1.358 load_reg( R_ECX, Rn );
1.359 + check_walign32(R_ECX);
1.360 MEM_WRITE_LONG( R_ECX, R_EAX );
1.361 }
1.362 break;
1.363 @@ -699,6 +795,7 @@
1.364 { /* MOV.W Rm, @-Rn */
1.365 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
1.366 load_reg( R_ECX, Rn );
1.367 + check_walign16( R_ECX );
1.368 load_reg( R_EAX, Rm );
1.369 ADD_imm8s_r32( -2, R_ECX );
1.370 MEM_WRITE_WORD( R_ECX, R_EAX );
1.371 @@ -709,6 +806,7 @@
1.372 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
1.373 load_reg( R_EAX, Rm );
1.374 load_reg( R_ECX, Rn );
1.375 + check_walign32( R_ECX );
1.376 ADD_imm8s_r32( -4, R_ECX );
1.377 store_reg( R_ECX, Rn );
1.378 MEM_WRITE_LONG( R_ECX, R_EAX );
1.379 @@ -794,11 +892,19 @@
1.380 case 0xE:
1.381 { /* MULU.W Rm, Rn */
1.382 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
1.383 + load_reg16u( R_EAX, Rm );
1.384 + load_reg16u( R_ECX, Rn );
1.385 + MUL_r32( R_ECX );
1.386 + store_spreg( R_EAX, R_MACL );
1.387 }
1.388 break;
1.389 case 0xF:
1.390 { /* MULS.W Rm, Rn */
1.391 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
1.392 + load_reg16s( R_EAX, Rm );
1.393 + load_reg16s( R_ECX, Rn );
1.394 + MUL_r32( R_ECX );
1.395 + store_spreg( R_EAX, R_MACL );
1.396 }
1.397 break;
1.398 default:
1.399 @@ -838,6 +944,17 @@
1.400 case 0x4:
1.401 { /* DIV1 Rm, Rn */
1.402 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
1.403 + load_reg( R_ECX, Rn );
1.404 + LDC_t();
1.405 + RCL1_r32( R_ECX ); // OP2
1.406 + SETC_r32( R_EDX ); // Q
1.407 + load_spreg( R_EAX, R_Q );
1.408 + CMP_sh4r_r32( R_M, R_EAX );
1.409 + JE_rel8(8);
1.410 + ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_ECX );
1.411 + JMP_rel8(3);
1.412 + SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_ECX );
1.413 + // TODO
1.414 }
1.415 break;
1.416 case 0x5:
1.417 @@ -1091,12 +1208,11 @@
1.418 case 0x0:
1.419 { /* STC.L SR, @-Rn */
1.420 uint32_t Rn = ((ir>>8)&0xF);
1.421 - /* TODO */
1.422 - load_reg( R_ECX, Rn );
1.423 - ADD_imm8s_r32( -4, Rn );
1.424 - store_reg( R_ECX, Rn );
1.425 - read_sr( R_EAX );
1.426 - MEM_WRITE_LONG( R_ECX, R_EAX );
1.427 + load_reg( R_ECX, Rn );
1.428 + ADD_imm8s_r32( -4, Rn );
1.429 + store_reg( R_ECX, Rn );
1.430 + call_func0( sh4_read_sr );
1.431 + MEM_WRITE_LONG( R_ECX, R_EAX );
1.432 }
1.433 break;
1.434 case 0x1:
1.435 @@ -1147,6 +1263,11 @@
1.436 case 0x1:
1.437 { /* STC.L Rm_BANK, @-Rn */
1.438 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm_BANK = ((ir>>4)&0x7);
1.439 + load_reg( R_ECX, Rn );
1.440 + ADD_imm8s_r32( -4, Rn );
1.441 + store_reg( R_ECX, Rn );
1.442 + load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
1.443 + MEM_WRITE_LONG( R_ECX, R_EAX );
1.444 }
1.445 break;
1.446 }
1.447 @@ -1307,7 +1428,7 @@
1.448 ADD_imm8s_r32( 4, R_EAX );
1.449 store_reg( R_EAX, Rm );
1.450 MEM_READ_LONG( R_ECX, R_EAX );
1.451 - write_sr( R_EAX );
1.452 + call_func1( sh4_write_sr, R_EAX );
1.453 }
1.454 break;
1.455 case 0x1:
1.456 @@ -1362,6 +1483,12 @@
1.457 case 0x1:
1.458 { /* LDC.L @Rm+, Rn_BANK */
1.459 uint32_t Rm = ((ir>>8)&0xF); uint32_t Rn_BANK = ((ir>>4)&0x7);
1.460 + load_reg( R_EAX, Rm );
1.461 + MOV_r32_r32( R_EAX, R_ECX );
1.462 + ADD_imm8s_r32( 4, R_EAX );
1.463 + store_reg( R_EAX, Rm );
1.464 + MEM_READ_LONG( R_ECX, R_EAX );
1.465 + store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
1.466 }
1.467 break;
1.468 }
1.469 @@ -1489,6 +1616,16 @@
1.470 case 0x0:
1.471 { /* JSR @Rn */
1.472 uint32_t Rn = ((ir>>8)&0xF);
1.473 + if( sh4_x86.in_delay_slot ) {
1.474 + SLOTILLEGAL();
1.475 + } else {
1.476 + load_imm32( R_EAX, pc + 4 );
1.477 + store_spreg( R_EAX, R_PR );
1.478 + load_reg( R_EDI, Rn );
1.479 + sh4_x86.in_delay_slot = TRUE;
1.480 + INC_r32(R_ESI);
1.481 + return 0;
1.482 + }
1.483 }
1.484 break;
1.485 case 0x1:
1.486 @@ -1505,6 +1642,14 @@
1.487 case 0x2:
1.488 { /* JMP @Rn */
1.489 uint32_t Rn = ((ir>>8)&0xF);
1.490 + if( sh4_x86.in_delay_slot ) {
1.491 + SLOTILLEGAL();
1.492 + } else {
1.493 + load_reg( R_EDI, Rn );
1.494 + sh4_x86.in_delay_slot = TRUE;
1.495 + INC_r32(R_ESI);
1.496 + return 0;
1.497 + }
1.498 }
1.499 break;
1.500 default:
1.501 @@ -1555,7 +1700,7 @@
1.502 { /* LDC Rm, SR */
1.503 uint32_t Rm = ((ir>>8)&0xF);
1.504 load_reg( R_EAX, Rm );
1.505 - write_sr( R_EAX );
1.506 + call_func1( sh4_write_sr, R_EAX );
1.507 }
1.508 break;
1.509 case 0x1:
1.510 @@ -1594,6 +1739,8 @@
1.511 case 0x1:
1.512 { /* LDC Rm, Rn_BANK */
1.513 uint32_t Rm = ((ir>>8)&0xF); uint32_t Rn_BANK = ((ir>>4)&0x7);
1.514 + load_reg( R_EAX, Rm );
1.515 + store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
1.516 }
1.517 break;
1.518 }
1.519 @@ -1610,6 +1757,7 @@
1.520 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF); uint32_t disp = (ir&0xF)<<2;
1.521 load_reg( R_ECX, Rm );
1.522 ADD_imm8s_r32( disp, R_ECX );
1.523 + check_ralign32( R_ECX );
1.524 MEM_READ_LONG( R_ECX, R_EAX );
1.525 store_reg( R_EAX, Rn );
1.526 }
1.527 @@ -1628,6 +1776,7 @@
1.528 { /* MOV.W @Rm, Rn */
1.529 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
1.530 load_reg( R_ECX, Rm );
1.531 + check_ralign16( R_ECX );
1.532 MEM_READ_WORD( R_ECX, R_EAX );
1.533 store_reg( R_EAX, Rn );
1.534 }
1.535 @@ -1636,6 +1785,7 @@
1.536 { /* MOV.L @Rm, Rn */
1.537 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
1.538 load_reg( R_ECX, Rm );
1.539 + check_ralign32( R_ECX );
1.540 MEM_READ_LONG( R_ECX, R_EAX );
1.541 store_reg( R_EAX, Rn );
1.542 }
1.543 @@ -1662,6 +1812,7 @@
1.544 { /* MOV.W @Rm+, Rn */
1.545 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
1.546 load_reg( R_EAX, Rm );
1.547 + check_ralign16( R_EAX );
1.548 MOV_r32_r32( R_EAX, R_ECX );
1.549 ADD_imm8s_r32( 2, R_EAX );
1.550 store_reg( R_EAX, Rm );
1.551 @@ -1673,6 +1824,7 @@
1.552 { /* MOV.L @Rm+, Rn */
1.553 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
1.554 load_reg( R_EAX, Rm );
1.555 + check_ralign32( R_ECX );
1.556 MOV_r32_r32( R_EAX, R_ECX );
1.557 ADD_imm8s_r32( 4, R_EAX );
1.558 store_reg( R_EAX, Rm );
1.559 @@ -1785,6 +1937,7 @@
1.560 load_reg( R_ECX, Rn );
1.561 load_reg( R_EAX, 0 );
1.562 ADD_imm32_r32( disp, R_ECX );
1.563 + check_walign16( R_ECX );
1.564 MEM_WRITE_WORD( R_ECX, R_EAX );
1.565 }
1.566 break;
1.567 @@ -1802,6 +1955,7 @@
1.568 uint32_t Rm = ((ir>>4)&0xF); uint32_t disp = (ir&0xF)<<1;
1.569 load_reg( R_ECX, Rm );
1.570 ADD_imm32_r32( disp, R_ECX );
1.571 + check_ralign16( R_ECX );
1.572 MEM_READ_WORD( R_ECX, R_EAX );
1.573 store_reg( R_EAX, 0 );
1.574 }
1.575 @@ -1817,32 +1971,63 @@
1.576 case 0x9:
1.577 { /* BT disp */
1.578 int32_t disp = SIGNEXT8(ir&0xFF)<<1;
1.579 - /* If true, result PC += 4 + disp. else result PC = pc+2 */
1.580 - return pc + 2;
1.581 + if( sh4_x86.in_delay_slot ) {
1.582 + SLOTILLEGAL();
1.583 + } else {
1.584 + load_imm32( R_EDI, pc + 2 );
1.585 + CMP_imm8s_sh4r( 0, R_T );
1.586 + JE_rel8( 5 );
1.587 + load_imm32( R_EDI, disp + pc + 4 );
1.588 + INC_r32(R_ESI);
1.589 + return 1;
1.590 + }
1.591 }
1.592 break;
1.593 case 0xB:
1.594 { /* BF disp */
1.595 int32_t disp = SIGNEXT8(ir&0xFF)<<1;
1.596 - CMP_imm8s_ebp( 0, R_T );
1.597 - JNE_rel8( 1 );
1.598 - exit_block( disp + pc + 4 );
1.599 - return 1;
1.600 + if( sh4_x86.in_delay_slot ) {
1.601 + SLOTILLEGAL();
1.602 + } else {
1.603 + load_imm32( R_EDI, pc + 2 );
1.604 + CMP_imm8s_sh4r( 0, R_T );
1.605 + JNE_rel8( 5 );
1.606 + load_imm32( R_EDI, disp + pc + 4 );
1.607 + INC_r32(R_ESI);
1.608 + return 1;
1.609 + }
1.610 }
1.611 break;
1.612 case 0xD:
1.613 { /* BT/S disp */
1.614 int32_t disp = SIGNEXT8(ir&0xFF)<<1;
1.615 - return pc + 4;
1.616 + if( sh4_x86.in_delay_slot ) {
1.617 + SLOTILLEGAL();
1.618 + } else {
1.619 + load_imm32( R_EDI, pc + 2 );
1.620 + CMP_imm8s_sh4r( 0, R_T );
1.621 + JE_rel8( 5 );
1.622 + load_imm32( R_EDI, disp + pc + 4 );
1.623 + sh4_x86.in_delay_slot = TRUE;
1.624 + INC_r32(R_ESI);
1.625 + return 0;
1.626 + }
1.627 }
1.628 break;
1.629 case 0xF:
1.630 { /* BF/S disp */
1.631 int32_t disp = SIGNEXT8(ir&0xFF)<<1;
1.632 - CMP_imm8s_ebp( 0, R_T );
1.633 - JNE_rel8( 1 );
1.634 - exit_block( disp + pc + 4 );
1.635 - sh4_x86.in_delay_slot = TRUE;
1.636 + if( sh4_x86.in_delay_slot ) {
1.637 + SLOTILLEGAL();
1.638 + } else {
1.639 + load_imm32( R_EDI, pc + 2 );
1.640 + CMP_imm8s_sh4r( 0, R_T );
1.641 + JNE_rel8( 5 );
1.642 + load_imm32( R_EDI, disp + pc + 4 );
1.643 + sh4_x86.in_delay_slot = TRUE;
1.644 + INC_r32(R_ESI);
1.645 + return 0;
1.646 + }
1.647 }
1.648 break;
1.649 default:
1.650 @@ -1853,20 +2038,41 @@
1.651 case 0x9:
1.652 { /* MOV.W @(disp, PC), Rn */
1.653 uint32_t Rn = ((ir>>8)&0xF); uint32_t disp = (ir&0xFF)<<1;
1.654 - load_imm32( R_ECX, pc + disp + 4 );
1.655 - MEM_READ_WORD( R_ECX, R_EAX );
1.656 - store_reg( R_EAX, Rn );
1.657 + if( sh4_x86.in_delay_slot ) {
1.658 + SLOTILLEGAL();
1.659 + } else {
1.660 + load_imm32( R_ECX, pc + disp + 4 );
1.661 + MEM_READ_WORD( R_ECX, R_EAX );
1.662 + store_reg( R_EAX, Rn );
1.663 + }
1.664 }
1.665 break;
1.666 case 0xA:
1.667 { /* BRA disp */
1.668 int32_t disp = SIGNEXT12(ir&0xFFF)<<1;
1.669 - exit_block( disp + pc + 4 );
1.670 + if( sh4_x86.in_delay_slot ) {
1.671 + SLOTILLEGAL();
1.672 + } else {
1.673 + load_imm32( R_EDI, disp + pc + 4 );
1.674 + sh4_x86.in_delay_slot = TRUE;
1.675 + INC_r32(R_ESI);
1.676 + return 0;
1.677 + }
1.678 }
1.679 break;
1.680 case 0xB:
1.681 { /* BSR disp */
1.682 int32_t disp = SIGNEXT12(ir&0xFFF)<<1;
1.683 + if( sh4_x86.in_delay_slot ) {
1.684 + SLOTILLEGAL();
1.685 + } else {
1.686 + load_imm32( R_EAX, pc + 4 );
1.687 + store_spreg( R_EAX, R_PR );
1.688 + load_imm32( R_EDI, disp + pc + 4 );
1.689 + sh4_x86.in_delay_slot = TRUE;
1.690 + INC_r32(R_ESI);
1.691 + return 0;
1.692 + }
1.693 }
1.694 break;
1.695 case 0xC:
1.696 @@ -1886,6 +2092,7 @@
1.697 load_spreg( R_ECX, R_GBR );
1.698 load_reg( R_EAX, 0 );
1.699 ADD_imm32_r32( disp, R_ECX );
1.700 + check_walign16( R_ECX );
1.701 MEM_WRITE_WORD( R_ECX, R_EAX );
1.702 }
1.703 break;
1.704 @@ -1895,12 +2102,19 @@
1.705 load_spreg( R_ECX, R_GBR );
1.706 load_reg( R_EAX, 0 );
1.707 ADD_imm32_r32( disp, R_ECX );
1.708 + check_walign32( R_ECX );
1.709 MEM_WRITE_LONG( R_ECX, R_EAX );
1.710 }
1.711 break;
1.712 case 0x3:
1.713 { /* TRAPA #imm */
1.714 uint32_t imm = (ir&0xFF);
1.715 + if( sh4_x86.in_delay_slot ) {
1.716 + SLOTILLEGAL();
1.717 + } else {
1.718 + // TODO: Write TRA
1.719 + RAISE_EXCEPTION(EXC_TRAP);
1.720 + }
1.721 }
1.722 break;
1.723 case 0x4:
1.724 @@ -1917,6 +2131,7 @@
1.725 uint32_t disp = (ir&0xFF)<<1;
1.726 load_spreg( R_ECX, R_GBR );
1.727 ADD_imm32_r32( disp, R_ECX );
1.728 + check_ralign16( R_ECX );
1.729 MEM_READ_WORD( R_ECX, R_EAX );
1.730 store_reg( R_EAX, 0 );
1.731 }
1.732 @@ -1926,6 +2141,7 @@
1.733 uint32_t disp = (ir&0xFF)<<2;
1.734 load_spreg( R_ECX, R_GBR );
1.735 ADD_imm32_r32( disp, R_ECX );
1.736 + check_ralign32( R_ECX );
1.737 MEM_READ_LONG( R_ECX, R_EAX );
1.738 store_reg( R_EAX, 0 );
1.739 }
1.740 @@ -1933,8 +2149,12 @@
1.741 case 0x7:
1.742 { /* MOVA @(disp, PC), R0 */
1.743 uint32_t disp = (ir&0xFF)<<2;
1.744 - load_imm32( R_ECX, (pc & 0xFFFFFFFC) + disp + 4 );
1.745 - store_reg( R_ECX, 0 );
1.746 + if( sh4_x86.in_delay_slot ) {
1.747 + SLOTILLEGAL();
1.748 + } else {
1.749 + load_imm32( R_ECX, (pc & 0xFFFFFFFC) + disp + 4 );
1.750 + store_reg( R_ECX, 0 );
1.751 + }
1.752 }
1.753 break;
1.754 case 0x8:
1.755 @@ -1985,7 +2205,7 @@
1.756 uint32_t imm = (ir&0xFF);
1.757 load_reg( R_EAX, 0 );
1.758 load_spreg( R_ECX, R_GBR );
1.759 - ADD_r32_r32( R_EAX, R_EBX );
1.760 + ADD_r32_r32( R_EAX, R_ECX );
1.761 MEM_READ_BYTE( R_ECX, R_EAX );
1.762 AND_imm32_r32(imm, R_ECX );
1.763 MEM_WRITE_BYTE( R_ECX, R_EAX );
1.764 @@ -2005,6 +2225,12 @@
1.765 case 0xF:
1.766 { /* OR.B #imm, @(R0, GBR) */
1.767 uint32_t imm = (ir&0xFF);
1.768 + load_reg( R_EAX, 0 );
1.769 + load_spreg( R_ECX, R_GBR );
1.770 + ADD_r32_r32( R_EAX, R_ECX );
1.771 + MEM_READ_BYTE( R_ECX, R_EAX );
1.772 + OR_imm32_r32(imm, R_ECX );
1.773 + MEM_WRITE_BYTE( R_ECX, R_EAX );
1.774 }
1.775 break;
1.776 }
1.777 @@ -2012,9 +2238,13 @@
1.778 case 0xD:
1.779 { /* MOV.L @(disp, PC), Rn */
1.780 uint32_t Rn = ((ir>>8)&0xF); uint32_t disp = (ir&0xFF)<<2;
1.781 - load_imm32( R_ECX, (pc & 0xFFFFFFFC) + disp + 4 );
1.782 - MEM_READ_LONG( R_ECX, R_EAX );
1.783 - store_reg( R_EAX, 0 );
1.784 + if( sh4_x86.in_delay_slot ) {
1.785 + SLOTILLEGAL();
1.786 + } else {
1.787 + load_imm32( R_ECX, (pc & 0xFFFFFFFC) + disp + 4 );
1.788 + MEM_READ_LONG( R_ECX, R_EAX );
1.789 + store_reg( R_EAX, 0 );
1.790 + }
1.791 }
1.792 break;
1.793 case 0xE:
1.794 @@ -2121,6 +2351,17 @@
1.795 case 0x5:
1.796 { /* FABS FRn */
1.797 uint32_t FRn = ((ir>>8)&0xF);
1.798 + load_spreg( R_ECX, R_FPSCR );
1.799 + load_spreg( R_EDX, REG_OFFSET(fr_bank) );
1.800 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.801 + JNE_rel8(10);
1.802 + push_fr(R_EDX, FRn); // 3
1.803 + FABS_st0(); // 2
1.804 + pop_fr( R_EDX, FRn); //3
1.805 + JMP_rel8(8); // 2
1.806 + push_dr(R_EDX, FRn);
1.807 + FABS_st0();
1.808 + pop_dr(R_EDX, FRn);
1.809 }
1.810 break;
1.811 case 0x6:
1.812 @@ -2184,6 +2425,12 @@
1.813 break;
1.814 case 0x3:
1.815 { /* UNDEF */
1.816 + if( sh4_x86.in_delay_slot ) {
1.817 + RAISE_EXCEPTION(EXC_SLOT_ILLEGAL);
1.818 + } else {
1.819 + RAISE_EXCEPTION(EXC_ILLEGAL);
1.820 + }
1.821 + return 1;
1.822 }
1.823 break;
1.824 default:
1.825 @@ -2213,6 +2460,9 @@
1.826 }
1.827
1.828 INC_r32(R_ESI);
1.829 -
1.830 + if( sh4_x86.in_delay_slot ) {
1.831 + sh4_x86.in_delay_slot = FALSE;
1.832 + return 1;
1.833 + }
1.834 return 0;
1.835 }
.