Search
lxdream.org :: lxdream/src/sh4/sh4x86.in :: diff
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 374:8f80a795513e
prev368:36fac4c42322
next375:4627600f7f8e
author nkeynes
date Tue Sep 11 02:14:46 2007 +0000 (16 years ago)
permissions -rw-r--r--
last change Cache the pointer to the last FR bank (speeds fp ops up by about 10%)
Implement experimental fix for FLOAT/FTRC
Make read/write sr functions non-static (share with translator)
Much more translator WIP
file annotate diff log raw
1.1 --- a/src/sh4/sh4x86.in Tue Sep 04 08:40:23 2007 +0000
1.2 +++ b/src/sh4/sh4x86.in Tue Sep 11 02:14:46 2007 +0000
1.3 @@ -1,5 +1,5 @@
1.4 /**
1.5 - * $Id: sh4x86.in,v 1.3 2007-09-04 08:40:23 nkeynes Exp $
1.6 + * $Id: sh4x86.in,v 1.4 2007-09-11 02:14:46 nkeynes Exp $
1.7 *
1.8 * SH4 => x86 translation. This version does no real optimization, it just
1.9 * outputs straight-line x86 code - it mainly exists to provide a baseline
1.10 @@ -73,7 +73,7 @@
1.11 {
1.12 unsigned int i;
1.13 for( i=0; i<sh4_x86.backpatch_posn; i++ ) {
1.14 - *sh4_x86.backpatch_list[i] += (reloc_base - ((uint8_t *)sh4_x86.backpatch_list[i]));
1.15 + *sh4_x86.backpatch_list[i] += (reloc_base - ((uint8_t *)sh4_x86.backpatch_list[i]) - 4);
1.16 }
1.17 }
1.18
1.19 @@ -97,35 +97,20 @@
1.20 OP(REG_OFFSET(r[sh4reg]));
1.21 }
1.22
1.23 -/**
1.24 - * Load the SR register into an x86 register
1.25 - */
1.26 -static inline void read_sr( int x86reg )
1.27 +static inline void load_reg16s( int x86reg, int sh4reg )
1.28 {
1.29 - MOV_ebp_r32( R_M, x86reg );
1.30 - SHL1_r32( x86reg );
1.31 - OR_ebp_r32( R_Q, x86reg );
1.32 - SHL_imm8_r32( 7, x86reg );
1.33 - OR_ebp_r32( R_S, x86reg );
1.34 - SHL1_r32( x86reg );
1.35 - OR_ebp_r32( R_T, x86reg );
1.36 - OR_ebp_r32( R_SR, x86reg );
1.37 + OP(0x0F);
1.38 + OP(0xBF);
1.39 + MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
1.40 }
1.41
1.42 -static inline void write_sr( int x86reg )
1.43 +static inline void load_reg16u( int x86reg, int sh4reg )
1.44 {
1.45 - TEST_imm32_r32( SR_M, x86reg );
1.46 - SETNE_ebp(R_M);
1.47 - TEST_imm32_r32( SR_Q, x86reg );
1.48 - SETNE_ebp(R_Q);
1.49 - TEST_imm32_r32( SR_S, x86reg );
1.50 - SETNE_ebp(R_S);
1.51 - TEST_imm32_r32( SR_T, x86reg );
1.52 - SETNE_ebp(R_T);
1.53 - AND_imm32_r32( SR_MQSTMASK, x86reg );
1.54 - MOV_r32_ebp( x86reg, R_SR );
1.55 + OP(0x0F);
1.56 + OP(0xB7);
1.57 + MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
1.58 +
1.59 }
1.60 -
1.61
1.62 static inline void load_spreg( int x86reg, int regoffset )
1.63 {
1.64 @@ -160,6 +145,49 @@
1.65 OP(regoffset);
1.66 }
1.67
1.68 +
1.69 +#define load_fr_bank(bankreg) load_spreg( bankreg, REG_OFFSET(fr_bank))
1.70 +
1.71 +static inline void load_xf_bank( int bankreg )
1.72 +{
1.73 + load_spreg( bankreg, R_FPSCR );
1.74 + SHR_imm8_r32( (21 - 6), bankreg ); // Extract bit 21 then *64 for bank size
1.75 + AND_imm8s_r32( 0x40, bankreg ); // Complete extraction
1.76 + OP(0x8D); OP(0x44+(bankreg<<3)); OP(0x28+bankreg); OP(REG_OFFSET(fr)); // LEA [ebp+bankreg+disp], bankreg
1.77 +}
1.78 +
1.79 +static inline void push_fr( int bankreg, int frm )
1.80 +{
1.81 + OP(0xD9); OP(0x40 + bankreg); OP((frm^1)<<2); // FLD.S [bankreg + frm^1*4]
1.82 +}
1.83 +
1.84 +static inline void pop_fr( int bankreg, int frm )
1.85 +{
1.86 + OP(0xD9); OP(0x58 + bankreg); OP((frm^1)<<2); // FST.S [bankreg + frm^1*4]
1.87 +}
1.88 +
1.89 +static inline void push_dr( int bankreg, int frm )
1.90 +{
1.91 + if( frm&1 ) {
1.92 + // this is technically undefined, but it seems to work consistently - high 32 bits
1.93 + // loaded from FRm (32-bits), low 32bits are 0.
1.94 + OP(0xFF); OP(0x70 + bankreg); OP((frm^1)<<2); // PUSH [bankreg + frm^1]
1.95 + PUSH_imm32(0);
1.96 +
1.97 +
1.98 + } else {
1.99 + OP(0xDD); OP(0x40 + bankreg); OP(frm<<2); // FLD.D [bankreg + frm*4]
1.100 + }
1.101 +}
1.102 +
1.103 +static inline void pop_dr( int bankreg, int frm )
1.104 +{
1.105 + if( frm&1 ) {
1.106 + } else {
1.107 + OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]
1.108 + }
1.109 +}
1.110 +
1.111 /**
1.112 * Note: clobbers EAX to make the indirect call - this isn't usually
1.113 * a problem since the callee will usually clobber it anyway.
1.114 @@ -248,7 +276,7 @@
1.115 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
1.116
1.117 #define RAISE_EXCEPTION( exc ) call_func1(sh4_raise_exception, exc);
1.118 -#define CHECKSLOTILLEGAL() if(sh4_x86.in_delay_slot) RAISE_EXCEPTION(EXC_SLOT_ILLEGAL)
1.119 +#define SLOTILLEGAL() RAISE_EXCEPTION(EXC_SLOT_ILLEGAL); return 1
1.120
1.121
1.122
1.123 @@ -259,9 +287,9 @@
1.124 void sh4_translate_begin_block()
1.125 {
1.126 PUSH_r32(R_EBP);
1.127 - PUSH_r32(R_ESI);
1.128 /* mov &sh4r, ebp */
1.129 load_imm32( R_EBP, (uint32_t)&sh4r );
1.130 + PUSH_r32(R_EDI);
1.131 PUSH_r32(R_ESI);
1.132
1.133 sh4_x86.in_delay_slot = FALSE;
1.134 @@ -273,16 +301,18 @@
1.135 /**
1.136 * Exit the block early (ie branch out), conditionally or otherwise
1.137 */
1.138 -void exit_block( uint32_t pc )
1.139 +void exit_block( )
1.140 {
1.141 - load_imm32( R_ECX, pc );
1.142 - store_spreg( R_ECX, REG_OFFSET(pc) );
1.143 + store_spreg( R_EDI, REG_OFFSET(pc) );
1.144 MOV_moff32_EAX( (uint32_t)&sh4_cpu_period );
1.145 load_spreg( R_ECX, REG_OFFSET(slice_cycle) );
1.146 MUL_r32( R_ESI );
1.147 ADD_r32_r32( R_EAX, R_ECX );
1.148 store_spreg( R_ECX, REG_OFFSET(slice_cycle) );
1.149 XOR_r32_r32( R_EAX, R_EAX );
1.150 + POP_r32(R_ESI);
1.151 + POP_r32(R_EDI);
1.152 + POP_r32(R_EBP);
1.153 RET();
1.154 }
1.155
1.156 @@ -292,7 +322,7 @@
1.157 void sh4_translate_end_block( sh4addr_t pc ) {
1.158 assert( !sh4_x86.in_delay_slot ); // should never stop here
1.159 // Normal termination - save PC, cycle count
1.160 - exit_block( pc );
1.161 + exit_block( );
1.162
1.163 uint8_t *end_ptr = xlat_output;
1.164 // Exception termination. Jump block for various exception codes:
1.165 @@ -380,7 +410,7 @@
1.166 AND.B #imm, @(R0, GBR) {:
1.167 load_reg( R_EAX, 0 );
1.168 load_spreg( R_ECX, R_GBR );
1.169 - ADD_r32_r32( R_EAX, R_EBX );
1.170 + ADD_r32_r32( R_EAX, R_ECX );
1.171 MEM_READ_BYTE( R_ECX, R_EAX );
1.172 AND_imm32_r32(imm, R_ECX );
1.173 MEM_WRITE_BYTE( R_ECX, R_EAX );
1.174 @@ -460,7 +490,19 @@
1.175 store_spreg( R_EAX, R_M );
1.176 store_spreg( R_EAX, R_T );
1.177 :}
1.178 -DIV1 Rm, Rn {: :}
1.179 +DIV1 Rm, Rn {:
1.180 + load_reg( R_ECX, Rn );
1.181 + LDC_t();
1.182 + RCL1_r32( R_ECX ); // OP2
1.183 + SETC_r32( R_EDX ); // Q
1.184 + load_spreg( R_EAX, R_Q );
1.185 + CMP_sh4r_r32( R_M, R_EAX );
1.186 + JE_rel8(8);
1.187 + ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_ECX );
1.188 + JMP_rel8(3);
1.189 + SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_ECX );
1.190 + // TODO
1.191 +:}
1.192 DMULS.L Rm, Rn {:
1.193 load_reg( R_EAX, Rm );
1.194 load_reg( R_ECX, Rn );
1.195 @@ -513,9 +555,18 @@
1.196 MUL_r32( R_ECX );
1.197 store_spreg( R_EAX, R_MACL );
1.198 :}
1.199 -MULS.W Rm, Rn {:
1.200 +MULS.W Rm, Rn {:
1.201 + load_reg16s( R_EAX, Rm );
1.202 + load_reg16s( R_ECX, Rn );
1.203 + MUL_r32( R_ECX );
1.204 + store_spreg( R_EAX, R_MACL );
1.205 :}
1.206 -MULU.W Rm, Rn {: :}
1.207 +MULU.W Rm, Rn {:
1.208 + load_reg16u( R_EAX, Rm );
1.209 + load_reg16u( R_ECX, Rn );
1.210 + MUL_r32( R_ECX );
1.211 + store_spreg( R_EAX, R_MACL );
1.212 +:}
1.213 NEG Rm, Rn {:
1.214 load_reg( R_EAX, Rm );
1.215 NEG_r32( R_EAX );
1.216 @@ -545,7 +596,14 @@
1.217 OR_imm32_r32(imm, R_EAX);
1.218 store_reg( R_EAX, 0 );
1.219 :}
1.220 -OR.B #imm, @(R0, GBR) {: :}
1.221 +OR.B #imm, @(R0, GBR) {:
1.222 + load_reg( R_EAX, 0 );
1.223 + load_spreg( R_ECX, R_GBR );
1.224 + ADD_r32_r32( R_EAX, R_ECX );
1.225 + MEM_READ_BYTE( R_ECX, R_EAX );
1.226 + OR_imm32_r32(imm, R_ECX );
1.227 + MEM_WRITE_BYTE( R_ECX, R_EAX );
1.228 +:}
1.229 ROTCL Rn {:
1.230 load_reg( R_EAX, Rn );
1.231 LDC_t();
1.232 @@ -811,14 +869,16 @@
1.233 MEM_READ_BYTE( R_ECX, R_EAX );
1.234 store_reg( R_EAX, 0 );
1.235 :}
1.236 -MOV.L Rm, @Rn {:
1.237 +MOV.L Rm, @Rn {:
1.238 load_reg( R_EAX, Rm );
1.239 load_reg( R_ECX, Rn );
1.240 + check_walign32(R_ECX);
1.241 MEM_WRITE_LONG( R_ECX, R_EAX );
1.242 :}
1.243 MOV.L Rm, @-Rn {:
1.244 load_reg( R_EAX, Rm );
1.245 load_reg( R_ECX, Rn );
1.246 + check_walign32( R_ECX );
1.247 ADD_imm8s_r32( -4, R_ECX );
1.248 store_reg( R_ECX, Rn );
1.249 MEM_WRITE_LONG( R_ECX, R_EAX );
1.250 @@ -827,6 +887,7 @@
1.251 load_reg( R_EAX, 0 );
1.252 load_reg( R_ECX, Rn );
1.253 ADD_r32_r32( R_EAX, R_ECX );
1.254 + check_walign32( R_ECX );
1.255 load_reg( R_EAX, Rm );
1.256 MEM_WRITE_LONG( R_ECX, R_EAX );
1.257 :}
1.258 @@ -834,21 +895,25 @@
1.259 load_spreg( R_ECX, R_GBR );
1.260 load_reg( R_EAX, 0 );
1.261 ADD_imm32_r32( disp, R_ECX );
1.262 + check_walign32( R_ECX );
1.263 MEM_WRITE_LONG( R_ECX, R_EAX );
1.264 :}
1.265 MOV.L Rm, @(disp, Rn) {:
1.266 load_reg( R_ECX, Rn );
1.267 load_reg( R_EAX, Rm );
1.268 ADD_imm32_r32( disp, R_ECX );
1.269 + check_walign32( R_ECX );
1.270 MEM_WRITE_LONG( R_ECX, R_EAX );
1.271 :}
1.272 MOV.L @Rm, Rn {:
1.273 load_reg( R_ECX, Rm );
1.274 + check_ralign32( R_ECX );
1.275 MEM_READ_LONG( R_ECX, R_EAX );
1.276 store_reg( R_EAX, Rn );
1.277 :}
1.278 MOV.L @Rm+, Rn {:
1.279 load_reg( R_EAX, Rm );
1.280 + check_ralign32( R_ECX );
1.281 MOV_r32_r32( R_EAX, R_ECX );
1.282 ADD_imm8s_r32( 4, R_EAX );
1.283 store_reg( R_EAX, Rm );
1.284 @@ -859,33 +924,42 @@
1.285 load_reg( R_EAX, 0 );
1.286 load_reg( R_ECX, Rm );
1.287 ADD_r32_r32( R_EAX, R_ECX );
1.288 + check_ralign32( R_ECX );
1.289 MEM_READ_LONG( R_ECX, R_EAX );
1.290 store_reg( R_EAX, Rn );
1.291 :}
1.292 MOV.L @(disp, GBR), R0 {:
1.293 load_spreg( R_ECX, R_GBR );
1.294 ADD_imm32_r32( disp, R_ECX );
1.295 + check_ralign32( R_ECX );
1.296 MEM_READ_LONG( R_ECX, R_EAX );
1.297 store_reg( R_EAX, 0 );
1.298 :}
1.299 MOV.L @(disp, PC), Rn {:
1.300 - load_imm32( R_ECX, (pc & 0xFFFFFFFC) + disp + 4 );
1.301 - MEM_READ_LONG( R_ECX, R_EAX );
1.302 - store_reg( R_EAX, 0 );
1.303 + if( sh4_x86.in_delay_slot ) {
1.304 + SLOTILLEGAL();
1.305 + } else {
1.306 + load_imm32( R_ECX, (pc & 0xFFFFFFFC) + disp + 4 );
1.307 + MEM_READ_LONG( R_ECX, R_EAX );
1.308 + store_reg( R_EAX, 0 );
1.309 + }
1.310 :}
1.311 MOV.L @(disp, Rm), Rn {:
1.312 load_reg( R_ECX, Rm );
1.313 ADD_imm8s_r32( disp, R_ECX );
1.314 + check_ralign32( R_ECX );
1.315 MEM_READ_LONG( R_ECX, R_EAX );
1.316 store_reg( R_EAX, Rn );
1.317 :}
1.318 MOV.W Rm, @Rn {:
1.319 load_reg( R_ECX, Rn );
1.320 + check_walign16( R_ECX );
1.321 MEM_READ_WORD( R_ECX, R_EAX );
1.322 store_reg( R_EAX, Rn );
1.323 :}
1.324 MOV.W Rm, @-Rn {:
1.325 load_reg( R_ECX, Rn );
1.326 + check_walign16( R_ECX );
1.327 load_reg( R_EAX, Rm );
1.328 ADD_imm8s_r32( -2, R_ECX );
1.329 MEM_WRITE_WORD( R_ECX, R_EAX );
1.330 @@ -894,6 +968,7 @@
1.331 load_reg( R_EAX, 0 );
1.332 load_reg( R_ECX, Rn );
1.333 ADD_r32_r32( R_EAX, R_ECX );
1.334 + check_walign16( R_ECX );
1.335 load_reg( R_EAX, Rm );
1.336 MEM_WRITE_WORD( R_ECX, R_EAX );
1.337 :}
1.338 @@ -901,21 +976,25 @@
1.339 load_spreg( R_ECX, R_GBR );
1.340 load_reg( R_EAX, 0 );
1.341 ADD_imm32_r32( disp, R_ECX );
1.342 + check_walign16( R_ECX );
1.343 MEM_WRITE_WORD( R_ECX, R_EAX );
1.344 :}
1.345 MOV.W R0, @(disp, Rn) {:
1.346 load_reg( R_ECX, Rn );
1.347 load_reg( R_EAX, 0 );
1.348 ADD_imm32_r32( disp, R_ECX );
1.349 + check_walign16( R_ECX );
1.350 MEM_WRITE_WORD( R_ECX, R_EAX );
1.351 :}
1.352 MOV.W @Rm, Rn {:
1.353 load_reg( R_ECX, Rm );
1.354 + check_ralign16( R_ECX );
1.355 MEM_READ_WORD( R_ECX, R_EAX );
1.356 store_reg( R_EAX, Rn );
1.357 :}
1.358 MOV.W @Rm+, Rn {:
1.359 load_reg( R_EAX, Rm );
1.360 + check_ralign16( R_EAX );
1.361 MOV_r32_r32( R_EAX, R_ECX );
1.362 ADD_imm8s_r32( 2, R_EAX );
1.363 store_reg( R_EAX, Rm );
1.364 @@ -926,77 +1005,242 @@
1.365 load_reg( R_EAX, 0 );
1.366 load_reg( R_ECX, Rm );
1.367 ADD_r32_r32( R_EAX, R_ECX );
1.368 + check_ralign16( R_ECX );
1.369 MEM_READ_WORD( R_ECX, R_EAX );
1.370 store_reg( R_EAX, Rn );
1.371 :}
1.372 MOV.W @(disp, GBR), R0 {:
1.373 load_spreg( R_ECX, R_GBR );
1.374 ADD_imm32_r32( disp, R_ECX );
1.375 + check_ralign16( R_ECX );
1.376 MEM_READ_WORD( R_ECX, R_EAX );
1.377 store_reg( R_EAX, 0 );
1.378 :}
1.379 MOV.W @(disp, PC), Rn {:
1.380 - load_imm32( R_ECX, pc + disp + 4 );
1.381 - MEM_READ_WORD( R_ECX, R_EAX );
1.382 - store_reg( R_EAX, Rn );
1.383 + if( sh4_x86.in_delay_slot ) {
1.384 + SLOTILLEGAL();
1.385 + } else {
1.386 + load_imm32( R_ECX, pc + disp + 4 );
1.387 + MEM_READ_WORD( R_ECX, R_EAX );
1.388 + store_reg( R_EAX, Rn );
1.389 + }
1.390 :}
1.391 MOV.W @(disp, Rm), R0 {:
1.392 load_reg( R_ECX, Rm );
1.393 ADD_imm32_r32( disp, R_ECX );
1.394 + check_ralign16( R_ECX );
1.395 MEM_READ_WORD( R_ECX, R_EAX );
1.396 store_reg( R_EAX, 0 );
1.397 :}
1.398 MOVA @(disp, PC), R0 {:
1.399 - load_imm32( R_ECX, (pc & 0xFFFFFFFC) + disp + 4 );
1.400 - store_reg( R_ECX, 0 );
1.401 + if( sh4_x86.in_delay_slot ) {
1.402 + SLOTILLEGAL();
1.403 + } else {
1.404 + load_imm32( R_ECX, (pc & 0xFFFFFFFC) + disp + 4 );
1.405 + store_reg( R_ECX, 0 );
1.406 + }
1.407 :}
1.408 MOVCA.L R0, @Rn {:
1.409 load_reg( R_EAX, 0 );
1.410 load_reg( R_ECX, Rn );
1.411 + check_walign32( R_ECX );
1.412 MEM_WRITE_LONG( R_ECX, R_EAX );
1.413 :}
1.414
1.415 /* Control transfer instructions */
1.416 -BF disp {:
1.417 - CMP_imm8s_ebp( 0, R_T );
1.418 - JNE_rel8( 1 );
1.419 - exit_block( disp + pc + 4 );
1.420 +BF disp {:
1.421 + if( sh4_x86.in_delay_slot ) {
1.422 + SLOTILLEGAL();
1.423 + } else {
1.424 + load_imm32( R_EDI, pc + 2 );
1.425 + CMP_imm8s_sh4r( 0, R_T );
1.426 + JNE_rel8( 5 );
1.427 + load_imm32( R_EDI, disp + pc + 4 );
1.428 + INC_r32(R_ESI);
1.429 + return 1;
1.430 + }
1.431 +:}
1.432 +BF/S disp {:
1.433 + if( sh4_x86.in_delay_slot ) {
1.434 + SLOTILLEGAL();
1.435 + } else {
1.436 + load_imm32( R_EDI, pc + 2 );
1.437 + CMP_imm8s_sh4r( 0, R_T );
1.438 + JNE_rel8( 5 );
1.439 + load_imm32( R_EDI, disp + pc + 4 );
1.440 + sh4_x86.in_delay_slot = TRUE;
1.441 + INC_r32(R_ESI);
1.442 + return 0;
1.443 + }
1.444 +:}
1.445 +BRA disp {:
1.446 + if( sh4_x86.in_delay_slot ) {
1.447 + SLOTILLEGAL();
1.448 + } else {
1.449 + load_imm32( R_EDI, disp + pc + 4 );
1.450 + sh4_x86.in_delay_slot = TRUE;
1.451 + INC_r32(R_ESI);
1.452 + return 0;
1.453 + }
1.454 +:}
1.455 +BRAF Rn {:
1.456 + if( sh4_x86.in_delay_slot ) {
1.457 + SLOTILLEGAL();
1.458 + } else {
1.459 + load_reg( R_EDI, Rn );
1.460 + sh4_x86.in_delay_slot = TRUE;
1.461 + INC_r32(R_ESI);
1.462 + return 0;
1.463 + }
1.464 +:}
1.465 +BSR disp {:
1.466 + if( sh4_x86.in_delay_slot ) {
1.467 + SLOTILLEGAL();
1.468 + } else {
1.469 + load_imm32( R_EAX, pc + 4 );
1.470 + store_spreg( R_EAX, R_PR );
1.471 + load_imm32( R_EDI, disp + pc + 4 );
1.472 + sh4_x86.in_delay_slot = TRUE;
1.473 + INC_r32(R_ESI);
1.474 + return 0;
1.475 + }
1.476 +:}
1.477 +BSRF Rn {:
1.478 + if( sh4_x86.in_delay_slot ) {
1.479 + SLOTILLEGAL();
1.480 + } else {
1.481 + load_imm32( R_EAX, pc + 4 );
1.482 + store_spreg( R_EAX, R_PR );
1.483 + load_reg( R_EDI, Rn );
1.484 + ADD_r32_r32( R_EAX, R_EDI );
1.485 + sh4_x86.in_delay_slot = TRUE;
1.486 + INC_r32(R_ESI);
1.487 + return 0;
1.488 + }
1.489 +:}
1.490 +BT disp {:
1.491 + if( sh4_x86.in_delay_slot ) {
1.492 + SLOTILLEGAL();
1.493 + } else {
1.494 + load_imm32( R_EDI, pc + 2 );
1.495 + CMP_imm8s_sh4r( 0, R_T );
1.496 + JE_rel8( 5 );
1.497 + load_imm32( R_EDI, disp + pc + 4 );
1.498 + INC_r32(R_ESI);
1.499 + return 1;
1.500 + }
1.501 +:}
1.502 +BT/S disp {:
1.503 + if( sh4_x86.in_delay_slot ) {
1.504 + SLOTILLEGAL();
1.505 + } else {
1.506 + load_imm32( R_EDI, pc + 2 );
1.507 + CMP_imm8s_sh4r( 0, R_T );
1.508 + JE_rel8( 5 );
1.509 + load_imm32( R_EDI, disp + pc + 4 );
1.510 + sh4_x86.in_delay_slot = TRUE;
1.511 + INC_r32(R_ESI);
1.512 + return 0;
1.513 + }
1.514 +:}
1.515 +JMP @Rn {:
1.516 + if( sh4_x86.in_delay_slot ) {
1.517 + SLOTILLEGAL();
1.518 + } else {
1.519 + load_reg( R_EDI, Rn );
1.520 + sh4_x86.in_delay_slot = TRUE;
1.521 + INC_r32(R_ESI);
1.522 + return 0;
1.523 + }
1.524 +:}
1.525 +JSR @Rn {:
1.526 + if( sh4_x86.in_delay_slot ) {
1.527 + SLOTILLEGAL();
1.528 + } else {
1.529 + load_imm32( R_EAX, pc + 4 );
1.530 + store_spreg( R_EAX, R_PR );
1.531 + load_reg( R_EDI, Rn );
1.532 + sh4_x86.in_delay_slot = TRUE;
1.533 + INC_r32(R_ESI);
1.534 + return 0;
1.535 + }
1.536 +:}
1.537 +RTE {:
1.538 + check_priv();
1.539 + if( sh4_x86.in_delay_slot ) {
1.540 + SLOTILLEGAL();
1.541 + } else {
1.542 + load_spreg( R_EDI, R_PR );
1.543 + load_spreg( R_EAX, R_SSR );
1.544 + call_func1( sh4_write_sr, R_EAX );
1.545 + sh4_x86.in_delay_slot = TRUE;
1.546 + INC_r32(R_ESI);
1.547 + return 0;
1.548 + }
1.549 +:}
1.550 +RTS {:
1.551 + if( sh4_x86.in_delay_slot ) {
1.552 + SLOTILLEGAL();
1.553 + } else {
1.554 + load_spreg( R_EDI, R_PR );
1.555 + sh4_x86.in_delay_slot = TRUE;
1.556 + INC_r32(R_ESI);
1.557 + return 0;
1.558 + }
1.559 +:}
1.560 +TRAPA #imm {:
1.561 + if( sh4_x86.in_delay_slot ) {
1.562 + SLOTILLEGAL();
1.563 + } else {
1.564 + // TODO: Write TRA
1.565 + RAISE_EXCEPTION(EXC_TRAP);
1.566 + }
1.567 +:}
1.568 +UNDEF {:
1.569 + if( sh4_x86.in_delay_slot ) {
1.570 + RAISE_EXCEPTION(EXC_SLOT_ILLEGAL);
1.571 + } else {
1.572 + RAISE_EXCEPTION(EXC_ILLEGAL);
1.573 + }
1.574 return 1;
1.575 :}
1.576 -BF/S disp {:
1.577 - CMP_imm8s_ebp( 0, R_T );
1.578 - JNE_rel8( 1 );
1.579 - exit_block( disp + pc + 4 );
1.580 - sh4_x86.in_delay_slot = TRUE;
1.581 +
1.582 +CLRMAC {:
1.583 + XOR_r32_r32(R_EAX, R_EAX);
1.584 + store_spreg( R_EAX, R_MACL );
1.585 + store_spreg( R_EAX, R_MACH );
1.586 :}
1.587 -BRA disp {:
1.588 - exit_block( disp + pc + 4 );
1.589 +CLRS {:
1.590 + CLC();
1.591 + SETC_sh4r(R_S);
1.592 :}
1.593 -BRAF Rn {: :}
1.594 -BSR disp {: :}
1.595 -BSRF Rn {: :}
1.596 -BT disp {: /* If true, result PC += 4 + disp. else result PC = pc+2 */
1.597 - return pc + 2;
1.598 +CLRT {:
1.599 + CLC();
1.600 + SETC_t();
1.601 :}
1.602 -BT/S disp {:
1.603 -
1.604 - return pc + 4;
1.605 +SETS {:
1.606 + STC();
1.607 + SETC_sh4r(R_S);
1.608 :}
1.609 -JMP @Rn {: :}
1.610 -JSR @Rn {: :}
1.611 -RTE {: :}
1.612 -RTS {: :}
1.613 -TRAPA #imm {: :}
1.614 -UNDEF {: :}
1.615 -
1.616 -CLRMAC {: :}
1.617 -CLRS {: :}
1.618 -CLRT {: :}
1.619 -SETS {: :}
1.620 -SETT {: :}
1.621 +SETT {:
1.622 + STC();
1.623 + SETC_t();
1.624 +:}
1.625
1.626 /* Floating point instructions */
1.627 -FABS FRn {: :}
1.628 +FABS FRn {:
1.629 + load_spreg( R_ECX, R_FPSCR );
1.630 + load_spreg( R_EDX, REG_OFFSET(fr_bank) );
1.631 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.632 + JNE_rel8(10);
1.633 + push_fr(R_EDX, FRn); // 3
1.634 + FABS_st0(); // 2
1.635 + pop_fr( R_EDX, FRn); //3
1.636 + JMP_rel8(8); // 2
1.637 + push_dr(R_EDX, FRn);
1.638 + FABS_st0();
1.639 + pop_dr(R_EDX, FRn);
1.640 +:}
1.641 FADD FRm, FRn {: :}
1.642 FCMP/EQ FRm, FRn {: :}
1.643 FCMP/GT FRm, FRn {: :}
1.644 @@ -1031,7 +1275,7 @@
1.645 /* Processor control instructions */
1.646 LDC Rm, SR {:
1.647 load_reg( R_EAX, Rm );
1.648 - write_sr( R_EAX );
1.649 + call_func1( sh4_write_sr, R_EAX );
1.650 :}
1.651 LDC Rm, GBR {:
1.652 load_reg( R_EAX, Rm );
1.653 @@ -1057,7 +1301,10 @@
1.654 load_reg( R_EAX, Rm );
1.655 store_spreg( R_EAX, R_DBR );
1.656 :}
1.657 -LDC Rm, Rn_BANK {: :}
1.658 +LDC Rm, Rn_BANK {:
1.659 + load_reg( R_EAX, Rm );
1.660 + store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
1.661 +:}
1.662 LDC.L @Rm+, GBR {:
1.663 load_reg( R_EAX, Rm );
1.664 MOV_r32_r32( R_EAX, R_ECX );
1.665 @@ -1072,7 +1319,7 @@
1.666 ADD_imm8s_r32( 4, R_EAX );
1.667 store_reg( R_EAX, Rm );
1.668 MEM_READ_LONG( R_ECX, R_EAX );
1.669 - write_sr( R_EAX );
1.670 + call_func1( sh4_write_sr, R_EAX );
1.671 :}
1.672 LDC.L @Rm+, VBR {:
1.673 load_reg( R_EAX, Rm );
1.674 @@ -1115,6 +1362,12 @@
1.675 store_spreg( R_EAX, R_DBR );
1.676 :}
1.677 LDC.L @Rm+, Rn_BANK {:
1.678 + load_reg( R_EAX, Rm );
1.679 + MOV_r32_r32( R_EAX, R_ECX );
1.680 + ADD_imm8s_r32( 4, R_EAX );
1.681 + store_reg( R_EAX, Rm );
1.682 + MEM_READ_LONG( R_ECX, R_EAX );
1.683 + store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
1.684 :}
1.685 LDS Rm, FPSCR {:
1.686 load_reg( R_EAX, Rm );
1.687 @@ -1180,10 +1433,18 @@
1.688 OCBI @Rn {: :}
1.689 OCBP @Rn {: :}
1.690 OCBWB @Rn {: :}
1.691 -PREF @Rn {: :}
1.692 -SLEEP {: :}
1.693 +PREF @Rn {:
1.694 + load_reg( R_EAX, Rn );
1.695 + PUSH_r32( R_EAX );
1.696 + AND_imm32_r32( 0xFC000000, R_EAX );
1.697 + CMP_imm32_r32( 0xE0000000, R_EAX );
1.698 + JNE_rel8(8);
1.699 + call_func0( sh4_flush_store_queue );
1.700 + ADD_imm8s_r32( -4, R_ESP );
1.701 +:}
1.702 + SLEEP {: /* TODO */ :}
1.703 STC SR, Rn {:
1.704 - read_sr( R_EAX );
1.705 + call_func0(sh4_read_sr);
1.706 store_reg( R_EAX, Rn );
1.707 :}
1.708 STC GBR, Rn {:
1.709 @@ -1210,13 +1471,15 @@
1.710 load_spreg( R_EAX, R_DBR );
1.711 store_reg( R_EAX, Rn );
1.712 :}
1.713 -STC Rm_BANK, Rn {: /* TODO */
1.714 +STC Rm_BANK, Rn {:
1.715 + load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
1.716 + store_reg( R_EAX, Rn );
1.717 :}
1.718 -STC.L SR, @-Rn {: /* TODO */
1.719 +STC.L SR, @-Rn {:
1.720 load_reg( R_ECX, Rn );
1.721 ADD_imm8s_r32( -4, Rn );
1.722 store_reg( R_ECX, Rn );
1.723 - read_sr( R_EAX );
1.724 + call_func0( sh4_read_sr );
1.725 MEM_WRITE_LONG( R_ECX, R_EAX );
1.726 :}
1.727 STC.L VBR, @-Rn {:
1.728 @@ -1254,7 +1517,13 @@
1.729 load_spreg( R_EAX, R_DBR );
1.730 MEM_WRITE_LONG( R_ECX, R_EAX );
1.731 :}
1.732 -STC.L Rm_BANK, @-Rn {: :}
1.733 +STC.L Rm_BANK, @-Rn {:
1.734 + load_reg( R_ECX, Rn );
1.735 + ADD_imm8s_r32( -4, Rn );
1.736 + store_reg( R_ECX, Rn );
1.737 + load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
1.738 + MEM_WRITE_LONG( R_ECX, R_EAX );
1.739 +:}
1.740 STC.L GBR, @-Rn {:
1.741 load_reg( R_ECX, Rn );
1.742 ADD_imm8s_r32( -4, Rn );
1.743 @@ -1321,6 +1590,9 @@
1.744 NOP {: /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ :}
1.745 %%
1.746 INC_r32(R_ESI);
1.747 -
1.748 + if( sh4_x86.in_delay_slot ) {
1.749 + sh4_x86.in_delay_slot = FALSE;
1.750 + return 1;
1.751 + }
1.752 return 0;
1.753 }
.