1.1 --- a/src/sh4/sh4x86.c Tue Sep 11 21:23:48 2007 +0000
1.2 +++ b/src/sh4/sh4x86.c Wed Sep 12 09:17:52 2007 +0000
1.5 - * $Id: sh4x86.c,v 1.5 2007-09-11 21:23:48 nkeynes Exp $
1.6 + * $Id: sh4x86.c,v 1.6 2007-09-12 09:17:52 nkeynes Exp $
1.8 * SH4 => x86 translation. This version does no real optimization, it just
1.9 * outputs straight-line x86 code - it mainly exists to provide a baseline
1.10 @@ -180,6 +180,22 @@
1.14 + * Push FPUL (as a 32-bit float) onto the FPU stack
1.16 +static inline void push_fpul( )
1.18 + OP(0xD9); OP(0x45); OP(R_FPUL);
1.22 + * Pop FPUL (as a 32-bit float) from the FPU stack
1.24 +static inline void pop_fpul( )
1.26 + OP(0xD9); OP(0x5D); OP(R_FPUL);
1.30 * Push a 32-bit float onto the FPU stack, with bankreg previously loaded
1.31 * with the location of the current fp bank.
1.33 @@ -203,24 +219,12 @@
1.35 static inline void push_dr( int bankreg, int frm )
1.38 - // this is technically undefined, but it seems to work consistently - high 32 bits
1.39 - // loaded from FRm (32-bits), low 32bits are 0.
1.40 - OP(0xFF); OP(0x70 + bankreg); OP((frm^1)<<2); // PUSH [bankreg + frm^1]
1.45 - OP(0xDD); OP(0x40 + bankreg); OP(frm<<2); // FLD.D [bankreg + frm*4]
1.47 + OP(0xDD); OP(0x40 + bankreg); OP(frm<<2); // FLD.D [bankreg + frm*4]
1.50 static inline void pop_dr( int bankreg, int frm )
1.54 - OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]
1.56 + OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]
1.64 - ADD_imm8s_r32( -4, R_ESP );
1.65 + ADD_imm8s_r32( 4, R_ESP );
1.68 static inline void call_func2( void *ptr, int arg1, int arg2 )
1.73 - ADD_imm8s_r32( -8, R_ESP );
1.74 + ADD_imm8s_r32( 8, R_ESP );
1.81 call_func0(sh4_write_long);
1.82 - ADD_imm8s_r32( -8, R_ESP );
1.83 + ADD_imm8s_r32( 8, R_ESP );
1.84 call_func0(sh4_write_long);
1.85 - ADD_imm8s_r32( -8, R_ESP );
1.86 + ADD_imm8s_r32( 8, R_ESP );
1.91 ADD_imm8s_r32( 4, addr );
1.93 call_func0(sh4_read_long);
1.94 - ADD_imm8s_r32( -4, R_ESP );
1.95 + ADD_imm8s_r32( 4, R_ESP );
1.96 MOV_r32_r32( R_EAX, arg2b );
1.100 CMP_imm32_r32( 0xE0000000, R_EAX );
1.102 call_func0( sh4_flush_store_queue );
1.103 - ADD_imm8s_r32( -4, R_ESP );
1.104 + ADD_imm8s_r32( 4, R_ESP );
1.108 @@ -764,6 +768,8 @@
1.109 load_spreg( R_EAX, R_SSR );
1.110 call_func1( sh4_write_sr, R_EAX );
1.111 sh4_x86.in_delay_slot = TRUE;
1.112 + sh4_x86.priv_checked = FALSE;
1.113 + sh4_x86.fpuen_checked = FALSE;
1.117 @@ -1502,6 +1508,8 @@
1.118 store_reg( R_EAX, Rm );
1.119 MEM_READ_LONG( R_ECX, R_EAX );
1.120 call_func1( sh4_write_sr, R_EAX );
1.121 + sh4_x86.priv_checked = FALSE;
1.122 + sh4_x86.fpuen_checked = FALSE;
1.126 @@ -1774,6 +1782,8 @@
1.127 uint32_t Rm = ((ir>>8)&0xF);
1.128 load_reg( R_EAX, Rm );
1.129 call_func1( sh4_write_sr, R_EAX );
1.130 + sh4_x86.priv_checked = FALSE;
1.131 + sh4_x86.fpuen_checked = FALSE;
1.135 @@ -2332,53 +2342,127 @@
1.137 { /* FADD FRm, FRn */
1.138 uint32_t FRn = ((ir>>8)&0xF); uint32_t FRm = ((ir>>4)&0xF);
1.140 + load_spreg( R_ECX, R_FPSCR );
1.141 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.142 + load_fr_bank( R_EDX );
1.144 + push_fr(R_EDX, FRm);
1.145 + push_fr(R_EDX, FRn);
1.147 + pop_fr(R_EDX, FRn);
1.149 + push_dr(R_EDX, FRm);
1.150 + push_dr(R_EDX, FRn);
1.152 + pop_dr(R_EDX, FRn);
1.156 { /* FSUB FRm, FRn */
1.157 uint32_t FRn = ((ir>>8)&0xF); uint32_t FRm = ((ir>>4)&0xF);
1.159 + load_spreg( R_ECX, R_FPSCR );
1.160 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.161 + load_fr_bank( R_EDX );
1.163 + push_fr(R_EDX, FRn);
1.164 + push_fr(R_EDX, FRm);
1.166 + pop_fr(R_EDX, FRn);
1.168 + push_dr(R_EDX, FRn);
1.169 + push_dr(R_EDX, FRm);
1.171 + pop_dr(R_EDX, FRn);
1.175 { /* FMUL FRm, FRn */
1.176 uint32_t FRn = ((ir>>8)&0xF); uint32_t FRm = ((ir>>4)&0xF);
1.178 + load_spreg( R_ECX, R_FPSCR );
1.179 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.180 + load_fr_bank( R_EDX );
1.182 + push_fr(R_EDX, FRm);
1.183 + push_fr(R_EDX, FRn);
1.185 + pop_fr(R_EDX, FRn);
1.187 + push_dr(R_EDX, FRm);
1.188 + push_dr(R_EDX, FRn);
1.190 + pop_dr(R_EDX, FRn);
1.194 { /* FDIV FRm, FRn */
1.195 uint32_t FRn = ((ir>>8)&0xF); uint32_t FRm = ((ir>>4)&0xF);
1.197 + load_spreg( R_ECX, R_FPSCR );
1.198 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.199 + load_fr_bank( R_EDX );
1.201 + push_fr(R_EDX, FRn);
1.202 + push_fr(R_EDX, FRm);
1.204 + pop_fr(R_EDX, FRn);
1.206 + push_dr(R_EDX, FRn);
1.207 + push_dr(R_EDX, FRm);
1.209 + pop_dr(R_EDX, FRn);
1.213 { /* FCMP/EQ FRm, FRn */
1.214 uint32_t FRn = ((ir>>8)&0xF); uint32_t FRm = ((ir>>4)&0xF);
1.216 + load_spreg( R_ECX, R_FPSCR );
1.217 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.218 + load_fr_bank( R_EDX );
1.220 + push_fr(R_EDX, FRm);
1.221 + push_fr(R_EDX, FRn);
1.223 + push_dr(R_EDX, FRm);
1.224 + push_dr(R_EDX, FRn);
1.231 { /* FCMP/GT FRm, FRn */
1.232 uint32_t FRn = ((ir>>8)&0xF); uint32_t FRm = ((ir>>4)&0xF);
1.234 + load_spreg( R_ECX, R_FPSCR );
1.235 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.236 + load_fr_bank( R_EDX );
1.238 + push_fr(R_EDX, FRm);
1.239 + push_fr(R_EDX, FRn);
1.241 + push_dr(R_EDX, FRm);
1.242 + push_dr(R_EDX, FRn);
1.249 { /* FMOV @(R0, Rm), FRn */
1.250 uint32_t FRn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
1.254 - { /* FMOV FRm, @(R0, Rn) */
1.255 - uint32_t Rn = ((ir>>8)&0xF); uint32_t FRm = ((ir>>4)&0xF);
1.259 - { /* FMOV @Rm, FRn */
1.260 - uint32_t FRn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
1.262 load_reg( R_EDX, Rm );
1.263 + ADD_sh4r_r32( REG_OFFSET(r[0]), R_EDX );
1.264 check_ralign32( R_EDX );
1.265 load_spreg( R_ECX, R_FPSCR );
1.266 TEST_imm32_r32( FPSCR_SZ, R_ECX );
1.268 MEM_READ_LONG( R_EDX, R_EAX );
1.269 - load_spreg( R_ECX, REG_OFFSET(fr_bank) );
1.270 + load_fr_bank( R_ECX );
1.271 store_fr( R_ECX, R_EAX, FRn );
1.274 @@ -2388,7 +2472,58 @@
1.277 MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
1.278 - load_spreg( R_ECX, REG_OFFSET(fr_bank) );
1.279 + load_fr_bank( R_ECX );
1.281 + store_fr( R_ECX, R_EAX, FRn&0x0E );
1.282 + store_fr( R_ECX, R_EDX, FRn|0x01 );
1.286 + { /* FMOV FRm, @(R0, Rn) */
1.287 + uint32_t Rn = ((ir>>8)&0xF); uint32_t FRm = ((ir>>4)&0xF);
1.289 + load_reg( R_EDX, Rn );
1.290 + ADD_sh4r_r32( REG_OFFSET(r[0]), R_EDX );
1.291 + check_walign32( R_EDX );
1.292 + load_spreg( R_ECX, R_FPSCR );
1.293 + TEST_imm32_r32( FPSCR_SZ, R_ECX );
1.295 + load_fr_bank( R_ECX );
1.296 + load_fr( R_ECX, R_EAX, FRm );
1.297 + MEM_WRITE_LONG( R_EDX, R_EAX ); // 12
1.300 + load_xf_bank( R_ECX );
1.303 + load_fr_bank( R_ECX );
1.305 + load_fr( R_ECX, R_EAX, FRm&0x0E );
1.306 + load_fr( R_ECX, R_ECX, FRm|0x01 );
1.307 + MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );
1.311 + { /* FMOV @Rm, FRn */
1.312 + uint32_t FRn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
1.314 + load_reg( R_EDX, Rm );
1.315 + check_ralign32( R_EDX );
1.316 + load_spreg( R_ECX, R_FPSCR );
1.317 + TEST_imm32_r32( FPSCR_SZ, R_ECX );
1.319 + MEM_READ_LONG( R_EDX, R_EAX );
1.320 + load_fr_bank( R_ECX );
1.321 + store_fr( R_ECX, R_EAX, FRn );
1.324 + MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
1.325 + load_spreg( R_ECX, R_FPSCR ); // assume read_long clobbered it
1.326 + load_xf_bank( R_ECX );
1.329 + MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
1.330 + load_fr_bank( R_ECX );
1.332 store_fr( R_ECX, R_EAX, FRn&0x0E );
1.333 store_fr( R_ECX, R_EDX, FRn|0x01 );
1.334 @@ -2397,17 +2532,46 @@
1.336 { /* FMOV @Rm+, FRn */
1.337 uint32_t FRn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
1.339 + load_reg( R_EDX, Rm );
1.340 + check_ralign32( R_EDX );
1.341 + MOV_r32_r32( R_EDX, R_EAX );
1.342 + load_spreg( R_ECX, R_FPSCR );
1.343 + TEST_imm32_r32( FPSCR_SZ, R_ECX );
1.345 + ADD_imm8s_r32( 4, R_EAX );
1.346 + store_reg( R_EAX, Rm );
1.347 + MEM_READ_LONG( R_EDX, R_EAX );
1.348 + load_fr_bank( R_ECX );
1.349 + store_fr( R_ECX, R_EAX, FRn );
1.352 + ADD_imm8s_r32( 8, R_EAX );
1.353 + store_reg(R_EAX, Rm);
1.354 + MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
1.355 + load_spreg( R_ECX, R_FPSCR ); // assume read_long clobbered it
1.356 + load_xf_bank( R_ECX );
1.359 + ADD_imm8s_r32( 8, R_EAX );
1.360 + store_reg(R_EAX, Rm);
1.361 + MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
1.362 + load_fr_bank( R_ECX );
1.364 + store_fr( R_ECX, R_EAX, FRn&0x0E );
1.365 + store_fr( R_ECX, R_EDX, FRn|0x01 );
1.369 { /* FMOV FRm, @Rn */
1.370 uint32_t Rn = ((ir>>8)&0xF); uint32_t FRm = ((ir>>4)&0xF);
1.372 load_reg( R_EDX, Rn );
1.373 check_walign32( R_EDX );
1.374 load_spreg( R_ECX, R_FPSCR );
1.375 TEST_imm32_r32( FPSCR_SZ, R_ECX );
1.377 - load_spreg( R_ECX, REG_OFFSET(fr_bank) );
1.378 + load_fr_bank( R_ECX );
1.379 load_fr( R_ECX, R_EAX, FRm );
1.380 MEM_WRITE_LONG( R_EDX, R_EAX ); // 12
1.382 @@ -2415,7 +2579,7 @@
1.383 load_xf_bank( R_ECX );
1.386 - load_spreg( R_ECX, REG_OFFSET(fr_bank) );
1.387 + load_fr_bank( R_ECX );
1.389 load_fr( R_ECX, R_EAX, FRm&0x0E );
1.390 load_fr( R_ECX, R_ECX, FRm|0x01 );
1.391 @@ -2425,6 +2589,29 @@
1.393 { /* FMOV FRm, @-Rn */
1.394 uint32_t Rn = ((ir>>8)&0xF); uint32_t FRm = ((ir>>4)&0xF);
1.396 + load_reg( R_EDX, Rn );
1.397 + check_walign32( R_EDX );
1.398 + load_spreg( R_ECX, R_FPSCR );
1.399 + TEST_imm32_r32( FPSCR_SZ, R_ECX );
1.401 + load_fr_bank( R_ECX );
1.402 + load_fr( R_ECX, R_EAX, FRm );
1.403 + ADD_imm8s_r32(-4,R_EDX);
1.404 + store_reg( R_EDX, Rn );
1.405 + MEM_WRITE_LONG( R_EDX, R_EAX ); // 12
1.408 + load_xf_bank( R_ECX );
1.411 + load_fr_bank( R_ECX );
1.413 + load_fr( R_ECX, R_EAX, FRm&0x0E );
1.414 + load_fr( R_ECX, R_ECX, FRm|0x01 );
1.415 + ADD_imm8s_r32(-8,R_EDX);
1.416 + store_reg( R_EDX, Rn );
1.417 + MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );
1.421 @@ -2437,8 +2624,9 @@
1.422 * 4. 64-bit xd-to-dr (PR=1, FRm&1 == 1, FRn&1 == 0 )
1.423 * 5. 64-bit xd-to-xd (PR=1, FRm&1 == 1, FRn&1 == 1 )
1.426 load_spreg( R_ECX, R_FPSCR );
1.427 - load_spreg( R_EDX, REG_OFFSET(fr_bank) );
1.428 + load_fr_bank( R_EDX );
1.429 TEST_imm32_r32( FPSCR_SZ, R_ECX );
1.431 load_fr( R_EDX, R_EAX, FRm ); // PR=0 branch
1.432 @@ -2479,33 +2667,65 @@
1.434 { /* FSTS FPUL, FRn */
1.435 uint32_t FRn = ((ir>>8)&0xF);
1.437 + load_fr_bank( R_ECX );
1.438 + load_spreg( R_EAX, R_FPUL );
1.439 + store_fr( R_ECX, R_EAX, FRn );
1.443 { /* FLDS FRm, FPUL */
1.444 uint32_t FRm = ((ir>>8)&0xF);
1.446 + load_fr_bank( R_ECX );
1.447 + load_fr( R_ECX, R_EAX, FRm );
1.448 + store_spreg( R_EAX, R_FPUL );
1.452 { /* FLOAT FPUL, FRn */
1.453 uint32_t FRn = ((ir>>8)&0xF);
1.455 + load_spreg( R_ECX, R_FPSCR );
1.456 + load_spreg(R_EDX, REG_OFFSET(fr_bank));
1.457 + FILD_sh4r(R_FPUL);
1.458 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.460 + pop_fr( R_EDX, FRn );
1.462 + pop_dr( R_EDX, FRn );
1.466 { /* FTRC FRm, FPUL */
1.467 uint32_t FRm = ((ir>>8)&0xF);
1.474 uint32_t FRn = ((ir>>8)&0xF);
1.476 + load_spreg( R_ECX, R_FPSCR );
1.477 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.478 + load_fr_bank( R_EDX );
1.480 + push_fr(R_EDX, FRn);
1.482 + pop_fr(R_EDX, FRn);
1.484 + push_dr(R_EDX, FRn);
1.486 + pop_dr(R_EDX, FRn);
1.491 uint32_t FRn = ((ir>>8)&0xF);
1.493 load_spreg( R_ECX, R_FPSCR );
1.494 - load_spreg( R_EDX, REG_OFFSET(fr_bank) );
1.495 + load_fr_bank( R_EDX );
1.496 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.498 push_fr(R_EDX, FRn); // 3
1.499 @@ -2520,36 +2740,90 @@
1.502 uint32_t FRn = ((ir>>8)&0xF);
1.504 + load_spreg( R_ECX, R_FPSCR );
1.505 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.506 + load_fr_bank( R_EDX );
1.508 + push_fr(R_EDX, FRn);
1.510 + pop_fr(R_EDX, FRn);
1.512 + push_dr(R_EDX, FRn);
1.514 + pop_dr(R_EDX, FRn);
1.519 uint32_t FRn = ((ir>>8)&0xF);
1.521 + load_spreg( R_ECX, R_FPSCR );
1.522 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.523 + load_fr_bank( R_EDX );
1.524 + JNE_rel8(12); // PR=0 only
1.526 + push_fr(R_EDX, FRn);
1.529 + pop_fr(R_EDX, FRn);
1.534 uint32_t FRn = ((ir>>8)&0xF);
1.537 + load_spreg( R_ECX, R_FPSCR );
1.538 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.540 + XOR_r32_r32( R_EAX, R_EAX );
1.541 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );
1.542 + store_fr( R_ECX, R_EAX, FRn );
1.547 uint32_t FRn = ((ir>>8)&0xF);
1.550 + load_spreg( R_ECX, R_FPSCR );
1.551 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.553 + load_imm32(R_EAX, 0x3F800000);
1.554 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );
1.555 + store_fr( R_ECX, R_EAX, FRn );
1.559 { /* FCNVSD FPUL, FRn */
1.560 uint32_t FRn = ((ir>>8)&0xF);
1.563 + load_spreg( R_ECX, R_FPSCR );
1.564 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.565 + JE_rel8(9); // only when PR=1
1.566 + load_fr_bank( R_ECX );
1.568 + pop_dr( R_ECX, FRn );
1.572 { /* FCNVDS FRm, FPUL */
1.573 uint32_t FRm = ((ir>>8)&0xF);
1.575 + load_spreg( R_ECX, R_FPSCR );
1.576 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.577 + JE_rel8(9); // only when PR=1
1.578 + load_fr_bank( R_ECX );
1.579 + push_dr( R_ECX, FRm );
1.584 { /* FIPR FVm, FVn */
1.585 uint32_t FVn = ((ir>>10)&0x3); uint32_t FVm = ((ir>>8)&0x3);
1.590 @@ -2557,6 +2831,7 @@
1.592 { /* FSCA FPUL, FRn */
1.593 uint32_t FRn = ((ir>>9)&0x7)<<1;
1.598 @@ -2564,16 +2839,25 @@
1.600 { /* FTRV XMTRX, FVn */
1.601 uint32_t FVn = ((ir>>10)&0x3);
1.606 switch( (ir&0xC00) >> 10 ) {
1.610 + load_spreg( R_ECX, R_FPSCR );
1.611 + XOR_imm32_r32( FPSCR_SZ, R_ECX );
1.612 + store_spreg( R_ECX, R_FPSCR );
1.618 + load_spreg( R_ECX, R_FPSCR );
1.619 + XOR_imm32_r32( FPSCR_FR, R_ECX );
1.620 + store_spreg( R_ECX, R_FPSCR );
1.624 @@ -2603,6 +2887,24 @@
1.626 { /* FMAC FR0, FRm, FRn */
1.627 uint32_t FRn = ((ir>>8)&0xF); uint32_t FRm = ((ir>>4)&0xF);
1.629 + load_spreg( R_ECX, R_FPSCR );
1.630 + load_spreg( R_EDX, REG_OFFSET(fr_bank));
1.631 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.633 + push_fr( R_EDX, 0 );
1.634 + push_fr( R_EDX, FRm );
1.636 + push_fr( R_EDX, FRn );
1.638 + pop_fr( R_EDX, FRn );
1.640 + push_dr( R_EDX, 0 );
1.641 + push_dr( R_EDX, FRm );
1.643 + push_dr( R_EDX, FRn );
1.645 + pop_dr( R_EDX, FRn );