Search
lxdream.org :: lxdream :: r377:fa18743f6905
lxdream 0.9.1
released Jun 29
Download Now
changeset377:fa18743f6905
parent376:8c7587af5a5d
child378:f10fbdd4e24b
authornkeynes
dateWed Sep 12 09:17:52 2007 +0000 (16 years ago)
Fill in most of the FP operations and fix the stack adjustments
src/sh4/sh4x86.c
src/sh4/sh4x86.in
src/sh4/x86op.h
1.1 --- a/src/sh4/sh4x86.c Wed Sep 12 09:16:47 2007 +0000
1.2 +++ b/src/sh4/sh4x86.c Wed Sep 12 09:17:52 2007 +0000
1.3 @@ -1,5 +1,5 @@
1.4 /**
1.5 - * $Id: sh4x86.c,v 1.5 2007-09-11 21:23:48 nkeynes Exp $
1.6 + * $Id: sh4x86.c,v 1.6 2007-09-12 09:17:52 nkeynes Exp $
1.7 *
1.8 * SH4 => x86 translation. This version does no real optimization, it just
1.9 * outputs straight-line x86 code - it mainly exists to provide a baseline
1.10 @@ -180,6 +180,22 @@
1.11 }
1.12
1.13 /**
1.14 + * Push FPUL (as a 32-bit float) onto the FPU stack
1.15 + */
1.16 +static inline void push_fpul( )
1.17 +{
1.18 + OP(0xD9); OP(0x45); OP(R_FPUL);
1.19 +}
1.20 +
1.21 +/**
1.22 + * Pop FPUL (as a 32-bit float) from the FPU stack
1.23 + */
1.24 +static inline void pop_fpul( )
1.25 +{
1.26 + OP(0xD9); OP(0x5D); OP(R_FPUL);
1.27 +}
1.28 +
1.29 +/**
1.30 * Push a 32-bit float onto the FPU stack, with bankreg previously loaded
1.31 * with the location of the current fp bank.
1.32 */
1.33 @@ -203,24 +219,12 @@
1.34 */
1.35 static inline void push_dr( int bankreg, int frm )
1.36 {
1.37 - if( frm&1 ) {
1.38 - // this is technically undefined, but it seems to work consistently - high 32 bits
1.39 - // loaded from FRm (32-bits), low 32bits are 0.
1.40 - OP(0xFF); OP(0x70 + bankreg); OP((frm^1)<<2); // PUSH [bankreg + frm^1]
1.41 - PUSH_imm32(0);
1.42 -
1.43 -
1.44 - } else {
1.45 - OP(0xDD); OP(0x40 + bankreg); OP(frm<<2); // FLD.D [bankreg + frm*4]
1.46 - }
1.47 + OP(0xDD); OP(0x40 + bankreg); OP(frm<<2); // FLD.D [bankreg + frm*4]
1.48 }
1.49
1.50 static inline void pop_dr( int bankreg, int frm )
1.51 {
1.52 - if( frm&1 ) {
1.53 - } else {
1.54 - OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]
1.55 - }
1.56 + OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]
1.57 }
1.58
1.59 /**
1.60 @@ -237,7 +241,7 @@
1.61 {
1.62 PUSH_r32(arg1);
1.63 call_func0(ptr);
1.64 - ADD_imm8s_r32( -4, R_ESP );
1.65 + ADD_imm8s_r32( 4, R_ESP );
1.66 }
1.67
1.68 static inline void call_func2( void *ptr, int arg1, int arg2 )
1.69 @@ -245,7 +249,7 @@
1.70 PUSH_r32(arg2);
1.71 PUSH_r32(arg1);
1.72 call_func0(ptr);
1.73 - ADD_imm8s_r32( -8, R_ESP );
1.74 + ADD_imm8s_r32( 8, R_ESP );
1.75 }
1.76
1.77 /**
1.78 @@ -262,9 +266,9 @@
1.79 PUSH_r32(addr);
1.80 PUSH_r32(arg2a);
1.81 call_func0(sh4_write_long);
1.82 - ADD_imm8s_r32( -8, R_ESP );
1.83 + ADD_imm8s_r32( 8, R_ESP );
1.84 call_func0(sh4_write_long);
1.85 - ADD_imm8s_r32( -8, R_ESP );
1.86 + ADD_imm8s_r32( 8, R_ESP );
1.87 }
1.88
1.89 /**
1.90 @@ -281,7 +285,7 @@
1.91 ADD_imm8s_r32( 4, addr );
1.92 PUSH_r32(addr);
1.93 call_func0(sh4_read_long);
1.94 - ADD_imm8s_r32( -4, R_ESP );
1.95 + ADD_imm8s_r32( 4, R_ESP );
1.96 MOV_r32_r32( R_EAX, arg2b );
1.97 POP_r32(arg2a);
1.98 }
1.99 @@ -537,7 +541,7 @@
1.100 CMP_imm32_r32( 0xE0000000, R_EAX );
1.101 JNE_rel8(8);
1.102 call_func0( sh4_flush_store_queue );
1.103 - ADD_imm8s_r32( -4, R_ESP );
1.104 + ADD_imm8s_r32( 4, R_ESP );
1.105 }
1.106 break;
1.107 case 0x9:
1.108 @@ -764,6 +768,8 @@
1.109 load_spreg( R_EAX, R_SSR );
1.110 call_func1( sh4_write_sr, R_EAX );
1.111 sh4_x86.in_delay_slot = TRUE;
1.112 + sh4_x86.priv_checked = FALSE;
1.113 + sh4_x86.fpuen_checked = FALSE;
1.114 INC_r32(R_ESI);
1.115 return 0;
1.116 }
1.117 @@ -1502,6 +1508,8 @@
1.118 store_reg( R_EAX, Rm );
1.119 MEM_READ_LONG( R_ECX, R_EAX );
1.120 call_func1( sh4_write_sr, R_EAX );
1.121 + sh4_x86.priv_checked = FALSE;
1.122 + sh4_x86.fpuen_checked = FALSE;
1.123 }
1.124 break;
1.125 case 0x1:
1.126 @@ -1774,6 +1782,8 @@
1.127 uint32_t Rm = ((ir>>8)&0xF);
1.128 load_reg( R_EAX, Rm );
1.129 call_func1( sh4_write_sr, R_EAX );
1.130 + sh4_x86.priv_checked = FALSE;
1.131 + sh4_x86.fpuen_checked = FALSE;
1.132 }
1.133 break;
1.134 case 0x1:
1.135 @@ -2332,53 +2342,127 @@
1.136 case 0x0:
1.137 { /* FADD FRm, FRn */
1.138 uint32_t FRn = ((ir>>8)&0xF); uint32_t FRm = ((ir>>4)&0xF);
1.139 + check_fpuen();
1.140 + load_spreg( R_ECX, R_FPSCR );
1.141 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.142 + load_fr_bank( R_EDX );
1.143 + JNE_rel8(13);
1.144 + push_fr(R_EDX, FRm);
1.145 + push_fr(R_EDX, FRn);
1.146 + FADDP_st(1);
1.147 + pop_fr(R_EDX, FRn);
1.148 + JMP_rel8(11);
1.149 + push_dr(R_EDX, FRm);
1.150 + push_dr(R_EDX, FRn);
1.151 + FADDP_st(1);
1.152 + pop_dr(R_EDX, FRn);
1.153 }
1.154 break;
1.155 case 0x1:
1.156 { /* FSUB FRm, FRn */
1.157 uint32_t FRn = ((ir>>8)&0xF); uint32_t FRm = ((ir>>4)&0xF);
1.158 + check_fpuen();
1.159 + load_spreg( R_ECX, R_FPSCR );
1.160 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.161 + load_fr_bank( R_EDX );
1.162 + JNE_rel8(13);
1.163 + push_fr(R_EDX, FRn);
1.164 + push_fr(R_EDX, FRm);
1.165 + FMULP_st(1);
1.166 + pop_fr(R_EDX, FRn);
1.167 + JMP_rel8(11);
1.168 + push_dr(R_EDX, FRn);
1.169 + push_dr(R_EDX, FRm);
1.170 + FMULP_st(1);
1.171 + pop_dr(R_EDX, FRn);
1.172 }
1.173 break;
1.174 case 0x2:
1.175 { /* FMUL FRm, FRn */
1.176 uint32_t FRn = ((ir>>8)&0xF); uint32_t FRm = ((ir>>4)&0xF);
1.177 + check_fpuen();
1.178 + load_spreg( R_ECX, R_FPSCR );
1.179 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.180 + load_fr_bank( R_EDX );
1.181 + JNE_rel8(13);
1.182 + push_fr(R_EDX, FRm);
1.183 + push_fr(R_EDX, FRn);
1.184 + FMULP_st(1);
1.185 + pop_fr(R_EDX, FRn);
1.186 + JMP_rel8(11);
1.187 + push_dr(R_EDX, FRm);
1.188 + push_dr(R_EDX, FRn);
1.189 + FMULP_st(1);
1.190 + pop_dr(R_EDX, FRn);
1.191 }
1.192 break;
1.193 case 0x3:
1.194 { /* FDIV FRm, FRn */
1.195 uint32_t FRn = ((ir>>8)&0xF); uint32_t FRm = ((ir>>4)&0xF);
1.196 + check_fpuen();
1.197 + load_spreg( R_ECX, R_FPSCR );
1.198 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.199 + load_fr_bank( R_EDX );
1.200 + JNE_rel8(13);
1.201 + push_fr(R_EDX, FRn);
1.202 + push_fr(R_EDX, FRm);
1.203 + FDIVP_st(1);
1.204 + pop_fr(R_EDX, FRn);
1.205 + JMP_rel8(11);
1.206 + push_dr(R_EDX, FRn);
1.207 + push_dr(R_EDX, FRm);
1.208 + FDIVP_st(1);
1.209 + pop_dr(R_EDX, FRn);
1.210 }
1.211 break;
1.212 case 0x4:
1.213 { /* FCMP/EQ FRm, FRn */
1.214 uint32_t FRn = ((ir>>8)&0xF); uint32_t FRm = ((ir>>4)&0xF);
1.215 + check_fpuen();
1.216 + load_spreg( R_ECX, R_FPSCR );
1.217 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.218 + load_fr_bank( R_EDX );
1.219 + JNE_rel8(8);
1.220 + push_fr(R_EDX, FRm);
1.221 + push_fr(R_EDX, FRn);
1.222 + JMP_rel8(6);
1.223 + push_dr(R_EDX, FRm);
1.224 + push_dr(R_EDX, FRn);
1.225 + FCOMIP_st(1);
1.226 + SETE_t();
1.227 + FPOP_st();
1.228 }
1.229 break;
1.230 case 0x5:
1.231 { /* FCMP/GT FRm, FRn */
1.232 uint32_t FRn = ((ir>>8)&0xF); uint32_t FRm = ((ir>>4)&0xF);
1.233 + check_fpuen();
1.234 + load_spreg( R_ECX, R_FPSCR );
1.235 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.236 + load_fr_bank( R_EDX );
1.237 + JNE_rel8(8);
1.238 + push_fr(R_EDX, FRm);
1.239 + push_fr(R_EDX, FRn);
1.240 + JMP_rel8(6);
1.241 + push_dr(R_EDX, FRm);
1.242 + push_dr(R_EDX, FRn);
1.243 + FCOMIP_st(1);
1.244 + SETA_t();
1.245 + FPOP_st();
1.246 }
1.247 break;
1.248 case 0x6:
1.249 { /* FMOV @(R0, Rm), FRn */
1.250 uint32_t FRn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
1.251 - }
1.252 - break;
1.253 - case 0x7:
1.254 - { /* FMOV FRm, @(R0, Rn) */
1.255 - uint32_t Rn = ((ir>>8)&0xF); uint32_t FRm = ((ir>>4)&0xF);
1.256 - }
1.257 - break;
1.258 - case 0x8:
1.259 - { /* FMOV @Rm, FRn */
1.260 - uint32_t FRn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
1.261 + check_fpuen();
1.262 load_reg( R_EDX, Rm );
1.263 + ADD_sh4r_r32( REG_OFFSET(r[0]), R_EDX );
1.264 check_ralign32( R_EDX );
1.265 load_spreg( R_ECX, R_FPSCR );
1.266 TEST_imm32_r32( FPSCR_SZ, R_ECX );
1.267 JNE_rel8(19);
1.268 MEM_READ_LONG( R_EDX, R_EAX );
1.269 - load_spreg( R_ECX, REG_OFFSET(fr_bank) );
1.270 + load_fr_bank( R_ECX );
1.271 store_fr( R_ECX, R_EAX, FRn );
1.272 if( FRn&1 ) {
1.273 JMP_rel8(46);
1.274 @@ -2388,7 +2472,58 @@
1.275 } else {
1.276 JMP_rel8(36);
1.277 MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
1.278 - load_spreg( R_ECX, REG_OFFSET(fr_bank) );
1.279 + load_fr_bank( R_ECX );
1.280 + }
1.281 + store_fr( R_ECX, R_EAX, FRn&0x0E );
1.282 + store_fr( R_ECX, R_EDX, FRn|0x01 );
1.283 + }
1.284 + break;
1.285 + case 0x7:
1.286 + { /* FMOV FRm, @(R0, Rn) */
1.287 + uint32_t Rn = ((ir>>8)&0xF); uint32_t FRm = ((ir>>4)&0xF);
1.288 + check_fpuen();
1.289 + load_reg( R_EDX, Rn );
1.290 + ADD_sh4r_r32( REG_OFFSET(r[0]), R_EDX );
1.291 + check_walign32( R_EDX );
1.292 + load_spreg( R_ECX, R_FPSCR );
1.293 + TEST_imm32_r32( FPSCR_SZ, R_ECX );
1.294 + JNE_rel8(20);
1.295 + load_fr_bank( R_ECX );
1.296 + load_fr( R_ECX, R_EAX, FRm );
1.297 + MEM_WRITE_LONG( R_EDX, R_EAX ); // 12
1.298 + if( FRm&1 ) {
1.299 + JMP_rel8( 46 );
1.300 + load_xf_bank( R_ECX );
1.301 + } else {
1.302 + JMP_rel8( 39 );
1.303 + load_fr_bank( R_ECX );
1.304 + }
1.305 + load_fr( R_ECX, R_EAX, FRm&0x0E );
1.306 + load_fr( R_ECX, R_ECX, FRm|0x01 );
1.307 + MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );
1.308 + }
1.309 + break;
1.310 + case 0x8:
1.311 + { /* FMOV @Rm, FRn */
1.312 + uint32_t FRn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
1.313 + check_fpuen();
1.314 + load_reg( R_EDX, Rm );
1.315 + check_ralign32( R_EDX );
1.316 + load_spreg( R_ECX, R_FPSCR );
1.317 + TEST_imm32_r32( FPSCR_SZ, R_ECX );
1.318 + JNE_rel8(19);
1.319 + MEM_READ_LONG( R_EDX, R_EAX );
1.320 + load_fr_bank( R_ECX );
1.321 + store_fr( R_ECX, R_EAX, FRn );
1.322 + if( FRn&1 ) {
1.323 + JMP_rel8(46);
1.324 + MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
1.325 + load_spreg( R_ECX, R_FPSCR ); // assume read_long clobbered it
1.326 + load_xf_bank( R_ECX );
1.327 + } else {
1.328 + JMP_rel8(36);
1.329 + MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
1.330 + load_fr_bank( R_ECX );
1.331 }
1.332 store_fr( R_ECX, R_EAX, FRn&0x0E );
1.333 store_fr( R_ECX, R_EDX, FRn|0x01 );
1.334 @@ -2397,17 +2532,46 @@
1.335 case 0x9:
1.336 { /* FMOV @Rm+, FRn */
1.337 uint32_t FRn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
1.338 + check_fpuen();
1.339 + load_reg( R_EDX, Rm );
1.340 + check_ralign32( R_EDX );
1.341 + MOV_r32_r32( R_EDX, R_EAX );
1.342 + load_spreg( R_ECX, R_FPSCR );
1.343 + TEST_imm32_r32( FPSCR_SZ, R_ECX );
1.344 + JNE_rel8(25);
1.345 + ADD_imm8s_r32( 4, R_EAX );
1.346 + store_reg( R_EAX, Rm );
1.347 + MEM_READ_LONG( R_EDX, R_EAX );
1.348 + load_fr_bank( R_ECX );
1.349 + store_fr( R_ECX, R_EAX, FRn );
1.350 + if( FRn&1 ) {
1.351 + JMP_rel8(52);
1.352 + ADD_imm8s_r32( 8, R_EAX );
1.353 + store_reg(R_EAX, Rm);
1.354 + MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
1.355 + load_spreg( R_ECX, R_FPSCR ); // assume read_long clobbered it
1.356 + load_xf_bank( R_ECX );
1.357 + } else {
1.358 + JMP_rel8(42);
1.359 + ADD_imm8s_r32( 8, R_EAX );
1.360 + store_reg(R_EAX, Rm);
1.361 + MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
1.362 + load_fr_bank( R_ECX );
1.363 + }
1.364 + store_fr( R_ECX, R_EAX, FRn&0x0E );
1.365 + store_fr( R_ECX, R_EDX, FRn|0x01 );
1.366 }
1.367 break;
1.368 case 0xA:
1.369 { /* FMOV FRm, @Rn */
1.370 uint32_t Rn = ((ir>>8)&0xF); uint32_t FRm = ((ir>>4)&0xF);
1.371 + check_fpuen();
1.372 load_reg( R_EDX, Rn );
1.373 check_walign32( R_EDX );
1.374 load_spreg( R_ECX, R_FPSCR );
1.375 TEST_imm32_r32( FPSCR_SZ, R_ECX );
1.376 JNE_rel8(20);
1.377 - load_spreg( R_ECX, REG_OFFSET(fr_bank) );
1.378 + load_fr_bank( R_ECX );
1.379 load_fr( R_ECX, R_EAX, FRm );
1.380 MEM_WRITE_LONG( R_EDX, R_EAX ); // 12
1.381 if( FRm&1 ) {
1.382 @@ -2415,7 +2579,7 @@
1.383 load_xf_bank( R_ECX );
1.384 } else {
1.385 JMP_rel8( 39 );
1.386 - load_spreg( R_ECX, REG_OFFSET(fr_bank) );
1.387 + load_fr_bank( R_ECX );
1.388 }
1.389 load_fr( R_ECX, R_EAX, FRm&0x0E );
1.390 load_fr( R_ECX, R_ECX, FRm|0x01 );
1.391 @@ -2425,6 +2589,29 @@
1.392 case 0xB:
1.393 { /* FMOV FRm, @-Rn */
1.394 uint32_t Rn = ((ir>>8)&0xF); uint32_t FRm = ((ir>>4)&0xF);
1.395 + check_fpuen();
1.396 + load_reg( R_EDX, Rn );
1.397 + check_walign32( R_EDX );
1.398 + load_spreg( R_ECX, R_FPSCR );
1.399 + TEST_imm32_r32( FPSCR_SZ, R_ECX );
1.400 + JNE_rel8(20);
1.401 + load_fr_bank( R_ECX );
1.402 + load_fr( R_ECX, R_EAX, FRm );
1.403 + ADD_imm8s_r32(-4,R_EDX);
1.404 + store_reg( R_EDX, Rn );
1.405 + MEM_WRITE_LONG( R_EDX, R_EAX ); // 12
1.406 + if( FRm&1 ) {
1.407 + JMP_rel8( 46 );
1.408 + load_xf_bank( R_ECX );
1.409 + } else {
1.410 + JMP_rel8( 39 );
1.411 + load_fr_bank( R_ECX );
1.412 + }
1.413 + load_fr( R_ECX, R_EAX, FRm&0x0E );
1.414 + load_fr( R_ECX, R_ECX, FRm|0x01 );
1.415 + ADD_imm8s_r32(-8,R_EDX);
1.416 + store_reg( R_EDX, Rn );
1.417 + MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );
1.418 }
1.419 break;
1.420 case 0xC:
1.421 @@ -2437,8 +2624,9 @@
1.422 * 4. 64-bit xd-to-dr (PR=1, FRm&1 == 1, FRn&1 == 0 )
1.423 * 5. 64-bit xd-to-xd (PR=1, FRm&1 == 1, FRn&1 == 1 )
1.424 */
1.425 + check_fpuen();
1.426 load_spreg( R_ECX, R_FPSCR );
1.427 - load_spreg( R_EDX, REG_OFFSET(fr_bank) );
1.428 + load_fr_bank( R_EDX );
1.429 TEST_imm32_r32( FPSCR_SZ, R_ECX );
1.430 JNE_rel8(8);
1.431 load_fr( R_EDX, R_EAX, FRm ); // PR=0 branch
1.432 @@ -2479,33 +2667,65 @@
1.433 case 0x0:
1.434 { /* FSTS FPUL, FRn */
1.435 uint32_t FRn = ((ir>>8)&0xF);
1.436 + check_fpuen();
1.437 + load_fr_bank( R_ECX );
1.438 + load_spreg( R_EAX, R_FPUL );
1.439 + store_fr( R_ECX, R_EAX, FRn );
1.440 }
1.441 break;
1.442 case 0x1:
1.443 { /* FLDS FRm, FPUL */
1.444 uint32_t FRm = ((ir>>8)&0xF);
1.445 + check_fpuen();
1.446 + load_fr_bank( R_ECX );
1.447 + load_fr( R_ECX, R_EAX, FRm );
1.448 + store_spreg( R_EAX, R_FPUL );
1.449 }
1.450 break;
1.451 case 0x2:
1.452 { /* FLOAT FPUL, FRn */
1.453 uint32_t FRn = ((ir>>8)&0xF);
1.454 + check_fpuen();
1.455 + load_spreg( R_ECX, R_FPSCR );
1.456 + load_spreg(R_EDX, REG_OFFSET(fr_bank));
1.457 + FILD_sh4r(R_FPUL);
1.458 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.459 + JNE_rel8(5);
1.460 + pop_fr( R_EDX, FRn );
1.461 + JMP_rel8(3);
1.462 + pop_dr( R_EDX, FRn );
1.463 }
1.464 break;
1.465 case 0x3:
1.466 { /* FTRC FRm, FPUL */
1.467 uint32_t FRm = ((ir>>8)&0xF);
1.468 + check_fpuen();
1.469 + // TODO
1.470 }
1.471 break;
1.472 case 0x4:
1.473 { /* FNEG FRn */
1.474 uint32_t FRn = ((ir>>8)&0xF);
1.475 + check_fpuen();
1.476 + load_spreg( R_ECX, R_FPSCR );
1.477 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.478 + load_fr_bank( R_EDX );
1.479 + JNE_rel8(10);
1.480 + push_fr(R_EDX, FRn);
1.481 + FCHS_st0();
1.482 + pop_fr(R_EDX, FRn);
1.483 + JMP_rel8(8);
1.484 + push_dr(R_EDX, FRn);
1.485 + FCHS_st0();
1.486 + pop_dr(R_EDX, FRn);
1.487 }
1.488 break;
1.489 case 0x5:
1.490 { /* FABS FRn */
1.491 uint32_t FRn = ((ir>>8)&0xF);
1.492 + check_fpuen();
1.493 load_spreg( R_ECX, R_FPSCR );
1.494 - load_spreg( R_EDX, REG_OFFSET(fr_bank) );
1.495 + load_fr_bank( R_EDX );
1.496 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.497 JNE_rel8(10);
1.498 push_fr(R_EDX, FRn); // 3
1.499 @@ -2520,36 +2740,90 @@
1.500 case 0x6:
1.501 { /* FSQRT FRn */
1.502 uint32_t FRn = ((ir>>8)&0xF);
1.503 + check_fpuen();
1.504 + load_spreg( R_ECX, R_FPSCR );
1.505 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.506 + load_fr_bank( R_EDX );
1.507 + JNE_rel8(10);
1.508 + push_fr(R_EDX, FRn);
1.509 + FSQRT_st0();
1.510 + pop_fr(R_EDX, FRn);
1.511 + JMP_rel8(8);
1.512 + push_dr(R_EDX, FRn);
1.513 + FSQRT_st0();
1.514 + pop_dr(R_EDX, FRn);
1.515 }
1.516 break;
1.517 case 0x7:
1.518 { /* FSRRA FRn */
1.519 uint32_t FRn = ((ir>>8)&0xF);
1.520 + check_fpuen();
1.521 + load_spreg( R_ECX, R_FPSCR );
1.522 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.523 + load_fr_bank( R_EDX );
1.524 + JNE_rel8(12); // PR=0 only
1.525 + FLD1_st0();
1.526 + push_fr(R_EDX, FRn);
1.527 + FSQRT_st0();
1.528 + FDIVP_st(1);
1.529 + pop_fr(R_EDX, FRn);
1.530 }
1.531 break;
1.532 case 0x8:
1.533 { /* FLDI0 FRn */
1.534 uint32_t FRn = ((ir>>8)&0xF);
1.535 + /* IFF PR=0 */
1.536 + check_fpuen();
1.537 + load_spreg( R_ECX, R_FPSCR );
1.538 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.539 + JNE_rel8(8);
1.540 + XOR_r32_r32( R_EAX, R_EAX );
1.541 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );
1.542 + store_fr( R_ECX, R_EAX, FRn );
1.543 }
1.544 break;
1.545 case 0x9:
1.546 { /* FLDI1 FRn */
1.547 uint32_t FRn = ((ir>>8)&0xF);
1.548 + /* IFF PR=0 */
1.549 + check_fpuen();
1.550 + load_spreg( R_ECX, R_FPSCR );
1.551 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.552 + JNE_rel8(11);
1.553 + load_imm32(R_EAX, 0x3F800000);
1.554 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );
1.555 + store_fr( R_ECX, R_EAX, FRn );
1.556 }
1.557 break;
1.558 case 0xA:
1.559 { /* FCNVSD FPUL, FRn */
1.560 uint32_t FRn = ((ir>>8)&0xF);
1.561 + check_fpuen();
1.562 + check_fpuen();
1.563 + load_spreg( R_ECX, R_FPSCR );
1.564 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.565 + JE_rel8(9); // only when PR=1
1.566 + load_fr_bank( R_ECX );
1.567 + push_fpul();
1.568 + pop_dr( R_ECX, FRn );
1.569 }
1.570 break;
1.571 case 0xB:
1.572 { /* FCNVDS FRm, FPUL */
1.573 uint32_t FRm = ((ir>>8)&0xF);
1.574 + check_fpuen();
1.575 + load_spreg( R_ECX, R_FPSCR );
1.576 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.577 + JE_rel8(9); // only when PR=1
1.578 + load_fr_bank( R_ECX );
1.579 + push_dr( R_ECX, FRm );
1.580 + pop_fpul();
1.581 }
1.582 break;
1.583 case 0xE:
1.584 { /* FIPR FVm, FVn */
1.585 uint32_t FVn = ((ir>>10)&0x3); uint32_t FVm = ((ir>>8)&0x3);
1.586 + check_fpuen();
1.587 }
1.588 break;
1.589 case 0xF:
1.590 @@ -2557,6 +2831,7 @@
1.591 case 0x0:
1.592 { /* FSCA FPUL, FRn */
1.593 uint32_t FRn = ((ir>>9)&0x7)<<1;
1.594 + check_fpuen();
1.595 }
1.596 break;
1.597 case 0x1:
1.598 @@ -2564,16 +2839,25 @@
1.599 case 0x0:
1.600 { /* FTRV XMTRX, FVn */
1.601 uint32_t FVn = ((ir>>10)&0x3);
1.602 + check_fpuen();
1.603 }
1.604 break;
1.605 case 0x1:
1.606 switch( (ir&0xC00) >> 10 ) {
1.607 case 0x0:
1.608 { /* FSCHG */
1.609 + check_fpuen();
1.610 + load_spreg( R_ECX, R_FPSCR );
1.611 + XOR_imm32_r32( FPSCR_SZ, R_ECX );
1.612 + store_spreg( R_ECX, R_FPSCR );
1.613 }
1.614 break;
1.615 case 0x2:
1.616 { /* FRCHG */
1.617 + check_fpuen();
1.618 + load_spreg( R_ECX, R_FPSCR );
1.619 + XOR_imm32_r32( FPSCR_FR, R_ECX );
1.620 + store_spreg( R_ECX, R_FPSCR );
1.621 }
1.622 break;
1.623 case 0x3:
1.624 @@ -2603,6 +2887,24 @@
1.625 case 0xE:
1.626 { /* FMAC FR0, FRm, FRn */
1.627 uint32_t FRn = ((ir>>8)&0xF); uint32_t FRm = ((ir>>4)&0xF);
1.628 + check_fpuen();
1.629 + load_spreg( R_ECX, R_FPSCR );
1.630 + load_spreg( R_EDX, REG_OFFSET(fr_bank));
1.631 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.632 + JNE_rel8(18);
1.633 + push_fr( R_EDX, 0 );
1.634 + push_fr( R_EDX, FRm );
1.635 + FMULP_st(1);
1.636 + push_fr( R_EDX, FRn );
1.637 + FADDP_st(1);
1.638 + pop_fr( R_EDX, FRn );
1.639 + JMP_rel8(16);
1.640 + push_dr( R_EDX, 0 );
1.641 + push_dr( R_EDX, FRm );
1.642 + FMULP_st(1);
1.643 + push_dr( R_EDX, FRn );
1.644 + FADDP_st(1);
1.645 + pop_dr( R_EDX, FRn );
1.646 }
1.647 break;
1.648 default:
2.1 --- a/src/sh4/sh4x86.in Wed Sep 12 09:16:47 2007 +0000
2.2 +++ b/src/sh4/sh4x86.in Wed Sep 12 09:17:52 2007 +0000
2.3 @@ -1,5 +1,5 @@
2.4 /**
2.5 - * $Id: sh4x86.in,v 1.5 2007-09-11 21:23:48 nkeynes Exp $
2.6 + * $Id: sh4x86.in,v 1.6 2007-09-12 09:17:24 nkeynes Exp $
2.7 *
2.8 * SH4 => x86 translation. This version does no real optimization, it just
2.9 * outputs straight-line x86 code - it mainly exists to provide a baseline
2.10 @@ -180,6 +180,22 @@
2.11 }
2.12
2.13 /**
2.14 + * Push FPUL (as a 32-bit float) onto the FPU stack
2.15 + */
2.16 +static inline void push_fpul( )
2.17 +{
2.18 + OP(0xD9); OP(0x45); OP(R_FPUL);
2.19 +}
2.20 +
2.21 +/**
2.22 + * Pop FPUL (as a 32-bit float) from the FPU stack
2.23 + */
2.24 +static inline void pop_fpul( )
2.25 +{
2.26 + OP(0xD9); OP(0x5D); OP(R_FPUL);
2.27 +}
2.28 +
2.29 +/**
2.30 * Push a 32-bit float onto the FPU stack, with bankreg previously loaded
2.31 * with the location of the current fp bank.
2.32 */
2.33 @@ -225,7 +241,7 @@
2.34 {
2.35 PUSH_r32(arg1);
2.36 call_func0(ptr);
2.37 - ADD_imm8s_r32( -4, R_ESP );
2.38 + ADD_imm8s_r32( 4, R_ESP );
2.39 }
2.40
2.41 static inline void call_func2( void *ptr, int arg1, int arg2 )
2.42 @@ -233,7 +249,7 @@
2.43 PUSH_r32(arg2);
2.44 PUSH_r32(arg1);
2.45 call_func0(ptr);
2.46 - ADD_imm8s_r32( -8, R_ESP );
2.47 + ADD_imm8s_r32( 8, R_ESP );
2.48 }
2.49
2.50 /**
2.51 @@ -250,9 +266,9 @@
2.52 PUSH_r32(addr);
2.53 PUSH_r32(arg2a);
2.54 call_func0(sh4_write_long);
2.55 - ADD_imm8s_r32( -8, R_ESP );
2.56 + ADD_imm8s_r32( 8, R_ESP );
2.57 call_func0(sh4_write_long);
2.58 - ADD_imm8s_r32( -8, R_ESP );
2.59 + ADD_imm8s_r32( 8, R_ESP );
2.60 }
2.61
2.62 /**
2.63 @@ -269,7 +285,7 @@
2.64 ADD_imm8s_r32( 4, addr );
2.65 PUSH_r32(addr);
2.66 call_func0(sh4_read_long);
2.67 - ADD_imm8s_r32( -4, R_ESP );
2.68 + ADD_imm8s_r32( 4, R_ESP );
2.69 MOV_r32_r32( R_EAX, arg2b );
2.70 POP_r32(arg2a);
2.71 }
2.72 @@ -1235,6 +1251,8 @@
2.73 load_spreg( R_EAX, R_SSR );
2.74 call_func1( sh4_write_sr, R_EAX );
2.75 sh4_x86.in_delay_slot = TRUE;
2.76 + sh4_x86.priv_checked = FALSE;
2.77 + sh4_x86.fpuen_checked = FALSE;
2.78 INC_r32(R_ESI);
2.79 return 0;
2.80 }
2.81 @@ -1297,8 +1315,9 @@
2.82 * 4. 64-bit xd-to-dr (PR=1, FRm&1 == 1, FRn&1 == 0 )
2.83 * 5. 64-bit xd-to-xd (PR=1, FRm&1 == 1, FRn&1 == 1 )
2.84 */
2.85 + check_fpuen();
2.86 load_spreg( R_ECX, R_FPSCR );
2.87 - load_spreg( R_EDX, REG_OFFSET(fr_bank) );
2.88 + load_fr_bank( R_EDX );
2.89 TEST_imm32_r32( FPSCR_SZ, R_ECX );
2.90 JNE_rel8(8);
2.91 load_fr( R_EDX, R_EAX, FRm ); // PR=0 branch
2.92 @@ -1334,12 +1353,13 @@
2.93 }
2.94 :}
2.95 FMOV FRm, @Rn {:
2.96 + check_fpuen();
2.97 load_reg( R_EDX, Rn );
2.98 check_walign32( R_EDX );
2.99 load_spreg( R_ECX, R_FPSCR );
2.100 TEST_imm32_r32( FPSCR_SZ, R_ECX );
2.101 JNE_rel8(20);
2.102 - load_spreg( R_ECX, REG_OFFSET(fr_bank) );
2.103 + load_fr_bank( R_ECX );
2.104 load_fr( R_ECX, R_EAX, FRm );
2.105 MEM_WRITE_LONG( R_EDX, R_EAX ); // 12
2.106 if( FRm&1 ) {
2.107 @@ -1347,20 +1367,21 @@
2.108 load_xf_bank( R_ECX );
2.109 } else {
2.110 JMP_rel8( 39 );
2.111 - load_spreg( R_ECX, REG_OFFSET(fr_bank) );
2.112 + load_fr_bank( R_ECX );
2.113 }
2.114 load_fr( R_ECX, R_EAX, FRm&0x0E );
2.115 load_fr( R_ECX, R_ECX, FRm|0x01 );
2.116 MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );
2.117 :}
2.118 FMOV @Rm, FRn {:
2.119 + check_fpuen();
2.120 load_reg( R_EDX, Rm );
2.121 check_ralign32( R_EDX );
2.122 load_spreg( R_ECX, R_FPSCR );
2.123 TEST_imm32_r32( FPSCR_SZ, R_ECX );
2.124 JNE_rel8(19);
2.125 MEM_READ_LONG( R_EDX, R_EAX );
2.126 - load_spreg( R_ECX, REG_OFFSET(fr_bank) );
2.127 + load_fr_bank( R_ECX );
2.128 store_fr( R_ECX, R_EAX, FRn );
2.129 if( FRn&1 ) {
2.130 JMP_rel8(46);
2.131 @@ -1370,20 +1391,183 @@
2.132 } else {
2.133 JMP_rel8(36);
2.134 MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
2.135 - load_spreg( R_ECX, REG_OFFSET(fr_bank) );
2.136 + load_fr_bank( R_ECX );
2.137 }
2.138 store_fr( R_ECX, R_EAX, FRn&0x0E );
2.139 store_fr( R_ECX, R_EDX, FRn|0x01 );
2.140 :}
2.141 -FMOV FRm, @-Rn {: :}
2.142 -FMOV FRm, @(R0, Rn) {: :}
2.143 -FMOV @Rm+, FRn {: :}
2.144 -FMOV @(R0, Rm), FRn {: :}
2.145 +FMOV FRm, @-Rn {:
2.146 + check_fpuen();
2.147 + load_reg( R_EDX, Rn );
2.148 + check_walign32( R_EDX );
2.149 + load_spreg( R_ECX, R_FPSCR );
2.150 + TEST_imm32_r32( FPSCR_SZ, R_ECX );
2.151 + JNE_rel8(20);
2.152 + load_fr_bank( R_ECX );
2.153 + load_fr( R_ECX, R_EAX, FRm );
2.154 + ADD_imm8s_r32(-4,R_EDX);
2.155 + store_reg( R_EDX, Rn );
2.156 + MEM_WRITE_LONG( R_EDX, R_EAX ); // 12
2.157 + if( FRm&1 ) {
2.158 + JMP_rel8( 46 );
2.159 + load_xf_bank( R_ECX );
2.160 + } else {
2.161 + JMP_rel8( 39 );
2.162 + load_fr_bank( R_ECX );
2.163 + }
2.164 + load_fr( R_ECX, R_EAX, FRm&0x0E );
2.165 + load_fr( R_ECX, R_ECX, FRm|0x01 );
2.166 + ADD_imm8s_r32(-8,R_EDX);
2.167 + store_reg( R_EDX, Rn );
2.168 + MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );
2.169 +:}
2.170 +FMOV @Rm+, FRn {:
2.171 + check_fpuen();
2.172 + load_reg( R_EDX, Rm );
2.173 + check_ralign32( R_EDX );
2.174 + MOV_r32_r32( R_EDX, R_EAX );
2.175 + load_spreg( R_ECX, R_FPSCR );
2.176 + TEST_imm32_r32( FPSCR_SZ, R_ECX );
2.177 + JNE_rel8(25);
2.178 + ADD_imm8s_r32( 4, R_EAX );
2.179 + store_reg( R_EAX, Rm );
2.180 + MEM_READ_LONG( R_EDX, R_EAX );
2.181 + load_fr_bank( R_ECX );
2.182 + store_fr( R_ECX, R_EAX, FRn );
2.183 + if( FRn&1 ) {
2.184 + JMP_rel8(52);
2.185 + ADD_imm8s_r32( 8, R_EAX );
2.186 + store_reg(R_EAX, Rm);
2.187 + MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
2.188 + load_spreg( R_ECX, R_FPSCR ); // assume read_long clobbered it
2.189 + load_xf_bank( R_ECX );
2.190 + } else {
2.191 + JMP_rel8(42);
2.192 + ADD_imm8s_r32( 8, R_EAX );
2.193 + store_reg(R_EAX, Rm);
2.194 + MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
2.195 + load_fr_bank( R_ECX );
2.196 + }
2.197 + store_fr( R_ECX, R_EAX, FRn&0x0E );
2.198 + store_fr( R_ECX, R_EDX, FRn|0x01 );
2.199 +:}
2.200 +FMOV FRm, @(R0, Rn) {:
2.201 + check_fpuen();
2.202 + load_reg( R_EDX, Rn );
2.203 + ADD_sh4r_r32( REG_OFFSET(r[0]), R_EDX );
2.204 + check_walign32( R_EDX );
2.205 + load_spreg( R_ECX, R_FPSCR );
2.206 + TEST_imm32_r32( FPSCR_SZ, R_ECX );
2.207 + JNE_rel8(20);
2.208 + load_fr_bank( R_ECX );
2.209 + load_fr( R_ECX, R_EAX, FRm );
2.210 + MEM_WRITE_LONG( R_EDX, R_EAX ); // 12
2.211 + if( FRm&1 ) {
2.212 + JMP_rel8( 46 );
2.213 + load_xf_bank( R_ECX );
2.214 + } else {
2.215 + JMP_rel8( 39 );
2.216 + load_fr_bank( R_ECX );
2.217 + }
2.218 + load_fr( R_ECX, R_EAX, FRm&0x0E );
2.219 + load_fr( R_ECX, R_ECX, FRm|0x01 );
2.220 + MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );
2.221 +:}
2.222 +FMOV @(R0, Rm), FRn {:
2.223 + check_fpuen();
2.224 + load_reg( R_EDX, Rm );
2.225 + ADD_sh4r_r32( REG_OFFSET(r[0]), R_EDX );
2.226 + check_ralign32( R_EDX );
2.227 + load_spreg( R_ECX, R_FPSCR );
2.228 + TEST_imm32_r32( FPSCR_SZ, R_ECX );
2.229 + JNE_rel8(19);
2.230 + MEM_READ_LONG( R_EDX, R_EAX );
2.231 + load_fr_bank( R_ECX );
2.232 + store_fr( R_ECX, R_EAX, FRn );
2.233 + if( FRn&1 ) {
2.234 + JMP_rel8(46);
2.235 + MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
2.236 + load_spreg( R_ECX, R_FPSCR ); // assume read_long clobbered it
2.237 + load_xf_bank( R_ECX );
2.238 + } else {
2.239 + JMP_rel8(36);
2.240 + MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
2.241 + load_fr_bank( R_ECX );
2.242 + }
2.243 + store_fr( R_ECX, R_EAX, FRn&0x0E );
2.244 + store_fr( R_ECX, R_EDX, FRn|0x01 );
2.245 +:}
2.246 +FLDI0 FRn {: /* IFF PR=0 */
2.247 + check_fpuen();
2.248 + load_spreg( R_ECX, R_FPSCR );
2.249 + TEST_imm32_r32( FPSCR_PR, R_ECX );
2.250 + JNE_rel8(8);
2.251 + XOR_r32_r32( R_EAX, R_EAX );
2.252 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );
2.253 + store_fr( R_ECX, R_EAX, FRn );
2.254 +:}
2.255 +FLDI1 FRn {: /* IFF PR=0 */
2.256 + check_fpuen();
2.257 + load_spreg( R_ECX, R_FPSCR );
2.258 + TEST_imm32_r32( FPSCR_PR, R_ECX );
2.259 + JNE_rel8(11);
2.260 + load_imm32(R_EAX, 0x3F800000);
2.261 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );
2.262 + store_fr( R_ECX, R_EAX, FRn );
2.263 +:}
2.264 +
2.265 +FLOAT FPUL, FRn {:
2.266 + check_fpuen();
2.267 + load_spreg( R_ECX, R_FPSCR );
2.268 + load_spreg(R_EDX, REG_OFFSET(fr_bank));
2.269 + FILD_sh4r(R_FPUL);
2.270 + TEST_imm32_r32( FPSCR_PR, R_ECX );
2.271 + JNE_rel8(5);
2.272 + pop_fr( R_EDX, FRn );
2.273 + JMP_rel8(3);
2.274 + pop_dr( R_EDX, FRn );
2.275 +:}
2.276 +FTRC FRm, FPUL {:
2.277 + check_fpuen();
2.278 + // TODO
2.279 +:}
2.280 +FLDS FRm, FPUL {:
2.281 + check_fpuen();
2.282 + load_fr_bank( R_ECX );
2.283 + load_fr( R_ECX, R_EAX, FRm );
2.284 + store_spreg( R_EAX, R_FPUL );
2.285 +:}
2.286 +FSTS FPUL, FRn {:
2.287 + check_fpuen();
2.288 + load_fr_bank( R_ECX );
2.289 + load_spreg( R_EAX, R_FPUL );
2.290 + store_fr( R_ECX, R_EAX, FRn );
2.291 +:}
2.292 +FCNVDS FRm, FPUL {:
2.293 + check_fpuen();
2.294 + load_spreg( R_ECX, R_FPSCR );
2.295 + TEST_imm32_r32( FPSCR_PR, R_ECX );
2.296 + JE_rel8(9); // only when PR=1
2.297 + load_fr_bank( R_ECX );
2.298 + push_dr( R_ECX, FRm );
2.299 + pop_fpul();
2.300 +:}
2.301 +FCNVSD FPUL, FRn {:
2.302 + check_fpuen();
2.303 + check_fpuen();
2.304 + load_spreg( R_ECX, R_FPSCR );
2.305 + TEST_imm32_r32( FPSCR_PR, R_ECX );
2.306 + JE_rel8(9); // only when PR=1
2.307 + load_fr_bank( R_ECX );
2.308 + push_fpul();
2.309 + pop_dr( R_ECX, FRn );
2.310 +:}
2.311
2.312 /* Floating point instructions */
2.313 FABS FRn {:
2.314 + check_fpuen();
2.315 load_spreg( R_ECX, R_FPSCR );
2.316 - load_spreg( R_EDX, REG_OFFSET(fr_bank) );
2.317 + load_fr_bank( R_EDX );
2.318 TEST_imm32_r32( FPSCR_PR, R_ECX );
2.319 JNE_rel8(10);
2.320 push_fr(R_EDX, FRn); // 3
2.321 @@ -1394,41 +1578,40 @@
2.322 FABS_st0();
2.323 pop_dr(R_EDX, FRn);
2.324 :}
2.325 -FADD FRm, FRn {: :}
2.326 -FCMP/EQ FRm, FRn {: :}
2.327 -FCMP/GT FRm, FRn {: :}
2.328 -FCNVDS FRm, FPUL {: :}
2.329 -FCNVSD FPUL, FRn {: :}
2.330 -FDIV FRm, FRn {: :}
2.331 -FIPR FVm, FVn {: :}
2.332 -FLDS FRm, FPUL {: :}
2.333 -FLDI0 FRn {: /* IFF PR=0 */
2.334 +FADD FRm, FRn {:
2.335 + check_fpuen();
2.336 load_spreg( R_ECX, R_FPSCR );
2.337 TEST_imm32_r32( FPSCR_PR, R_ECX );
2.338 - JNE_rel8(8);
2.339 - xor_r32_r32( R_EAX, R_EAX );
2.340 - load_spreg( R_ECX, REG_OFFSET(fr_bank) );
2.341 - store_fr( R_ECX, R_EAX, FRn );
2.342 + load_fr_bank( R_EDX );
2.343 + JNE_rel8(13);
2.344 + push_fr(R_EDX, FRm);
2.345 + push_fr(R_EDX, FRn);
2.346 + FADDP_st(1);
2.347 + pop_fr(R_EDX, FRn);
2.348 + JMP_rel8(11);
2.349 + push_dr(R_EDX, FRm);
2.350 + push_dr(R_EDX, FRn);
2.351 + FADDP_st(1);
2.352 + pop_dr(R_EDX, FRn);
2.353 :}
2.354 -FLDI1 FRn {: /* IFF PR=0 */
2.355 +FDIV FRm, FRn {:
2.356 + check_fpuen();
2.357 load_spreg( R_ECX, R_FPSCR );
2.358 TEST_imm32_r32( FPSCR_PR, R_ECX );
2.359 - JNE_rel8(11);
2.360 - load_imm32(R_EAX, 0x3F800000);
2.361 - load_spreg( R_ECX, REG_OFFSET(fr_bank) );
2.362 - store_fr( R_ECX, R_EAX, FRn );
2.363 -:}
2.364 -FLOAT FPUL, FRn {:
2.365 - load_spreg( R_ECX, R_FPSCR );
2.366 - load_spreg(R_EDX, REG_OFFSET(fr_bank));
2.367 - FILD_sh4r(R_FPUL);
2.368 - TEST_imm32_r32( FPSCR_PR, R_ECX );
2.369 - JNE_rel8(5);
2.370 - pop_fr( R_EDX, FRn );
2.371 - JMP_rel8(3);
2.372 - pop_dr( R_EDX, FRn );
2.373 + load_fr_bank( R_EDX );
2.374 + JNE_rel8(13);
2.375 + push_fr(R_EDX, FRn);
2.376 + push_fr(R_EDX, FRm);
2.377 + FDIVP_st(1);
2.378 + pop_fr(R_EDX, FRn);
2.379 + JMP_rel8(11);
2.380 + push_dr(R_EDX, FRn);
2.381 + push_dr(R_EDX, FRm);
2.382 + FDIVP_st(1);
2.383 + pop_dr(R_EDX, FRn);
2.384 :}
2.385 FMAC FR0, FRm, FRn {:
2.386 + check_fpuen();
2.387 load_spreg( R_ECX, R_FPSCR );
2.388 load_spreg( R_EDX, REG_OFFSET(fr_bank));
2.389 TEST_imm32_r32( FPSCR_PR, R_ECX );
2.390 @@ -1448,22 +1631,140 @@
2.391 pop_dr( R_EDX, FRn );
2.392 :}
2.393
2.394 -FMUL FRm, FRn {: :}
2.395 -FNEG FRn {: :}
2.396 -FRCHG {: :}
2.397 -FSCA FPUL, FRn {: :}
2.398 -FSCHG {: :}
2.399 -FSQRT FRn {: :}
2.400 -FSRRA FRn {: :}
2.401 -FSTS FPUL, FRn {: :}
2.402 -FSUB FRm, FRn {: :}
2.403 -FTRC FRm, FPUL {: :}
2.404 -FTRV XMTRX, FVn {: :}
2.405 +FMUL FRm, FRn {:
2.406 + check_fpuen();
2.407 + load_spreg( R_ECX, R_FPSCR );
2.408 + TEST_imm32_r32( FPSCR_PR, R_ECX );
2.409 + load_fr_bank( R_EDX );
2.410 + JNE_rel8(13);
2.411 + push_fr(R_EDX, FRm);
2.412 + push_fr(R_EDX, FRn);
2.413 + FMULP_st(1);
2.414 + pop_fr(R_EDX, FRn);
2.415 + JMP_rel8(11);
2.416 + push_dr(R_EDX, FRm);
2.417 + push_dr(R_EDX, FRn);
2.418 + FMULP_st(1);
2.419 + pop_dr(R_EDX, FRn);
2.420 +:}
2.421 +FNEG FRn {:
2.422 + check_fpuen();
2.423 + load_spreg( R_ECX, R_FPSCR );
2.424 + TEST_imm32_r32( FPSCR_PR, R_ECX );
2.425 + load_fr_bank( R_EDX );
2.426 + JNE_rel8(10);
2.427 + push_fr(R_EDX, FRn);
2.428 + FCHS_st0();
2.429 + pop_fr(R_EDX, FRn);
2.430 + JMP_rel8(8);
2.431 + push_dr(R_EDX, FRn);
2.432 + FCHS_st0();
2.433 + pop_dr(R_EDX, FRn);
2.434 +:}
2.435 +FSRRA FRn {:
2.436 + check_fpuen();
2.437 + load_spreg( R_ECX, R_FPSCR );
2.438 + TEST_imm32_r32( FPSCR_PR, R_ECX );
2.439 + load_fr_bank( R_EDX );
2.440 + JNE_rel8(12); // PR=0 only
2.441 + FLD1_st0();
2.442 + push_fr(R_EDX, FRn);
2.443 + FSQRT_st0();
2.444 + FDIVP_st(1);
2.445 + pop_fr(R_EDX, FRn);
2.446 +:}
2.447 +FSQRT FRn {:
2.448 + check_fpuen();
2.449 + load_spreg( R_ECX, R_FPSCR );
2.450 + TEST_imm32_r32( FPSCR_PR, R_ECX );
2.451 + load_fr_bank( R_EDX );
2.452 + JNE_rel8(10);
2.453 + push_fr(R_EDX, FRn);
2.454 + FSQRT_st0();
2.455 + pop_fr(R_EDX, FRn);
2.456 + JMP_rel8(8);
2.457 + push_dr(R_EDX, FRn);
2.458 + FSQRT_st0();
2.459 + pop_dr(R_EDX, FRn);
2.460 +:}
2.461 +FSUB FRm, FRn {:
2.462 + check_fpuen();
2.463 + load_spreg( R_ECX, R_FPSCR );
2.464 + TEST_imm32_r32( FPSCR_PR, R_ECX );
2.465 + load_fr_bank( R_EDX );
2.466 + JNE_rel8(13);
2.467 + push_fr(R_EDX, FRn);
2.468 + push_fr(R_EDX, FRm);
2.469 + FMULP_st(1);
2.470 + pop_fr(R_EDX, FRn);
2.471 + JMP_rel8(11);
2.472 + push_dr(R_EDX, FRn);
2.473 + push_dr(R_EDX, FRm);
2.474 + FMULP_st(1);
2.475 + pop_dr(R_EDX, FRn);
2.476 +:}
2.477 +
2.478 +FCMP/EQ FRm, FRn {:
2.479 + check_fpuen();
2.480 + load_spreg( R_ECX, R_FPSCR );
2.481 + TEST_imm32_r32( FPSCR_PR, R_ECX );
2.482 + load_fr_bank( R_EDX );
2.483 + JNE_rel8(8);
2.484 + push_fr(R_EDX, FRm);
2.485 + push_fr(R_EDX, FRn);
2.486 + JMP_rel8(6);
2.487 + push_dr(R_EDX, FRm);
2.488 + push_dr(R_EDX, FRn);
2.489 + FCOMIP_st(1);
2.490 + SETE_t();
2.491 + FPOP_st();
2.492 +:}
2.493 +FCMP/GT FRm, FRn {:
2.494 + check_fpuen();
2.495 + load_spreg( R_ECX, R_FPSCR );
2.496 + TEST_imm32_r32( FPSCR_PR, R_ECX );
2.497 + load_fr_bank( R_EDX );
2.498 + JNE_rel8(8);
2.499 + push_fr(R_EDX, FRm);
2.500 + push_fr(R_EDX, FRn);
2.501 + JMP_rel8(6);
2.502 + push_dr(R_EDX, FRm);
2.503 + push_dr(R_EDX, FRn);
2.504 + FCOMIP_st(1);
2.505 + SETA_t();
2.506 + FPOP_st();
2.507 +:}
2.508 +
2.509 +FSCA FPUL, FRn {:
2.510 + check_fpuen();
2.511 +:}
2.512 +FIPR FVm, FVn {:
2.513 + check_fpuen();
2.514 +:}
2.515 +FTRV XMTRX, FVn {:
2.516 + check_fpuen();
2.517 +:}
2.518 +
2.519 +FRCHG {:
2.520 + check_fpuen();
2.521 + load_spreg( R_ECX, R_FPSCR );
2.522 + XOR_imm32_r32( FPSCR_FR, R_ECX );
2.523 + store_spreg( R_ECX, R_FPSCR );
2.524 +
2.525 +:}
2.526 +FSCHG {:
2.527 + check_fpuen();
2.528 + load_spreg( R_ECX, R_FPSCR );
2.529 + XOR_imm32_r32( FPSCR_SZ, R_ECX );
2.530 + store_spreg( R_ECX, R_FPSCR );
2.531 +:}
2.532
2.533 /* Processor control instructions */
2.534 LDC Rm, SR {:
2.535 load_reg( R_EAX, Rm );
2.536 call_func1( sh4_write_sr, R_EAX );
2.537 + sh4_x86.priv_checked = FALSE;
2.538 + sh4_x86.fpuen_checked = FALSE;
2.539 :}
2.540 LDC Rm, GBR {:
2.541 load_reg( R_EAX, Rm );
2.542 @@ -1508,6 +1809,8 @@
2.543 store_reg( R_EAX, Rm );
2.544 MEM_READ_LONG( R_ECX, R_EAX );
2.545 call_func1( sh4_write_sr, R_EAX );
2.546 + sh4_x86.priv_checked = FALSE;
2.547 + sh4_x86.fpuen_checked = FALSE;
2.548 :}
2.549 LDC.L @Rm+, VBR {:
2.550 load_reg( R_EAX, Rm );
2.551 @@ -1628,7 +1931,7 @@
2.552 CMP_imm32_r32( 0xE0000000, R_EAX );
2.553 JNE_rel8(8);
2.554 call_func0( sh4_flush_store_queue );
2.555 - ADD_imm8s_r32( -4, R_ESP );
2.556 + ADD_imm8s_r32( 4, R_ESP );
2.557 :}
2.558 SLEEP {: /* TODO */ :}
2.559 STC SR, Rn {:
3.1 --- a/src/sh4/x86op.h Wed Sep 12 09:16:47 2007 +0000
3.2 +++ b/src/sh4/x86op.h Wed Sep 12 09:17:52 2007 +0000
3.3 @@ -1,5 +1,5 @@
3.4 /**
3.5 - * $Id: x86op.h,v 1.5 2007-09-11 21:23:48 nkeynes Exp $
3.6 + * $Id: x86op.h,v 1.6 2007-09-12 09:17:24 nkeynes Exp $
3.7 *
3.8 * Definitions of x86 opcodes for use by the translator.
3.9 *
3.10 @@ -151,10 +151,14 @@
3.11 #define FABS_st0() OP(0xD9); OP(0xE1)
3.12 #define FADDP_st(st) OP(0xDE); OP(0xC0+st)
3.13 #define FCHS_st0() OP(0xD9); OP(0xE0)
3.14 +#define FCOMIP_st(st) OP(0xDF); OP(0xF0+st)
3.15 #define FDIVP_st(st) OP(0xDE); OP(0xF8+st)
3.16 -#define FILD_sh4r(disp) OP(0xDB); MODRM_sh4r_r32(disp, 0)
3.17 -#define FISTTP_shr4(disp) OP(0xDB); MODRM_sh4r_r32(disp, 1)
3.18 +#define FILD_sh4r(disp) OP(0xDB); MODRM_r32_sh4r(0, disp)
3.19 +#define FISTTP_shr4(disp) OP(0xDB); MODRM_r32_sh4r(1, disp)
3.20 +#define FLD0_st0() OP(0xD9); OP(0xEE);
3.21 +#define FLD1_st0() OP(0xD9); OP(0xE8);
3.22 #define FMULP_st(st) OP(0xDE); OP(0xC8+st)
3.23 +#define FPOP_st() OP(0xDD); OP(0xC0); OP(0xD9); OP(0xF7)
3.24 #define FSUB_st(st) OP(0xDE); OP(0xE8+st)
3.25 #define FSQRT_st0() OP(0xD9); OP(0xFA)
3.26
.