Search
lxdream.org :: lxdream/src/sh4/sh4x86.c :: diff
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.c
changeset 669:ab344e42bca9
prev626:a010e30a30e9
next671:a530ea88eebd
author nkeynes
date Mon May 12 10:00:13 2008 +0000 (12 years ago)
permissions -rw-r--r--
last change Cleanup most of the -Wall warnings (getting a bit sloppy...)
Convert FP code to use fixed banks rather than indirect pointer
(3-4% faster this way now)
file annotate diff log raw
1.1 --- a/src/sh4/sh4x86.c Fri Feb 08 00:06:56 2008 +0000
1.2 +++ b/src/sh4/sh4x86.c Mon May 12 10:00:13 2008 +0000
1.3 @@ -80,15 +80,14 @@
1.4 #define TSTATE_AE 3
1.5
1.6 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
1.7 -#define JT_rel8(rel8,label) if( sh4_x86.tstate == TSTATE_NONE ) { \
1.8 +#define JT_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
1.9 CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
1.10 - OP(0x70+sh4_x86.tstate); OP(rel8); \
1.11 - MARK_JMP(rel8,label)
1.12 + OP(0x70+sh4_x86.tstate); MARK_JMP8(label); OP(-1)
1.13 +
1.14 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
1.15 -#define JF_rel8(rel8,label) if( sh4_x86.tstate == TSTATE_NONE ) { \
1.16 +#define JF_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
1.17 CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
1.18 - OP(0x70+ (sh4_x86.tstate^1)); OP(rel8); \
1.19 - MARK_JMP(rel8, label)
1.20 + OP(0x70+ (sh4_x86.tstate^1)); MARK_JMP8(label); OP(-1)
1.21
1.22 static struct sh4_x86_state sh4_x86;
1.23
1.24 @@ -97,7 +96,7 @@
1.25 static uint32_t save_fcw; /* save value for fpu control word */
1.26 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
1.27
1.28 -void sh4_x86_init()
1.29 +void sh4_translate_init(void)
1.30 {
1.31 sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
1.32 sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
1.33 @@ -169,7 +168,6 @@
1.34 OP64(value);
1.35 }
1.36
1.37 -
1.38 /**
1.39 * Emit an instruction to store an SH4 reg (RN)
1.40 */
1.41 @@ -180,97 +178,42 @@
1.42 OP(REG_OFFSET(r[sh4reg]));
1.43 }
1.44
1.45 -#define load_fr_bank(bankreg) load_spreg( bankreg, REG_OFFSET(fr_bank))
1.46 -
1.47 /**
1.48 * Load an FR register (single-precision floating point) into an integer x86
1.49 * register (eg for register-to-register moves)
1.50 */
1.51 -void static inline load_fr( int bankreg, int x86reg, int frm )
1.52 -{
1.53 - OP(0x8B); OP(0x40+bankreg+(x86reg<<3)); OP((frm^1)<<2);
1.54 -}
1.55 +#define load_fr(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[0][(frm)^1]) )
1.56 +#define load_xf(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[1][(frm)^1]) )
1.57
1.58 /**
1.59 - * Store an FR register (single-precision floating point) into an integer x86
1.60 + * Load the low half of a DR register (DR or XD) into an integer x86 register
1.61 + */
1.62 +#define load_dr0(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
1.63 +#define load_dr1(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
1.64 +
1.65 +/**
1.66 + * Store an FR register (single-precision floating point) from an integer x86+
1.67 * register (eg for register-to-register moves)
1.68 */
1.69 -void static inline store_fr( int bankreg, int x86reg, int frn )
1.70 -{
1.71 - OP(0x89); OP(0x40+bankreg+(x86reg<<3)); OP((frn^1)<<2);
1.72 -}
1.73 +#define store_fr(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[0][(frm)^1]) )
1.74 +#define store_xf(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[1][(frm)^1]) )
1.75
1.76 +#define store_dr0(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
1.77 +#define store_dr1(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
1.78
1.79 -/**
1.80 - * Load a pointer to the back fp back into the specified x86 register. The
1.81 - * bankreg must have been previously loaded with FPSCR.
1.82 - * NB: 12 bytes
1.83 - */
1.84 -static inline void load_xf_bank( int bankreg )
1.85 -{
1.86 - NOT_r32( bankreg );
1.87 - SHR_imm8_r32( (21 - 6), bankreg ); // Extract bit 21 then *64 for bank size
1.88 - AND_imm8s_r32( 0x40, bankreg ); // Complete extraction
1.89 - OP(0x8D); OP(0x44+(bankreg<<3)); OP(0x28+bankreg); OP(REG_OFFSET(fr)); // LEA [ebp+bankreg+disp], bankreg
1.90 -}
1.91
1.92 -/**
1.93 - * Update the fr_bank pointer based on the current fpscr value.
1.94 - */
1.95 -static inline void update_fr_bank( int fpscrreg )
1.96 -{
1.97 - SHR_imm8_r32( (21 - 6), fpscrreg ); // Extract bit 21 then *64 for bank size
1.98 - AND_imm8s_r32( 0x40, fpscrreg ); // Complete extraction
1.99 - OP(0x8D); OP(0x44+(fpscrreg<<3)); OP(0x28+fpscrreg); OP(REG_OFFSET(fr)); // LEA [ebp+fpscrreg+disp], fpscrreg
1.100 - store_spreg( fpscrreg, REG_OFFSET(fr_bank) );
1.101 -}
1.102 -/**
1.103 - * Push FPUL (as a 32-bit float) onto the FPU stack
1.104 - */
1.105 -static inline void push_fpul( )
1.106 -{
1.107 - OP(0xD9); OP(0x45); OP(R_FPUL);
1.108 -}
1.109 +#define push_fpul() FLDF_sh4r(R_FPUL)
1.110 +#define pop_fpul() FSTPF_sh4r(R_FPUL)
1.111 +#define push_fr(frm) FLDF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
1.112 +#define pop_fr(frm) FSTPF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
1.113 +#define push_xf(frm) FLDF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
1.114 +#define pop_xf(frm) FSTPF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
1.115 +#define push_dr(frm) FLDD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
1.116 +#define pop_dr(frm) FSTPD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
1.117 +#define push_xdr(frm) FLDD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
1.118 +#define pop_xdr(frm) FSTPD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
1.119
1.120 -/**
1.121 - * Pop FPUL (as a 32-bit float) from the FPU stack
1.122 - */
1.123 -static inline void pop_fpul( )
1.124 -{
1.125 - OP(0xD9); OP(0x5D); OP(R_FPUL);
1.126 -}
1.127
1.128 -/**
1.129 - * Push a 32-bit float onto the FPU stack, with bankreg previously loaded
1.130 - * with the location of the current fp bank.
1.131 - */
1.132 -static inline void push_fr( int bankreg, int frm )
1.133 -{
1.134 - OP(0xD9); OP(0x40 + bankreg); OP((frm^1)<<2); // FLD.S [bankreg + frm^1*4]
1.135 -}
1.136 -
1.137 -/**
1.138 - * Pop a 32-bit float from the FPU stack and store it back into the fp bank,
1.139 - * with bankreg previously loaded with the location of the current fp bank.
1.140 - */
1.141 -static inline void pop_fr( int bankreg, int frm )
1.142 -{
1.143 - OP(0xD9); OP(0x58 + bankreg); OP((frm^1)<<2); // FST.S [bankreg + frm^1*4]
1.144 -}
1.145 -
1.146 -/**
1.147 - * Push a 64-bit double onto the FPU stack, with bankreg previously loaded
1.148 - * with the location of the current fp bank.
1.149 - */
1.150 -static inline void push_dr( int bankreg, int frm )
1.151 -{
1.152 - OP(0xDD); OP(0x40 + bankreg); OP(frm<<2); // FLD.D [bankreg + frm*4]
1.153 -}
1.154 -
1.155 -static inline void pop_dr( int bankreg, int frm )
1.156 -{
1.157 - OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]
1.158 -}
1.159
1.160 /* Exception checks - Note that all exception checks will clobber EAX */
1.161
1.162 @@ -563,7 +506,7 @@
1.163 MOV_r32_r32( R_EAX, R_ECX );
1.164 AND_imm32_r32( 0xFC000000, R_EAX );
1.165 CMP_imm32_r32( 0xE0000000, R_EAX );
1.166 - JNE_rel8(8+CALL_FUNC1_SIZE, end);
1.167 + JNE_rel8(end);
1.168 call_func1( sh4_flush_store_queue, R_ECX );
1.169 TEST_r32_r32( R_EAX, R_EAX );
1.170 JE_exc(-1);
1.171 @@ -926,7 +869,7 @@
1.172
1.173 load_spreg( R_ECX, R_S );
1.174 TEST_r32_r32(R_ECX, R_ECX);
1.175 - JE_rel8( CALL_FUNC0_SIZE, nosat );
1.176 + JE_rel8( nosat );
1.177 call_func0( signsat48 );
1.178 JMP_TARGET( nosat );
1.179 sh4_x86.tstate = TSTATE_NONE;
1.180 @@ -1082,13 +1025,13 @@
1.181 load_reg( R_ECX, Rn );
1.182 XOR_r32_r32( R_ECX, R_EAX );
1.183 TEST_r8_r8( R_AL, R_AL );
1.184 - JE_rel8(13, target1);
1.185 - TEST_r8_r8( R_AH, R_AH ); // 2
1.186 - JE_rel8(9, target2);
1.187 - SHR_imm8_r32( 16, R_EAX ); // 3
1.188 - TEST_r8_r8( R_AL, R_AL ); // 2
1.189 - JE_rel8(2, target3);
1.190 - TEST_r8_r8( R_AH, R_AH ); // 2
1.191 + JE_rel8(target1);
1.192 + TEST_r8_r8( R_AH, R_AH );
1.193 + JE_rel8(target2);
1.194 + SHR_imm8_r32( 16, R_EAX );
1.195 + TEST_r8_r8( R_AL, R_AL );
1.196 + JE_rel8(target3);
1.197 + TEST_r8_r8( R_AH, R_AH );
1.198 JMP_TARGET(target1);
1.199 JMP_TARGET(target2);
1.200 JMP_TARGET(target3);
1.201 @@ -1176,9 +1119,9 @@
1.202 RCL1_r32( R_EAX );
1.203 SETC_r8( R_DL ); // Q'
1.204 CMP_sh4r_r32( R_Q, R_ECX );
1.205 - JE_rel8(5, mqequal);
1.206 + JE_rel8(mqequal);
1.207 ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
1.208 - JMP_rel8(3, end);
1.209 + JMP_rel8(end);
1.210 JMP_TARGET(mqequal);
1.211 SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
1.212 JMP_TARGET(end);
1.213 @@ -1725,8 +1668,7 @@
1.214 MMU_TRANSLATE_READ( R_EAX );
1.215 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
1.216 MEM_READ_LONG( R_EAX, R_EAX );
1.217 - store_spreg( R_EAX, R_FPSCR );
1.218 - update_fr_bank( R_EAX );
1.219 + call_func1( sh4_write_fpscr, R_EAX );
1.220 sh4_x86.tstate = TSTATE_NONE;
1.221 }
1.222 break;
1.223 @@ -1955,8 +1897,7 @@
1.224 uint32_t Rm = ((ir>>8)&0xF);
1.225 check_fpuen();
1.226 load_reg( R_EAX, Rm );
1.227 - store_spreg( R_EAX, R_FPSCR );
1.228 - update_fr_bank( R_EAX );
1.229 + call_func1( sh4_write_fpscr, R_EAX );
1.230 sh4_x86.tstate = TSTATE_NONE;
1.231 }
1.232 break;
1.233 @@ -2049,17 +1990,17 @@
1.234 load_reg( R_EAX, Rn );
1.235 load_reg( R_ECX, Rm );
1.236 CMP_imm32_r32( 0, R_ECX );
1.237 - JGE_rel8(16, doshl);
1.238 + JGE_rel8(doshl);
1.239
1.240 NEG_r32( R_ECX ); // 2
1.241 AND_imm8_r8( 0x1F, R_CL ); // 3
1.242 - JE_rel8( 4, emptysar); // 2
1.243 + JE_rel8(emptysar); // 2
1.244 SAR_r32_CL( R_EAX ); // 2
1.245 - JMP_rel8(10, end); // 2
1.246 + JMP_rel8(end); // 2
1.247
1.248 JMP_TARGET(emptysar);
1.249 SAR_imm8_r32(31, R_EAX ); // 3
1.250 - JMP_rel8(5, end2);
1.251 + JMP_rel8(end2);
1.252
1.253 JMP_TARGET(doshl);
1.254 AND_imm8_r8( 0x1F, R_CL ); // 3
1.255 @@ -2076,17 +2017,17 @@
1.256 load_reg( R_EAX, Rn );
1.257 load_reg( R_ECX, Rm );
1.258 CMP_imm32_r32( 0, R_ECX );
1.259 - JGE_rel8(15, doshl);
1.260 + JGE_rel8(doshl);
1.261
1.262 NEG_r32( R_ECX ); // 2
1.263 AND_imm8_r8( 0x1F, R_CL ); // 3
1.264 - JE_rel8( 4, emptyshr );
1.265 + JE_rel8(emptyshr );
1.266 SHR_r32_CL( R_EAX ); // 2
1.267 - JMP_rel8(9, end); // 2
1.268 + JMP_rel8(end); // 2
1.269
1.270 JMP_TARGET(emptyshr);
1.271 XOR_r32_r32( R_EAX, R_EAX );
1.272 - JMP_rel8(5, end2);
1.273 + JMP_rel8(end2);
1.274
1.275 JMP_TARGET(doshl);
1.276 AND_imm8_r8( 0x1F, R_CL ); // 3
1.277 @@ -2201,21 +2142,21 @@
1.278
1.279 load_spreg( R_ECX, R_S );
1.280 TEST_r32_r32( R_ECX, R_ECX );
1.281 - JE_rel8( 47, nosat );
1.282 + JE_rel8( nosat );
1.283
1.284 ADD_r32_sh4r( R_EAX, R_MACL ); // 6
1.285 - JNO_rel8( 51, end ); // 2
1.286 + JNO_rel8( end ); // 2
1.287 load_imm32( R_EDX, 1 ); // 5
1.288 store_spreg( R_EDX, R_MACH ); // 6
1.289 - JS_rel8( 13, positive ); // 2
1.290 + JS_rel8( positive ); // 2
1.291 load_imm32( R_EAX, 0x80000000 );// 5
1.292 store_spreg( R_EAX, R_MACL ); // 6
1.293 - JMP_rel8( 25, end2 ); // 2
1.294 + JMP_rel8(end2); // 2
1.295
1.296 JMP_TARGET(positive);
1.297 load_imm32( R_EAX, 0x7FFFFFFF );// 5
1.298 store_spreg( R_EAX, R_MACL ); // 6
1.299 - JMP_rel8( 12, end3); // 2
1.300 + JMP_rel8(end3); // 2
1.301
1.302 JMP_TARGET(nosat);
1.303 ADD_r32_sh4r( R_EAX, R_MACL ); // 6
1.304 @@ -2473,7 +2414,7 @@
1.305 SLOTILLEGAL();
1.306 } else {
1.307 sh4vma_t target = disp + pc + 4;
1.308 - JF_rel8( EXIT_BLOCK_REL_SIZE(target), nottaken );
1.309 + JF_rel8( nottaken );
1.310 exit_block_rel(target, pc+2 );
1.311 JMP_TARGET(nottaken);
1.312 return 2;
1.313 @@ -2487,7 +2428,7 @@
1.314 SLOTILLEGAL();
1.315 } else {
1.316 sh4vma_t target = disp + pc + 4;
1.317 - JT_rel8( EXIT_BLOCK_REL_SIZE(target), nottaken );
1.318 + JT_rel8( nottaken );
1.319 exit_block_rel(target, pc+2 );
1.320 JMP_TARGET(nottaken);
1.321 return 2;
1.322 @@ -2503,7 +2444,7 @@
1.323 sh4_x86.in_delay_slot = DELAY_PC;
1.324 if( UNTRANSLATABLE(pc+2) ) {
1.325 load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
1.326 - JF_rel8(6,nottaken);
1.327 + JF_rel8(nottaken);
1.328 ADD_imm32_r32( disp, R_EAX );
1.329 JMP_TARGET(nottaken);
1.330 ADD_sh4r_r32( R_PC, R_EAX );
1.331 @@ -2536,7 +2477,7 @@
1.332 sh4_x86.in_delay_slot = DELAY_PC;
1.333 if( UNTRANSLATABLE(pc+2) ) {
1.334 load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
1.335 - JT_rel8(6,nottaken);
1.336 + JT_rel8(nottaken);
1.337 ADD_imm32_r32( disp, R_EAX );
1.338 JMP_TARGET(nottaken);
1.339 ADD_sh4r_r32( R_PC, R_EAX );
1.340 @@ -2883,18 +2824,17 @@
1.341 check_fpuen();
1.342 load_spreg( R_ECX, R_FPSCR );
1.343 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.344 - load_fr_bank( R_EDX );
1.345 - JNE_rel8(13,doubleprec);
1.346 - push_fr(R_EDX, FRm);
1.347 - push_fr(R_EDX, FRn);
1.348 + JNE_rel8(doubleprec);
1.349 + push_fr(FRm);
1.350 + push_fr(FRn);
1.351 FADDP_st(1);
1.352 - pop_fr(R_EDX, FRn);
1.353 - JMP_rel8(11,end);
1.354 + pop_fr(FRn);
1.355 + JMP_rel8(end);
1.356 JMP_TARGET(doubleprec);
1.357 - push_dr(R_EDX, FRm);
1.358 - push_dr(R_EDX, FRn);
1.359 + push_dr(FRm);
1.360 + push_dr(FRn);
1.361 FADDP_st(1);
1.362 - pop_dr(R_EDX, FRn);
1.363 + pop_dr(FRn);
1.364 JMP_TARGET(end);
1.365 sh4_x86.tstate = TSTATE_NONE;
1.366 }
1.367 @@ -2905,18 +2845,17 @@
1.368 check_fpuen();
1.369 load_spreg( R_ECX, R_FPSCR );
1.370 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.371 - load_fr_bank( R_EDX );
1.372 - JNE_rel8(13, doubleprec);
1.373 - push_fr(R_EDX, FRn);
1.374 - push_fr(R_EDX, FRm);
1.375 + JNE_rel8(doubleprec);
1.376 + push_fr(FRn);
1.377 + push_fr(FRm);
1.378 FSUBP_st(1);
1.379 - pop_fr(R_EDX, FRn);
1.380 - JMP_rel8(11, end);
1.381 + pop_fr(FRn);
1.382 + JMP_rel8(end);
1.383 JMP_TARGET(doubleprec);
1.384 - push_dr(R_EDX, FRn);
1.385 - push_dr(R_EDX, FRm);
1.386 + push_dr(FRn);
1.387 + push_dr(FRm);
1.388 FSUBP_st(1);
1.389 - pop_dr(R_EDX, FRn);
1.390 + pop_dr(FRn);
1.391 JMP_TARGET(end);
1.392 sh4_x86.tstate = TSTATE_NONE;
1.393 }
1.394 @@ -2927,18 +2866,17 @@
1.395 check_fpuen();
1.396 load_spreg( R_ECX, R_FPSCR );
1.397 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.398 - load_fr_bank( R_EDX );
1.399 - JNE_rel8(13, doubleprec);
1.400 - push_fr(R_EDX, FRm);
1.401 - push_fr(R_EDX, FRn);
1.402 + JNE_rel8(doubleprec);
1.403 + push_fr(FRm);
1.404 + push_fr(FRn);
1.405 FMULP_st(1);
1.406 - pop_fr(R_EDX, FRn);
1.407 - JMP_rel8(11, end);
1.408 + pop_fr(FRn);
1.409 + JMP_rel8(end);
1.410 JMP_TARGET(doubleprec);
1.411 - push_dr(R_EDX, FRm);
1.412 - push_dr(R_EDX, FRn);
1.413 + push_dr(FRm);
1.414 + push_dr(FRn);
1.415 FMULP_st(1);
1.416 - pop_dr(R_EDX, FRn);
1.417 + pop_dr(FRn);
1.418 JMP_TARGET(end);
1.419 sh4_x86.tstate = TSTATE_NONE;
1.420 }
1.421 @@ -2949,18 +2887,17 @@
1.422 check_fpuen();
1.423 load_spreg( R_ECX, R_FPSCR );
1.424 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.425 - load_fr_bank( R_EDX );
1.426 - JNE_rel8(13, doubleprec);
1.427 - push_fr(R_EDX, FRn);
1.428 - push_fr(R_EDX, FRm);
1.429 + JNE_rel8(doubleprec);
1.430 + push_fr(FRn);
1.431 + push_fr(FRm);
1.432 FDIVP_st(1);
1.433 - pop_fr(R_EDX, FRn);
1.434 - JMP_rel8(11, end);
1.435 + pop_fr(FRn);
1.436 + JMP_rel8(end);
1.437 JMP_TARGET(doubleprec);
1.438 - push_dr(R_EDX, FRn);
1.439 - push_dr(R_EDX, FRm);
1.440 + push_dr(FRn);
1.441 + push_dr(FRm);
1.442 FDIVP_st(1);
1.443 - pop_dr(R_EDX, FRn);
1.444 + pop_dr(FRn);
1.445 JMP_TARGET(end);
1.446 sh4_x86.tstate = TSTATE_NONE;
1.447 }
1.448 @@ -2971,14 +2908,13 @@
1.449 check_fpuen();
1.450 load_spreg( R_ECX, R_FPSCR );
1.451 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.452 - load_fr_bank( R_EDX );
1.453 - JNE_rel8(8, doubleprec);
1.454 - push_fr(R_EDX, FRm);
1.455 - push_fr(R_EDX, FRn);
1.456 - JMP_rel8(6, end);
1.457 + JNE_rel8(doubleprec);
1.458 + push_fr(FRm);
1.459 + push_fr(FRn);
1.460 + JMP_rel8(end);
1.461 JMP_TARGET(doubleprec);
1.462 - push_dr(R_EDX, FRm);
1.463 - push_dr(R_EDX, FRn);
1.464 + push_dr(FRm);
1.465 + push_dr(FRn);
1.466 JMP_TARGET(end);
1.467 FCOMIP_st(1);
1.468 SETE_t();
1.469 @@ -2992,14 +2928,13 @@
1.470 check_fpuen();
1.471 load_spreg( R_ECX, R_FPSCR );
1.472 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.473 - load_fr_bank( R_EDX );
1.474 - JNE_rel8(8, doubleprec);
1.475 - push_fr(R_EDX, FRm);
1.476 - push_fr(R_EDX, FRn);
1.477 - JMP_rel8(6, end);
1.478 + JNE_rel8(doubleprec);
1.479 + push_fr(FRm);
1.480 + push_fr(FRn);
1.481 + JMP_rel8(end);
1.482 JMP_TARGET(doubleprec);
1.483 - push_dr(R_EDX, FRm);
1.484 - push_dr(R_EDX, FRn);
1.485 + push_dr(FRm);
1.486 + push_dr(FRn);
1.487 JMP_TARGET(end);
1.488 FCOMIP_st(1);
1.489 SETA_t();
1.490 @@ -3017,28 +2952,18 @@
1.491 MMU_TRANSLATE_READ( R_EAX );
1.492 load_spreg( R_EDX, R_FPSCR );
1.493 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1.494 - JNE_rel8(8 + MEM_READ_SIZE, doublesize);
1.495 + JNE_rel8(doublesize);
1.496 +
1.497 MEM_READ_LONG( R_EAX, R_EAX );
1.498 - load_fr_bank( R_EDX );
1.499 - store_fr( R_EDX, R_EAX, FRn );
1.500 - if( FRn&1 ) {
1.501 - JMP_rel8(21 + MEM_READ_DOUBLE_SIZE, end);
1.502 - JMP_TARGET(doublesize);
1.503 - MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
1.504 - load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
1.505 - load_xf_bank( R_EDX );
1.506 - store_fr( R_EDX, R_ECX, FRn&0x0E );
1.507 - store_fr( R_EDX, R_EAX, FRn|0x01 );
1.508 - JMP_TARGET(end);
1.509 - } else {
1.510 - JMP_rel8(9 + MEM_READ_DOUBLE_SIZE, end);
1.511 - JMP_TARGET(doublesize);
1.512 - MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
1.513 - load_fr_bank( R_EDX );
1.514 - store_fr( R_EDX, R_ECX, FRn&0x0E );
1.515 - store_fr( R_EDX, R_EAX, FRn|0x01 );
1.516 - JMP_TARGET(end);
1.517 - }
1.518 + store_fr( R_EAX, FRn );
1.519 + JMP_rel8(end);
1.520 +
1.521 + JMP_TARGET(doublesize);
1.522 + MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
1.523 + store_dr0( R_ECX, FRn );
1.524 + store_dr1( R_EAX, FRn );
1.525 + JMP_TARGET(end);
1.526 +
1.527 sh4_x86.tstate = TSTATE_NONE;
1.528 }
1.529 break;
1.530 @@ -3052,27 +2977,18 @@
1.531 MMU_TRANSLATE_WRITE( R_EAX );
1.532 load_spreg( R_EDX, R_FPSCR );
1.533 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1.534 - JNE_rel8(8 + MEM_WRITE_SIZE, doublesize);
1.535 - load_fr_bank( R_EDX );
1.536 - load_fr( R_EDX, R_ECX, FRm );
1.537 + JNE_rel8(doublesize);
1.538 +
1.539 + load_fr( R_ECX, FRm );
1.540 MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
1.541 - if( FRm&1 ) {
1.542 - JMP_rel8( 18 + MEM_WRITE_DOUBLE_SIZE, end );
1.543 - JMP_TARGET(doublesize);
1.544 - load_xf_bank( R_EDX );
1.545 - load_fr( R_EDX, R_ECX, FRm&0x0E );
1.546 - load_fr( R_EDX, R_EDX, FRm|0x01 );
1.547 - MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
1.548 - JMP_TARGET(end);
1.549 - } else {
1.550 - JMP_rel8( 9 + MEM_WRITE_DOUBLE_SIZE, end );
1.551 - JMP_TARGET(doublesize);
1.552 - load_fr_bank( R_EDX );
1.553 - load_fr( R_EDX, R_ECX, FRm&0x0E );
1.554 - load_fr( R_EDX, R_EDX, FRm|0x01 );
1.555 - MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
1.556 - JMP_TARGET(end);
1.557 - }
1.558 + JMP_rel8(end);
1.559 +
1.560 + JMP_TARGET(doublesize);
1.561 + load_dr0( R_ECX, FRm );
1.562 + load_dr1( R_EDX, FRm );
1.563 + MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
1.564 + JMP_TARGET(end);
1.565 +
1.566 sh4_x86.tstate = TSTATE_NONE;
1.567 }
1.568 break;
1.569 @@ -3085,28 +3001,17 @@
1.570 MMU_TRANSLATE_READ( R_EAX );
1.571 load_spreg( R_EDX, R_FPSCR );
1.572 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1.573 - JNE_rel8(8 + MEM_READ_SIZE, doublesize);
1.574 + JNE_rel8(doublesize);
1.575 +
1.576 MEM_READ_LONG( R_EAX, R_EAX );
1.577 - load_fr_bank( R_EDX );
1.578 - store_fr( R_EDX, R_EAX, FRn );
1.579 - if( FRn&1 ) {
1.580 - JMP_rel8(21 + MEM_READ_DOUBLE_SIZE, end);
1.581 - JMP_TARGET(doublesize);
1.582 - MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
1.583 - load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
1.584 - load_xf_bank( R_EDX );
1.585 - store_fr( R_EDX, R_ECX, FRn&0x0E );
1.586 - store_fr( R_EDX, R_EAX, FRn|0x01 );
1.587 - JMP_TARGET(end);
1.588 - } else {
1.589 - JMP_rel8(9 + MEM_READ_DOUBLE_SIZE, end);
1.590 - JMP_TARGET(doublesize);
1.591 - MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
1.592 - load_fr_bank( R_EDX );
1.593 - store_fr( R_EDX, R_ECX, FRn&0x0E );
1.594 - store_fr( R_EDX, R_EAX, FRn|0x01 );
1.595 - JMP_TARGET(end);
1.596 - }
1.597 + store_fr( R_EAX, FRn );
1.598 + JMP_rel8(end);
1.599 +
1.600 + JMP_TARGET(doublesize);
1.601 + MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
1.602 + store_dr0( R_ECX, FRn );
1.603 + store_dr1( R_EAX, FRn );
1.604 + JMP_TARGET(end);
1.605 sh4_x86.tstate = TSTATE_NONE;
1.606 }
1.607 break;
1.608 @@ -3119,30 +3024,20 @@
1.609 MMU_TRANSLATE_READ( R_EAX );
1.610 load_spreg( R_EDX, R_FPSCR );
1.611 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1.612 - JNE_rel8(12 + MEM_READ_SIZE, doublesize);
1.613 + JNE_rel8(doublesize);
1.614 +
1.615 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
1.616 MEM_READ_LONG( R_EAX, R_EAX );
1.617 - load_fr_bank( R_EDX );
1.618 - store_fr( R_EDX, R_EAX, FRn );
1.619 - if( FRn&1 ) {
1.620 - JMP_rel8(25 + MEM_READ_DOUBLE_SIZE, end);
1.621 - JMP_TARGET(doublesize);
1.622 - ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
1.623 - MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
1.624 - load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
1.625 - load_xf_bank( R_EDX );
1.626 - store_fr( R_EDX, R_ECX, FRn&0x0E );
1.627 - store_fr( R_EDX, R_EAX, FRn|0x01 );
1.628 - JMP_TARGET(end);
1.629 - } else {
1.630 - JMP_rel8(13 + MEM_READ_DOUBLE_SIZE, end);
1.631 - ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
1.632 - MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
1.633 - load_fr_bank( R_EDX );
1.634 - store_fr( R_EDX, R_ECX, FRn&0x0E );
1.635 - store_fr( R_EDX, R_EAX, FRn|0x01 );
1.636 - JMP_TARGET(end);
1.637 - }
1.638 + store_fr( R_EAX, FRn );
1.639 + JMP_rel8(end);
1.640 +
1.641 + JMP_TARGET(doublesize);
1.642 + ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
1.643 + MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
1.644 + store_dr0( R_ECX, FRn );
1.645 + store_dr1( R_EAX, FRn );
1.646 + JMP_TARGET(end);
1.647 +
1.648 sh4_x86.tstate = TSTATE_NONE;
1.649 }
1.650 break;
1.651 @@ -3155,27 +3050,17 @@
1.652 MMU_TRANSLATE_WRITE( R_EAX );
1.653 load_spreg( R_EDX, R_FPSCR );
1.654 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1.655 - JNE_rel8(8 + MEM_WRITE_SIZE, doublesize);
1.656 - load_fr_bank( R_EDX );
1.657 - load_fr( R_EDX, R_ECX, FRm );
1.658 + JNE_rel8(doublesize);
1.659 +
1.660 + load_fr( R_ECX, FRm );
1.661 MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
1.662 - if( FRm&1 ) {
1.663 - JMP_rel8( 18 + MEM_WRITE_DOUBLE_SIZE, end );
1.664 - JMP_TARGET(doublesize);
1.665 - load_xf_bank( R_EDX );
1.666 - load_fr( R_EDX, R_ECX, FRm&0x0E );
1.667 - load_fr( R_EDX, R_EDX, FRm|0x01 );
1.668 - MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
1.669 - JMP_TARGET(end);
1.670 - } else {
1.671 - JMP_rel8( 9 + MEM_WRITE_DOUBLE_SIZE, end );
1.672 - JMP_TARGET(doublesize);
1.673 - load_fr_bank( R_EDX );
1.674 - load_fr( R_EDX, R_ECX, FRm&0x0E );
1.675 - load_fr( R_EDX, R_EDX, FRm|0x01 );
1.676 - MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
1.677 - JMP_TARGET(end);
1.678 - }
1.679 + JMP_rel8(end);
1.680 +
1.681 + JMP_TARGET(doublesize);
1.682 + load_dr0( R_ECX, FRm );
1.683 + load_dr1( R_EDX, FRm );
1.684 + MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
1.685 + JMP_TARGET(end);
1.686 sh4_x86.tstate = TSTATE_NONE;
1.687 }
1.688 break;
1.689 @@ -3187,36 +3072,24 @@
1.690 check_walign32( R_EAX );
1.691 load_spreg( R_EDX, R_FPSCR );
1.692 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1.693 - JNE_rel8(15 + MEM_WRITE_SIZE + MMU_TRANSLATE_SIZE, doublesize);
1.694 + JNE_rel8(doublesize);
1.695 +
1.696 ADD_imm8s_r32( -4, R_EAX );
1.697 MMU_TRANSLATE_WRITE( R_EAX );
1.698 - load_fr_bank( R_EDX );
1.699 - load_fr( R_EDX, R_ECX, FRm );
1.700 + load_fr( R_ECX, FRm );
1.701 ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
1.702 - MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
1.703 - if( FRm&1 ) {
1.704 - JMP_rel8( 25 + MEM_WRITE_DOUBLE_SIZE + MMU_TRANSLATE_SIZE, end );
1.705 - JMP_TARGET(doublesize);
1.706 - ADD_imm8s_r32(-8,R_EAX);
1.707 - MMU_TRANSLATE_WRITE( R_EAX );
1.708 - load_xf_bank( R_EDX );
1.709 - load_fr( R_EDX, R_ECX, FRm&0x0E );
1.710 - load_fr( R_EDX, R_EDX, FRm|0x01 );
1.711 - ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
1.712 - MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
1.713 - JMP_TARGET(end);
1.714 - } else {
1.715 - JMP_rel8( 16 + MEM_WRITE_DOUBLE_SIZE + MMU_TRANSLATE_SIZE, end );
1.716 - JMP_TARGET(doublesize);
1.717 - ADD_imm8s_r32(-8,R_EAX);
1.718 - MMU_TRANSLATE_WRITE( R_EAX );
1.719 - load_fr_bank( R_EDX );
1.720 - load_fr( R_EDX, R_ECX, FRm&0x0E );
1.721 - load_fr( R_EDX, R_EDX, FRm|0x01 );
1.722 - ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
1.723 - MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
1.724 - JMP_TARGET(end);
1.725 - }
1.726 + MEM_WRITE_LONG( R_EAX, R_ECX );
1.727 + JMP_rel8(end);
1.728 +
1.729 + JMP_TARGET(doublesize);
1.730 + ADD_imm8s_r32(-8,R_EAX);
1.731 + MMU_TRANSLATE_WRITE( R_EAX );
1.732 + load_dr0( R_ECX, FRm );
1.733 + load_dr1( R_EDX, FRm );
1.734 + ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
1.735 + MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
1.736 + JMP_TARGET(end);
1.737 +
1.738 sh4_x86.tstate = TSTATE_NONE;
1.739 }
1.740 break;
1.741 @@ -3232,44 +3105,17 @@
1.742 */
1.743 check_fpuen();
1.744 load_spreg( R_ECX, R_FPSCR );
1.745 - load_fr_bank( R_EDX );
1.746 TEST_imm32_r32( FPSCR_SZ, R_ECX );
1.747 - JNE_rel8(8, doublesize);
1.748 - load_fr( R_EDX, R_EAX, FRm ); // PR=0 branch
1.749 - store_fr( R_EDX, R_EAX, FRn );
1.750 - if( FRm&1 ) {
1.751 - JMP_rel8(24, end);
1.752 - JMP_TARGET(doublesize);
1.753 - load_xf_bank( R_ECX );
1.754 - load_fr( R_ECX, R_EAX, FRm-1 );
1.755 - if( FRn&1 ) {
1.756 - load_fr( R_ECX, R_EDX, FRm );
1.757 - store_fr( R_ECX, R_EAX, FRn-1 );
1.758 - store_fr( R_ECX, R_EDX, FRn );
1.759 - } else /* FRn&1 == 0 */ {
1.760 - load_fr( R_ECX, R_ECX, FRm );
1.761 - store_fr( R_EDX, R_EAX, FRn );
1.762 - store_fr( R_EDX, R_ECX, FRn+1 );
1.763 - }
1.764 - JMP_TARGET(end);
1.765 - } else /* FRm&1 == 0 */ {
1.766 - if( FRn&1 ) {
1.767 - JMP_rel8(24, end);
1.768 - load_xf_bank( R_ECX );
1.769 - load_fr( R_EDX, R_EAX, FRm );
1.770 - load_fr( R_EDX, R_EDX, FRm+1 );
1.771 - store_fr( R_ECX, R_EAX, FRn-1 );
1.772 - store_fr( R_ECX, R_EDX, FRn );
1.773 - JMP_TARGET(end);
1.774 - } else /* FRn&1 == 0 */ {
1.775 - JMP_rel8(12, end);
1.776 - load_fr( R_EDX, R_EAX, FRm );
1.777 - load_fr( R_EDX, R_ECX, FRm+1 );
1.778 - store_fr( R_EDX, R_EAX, FRn );
1.779 - store_fr( R_EDX, R_ECX, FRn+1 );
1.780 - JMP_TARGET(end);
1.781 - }
1.782 - }
1.783 + JNE_rel8(doublesize);
1.784 + load_fr( R_EAX, FRm ); // PR=0 branch
1.785 + store_fr( R_EAX, FRn );
1.786 + JMP_rel8(end);
1.787 + JMP_TARGET(doublesize);
1.788 + load_dr0( R_EAX, FRm );
1.789 + load_dr1( R_ECX, FRm );
1.790 + store_dr0( R_EAX, FRn );
1.791 + store_dr1( R_ECX, FRn );
1.792 + JMP_TARGET(end);
1.793 sh4_x86.tstate = TSTATE_NONE;
1.794 }
1.795 break;
1.796 @@ -3279,9 +3125,8 @@
1.797 { /* FSTS FPUL, FRn */
1.798 uint32_t FRn = ((ir>>8)&0xF);
1.799 check_fpuen();
1.800 - load_fr_bank( R_ECX );
1.801 load_spreg( R_EAX, R_FPUL );
1.802 - store_fr( R_ECX, R_EAX, FRn );
1.803 + store_fr( R_EAX, FRn );
1.804 sh4_x86.tstate = TSTATE_NONE;
1.805 }
1.806 break;
1.807 @@ -3289,8 +3134,7 @@
1.808 { /* FLDS FRm, FPUL */
1.809 uint32_t FRm = ((ir>>8)&0xF);
1.810 check_fpuen();
1.811 - load_fr_bank( R_ECX );
1.812 - load_fr( R_ECX, R_EAX, FRm );
1.813 + load_fr( R_EAX, FRm );
1.814 store_spreg( R_EAX, R_FPUL );
1.815 sh4_x86.tstate = TSTATE_NONE;
1.816 }
1.817 @@ -3300,14 +3144,13 @@
1.818 uint32_t FRn = ((ir>>8)&0xF);
1.819 check_fpuen();
1.820 load_spreg( R_ECX, R_FPSCR );
1.821 - load_spreg(R_EDX, REG_OFFSET(fr_bank));
1.822 FILD_sh4r(R_FPUL);
1.823 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.824 - JNE_rel8(5, doubleprec);
1.825 - pop_fr( R_EDX, FRn );
1.826 - JMP_rel8(3, end);
1.827 + JNE_rel8(doubleprec);
1.828 + pop_fr( FRn );
1.829 + JMP_rel8(end);
1.830 JMP_TARGET(doubleprec);
1.831 - pop_dr( R_EDX, FRn );
1.832 + pop_dr( FRn );
1.833 JMP_TARGET(end);
1.834 sh4_x86.tstate = TSTATE_NONE;
1.835 }
1.836 @@ -3317,29 +3160,28 @@
1.837 uint32_t FRm = ((ir>>8)&0xF);
1.838 check_fpuen();
1.839 load_spreg( R_ECX, R_FPSCR );
1.840 - load_fr_bank( R_EDX );
1.841 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.842 - JNE_rel8(5, doubleprec);
1.843 - push_fr( R_EDX, FRm );
1.844 - JMP_rel8(3, doop);
1.845 + JNE_rel8(doubleprec);
1.846 + push_fr( FRm );
1.847 + JMP_rel8(doop);
1.848 JMP_TARGET(doubleprec);
1.849 - push_dr( R_EDX, FRm );
1.850 + push_dr( FRm );
1.851 JMP_TARGET( doop );
1.852 load_imm32( R_ECX, (uint32_t)&max_int );
1.853 FILD_r32ind( R_ECX );
1.854 FCOMIP_st(1);
1.855 - JNA_rel8( 32, sat );
1.856 + JNA_rel8( sat );
1.857 load_imm32( R_ECX, (uint32_t)&min_int ); // 5
1.858 FILD_r32ind( R_ECX ); // 2
1.859 FCOMIP_st(1); // 2
1.860 - JAE_rel8( 21, sat2 ); // 2
1.861 + JAE_rel8( sat2 ); // 2
1.862 load_imm32( R_EAX, (uint32_t)&save_fcw );
1.863 FNSTCW_r32ind( R_EAX );
1.864 load_imm32( R_EDX, (uint32_t)&trunc_fcw );
1.865 FLDCW_r32ind( R_EDX );
1.866 FISTP_sh4r(R_FPUL); // 3
1.867 FLDCW_r32ind( R_EAX );
1.868 - JMP_rel8( 9, end ); // 2
1.869 + JMP_rel8(end); // 2
1.870
1.871 JMP_TARGET(sat);
1.872 JMP_TARGET(sat2);
1.873 @@ -3356,16 +3198,15 @@
1.874 check_fpuen();
1.875 load_spreg( R_ECX, R_FPSCR );
1.876 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.877 - load_fr_bank( R_EDX );
1.878 - JNE_rel8(10, doubleprec);
1.879 - push_fr(R_EDX, FRn);
1.880 + JNE_rel8(doubleprec);
1.881 + push_fr(FRn);
1.882 FCHS_st0();
1.883 - pop_fr(R_EDX, FRn);
1.884 - JMP_rel8(8, end);
1.885 + pop_fr(FRn);
1.886 + JMP_rel8(end);
1.887 JMP_TARGET(doubleprec);
1.888 - push_dr(R_EDX, FRn);
1.889 + push_dr(FRn);
1.890 FCHS_st0();
1.891 - pop_dr(R_EDX, FRn);
1.892 + pop_dr(FRn);
1.893 JMP_TARGET(end);
1.894 sh4_x86.tstate = TSTATE_NONE;
1.895 }
1.896 @@ -3375,17 +3216,16 @@
1.897 uint32_t FRn = ((ir>>8)&0xF);
1.898 check_fpuen();
1.899 load_spreg( R_ECX, R_FPSCR );
1.900 - load_fr_bank( R_EDX );
1.901 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.902 - JNE_rel8(10, doubleprec);
1.903 - push_fr(R_EDX, FRn); // 3
1.904 + JNE_rel8(doubleprec);
1.905 + push_fr(FRn); // 6
1.906 FABS_st0(); // 2
1.907 - pop_fr( R_EDX, FRn); //3
1.908 - JMP_rel8(8,end); // 2
1.909 + pop_fr(FRn); //6
1.910 + JMP_rel8(end); // 2
1.911 JMP_TARGET(doubleprec);
1.912 - push_dr(R_EDX, FRn);
1.913 + push_dr(FRn);
1.914 FABS_st0();
1.915 - pop_dr(R_EDX, FRn);
1.916 + pop_dr(FRn);
1.917 JMP_TARGET(end);
1.918 sh4_x86.tstate = TSTATE_NONE;
1.919 }
1.920 @@ -3396,16 +3236,15 @@
1.921 check_fpuen();
1.922 load_spreg( R_ECX, R_FPSCR );
1.923 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.924 - load_fr_bank( R_EDX );
1.925 - JNE_rel8(10, doubleprec);
1.926 - push_fr(R_EDX, FRn);
1.927 + JNE_rel8(doubleprec);
1.928 + push_fr(FRn);
1.929 FSQRT_st0();
1.930 - pop_fr(R_EDX, FRn);
1.931 - JMP_rel8(8, end);
1.932 + pop_fr(FRn);
1.933 + JMP_rel8(end);
1.934 JMP_TARGET(doubleprec);
1.935 - push_dr(R_EDX, FRn);
1.936 + push_dr(FRn);
1.937 FSQRT_st0();
1.938 - pop_dr(R_EDX, FRn);
1.939 + pop_dr(FRn);
1.940 JMP_TARGET(end);
1.941 sh4_x86.tstate = TSTATE_NONE;
1.942 }
1.943 @@ -3416,13 +3255,12 @@
1.944 check_fpuen();
1.945 load_spreg( R_ECX, R_FPSCR );
1.946 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.947 - load_fr_bank( R_EDX );
1.948 - JNE_rel8(12, end); // PR=0 only
1.949 + JNE_rel8(end); // PR=0 only
1.950 FLD1_st0();
1.951 - push_fr(R_EDX, FRn);
1.952 + push_fr(FRn);
1.953 FSQRT_st0();
1.954 FDIVP_st(1);
1.955 - pop_fr(R_EDX, FRn);
1.956 + pop_fr(FRn);
1.957 JMP_TARGET(end);
1.958 sh4_x86.tstate = TSTATE_NONE;
1.959 }
1.960 @@ -3434,10 +3272,9 @@
1.961 check_fpuen();
1.962 load_spreg( R_ECX, R_FPSCR );
1.963 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.964 - JNE_rel8(8, end);
1.965 + JNE_rel8(end);
1.966 XOR_r32_r32( R_EAX, R_EAX );
1.967 - load_spreg( R_ECX, REG_OFFSET(fr_bank) );
1.968 - store_fr( R_ECX, R_EAX, FRn );
1.969 + store_fr( R_EAX, FRn );
1.970 JMP_TARGET(end);
1.971 sh4_x86.tstate = TSTATE_NONE;
1.972 }
1.973 @@ -3449,10 +3286,9 @@
1.974 check_fpuen();
1.975 load_spreg( R_ECX, R_FPSCR );
1.976 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.977 - JNE_rel8(11, end);
1.978 + JNE_rel8(end);
1.979 load_imm32(R_EAX, 0x3F800000);
1.980 - load_spreg( R_ECX, REG_OFFSET(fr_bank) );
1.981 - store_fr( R_ECX, R_EAX, FRn );
1.982 + store_fr( R_EAX, FRn );
1.983 JMP_TARGET(end);
1.984 sh4_x86.tstate = TSTATE_NONE;
1.985 }
1.986 @@ -3463,10 +3299,9 @@
1.987 check_fpuen();
1.988 load_spreg( R_ECX, R_FPSCR );
1.989 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.990 - JE_rel8(9, end); // only when PR=1
1.991 - load_fr_bank( R_ECX );
1.992 + JE_rel8(end); // only when PR=1
1.993 push_fpul();
1.994 - pop_dr( R_ECX, FRn );
1.995 + pop_dr( FRn );
1.996 JMP_TARGET(end);
1.997 sh4_x86.tstate = TSTATE_NONE;
1.998 }
1.999 @@ -3477,9 +3312,8 @@
1.1000 check_fpuen();
1.1001 load_spreg( R_ECX, R_FPSCR );
1.1002 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.1003 - JE_rel8(9, end); // only when PR=1
1.1004 - load_fr_bank( R_ECX );
1.1005 - push_dr( R_ECX, FRm );
1.1006 + JE_rel8(end); // only when PR=1
1.1007 + push_dr( FRm );
1.1008 pop_fpul();
1.1009 JMP_TARGET(end);
1.1010 sh4_x86.tstate = TSTATE_NONE;
1.1011 @@ -3491,25 +3325,24 @@
1.1012 check_fpuen();
1.1013 load_spreg( R_ECX, R_FPSCR );
1.1014 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.1015 - JNE_rel8(44, doubleprec);
1.1016 + JNE_rel8( doubleprec);
1.1017
1.1018 - load_fr_bank( R_ECX );
1.1019 - push_fr( R_ECX, FVm<<2 );
1.1020 - push_fr( R_ECX, FVn<<2 );
1.1021 + push_fr( FVm<<2 );
1.1022 + push_fr( FVn<<2 );
1.1023 FMULP_st(1);
1.1024 - push_fr( R_ECX, (FVm<<2)+1);
1.1025 - push_fr( R_ECX, (FVn<<2)+1);
1.1026 + push_fr( (FVm<<2)+1);
1.1027 + push_fr( (FVn<<2)+1);
1.1028 FMULP_st(1);
1.1029 FADDP_st(1);
1.1030 - push_fr( R_ECX, (FVm<<2)+2);
1.1031 - push_fr( R_ECX, (FVn<<2)+2);
1.1032 + push_fr( (FVm<<2)+2);
1.1033 + push_fr( (FVn<<2)+2);
1.1034 FMULP_st(1);
1.1035 FADDP_st(1);
1.1036 - push_fr( R_ECX, (FVm<<2)+3);
1.1037 - push_fr( R_ECX, (FVn<<2)+3);
1.1038 + push_fr( (FVm<<2)+3);
1.1039 + push_fr( (FVn<<2)+3);
1.1040 FMULP_st(1);
1.1041 FADDP_st(1);
1.1042 - pop_fr( R_ECX, (FVn<<2)+3);
1.1043 + pop_fr( (FVn<<2)+3);
1.1044 JMP_TARGET(doubleprec);
1.1045 sh4_x86.tstate = TSTATE_NONE;
1.1046 }
1.1047 @@ -3522,9 +3355,8 @@
1.1048 check_fpuen();
1.1049 load_spreg( R_ECX, R_FPSCR );
1.1050 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.1051 - JNE_rel8( CALL_FUNC2_SIZE + 9, doubleprec );
1.1052 - load_fr_bank( R_ECX );
1.1053 - ADD_imm8s_r32( (FRn&0x0E)<<2, R_ECX );
1.1054 + JNE_rel8(doubleprec );
1.1055 + LEA_sh4r_r32( REG_OFFSET(fr[0][FRn&0x0E]), R_ECX );
1.1056 load_spreg( R_EDX, R_FPUL );
1.1057 call_func2( sh4_fsca, R_EDX, R_ECX );
1.1058 JMP_TARGET(doubleprec);
1.1059 @@ -3539,11 +3371,9 @@
1.1060 check_fpuen();
1.1061 load_spreg( R_ECX, R_FPSCR );
1.1062 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.1063 - JNE_rel8( 18 + CALL_FUNC2_SIZE, doubleprec );
1.1064 - load_fr_bank( R_EDX ); // 3
1.1065 - ADD_imm8s_r32( FVn<<4, R_EDX ); // 3
1.1066 - load_xf_bank( R_ECX ); // 12
1.1067 - call_func2( sh4_ftrv, R_EDX, R_ECX ); // 12
1.1068 + JNE_rel8( doubleprec );
1.1069 + LEA_sh4r_r32( REG_OFFSET(fr[0][FVn<<2]), R_EDX );
1.1070 + call_func1( sh4_ftrv, R_EDX ); // 12
1.1071 JMP_TARGET(doubleprec);
1.1072 sh4_x86.tstate = TSTATE_NONE;
1.1073 }
1.1074 @@ -3565,7 +3395,7 @@
1.1075 load_spreg( R_ECX, R_FPSCR );
1.1076 XOR_imm32_r32( FPSCR_FR, R_ECX );
1.1077 store_spreg( R_ECX, R_FPSCR );
1.1078 - update_fr_bank( R_ECX );
1.1079 + call_func0( sh4_switch_fr_banks );
1.1080 sh4_x86.tstate = TSTATE_NONE;
1.1081 }
1.1082 break;
1.1083 @@ -3598,23 +3428,22 @@
1.1084 uint32_t FRn = ((ir>>8)&0xF); uint32_t FRm = ((ir>>4)&0xF);
1.1085 check_fpuen();
1.1086 load_spreg( R_ECX, R_FPSCR );
1.1087 - load_spreg( R_EDX, REG_OFFSET(fr_bank));
1.1088 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.1089 - JNE_rel8(18, doubleprec);
1.1090 - push_fr( R_EDX, 0 );
1.1091 - push_fr( R_EDX, FRm );
1.1092 + JNE_rel8(doubleprec);
1.1093 + push_fr( 0 );
1.1094 + push_fr( FRm );
1.1095 FMULP_st(1);
1.1096 - push_fr( R_EDX, FRn );
1.1097 + push_fr( FRn );
1.1098 FADDP_st(1);
1.1099 - pop_fr( R_EDX, FRn );
1.1100 - JMP_rel8(16, end);
1.1101 + pop_fr( FRn );
1.1102 + JMP_rel8(end);
1.1103 JMP_TARGET(doubleprec);
1.1104 - push_dr( R_EDX, 0 );
1.1105 - push_dr( R_EDX, FRm );
1.1106 + push_dr( 0 );
1.1107 + push_dr( FRm );
1.1108 FMULP_st(1);
1.1109 - push_dr( R_EDX, FRn );
1.1110 + push_dr( FRn );
1.1111 FADDP_st(1);
1.1112 - pop_dr( R_EDX, FRn );
1.1113 + pop_dr( FRn );
1.1114 JMP_TARGET(end);
1.1115 sh4_x86.tstate = TSTATE_NONE;
1.1116 }
.