Search
lxdream.org :: lxdream/src/sh4/sh4x86.in :: diff
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 669:ab344e42bca9
prev626:a010e30a30e9
next671:a530ea88eebd
author nkeynes
date Mon May 12 10:00:13 2008 +0000 (12 years ago)
permissions -rw-r--r--
last change Cleanup most of the -Wall warnings (getting a bit sloppy...)
Convert FP code to use fixed banks rather than indirect pointer
(3-4% faster this way now)
file annotate diff log raw
1.1 --- a/src/sh4/sh4x86.in Fri Feb 08 00:06:56 2008 +0000
1.2 +++ b/src/sh4/sh4x86.in Mon May 12 10:00:13 2008 +0000
1.3 @@ -80,15 +80,14 @@
1.4 #define TSTATE_AE 3
1.5
1.6 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
1.7 -#define JT_rel8(rel8,label) if( sh4_x86.tstate == TSTATE_NONE ) { \
1.8 +#define JT_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
1.9 CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
1.10 - OP(0x70+sh4_x86.tstate); OP(rel8); \
1.11 - MARK_JMP(rel8,label)
1.12 + OP(0x70+sh4_x86.tstate); MARK_JMP8(label); OP(-1)
1.13 +
1.14 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
1.15 -#define JF_rel8(rel8,label) if( sh4_x86.tstate == TSTATE_NONE ) { \
1.16 +#define JF_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
1.17 CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
1.18 - OP(0x70+ (sh4_x86.tstate^1)); OP(rel8); \
1.19 - MARK_JMP(rel8, label)
1.20 + OP(0x70+ (sh4_x86.tstate^1)); MARK_JMP8(label); OP(-1)
1.21
1.22 static struct sh4_x86_state sh4_x86;
1.23
1.24 @@ -97,7 +96,7 @@
1.25 static uint32_t save_fcw; /* save value for fpu control word */
1.26 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
1.27
1.28 -void sh4_x86_init()
1.29 +void sh4_translate_init(void)
1.30 {
1.31 sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
1.32 sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
1.33 @@ -169,7 +168,6 @@
1.34 OP64(value);
1.35 }
1.36
1.37 -
1.38 /**
1.39 * Emit an instruction to store an SH4 reg (RN)
1.40 */
1.41 @@ -180,97 +178,42 @@
1.42 OP(REG_OFFSET(r[sh4reg]));
1.43 }
1.44
1.45 -#define load_fr_bank(bankreg) load_spreg( bankreg, REG_OFFSET(fr_bank))
1.46 -
1.47 /**
1.48 * Load an FR register (single-precision floating point) into an integer x86
1.49 * register (eg for register-to-register moves)
1.50 */
1.51 -void static inline load_fr( int bankreg, int x86reg, int frm )
1.52 -{
1.53 - OP(0x8B); OP(0x40+bankreg+(x86reg<<3)); OP((frm^1)<<2);
1.54 -}
1.55 +#define load_fr(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[0][(frm)^1]) )
1.56 +#define load_xf(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[1][(frm)^1]) )
1.57
1.58 /**
1.59 - * Store an FR register (single-precision floating point) into an integer x86
1.60 + * Load the low half of a DR register (DR or XD) into an integer x86 register
1.61 + */
1.62 +#define load_dr0(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
1.63 +#define load_dr1(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
1.64 +
1.65 +/**
1.66 + * Store an FR register (single-precision floating point) from an integer x86+
1.67 * register (eg for register-to-register moves)
1.68 */
1.69 -void static inline store_fr( int bankreg, int x86reg, int frn )
1.70 -{
1.71 - OP(0x89); OP(0x40+bankreg+(x86reg<<3)); OP((frn^1)<<2);
1.72 -}
1.73 +#define store_fr(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[0][(frm)^1]) )
1.74 +#define store_xf(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[1][(frm)^1]) )
1.75
1.76 +#define store_dr0(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
1.77 +#define store_dr1(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
1.78
1.79 -/**
1.80 - * Load a pointer to the back fp back into the specified x86 register. The
1.81 - * bankreg must have been previously loaded with FPSCR.
1.82 - * NB: 12 bytes
1.83 - */
1.84 -static inline void load_xf_bank( int bankreg )
1.85 -{
1.86 - NOT_r32( bankreg );
1.87 - SHR_imm8_r32( (21 - 6), bankreg ); // Extract bit 21 then *64 for bank size
1.88 - AND_imm8s_r32( 0x40, bankreg ); // Complete extraction
1.89 - OP(0x8D); OP(0x44+(bankreg<<3)); OP(0x28+bankreg); OP(REG_OFFSET(fr)); // LEA [ebp+bankreg+disp], bankreg
1.90 -}
1.91
1.92 -/**
1.93 - * Update the fr_bank pointer based on the current fpscr value.
1.94 - */
1.95 -static inline void update_fr_bank( int fpscrreg )
1.96 -{
1.97 - SHR_imm8_r32( (21 - 6), fpscrreg ); // Extract bit 21 then *64 for bank size
1.98 - AND_imm8s_r32( 0x40, fpscrreg ); // Complete extraction
1.99 - OP(0x8D); OP(0x44+(fpscrreg<<3)); OP(0x28+fpscrreg); OP(REG_OFFSET(fr)); // LEA [ebp+fpscrreg+disp], fpscrreg
1.100 - store_spreg( fpscrreg, REG_OFFSET(fr_bank) );
1.101 -}
1.102 -/**
1.103 - * Push FPUL (as a 32-bit float) onto the FPU stack
1.104 - */
1.105 -static inline void push_fpul( )
1.106 -{
1.107 - OP(0xD9); OP(0x45); OP(R_FPUL);
1.108 -}
1.109 +#define push_fpul() FLDF_sh4r(R_FPUL)
1.110 +#define pop_fpul() FSTPF_sh4r(R_FPUL)
1.111 +#define push_fr(frm) FLDF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
1.112 +#define pop_fr(frm) FSTPF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
1.113 +#define push_xf(frm) FLDF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
1.114 +#define pop_xf(frm) FSTPF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
1.115 +#define push_dr(frm) FLDD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
1.116 +#define pop_dr(frm) FSTPD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
1.117 +#define push_xdr(frm) FLDD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
1.118 +#define pop_xdr(frm) FSTPD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
1.119
1.120 -/**
1.121 - * Pop FPUL (as a 32-bit float) from the FPU stack
1.122 - */
1.123 -static inline void pop_fpul( )
1.124 -{
1.125 - OP(0xD9); OP(0x5D); OP(R_FPUL);
1.126 -}
1.127
1.128 -/**
1.129 - * Push a 32-bit float onto the FPU stack, with bankreg previously loaded
1.130 - * with the location of the current fp bank.
1.131 - */
1.132 -static inline void push_fr( int bankreg, int frm )
1.133 -{
1.134 - OP(0xD9); OP(0x40 + bankreg); OP((frm^1)<<2); // FLD.S [bankreg + frm^1*4]
1.135 -}
1.136 -
1.137 -/**
1.138 - * Pop a 32-bit float from the FPU stack and store it back into the fp bank,
1.139 - * with bankreg previously loaded with the location of the current fp bank.
1.140 - */
1.141 -static inline void pop_fr( int bankreg, int frm )
1.142 -{
1.143 - OP(0xD9); OP(0x58 + bankreg); OP((frm^1)<<2); // FST.S [bankreg + frm^1*4]
1.144 -}
1.145 -
1.146 -/**
1.147 - * Push a 64-bit double onto the FPU stack, with bankreg previously loaded
1.148 - * with the location of the current fp bank.
1.149 - */
1.150 -static inline void push_dr( int bankreg, int frm )
1.151 -{
1.152 - OP(0xDD); OP(0x40 + bankreg); OP(frm<<2); // FLD.D [bankreg + frm*4]
1.153 -}
1.154 -
1.155 -static inline void pop_dr( int bankreg, int frm )
1.156 -{
1.157 - OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]
1.158 -}
1.159
1.160 /* Exception checks - Note that all exception checks will clobber EAX */
1.161
1.162 @@ -555,13 +498,13 @@
1.163 load_reg( R_ECX, Rn );
1.164 XOR_r32_r32( R_ECX, R_EAX );
1.165 TEST_r8_r8( R_AL, R_AL );
1.166 - JE_rel8(13, target1);
1.167 - TEST_r8_r8( R_AH, R_AH ); // 2
1.168 - JE_rel8(9, target2);
1.169 - SHR_imm8_r32( 16, R_EAX ); // 3
1.170 - TEST_r8_r8( R_AL, R_AL ); // 2
1.171 - JE_rel8(2, target3);
1.172 - TEST_r8_r8( R_AH, R_AH ); // 2
1.173 + JE_rel8(target1);
1.174 + TEST_r8_r8( R_AH, R_AH );
1.175 + JE_rel8(target2);
1.176 + SHR_imm8_r32( 16, R_EAX );
1.177 + TEST_r8_r8( R_AL, R_AL );
1.178 + JE_rel8(target3);
1.179 + TEST_r8_r8( R_AH, R_AH );
1.180 JMP_TARGET(target1);
1.181 JMP_TARGET(target2);
1.182 JMP_TARGET(target3);
1.183 @@ -595,9 +538,9 @@
1.184 RCL1_r32( R_EAX );
1.185 SETC_r8( R_DL ); // Q'
1.186 CMP_sh4r_r32( R_Q, R_ECX );
1.187 - JE_rel8(5, mqequal);
1.188 + JE_rel8(mqequal);
1.189 ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
1.190 - JMP_rel8(3, end);
1.191 + JMP_rel8(end);
1.192 JMP_TARGET(mqequal);
1.193 SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
1.194 JMP_TARGET(end);
1.195 @@ -690,7 +633,7 @@
1.196
1.197 load_spreg( R_ECX, R_S );
1.198 TEST_r32_r32(R_ECX, R_ECX);
1.199 - JE_rel8( CALL_FUNC0_SIZE, nosat );
1.200 + JE_rel8( nosat );
1.201 call_func0( signsat48 );
1.202 JMP_TARGET( nosat );
1.203 sh4_x86.tstate = TSTATE_NONE;
1.204 @@ -728,21 +671,21 @@
1.205
1.206 load_spreg( R_ECX, R_S );
1.207 TEST_r32_r32( R_ECX, R_ECX );
1.208 - JE_rel8( 47, nosat );
1.209 + JE_rel8( nosat );
1.210
1.211 ADD_r32_sh4r( R_EAX, R_MACL ); // 6
1.212 - JNO_rel8( 51, end ); // 2
1.213 + JNO_rel8( end ); // 2
1.214 load_imm32( R_EDX, 1 ); // 5
1.215 store_spreg( R_EDX, R_MACH ); // 6
1.216 - JS_rel8( 13, positive ); // 2
1.217 + JS_rel8( positive ); // 2
1.218 load_imm32( R_EAX, 0x80000000 );// 5
1.219 store_spreg( R_EAX, R_MACL ); // 6
1.220 - JMP_rel8( 25, end2 ); // 2
1.221 + JMP_rel8(end2); // 2
1.222
1.223 JMP_TARGET(positive);
1.224 load_imm32( R_EAX, 0x7FFFFFFF );// 5
1.225 store_spreg( R_EAX, R_MACL ); // 6
1.226 - JMP_rel8( 12, end3); // 2
1.227 + JMP_rel8(end3); // 2
1.228
1.229 JMP_TARGET(nosat);
1.230 ADD_r32_sh4r( R_EAX, R_MACL ); // 6
1.231 @@ -862,17 +805,17 @@
1.232 load_reg( R_EAX, Rn );
1.233 load_reg( R_ECX, Rm );
1.234 CMP_imm32_r32( 0, R_ECX );
1.235 - JGE_rel8(16, doshl);
1.236 + JGE_rel8(doshl);
1.237
1.238 NEG_r32( R_ECX ); // 2
1.239 AND_imm8_r8( 0x1F, R_CL ); // 3
1.240 - JE_rel8( 4, emptysar); // 2
1.241 + JE_rel8(emptysar); // 2
1.242 SAR_r32_CL( R_EAX ); // 2
1.243 - JMP_rel8(10, end); // 2
1.244 + JMP_rel8(end); // 2
1.245
1.246 JMP_TARGET(emptysar);
1.247 SAR_imm8_r32(31, R_EAX ); // 3
1.248 - JMP_rel8(5, end2);
1.249 + JMP_rel8(end2);
1.250
1.251 JMP_TARGET(doshl);
1.252 AND_imm8_r8( 0x1F, R_CL ); // 3
1.253 @@ -886,17 +829,17 @@
1.254 load_reg( R_EAX, Rn );
1.255 load_reg( R_ECX, Rm );
1.256 CMP_imm32_r32( 0, R_ECX );
1.257 - JGE_rel8(15, doshl);
1.258 + JGE_rel8(doshl);
1.259
1.260 NEG_r32( R_ECX ); // 2
1.261 AND_imm8_r8( 0x1F, R_CL ); // 3
1.262 - JE_rel8( 4, emptyshr );
1.263 + JE_rel8(emptyshr );
1.264 SHR_r32_CL( R_EAX ); // 2
1.265 - JMP_rel8(9, end); // 2
1.266 + JMP_rel8(end); // 2
1.267
1.268 JMP_TARGET(emptyshr);
1.269 XOR_r32_r32( R_EAX, R_EAX );
1.270 - JMP_rel8(5, end2);
1.271 + JMP_rel8(end2);
1.272
1.273 JMP_TARGET(doshl);
1.274 AND_imm8_r8( 0x1F, R_CL ); // 3
1.275 @@ -1427,7 +1370,7 @@
1.276 SLOTILLEGAL();
1.277 } else {
1.278 sh4vma_t target = disp + pc + 4;
1.279 - JT_rel8( EXIT_BLOCK_REL_SIZE(target), nottaken );
1.280 + JT_rel8( nottaken );
1.281 exit_block_rel(target, pc+2 );
1.282 JMP_TARGET(nottaken);
1.283 return 2;
1.284 @@ -1440,7 +1383,7 @@
1.285 sh4_x86.in_delay_slot = DELAY_PC;
1.286 if( UNTRANSLATABLE(pc+2) ) {
1.287 load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
1.288 - JT_rel8(6,nottaken);
1.289 + JT_rel8(nottaken);
1.290 ADD_imm32_r32( disp, R_EAX );
1.291 JMP_TARGET(nottaken);
1.292 ADD_sh4r_r32( R_PC, R_EAX );
1.293 @@ -1555,7 +1498,7 @@
1.294 SLOTILLEGAL();
1.295 } else {
1.296 sh4vma_t target = disp + pc + 4;
1.297 - JF_rel8( EXIT_BLOCK_REL_SIZE(target), nottaken );
1.298 + JF_rel8( nottaken );
1.299 exit_block_rel(target, pc+2 );
1.300 JMP_TARGET(nottaken);
1.301 return 2;
1.302 @@ -1568,7 +1511,7 @@
1.303 sh4_x86.in_delay_slot = DELAY_PC;
1.304 if( UNTRANSLATABLE(pc+2) ) {
1.305 load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
1.306 - JF_rel8(6,nottaken);
1.307 + JF_rel8(nottaken);
1.308 ADD_imm32_r32( disp, R_EAX );
1.309 JMP_TARGET(nottaken);
1.310 ADD_sh4r_r32( R_PC, R_EAX );
1.311 @@ -1734,44 +1677,17 @@
1.312 */
1.313 check_fpuen();
1.314 load_spreg( R_ECX, R_FPSCR );
1.315 - load_fr_bank( R_EDX );
1.316 TEST_imm32_r32( FPSCR_SZ, R_ECX );
1.317 - JNE_rel8(8, doublesize);
1.318 - load_fr( R_EDX, R_EAX, FRm ); // PR=0 branch
1.319 - store_fr( R_EDX, R_EAX, FRn );
1.320 - if( FRm&1 ) {
1.321 - JMP_rel8(24, end);
1.322 - JMP_TARGET(doublesize);
1.323 - load_xf_bank( R_ECX );
1.324 - load_fr( R_ECX, R_EAX, FRm-1 );
1.325 - if( FRn&1 ) {
1.326 - load_fr( R_ECX, R_EDX, FRm );
1.327 - store_fr( R_ECX, R_EAX, FRn-1 );
1.328 - store_fr( R_ECX, R_EDX, FRn );
1.329 - } else /* FRn&1 == 0 */ {
1.330 - load_fr( R_ECX, R_ECX, FRm );
1.331 - store_fr( R_EDX, R_EAX, FRn );
1.332 - store_fr( R_EDX, R_ECX, FRn+1 );
1.333 - }
1.334 - JMP_TARGET(end);
1.335 - } else /* FRm&1 == 0 */ {
1.336 - if( FRn&1 ) {
1.337 - JMP_rel8(24, end);
1.338 - load_xf_bank( R_ECX );
1.339 - load_fr( R_EDX, R_EAX, FRm );
1.340 - load_fr( R_EDX, R_EDX, FRm+1 );
1.341 - store_fr( R_ECX, R_EAX, FRn-1 );
1.342 - store_fr( R_ECX, R_EDX, FRn );
1.343 - JMP_TARGET(end);
1.344 - } else /* FRn&1 == 0 */ {
1.345 - JMP_rel8(12, end);
1.346 - load_fr( R_EDX, R_EAX, FRm );
1.347 - load_fr( R_EDX, R_ECX, FRm+1 );
1.348 - store_fr( R_EDX, R_EAX, FRn );
1.349 - store_fr( R_EDX, R_ECX, FRn+1 );
1.350 - JMP_TARGET(end);
1.351 - }
1.352 - }
1.353 + JNE_rel8(doublesize);
1.354 + load_fr( R_EAX, FRm ); // PR=0 branch
1.355 + store_fr( R_EAX, FRn );
1.356 + JMP_rel8(end);
1.357 + JMP_TARGET(doublesize);
1.358 + load_dr0( R_EAX, FRm );
1.359 + load_dr1( R_ECX, FRm );
1.360 + store_dr0( R_EAX, FRn );
1.361 + store_dr1( R_ECX, FRn );
1.362 + JMP_TARGET(end);
1.363 sh4_x86.tstate = TSTATE_NONE;
1.364 :}
1.365 FMOV FRm, @Rn {:
1.366 @@ -1781,27 +1697,17 @@
1.367 MMU_TRANSLATE_WRITE( R_EAX );
1.368 load_spreg( R_EDX, R_FPSCR );
1.369 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1.370 - JNE_rel8(8 + MEM_WRITE_SIZE, doublesize);
1.371 - load_fr_bank( R_EDX );
1.372 - load_fr( R_EDX, R_ECX, FRm );
1.373 + JNE_rel8(doublesize);
1.374 +
1.375 + load_fr( R_ECX, FRm );
1.376 MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
1.377 - if( FRm&1 ) {
1.378 - JMP_rel8( 18 + MEM_WRITE_DOUBLE_SIZE, end );
1.379 - JMP_TARGET(doublesize);
1.380 - load_xf_bank( R_EDX );
1.381 - load_fr( R_EDX, R_ECX, FRm&0x0E );
1.382 - load_fr( R_EDX, R_EDX, FRm|0x01 );
1.383 - MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
1.384 - JMP_TARGET(end);
1.385 - } else {
1.386 - JMP_rel8( 9 + MEM_WRITE_DOUBLE_SIZE, end );
1.387 - JMP_TARGET(doublesize);
1.388 - load_fr_bank( R_EDX );
1.389 - load_fr( R_EDX, R_ECX, FRm&0x0E );
1.390 - load_fr( R_EDX, R_EDX, FRm|0x01 );
1.391 - MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
1.392 - JMP_TARGET(end);
1.393 - }
1.394 + JMP_rel8(end);
1.395 +
1.396 + JMP_TARGET(doublesize);
1.397 + load_dr0( R_ECX, FRm );
1.398 + load_dr1( R_EDX, FRm );
1.399 + MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
1.400 + JMP_TARGET(end);
1.401 sh4_x86.tstate = TSTATE_NONE;
1.402 :}
1.403 FMOV @Rm, FRn {:
1.404 @@ -1811,28 +1717,17 @@
1.405 MMU_TRANSLATE_READ( R_EAX );
1.406 load_spreg( R_EDX, R_FPSCR );
1.407 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1.408 - JNE_rel8(8 + MEM_READ_SIZE, doublesize);
1.409 + JNE_rel8(doublesize);
1.410 +
1.411 MEM_READ_LONG( R_EAX, R_EAX );
1.412 - load_fr_bank( R_EDX );
1.413 - store_fr( R_EDX, R_EAX, FRn );
1.414 - if( FRn&1 ) {
1.415 - JMP_rel8(21 + MEM_READ_DOUBLE_SIZE, end);
1.416 - JMP_TARGET(doublesize);
1.417 - MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
1.418 - load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
1.419 - load_xf_bank( R_EDX );
1.420 - store_fr( R_EDX, R_ECX, FRn&0x0E );
1.421 - store_fr( R_EDX, R_EAX, FRn|0x01 );
1.422 - JMP_TARGET(end);
1.423 - } else {
1.424 - JMP_rel8(9 + MEM_READ_DOUBLE_SIZE, end);
1.425 - JMP_TARGET(doublesize);
1.426 - MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
1.427 - load_fr_bank( R_EDX );
1.428 - store_fr( R_EDX, R_ECX, FRn&0x0E );
1.429 - store_fr( R_EDX, R_EAX, FRn|0x01 );
1.430 - JMP_TARGET(end);
1.431 - }
1.432 + store_fr( R_EAX, FRn );
1.433 + JMP_rel8(end);
1.434 +
1.435 + JMP_TARGET(doublesize);
1.436 + MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
1.437 + store_dr0( R_ECX, FRn );
1.438 + store_dr1( R_EAX, FRn );
1.439 + JMP_TARGET(end);
1.440 sh4_x86.tstate = TSTATE_NONE;
1.441 :}
1.442 FMOV FRm, @-Rn {:
1.443 @@ -1841,36 +1736,24 @@
1.444 check_walign32( R_EAX );
1.445 load_spreg( R_EDX, R_FPSCR );
1.446 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1.447 - JNE_rel8(15 + MEM_WRITE_SIZE + MMU_TRANSLATE_SIZE, doublesize);
1.448 + JNE_rel8(doublesize);
1.449 +
1.450 ADD_imm8s_r32( -4, R_EAX );
1.451 MMU_TRANSLATE_WRITE( R_EAX );
1.452 - load_fr_bank( R_EDX );
1.453 - load_fr( R_EDX, R_ECX, FRm );
1.454 + load_fr( R_ECX, FRm );
1.455 ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
1.456 - MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
1.457 - if( FRm&1 ) {
1.458 - JMP_rel8( 25 + MEM_WRITE_DOUBLE_SIZE + MMU_TRANSLATE_SIZE, end );
1.459 - JMP_TARGET(doublesize);
1.460 - ADD_imm8s_r32(-8,R_EAX);
1.461 - MMU_TRANSLATE_WRITE( R_EAX );
1.462 - load_xf_bank( R_EDX );
1.463 - load_fr( R_EDX, R_ECX, FRm&0x0E );
1.464 - load_fr( R_EDX, R_EDX, FRm|0x01 );
1.465 - ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
1.466 - MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
1.467 - JMP_TARGET(end);
1.468 - } else {
1.469 - JMP_rel8( 16 + MEM_WRITE_DOUBLE_SIZE + MMU_TRANSLATE_SIZE, end );
1.470 - JMP_TARGET(doublesize);
1.471 - ADD_imm8s_r32(-8,R_EAX);
1.472 - MMU_TRANSLATE_WRITE( R_EAX );
1.473 - load_fr_bank( R_EDX );
1.474 - load_fr( R_EDX, R_ECX, FRm&0x0E );
1.475 - load_fr( R_EDX, R_EDX, FRm|0x01 );
1.476 - ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
1.477 - MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
1.478 - JMP_TARGET(end);
1.479 - }
1.480 + MEM_WRITE_LONG( R_EAX, R_ECX );
1.481 + JMP_rel8(end);
1.482 +
1.483 + JMP_TARGET(doublesize);
1.484 + ADD_imm8s_r32(-8,R_EAX);
1.485 + MMU_TRANSLATE_WRITE( R_EAX );
1.486 + load_dr0( R_ECX, FRm );
1.487 + load_dr1( R_EDX, FRm );
1.488 + ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
1.489 + MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
1.490 + JMP_TARGET(end);
1.491 +
1.492 sh4_x86.tstate = TSTATE_NONE;
1.493 :}
1.494 FMOV @Rm+, FRn {:
1.495 @@ -1880,30 +1763,20 @@
1.496 MMU_TRANSLATE_READ( R_EAX );
1.497 load_spreg( R_EDX, R_FPSCR );
1.498 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1.499 - JNE_rel8(12 + MEM_READ_SIZE, doublesize);
1.500 + JNE_rel8(doublesize);
1.501 +
1.502 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
1.503 MEM_READ_LONG( R_EAX, R_EAX );
1.504 - load_fr_bank( R_EDX );
1.505 - store_fr( R_EDX, R_EAX, FRn );
1.506 - if( FRn&1 ) {
1.507 - JMP_rel8(25 + MEM_READ_DOUBLE_SIZE, end);
1.508 - JMP_TARGET(doublesize);
1.509 - ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
1.510 - MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
1.511 - load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
1.512 - load_xf_bank( R_EDX );
1.513 - store_fr( R_EDX, R_ECX, FRn&0x0E );
1.514 - store_fr( R_EDX, R_EAX, FRn|0x01 );
1.515 - JMP_TARGET(end);
1.516 - } else {
1.517 - JMP_rel8(13 + MEM_READ_DOUBLE_SIZE, end);
1.518 - ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
1.519 - MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
1.520 - load_fr_bank( R_EDX );
1.521 - store_fr( R_EDX, R_ECX, FRn&0x0E );
1.522 - store_fr( R_EDX, R_EAX, FRn|0x01 );
1.523 - JMP_TARGET(end);
1.524 - }
1.525 + store_fr( R_EAX, FRn );
1.526 + JMP_rel8(end);
1.527 +
1.528 + JMP_TARGET(doublesize);
1.529 + ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
1.530 + MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
1.531 + store_dr0( R_ECX, FRn );
1.532 + store_dr1( R_EAX, FRn );
1.533 + JMP_TARGET(end);
1.534 +
1.535 sh4_x86.tstate = TSTATE_NONE;
1.536 :}
1.537 FMOV FRm, @(R0, Rn) {:
1.538 @@ -1914,27 +1787,18 @@
1.539 MMU_TRANSLATE_WRITE( R_EAX );
1.540 load_spreg( R_EDX, R_FPSCR );
1.541 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1.542 - JNE_rel8(8 + MEM_WRITE_SIZE, doublesize);
1.543 - load_fr_bank( R_EDX );
1.544 - load_fr( R_EDX, R_ECX, FRm );
1.545 + JNE_rel8(doublesize);
1.546 +
1.547 + load_fr( R_ECX, FRm );
1.548 MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
1.549 - if( FRm&1 ) {
1.550 - JMP_rel8( 18 + MEM_WRITE_DOUBLE_SIZE, end );
1.551 - JMP_TARGET(doublesize);
1.552 - load_xf_bank( R_EDX );
1.553 - load_fr( R_EDX, R_ECX, FRm&0x0E );
1.554 - load_fr( R_EDX, R_EDX, FRm|0x01 );
1.555 - MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
1.556 - JMP_TARGET(end);
1.557 - } else {
1.558 - JMP_rel8( 9 + MEM_WRITE_DOUBLE_SIZE, end );
1.559 - JMP_TARGET(doublesize);
1.560 - load_fr_bank( R_EDX );
1.561 - load_fr( R_EDX, R_ECX, FRm&0x0E );
1.562 - load_fr( R_EDX, R_EDX, FRm|0x01 );
1.563 - MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
1.564 - JMP_TARGET(end);
1.565 - }
1.566 + JMP_rel8(end);
1.567 +
1.568 + JMP_TARGET(doublesize);
1.569 + load_dr0( R_ECX, FRm );
1.570 + load_dr1( R_EDX, FRm );
1.571 + MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
1.572 + JMP_TARGET(end);
1.573 +
1.574 sh4_x86.tstate = TSTATE_NONE;
1.575 :}
1.576 FMOV @(R0, Rm), FRn {:
1.577 @@ -1945,38 +1809,27 @@
1.578 MMU_TRANSLATE_READ( R_EAX );
1.579 load_spreg( R_EDX, R_FPSCR );
1.580 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1.581 - JNE_rel8(8 + MEM_READ_SIZE, doublesize);
1.582 + JNE_rel8(doublesize);
1.583 +
1.584 MEM_READ_LONG( R_EAX, R_EAX );
1.585 - load_fr_bank( R_EDX );
1.586 - store_fr( R_EDX, R_EAX, FRn );
1.587 - if( FRn&1 ) {
1.588 - JMP_rel8(21 + MEM_READ_DOUBLE_SIZE, end);
1.589 - JMP_TARGET(doublesize);
1.590 - MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
1.591 - load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
1.592 - load_xf_bank( R_EDX );
1.593 - store_fr( R_EDX, R_ECX, FRn&0x0E );
1.594 - store_fr( R_EDX, R_EAX, FRn|0x01 );
1.595 - JMP_TARGET(end);
1.596 - } else {
1.597 - JMP_rel8(9 + MEM_READ_DOUBLE_SIZE, end);
1.598 - JMP_TARGET(doublesize);
1.599 - MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
1.600 - load_fr_bank( R_EDX );
1.601 - store_fr( R_EDX, R_ECX, FRn&0x0E );
1.602 - store_fr( R_EDX, R_EAX, FRn|0x01 );
1.603 - JMP_TARGET(end);
1.604 - }
1.605 + store_fr( R_EAX, FRn );
1.606 + JMP_rel8(end);
1.607 +
1.608 + JMP_TARGET(doublesize);
1.609 + MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
1.610 + store_dr0( R_ECX, FRn );
1.611 + store_dr1( R_EAX, FRn );
1.612 + JMP_TARGET(end);
1.613 +
1.614 sh4_x86.tstate = TSTATE_NONE;
1.615 :}
1.616 FLDI0 FRn {: /* IFF PR=0 */
1.617 check_fpuen();
1.618 load_spreg( R_ECX, R_FPSCR );
1.619 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.620 - JNE_rel8(8, end);
1.621 + JNE_rel8(end);
1.622 XOR_r32_r32( R_EAX, R_EAX );
1.623 - load_spreg( R_ECX, REG_OFFSET(fr_bank) );
1.624 - store_fr( R_ECX, R_EAX, FRn );
1.625 + store_fr( R_EAX, FRn );
1.626 JMP_TARGET(end);
1.627 sh4_x86.tstate = TSTATE_NONE;
1.628 :}
1.629 @@ -1984,10 +1837,9 @@
1.630 check_fpuen();
1.631 load_spreg( R_ECX, R_FPSCR );
1.632 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.633 - JNE_rel8(11, end);
1.634 + JNE_rel8(end);
1.635 load_imm32(R_EAX, 0x3F800000);
1.636 - load_spreg( R_ECX, REG_OFFSET(fr_bank) );
1.637 - store_fr( R_ECX, R_EAX, FRn );
1.638 + store_fr( R_EAX, FRn );
1.639 JMP_TARGET(end);
1.640 sh4_x86.tstate = TSTATE_NONE;
1.641 :}
1.642 @@ -1995,43 +1847,41 @@
1.643 FLOAT FPUL, FRn {:
1.644 check_fpuen();
1.645 load_spreg( R_ECX, R_FPSCR );
1.646 - load_spreg(R_EDX, REG_OFFSET(fr_bank));
1.647 FILD_sh4r(R_FPUL);
1.648 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.649 - JNE_rel8(5, doubleprec);
1.650 - pop_fr( R_EDX, FRn );
1.651 - JMP_rel8(3, end);
1.652 + JNE_rel8(doubleprec);
1.653 + pop_fr( FRn );
1.654 + JMP_rel8(end);
1.655 JMP_TARGET(doubleprec);
1.656 - pop_dr( R_EDX, FRn );
1.657 + pop_dr( FRn );
1.658 JMP_TARGET(end);
1.659 sh4_x86.tstate = TSTATE_NONE;
1.660 :}
1.661 FTRC FRm, FPUL {:
1.662 check_fpuen();
1.663 load_spreg( R_ECX, R_FPSCR );
1.664 - load_fr_bank( R_EDX );
1.665 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.666 - JNE_rel8(5, doubleprec);
1.667 - push_fr( R_EDX, FRm );
1.668 - JMP_rel8(3, doop);
1.669 + JNE_rel8(doubleprec);
1.670 + push_fr( FRm );
1.671 + JMP_rel8(doop);
1.672 JMP_TARGET(doubleprec);
1.673 - push_dr( R_EDX, FRm );
1.674 + push_dr( FRm );
1.675 JMP_TARGET( doop );
1.676 load_imm32( R_ECX, (uint32_t)&max_int );
1.677 FILD_r32ind( R_ECX );
1.678 FCOMIP_st(1);
1.679 - JNA_rel8( 32, sat );
1.680 + JNA_rel8( sat );
1.681 load_imm32( R_ECX, (uint32_t)&min_int ); // 5
1.682 FILD_r32ind( R_ECX ); // 2
1.683 FCOMIP_st(1); // 2
1.684 - JAE_rel8( 21, sat2 ); // 2
1.685 + JAE_rel8( sat2 ); // 2
1.686 load_imm32( R_EAX, (uint32_t)&save_fcw );
1.687 FNSTCW_r32ind( R_EAX );
1.688 load_imm32( R_EDX, (uint32_t)&trunc_fcw );
1.689 FLDCW_r32ind( R_EDX );
1.690 FISTP_sh4r(R_FPUL); // 3
1.691 FLDCW_r32ind( R_EAX );
1.692 - JMP_rel8( 9, end ); // 2
1.693 + JMP_rel8(end); // 2
1.694
1.695 JMP_TARGET(sat);
1.696 JMP_TARGET(sat2);
1.697 @@ -2043,25 +1893,22 @@
1.698 :}
1.699 FLDS FRm, FPUL {:
1.700 check_fpuen();
1.701 - load_fr_bank( R_ECX );
1.702 - load_fr( R_ECX, R_EAX, FRm );
1.703 + load_fr( R_EAX, FRm );
1.704 store_spreg( R_EAX, R_FPUL );
1.705 sh4_x86.tstate = TSTATE_NONE;
1.706 :}
1.707 FSTS FPUL, FRn {:
1.708 check_fpuen();
1.709 - load_fr_bank( R_ECX );
1.710 load_spreg( R_EAX, R_FPUL );
1.711 - store_fr( R_ECX, R_EAX, FRn );
1.712 + store_fr( R_EAX, FRn );
1.713 sh4_x86.tstate = TSTATE_NONE;
1.714 :}
1.715 FCNVDS FRm, FPUL {:
1.716 check_fpuen();
1.717 load_spreg( R_ECX, R_FPSCR );
1.718 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.719 - JE_rel8(9, end); // only when PR=1
1.720 - load_fr_bank( R_ECX );
1.721 - push_dr( R_ECX, FRm );
1.722 + JE_rel8(end); // only when PR=1
1.723 + push_dr( FRm );
1.724 pop_fpul();
1.725 JMP_TARGET(end);
1.726 sh4_x86.tstate = TSTATE_NONE;
1.727 @@ -2070,10 +1917,9 @@
1.728 check_fpuen();
1.729 load_spreg( R_ECX, R_FPSCR );
1.730 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.731 - JE_rel8(9, end); // only when PR=1
1.732 - load_fr_bank( R_ECX );
1.733 + JE_rel8(end); // only when PR=1
1.734 push_fpul();
1.735 - pop_dr( R_ECX, FRn );
1.736 + pop_dr( FRn );
1.737 JMP_TARGET(end);
1.738 sh4_x86.tstate = TSTATE_NONE;
1.739 :}
1.740 @@ -2082,17 +1928,16 @@
1.741 FABS FRn {:
1.742 check_fpuen();
1.743 load_spreg( R_ECX, R_FPSCR );
1.744 - load_fr_bank( R_EDX );
1.745 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.746 - JNE_rel8(10, doubleprec);
1.747 - push_fr(R_EDX, FRn); // 3
1.748 + JNE_rel8(doubleprec);
1.749 + push_fr(FRn); // 6
1.750 FABS_st0(); // 2
1.751 - pop_fr( R_EDX, FRn); //3
1.752 - JMP_rel8(8,end); // 2
1.753 + pop_fr(FRn); //6
1.754 + JMP_rel8(end); // 2
1.755 JMP_TARGET(doubleprec);
1.756 - push_dr(R_EDX, FRn);
1.757 + push_dr(FRn);
1.758 FABS_st0();
1.759 - pop_dr(R_EDX, FRn);
1.760 + pop_dr(FRn);
1.761 JMP_TARGET(end);
1.762 sh4_x86.tstate = TSTATE_NONE;
1.763 :}
1.764 @@ -2100,18 +1945,17 @@
1.765 check_fpuen();
1.766 load_spreg( R_ECX, R_FPSCR );
1.767 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.768 - load_fr_bank( R_EDX );
1.769 - JNE_rel8(13,doubleprec);
1.770 - push_fr(R_EDX, FRm);
1.771 - push_fr(R_EDX, FRn);
1.772 + JNE_rel8(doubleprec);
1.773 + push_fr(FRm);
1.774 + push_fr(FRn);
1.775 FADDP_st(1);
1.776 - pop_fr(R_EDX, FRn);
1.777 - JMP_rel8(11,end);
1.778 + pop_fr(FRn);
1.779 + JMP_rel8(end);
1.780 JMP_TARGET(doubleprec);
1.781 - push_dr(R_EDX, FRm);
1.782 - push_dr(R_EDX, FRn);
1.783 + push_dr(FRm);
1.784 + push_dr(FRn);
1.785 FADDP_st(1);
1.786 - pop_dr(R_EDX, FRn);
1.787 + pop_dr(FRn);
1.788 JMP_TARGET(end);
1.789 sh4_x86.tstate = TSTATE_NONE;
1.790 :}
1.791 @@ -2119,41 +1963,39 @@
1.792 check_fpuen();
1.793 load_spreg( R_ECX, R_FPSCR );
1.794 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.795 - load_fr_bank( R_EDX );
1.796 - JNE_rel8(13, doubleprec);
1.797 - push_fr(R_EDX, FRn);
1.798 - push_fr(R_EDX, FRm);
1.799 + JNE_rel8(doubleprec);
1.800 + push_fr(FRn);
1.801 + push_fr(FRm);
1.802 FDIVP_st(1);
1.803 - pop_fr(R_EDX, FRn);
1.804 - JMP_rel8(11, end);
1.805 + pop_fr(FRn);
1.806 + JMP_rel8(end);
1.807 JMP_TARGET(doubleprec);
1.808 - push_dr(R_EDX, FRn);
1.809 - push_dr(R_EDX, FRm);
1.810 + push_dr(FRn);
1.811 + push_dr(FRm);
1.812 FDIVP_st(1);
1.813 - pop_dr(R_EDX, FRn);
1.814 + pop_dr(FRn);
1.815 JMP_TARGET(end);
1.816 sh4_x86.tstate = TSTATE_NONE;
1.817 :}
1.818 FMAC FR0, FRm, FRn {:
1.819 check_fpuen();
1.820 load_spreg( R_ECX, R_FPSCR );
1.821 - load_spreg( R_EDX, REG_OFFSET(fr_bank));
1.822 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.823 - JNE_rel8(18, doubleprec);
1.824 - push_fr( R_EDX, 0 );
1.825 - push_fr( R_EDX, FRm );
1.826 + JNE_rel8(doubleprec);
1.827 + push_fr( 0 );
1.828 + push_fr( FRm );
1.829 FMULP_st(1);
1.830 - push_fr( R_EDX, FRn );
1.831 + push_fr( FRn );
1.832 FADDP_st(1);
1.833 - pop_fr( R_EDX, FRn );
1.834 - JMP_rel8(16, end);
1.835 + pop_fr( FRn );
1.836 + JMP_rel8(end);
1.837 JMP_TARGET(doubleprec);
1.838 - push_dr( R_EDX, 0 );
1.839 - push_dr( R_EDX, FRm );
1.840 + push_dr( 0 );
1.841 + push_dr( FRm );
1.842 FMULP_st(1);
1.843 - push_dr( R_EDX, FRn );
1.844 + push_dr( FRn );
1.845 FADDP_st(1);
1.846 - pop_dr( R_EDX, FRn );
1.847 + pop_dr( FRn );
1.848 JMP_TARGET(end);
1.849 sh4_x86.tstate = TSTATE_NONE;
1.850 :}
1.851 @@ -2162,18 +2004,17 @@
1.852 check_fpuen();
1.853 load_spreg( R_ECX, R_FPSCR );
1.854 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.855 - load_fr_bank( R_EDX );
1.856 - JNE_rel8(13, doubleprec);
1.857 - push_fr(R_EDX, FRm);
1.858 - push_fr(R_EDX, FRn);
1.859 + JNE_rel8(doubleprec);
1.860 + push_fr(FRm);
1.861 + push_fr(FRn);
1.862 FMULP_st(1);
1.863 - pop_fr(R_EDX, FRn);
1.864 - JMP_rel8(11, end);
1.865 + pop_fr(FRn);
1.866 + JMP_rel8(end);
1.867 JMP_TARGET(doubleprec);
1.868 - push_dr(R_EDX, FRm);
1.869 - push_dr(R_EDX, FRn);
1.870 + push_dr(FRm);
1.871 + push_dr(FRn);
1.872 FMULP_st(1);
1.873 - pop_dr(R_EDX, FRn);
1.874 + pop_dr(FRn);
1.875 JMP_TARGET(end);
1.876 sh4_x86.tstate = TSTATE_NONE;
1.877 :}
1.878 @@ -2181,16 +2022,15 @@
1.879 check_fpuen();
1.880 load_spreg( R_ECX, R_FPSCR );
1.881 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.882 - load_fr_bank( R_EDX );
1.883 - JNE_rel8(10, doubleprec);
1.884 - push_fr(R_EDX, FRn);
1.885 + JNE_rel8(doubleprec);
1.886 + push_fr(FRn);
1.887 FCHS_st0();
1.888 - pop_fr(R_EDX, FRn);
1.889 - JMP_rel8(8, end);
1.890 + pop_fr(FRn);
1.891 + JMP_rel8(end);
1.892 JMP_TARGET(doubleprec);
1.893 - push_dr(R_EDX, FRn);
1.894 + push_dr(FRn);
1.895 FCHS_st0();
1.896 - pop_dr(R_EDX, FRn);
1.897 + pop_dr(FRn);
1.898 JMP_TARGET(end);
1.899 sh4_x86.tstate = TSTATE_NONE;
1.900 :}
1.901 @@ -2198,13 +2038,12 @@
1.902 check_fpuen();
1.903 load_spreg( R_ECX, R_FPSCR );
1.904 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.905 - load_fr_bank( R_EDX );
1.906 - JNE_rel8(12, end); // PR=0 only
1.907 + JNE_rel8(end); // PR=0 only
1.908 FLD1_st0();
1.909 - push_fr(R_EDX, FRn);
1.910 + push_fr(FRn);
1.911 FSQRT_st0();
1.912 FDIVP_st(1);
1.913 - pop_fr(R_EDX, FRn);
1.914 + pop_fr(FRn);
1.915 JMP_TARGET(end);
1.916 sh4_x86.tstate = TSTATE_NONE;
1.917 :}
1.918 @@ -2212,16 +2051,15 @@
1.919 check_fpuen();
1.920 load_spreg( R_ECX, R_FPSCR );
1.921 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.922 - load_fr_bank( R_EDX );
1.923 - JNE_rel8(10, doubleprec);
1.924 - push_fr(R_EDX, FRn);
1.925 + JNE_rel8(doubleprec);
1.926 + push_fr(FRn);
1.927 FSQRT_st0();
1.928 - pop_fr(R_EDX, FRn);
1.929 - JMP_rel8(8, end);
1.930 + pop_fr(FRn);
1.931 + JMP_rel8(end);
1.932 JMP_TARGET(doubleprec);
1.933 - push_dr(R_EDX, FRn);
1.934 + push_dr(FRn);
1.935 FSQRT_st0();
1.936 - pop_dr(R_EDX, FRn);
1.937 + pop_dr(FRn);
1.938 JMP_TARGET(end);
1.939 sh4_x86.tstate = TSTATE_NONE;
1.940 :}
1.941 @@ -2229,18 +2067,17 @@
1.942 check_fpuen();
1.943 load_spreg( R_ECX, R_FPSCR );
1.944 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.945 - load_fr_bank( R_EDX );
1.946 - JNE_rel8(13, doubleprec);
1.947 - push_fr(R_EDX, FRn);
1.948 - push_fr(R_EDX, FRm);
1.949 + JNE_rel8(doubleprec);
1.950 + push_fr(FRn);
1.951 + push_fr(FRm);
1.952 FSUBP_st(1);
1.953 - pop_fr(R_EDX, FRn);
1.954 - JMP_rel8(11, end);
1.955 + pop_fr(FRn);
1.956 + JMP_rel8(end);
1.957 JMP_TARGET(doubleprec);
1.958 - push_dr(R_EDX, FRn);
1.959 - push_dr(R_EDX, FRm);
1.960 + push_dr(FRn);
1.961 + push_dr(FRm);
1.962 FSUBP_st(1);
1.963 - pop_dr(R_EDX, FRn);
1.964 + pop_dr(FRn);
1.965 JMP_TARGET(end);
1.966 sh4_x86.tstate = TSTATE_NONE;
1.967 :}
1.968 @@ -2249,14 +2086,13 @@
1.969 check_fpuen();
1.970 load_spreg( R_ECX, R_FPSCR );
1.971 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.972 - load_fr_bank( R_EDX );
1.973 - JNE_rel8(8, doubleprec);
1.974 - push_fr(R_EDX, FRm);
1.975 - push_fr(R_EDX, FRn);
1.976 - JMP_rel8(6, end);
1.977 + JNE_rel8(doubleprec);
1.978 + push_fr(FRm);
1.979 + push_fr(FRn);
1.980 + JMP_rel8(end);
1.981 JMP_TARGET(doubleprec);
1.982 - push_dr(R_EDX, FRm);
1.983 - push_dr(R_EDX, FRn);
1.984 + push_dr(FRm);
1.985 + push_dr(FRn);
1.986 JMP_TARGET(end);
1.987 FCOMIP_st(1);
1.988 SETE_t();
1.989 @@ -2267,14 +2103,13 @@
1.990 check_fpuen();
1.991 load_spreg( R_ECX, R_FPSCR );
1.992 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.993 - load_fr_bank( R_EDX );
1.994 - JNE_rel8(8, doubleprec);
1.995 - push_fr(R_EDX, FRm);
1.996 - push_fr(R_EDX, FRn);
1.997 - JMP_rel8(6, end);
1.998 + JNE_rel8(doubleprec);
1.999 + push_fr(FRm);
1.1000 + push_fr(FRn);
1.1001 + JMP_rel8(end);
1.1002 JMP_TARGET(doubleprec);
1.1003 - push_dr(R_EDX, FRm);
1.1004 - push_dr(R_EDX, FRn);
1.1005 + push_dr(FRm);
1.1006 + push_dr(FRn);
1.1007 JMP_TARGET(end);
1.1008 FCOMIP_st(1);
1.1009 SETA_t();
1.1010 @@ -2286,9 +2121,8 @@
1.1011 check_fpuen();
1.1012 load_spreg( R_ECX, R_FPSCR );
1.1013 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.1014 - JNE_rel8( CALL_FUNC2_SIZE + 9, doubleprec );
1.1015 - load_fr_bank( R_ECX );
1.1016 - ADD_imm8s_r32( (FRn&0x0E)<<2, R_ECX );
1.1017 + JNE_rel8(doubleprec );
1.1018 + LEA_sh4r_r32( REG_OFFSET(fr[0][FRn&0x0E]), R_ECX );
1.1019 load_spreg( R_EDX, R_FPUL );
1.1020 call_func2( sh4_fsca, R_EDX, R_ECX );
1.1021 JMP_TARGET(doubleprec);
1.1022 @@ -2298,25 +2132,24 @@
1.1023 check_fpuen();
1.1024 load_spreg( R_ECX, R_FPSCR );
1.1025 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.1026 - JNE_rel8(44, doubleprec);
1.1027 + JNE_rel8( doubleprec);
1.1028
1.1029 - load_fr_bank( R_ECX );
1.1030 - push_fr( R_ECX, FVm<<2 );
1.1031 - push_fr( R_ECX, FVn<<2 );
1.1032 + push_fr( FVm<<2 );
1.1033 + push_fr( FVn<<2 );
1.1034 FMULP_st(1);
1.1035 - push_fr( R_ECX, (FVm<<2)+1);
1.1036 - push_fr( R_ECX, (FVn<<2)+1);
1.1037 + push_fr( (FVm<<2)+1);
1.1038 + push_fr( (FVn<<2)+1);
1.1039 FMULP_st(1);
1.1040 FADDP_st(1);
1.1041 - push_fr( R_ECX, (FVm<<2)+2);
1.1042 - push_fr( R_ECX, (FVn<<2)+2);
1.1043 + push_fr( (FVm<<2)+2);
1.1044 + push_fr( (FVn<<2)+2);
1.1045 FMULP_st(1);
1.1046 FADDP_st(1);
1.1047 - push_fr( R_ECX, (FVm<<2)+3);
1.1048 - push_fr( R_ECX, (FVn<<2)+3);
1.1049 + push_fr( (FVm<<2)+3);
1.1050 + push_fr( (FVn<<2)+3);
1.1051 FMULP_st(1);
1.1052 FADDP_st(1);
1.1053 - pop_fr( R_ECX, (FVn<<2)+3);
1.1054 + pop_fr( (FVn<<2)+3);
1.1055 JMP_TARGET(doubleprec);
1.1056 sh4_x86.tstate = TSTATE_NONE;
1.1057 :}
1.1058 @@ -2324,11 +2157,9 @@
1.1059 check_fpuen();
1.1060 load_spreg( R_ECX, R_FPSCR );
1.1061 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.1062 - JNE_rel8( 18 + CALL_FUNC2_SIZE, doubleprec );
1.1063 - load_fr_bank( R_EDX ); // 3
1.1064 - ADD_imm8s_r32( FVn<<4, R_EDX ); // 3
1.1065 - load_xf_bank( R_ECX ); // 12
1.1066 - call_func2( sh4_ftrv, R_EDX, R_ECX ); // 12
1.1067 + JNE_rel8( doubleprec );
1.1068 + LEA_sh4r_r32( REG_OFFSET(fr[0][FVn<<2]), R_EDX );
1.1069 + call_func1( sh4_ftrv, R_EDX ); // 12
1.1070 JMP_TARGET(doubleprec);
1.1071 sh4_x86.tstate = TSTATE_NONE;
1.1072 :}
1.1073 @@ -2338,7 +2169,7 @@
1.1074 load_spreg( R_ECX, R_FPSCR );
1.1075 XOR_imm32_r32( FPSCR_FR, R_ECX );
1.1076 store_spreg( R_ECX, R_FPSCR );
1.1077 - update_fr_bank( R_ECX );
1.1078 + call_func0( sh4_switch_fr_banks );
1.1079 sh4_x86.tstate = TSTATE_NONE;
1.1080 :}
1.1081 FSCHG {:
1.1082 @@ -2490,8 +2321,7 @@
1.1083 LDS Rm, FPSCR {:
1.1084 check_fpuen();
1.1085 load_reg( R_EAX, Rm );
1.1086 - store_spreg( R_EAX, R_FPSCR );
1.1087 - update_fr_bank( R_EAX );
1.1088 + call_func1( sh4_write_fpscr, R_EAX );
1.1089 sh4_x86.tstate = TSTATE_NONE;
1.1090 :}
1.1091 LDS.L @Rm+, FPSCR {:
1.1092 @@ -2501,8 +2331,7 @@
1.1093 MMU_TRANSLATE_READ( R_EAX );
1.1094 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
1.1095 MEM_READ_LONG( R_EAX, R_EAX );
1.1096 - store_spreg( R_EAX, R_FPSCR );
1.1097 - update_fr_bank( R_EAX );
1.1098 + call_func1( sh4_write_fpscr, R_EAX );
1.1099 sh4_x86.tstate = TSTATE_NONE;
1.1100 :}
1.1101 LDS Rm, FPUL {:
1.1102 @@ -2570,7 +2399,7 @@
1.1103 MOV_r32_r32( R_EAX, R_ECX );
1.1104 AND_imm32_r32( 0xFC000000, R_EAX );
1.1105 CMP_imm32_r32( 0xE0000000, R_EAX );
1.1106 - JNE_rel8(8+CALL_FUNC1_SIZE, end);
1.1107 + JNE_rel8(end);
1.1108 call_func1( sh4_flush_store_queue, R_ECX );
1.1109 TEST_r32_r32( R_EAX, R_EAX );
1.1110 JE_exc(-1);
.