revision 375:4627600f7f8e
summary |
tree |
shortlog |
changelog |
graph |
changeset |
raw | bz2 | zip | gz changeset | 375:4627600f7f8e |
parent | 374:8f80a795513e |
child | 376:8c7587af5a5d |
author | nkeynes |
date | Tue Sep 11 21:23:48 2007 +0000 (16 years ago) |
Start in on the FP instructions (simplest possible impl)
![]() | src/sh4/sh4x86.c | view | annotate | diff | log | |
![]() | src/sh4/sh4x86.in | view | annotate | diff | log | |
![]() | src/sh4/x86op.h | view | annotate | diff | log |
1.1 --- a/src/sh4/sh4x86.c Tue Sep 11 02:14:46 2007 +00001.2 +++ b/src/sh4/sh4x86.c Tue Sep 11 21:23:48 2007 +00001.3 @@ -1,5 +1,5 @@1.4 /**1.5 - * $Id: sh4x86.c,v 1.4 2007-09-11 02:14:46 nkeynes Exp $1.6 + * $Id: sh4x86.c,v 1.5 2007-09-11 21:23:48 nkeynes Exp $1.7 *1.8 * SH4 => x86 translation. This version does no real optimization, it just1.9 * outputs straight-line x86 code - it mainly exists to provide a baseline1.10 @@ -148,24 +148,59 @@1.12 #define load_fr_bank(bankreg) load_spreg( bankreg, REG_OFFSET(fr_bank))1.14 +/**1.15 + * Load an FR register (single-precision floating point) into an integer x861.16 + * register (eg for register-to-register moves)1.17 + */1.18 +void static inline load_fr( int bankreg, int x86reg, int frm )1.19 +{1.20 + OP(0x8B); OP(0x40+bankreg+(x86reg<<3)); OP((frm^1)<<2);1.21 +}1.22 +1.23 +/**1.24 + * Store an FR register (single-precision floating point) into an integer x861.25 + * register (eg for register-to-register moves)1.26 + */1.27 +void static inline store_fr( int bankreg, int x86reg, int frn )1.28 +{1.29 + OP(0x89); OP(0x40+bankreg+(x86reg<<3)); OP((frn^1)<<2);1.30 +}1.31 +1.32 +1.33 +/**1.34 + * Load a pointer to the back fp back into the specified x86 register. The1.35 + * bankreg must have been previously loaded with FPSCR.1.36 + * NB: 10 bytes1.37 + */1.38 static inline void load_xf_bank( int bankreg )1.39 {1.40 - load_spreg( bankreg, R_FPSCR );1.41 SHR_imm8_r32( (21 - 6), bankreg ); // Extract bit 21 then *64 for bank size1.42 AND_imm8s_r32( 0x40, bankreg ); // Complete extraction1.43 OP(0x8D); OP(0x44+(bankreg<<3)); OP(0x28+bankreg); OP(REG_OFFSET(fr)); // LEA [ebp+bankreg+disp], bankreg1.44 }1.46 +/**1.47 + * Push a 32-bit float onto the FPU stack, with bankreg previously loaded1.48 + * with the location of the current fp bank.1.49 + */1.50 static inline void push_fr( int bankreg, int frm )1.51 {1.52 OP(0xD9); OP(0x40 + bankreg); OP((frm^1)<<2); // FLD.S [bankreg + frm^1*4]1.53 }1.55 +/**1.56 + * Pop a 32-bit float from the FPU stack and store it back into the fp bank,1.57 + * with bankreg previously loaded with the location of the current fp bank.1.58 + */1.59 static inline void pop_fr( int bankreg, int frm )1.60 {1.61 OP(0xD9); OP(0x58 + bankreg); OP((frm^1)<<2); // FST.S [bankreg + frm^1*4]1.62 }1.64 +/**1.65 + * Push a 64-bit double onto the FPU stack, with bankreg previously loaded1.66 + * with the location of the current fp bank.1.67 + */1.68 static inline void push_dr( int bankreg, int frm )1.69 {1.70 if( frm&1 ) {1.71 @@ -210,7 +245,45 @@1.72 PUSH_r32(arg2);1.73 PUSH_r32(arg1);1.74 call_func0(ptr);1.75 + ADD_imm8s_r32( -8, R_ESP );1.76 +}1.77 +1.78 +/**1.79 + * Write a double (64-bit) value into memory, with the first word in arg2a, and1.80 + * the second in arg2b1.81 + * NB: 30 bytes1.82 + */1.83 +static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )1.84 +{1.85 + ADD_imm8s_r32( 4, addr );1.86 + PUSH_r32(addr);1.87 + PUSH_r32(arg2b);1.88 + ADD_imm8s_r32( -4, addr );1.89 + PUSH_r32(addr);1.90 + PUSH_r32(arg2a);1.91 + call_func0(sh4_write_long);1.92 + ADD_imm8s_r32( -8, R_ESP );1.93 + call_func0(sh4_write_long);1.94 + ADD_imm8s_r32( -8, R_ESP );1.95 +}1.96 +1.97 +/**1.98 + * Read a double (64-bit) value from memory, writing the first word into arg2a1.99 + * and the second into arg2b. The addr must not be in EAX1.100 + * NB: 27 bytes1.101 + */1.102 +static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )1.103 +{1.104 + PUSH_r32(addr);1.105 + call_func0(sh4_read_long);1.106 + POP_r32(addr);1.107 + PUSH_r32(R_EAX);1.108 + ADD_imm8s_r32( 4, addr );1.109 + PUSH_r32(addr);1.110 + call_func0(sh4_read_long);1.111 ADD_imm8s_r32( -4, R_ESP );1.112 + MOV_r32_r32( R_EAX, arg2b );1.113 + POP_r32(arg2a);1.114 }1.116 /* Exception checks - Note that all exception checks will clobber EAX */1.117 @@ -2299,6 +2372,26 @@1.118 case 0x8:1.119 { /* FMOV @Rm, FRn */1.120 uint32_t FRn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);1.121 + load_reg( R_EDX, Rm );1.122 + check_ralign32( R_EDX );1.123 + load_spreg( R_ECX, R_FPSCR );1.124 + TEST_imm32_r32( FPSCR_SZ, R_ECX );1.125 + JNE_rel8(19);1.126 + MEM_READ_LONG( R_EDX, R_EAX );1.127 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );1.128 + store_fr( R_ECX, R_EAX, FRn );1.129 + if( FRn&1 ) {1.130 + JMP_rel8(46);1.131 + MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );1.132 + load_spreg( R_ECX, R_FPSCR ); // assume read_long clobbered it1.133 + load_xf_bank( R_ECX );1.134 + } else {1.135 + JMP_rel8(36);1.136 + MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );1.137 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );1.138 + }1.139 + store_fr( R_ECX, R_EAX, FRn&0x0E );1.140 + store_fr( R_ECX, R_EDX, FRn|0x01 );1.141 }1.142 break;1.143 case 0x9:1.144 @@ -2309,6 +2402,24 @@1.145 case 0xA:1.146 { /* FMOV FRm, @Rn */1.147 uint32_t Rn = ((ir>>8)&0xF); uint32_t FRm = ((ir>>4)&0xF);1.148 + load_reg( R_EDX, Rn );1.149 + check_walign32( R_EDX );1.150 + load_spreg( R_ECX, R_FPSCR );1.151 + TEST_imm32_r32( FPSCR_SZ, R_ECX );1.152 + JNE_rel8(20);1.153 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );1.154 + load_fr( R_ECX, R_EAX, FRm );1.155 + MEM_WRITE_LONG( R_EDX, R_EAX ); // 121.156 + if( FRm&1 ) {1.157 + JMP_rel8( 46 );1.158 + load_xf_bank( R_ECX );1.159 + } else {1.160 + JMP_rel8( 39 );1.161 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );1.162 + }1.163 + load_fr( R_ECX, R_EAX, FRm&0x0E );1.164 + load_fr( R_ECX, R_ECX, FRm|0x01 );1.165 + MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );1.166 }1.167 break;1.168 case 0xB:1.169 @@ -2319,6 +2430,48 @@1.170 case 0xC:1.171 { /* FMOV FRm, FRn */1.172 uint32_t FRn = ((ir>>8)&0xF); uint32_t FRm = ((ir>>4)&0xF);1.173 + /* As horrible as this looks, it's actually covering 5 separate cases:1.174 + * 1. 32-bit fr-to-fr (PR=0)1.175 + * 2. 64-bit dr-to-dr (PR=1, FRm&1 == 0, FRn&1 == 0 )1.176 + * 3. 64-bit dr-to-xd (PR=1, FRm&1 == 0, FRn&1 == 1 )1.177 + * 4. 64-bit xd-to-dr (PR=1, FRm&1 == 1, FRn&1 == 0 )1.178 + * 5. 64-bit xd-to-xd (PR=1, FRm&1 == 1, FRn&1 == 1 )1.179 + */1.180 + load_spreg( R_ECX, R_FPSCR );1.181 + load_spreg( R_EDX, REG_OFFSET(fr_bank) );1.182 + TEST_imm32_r32( FPSCR_SZ, R_ECX );1.183 + JNE_rel8(8);1.184 + load_fr( R_EDX, R_EAX, FRm ); // PR=0 branch1.185 + store_fr( R_EDX, R_EAX, FRn );1.186 + if( FRm&1 ) {1.187 + JMP_rel8(22);1.188 + load_xf_bank( R_ECX );1.189 + load_fr( R_ECX, R_EAX, FRm-1 );1.190 + if( FRn&1 ) {1.191 + load_fr( R_ECX, R_EDX, FRm );1.192 + store_fr( R_ECX, R_EAX, FRn-1 );1.193 + store_fr( R_ECX, R_EDX, FRn );1.194 + } else /* FRn&1 == 0 */ {1.195 + load_fr( R_ECX, R_ECX, FRm );1.196 + store_fr( R_EDX, R_EAX, FRn-1 );1.197 + store_fr( R_EDX, R_ECX, FRn );1.198 + }1.199 + } else /* FRm&1 == 0 */ {1.200 + if( FRn&1 ) {1.201 + JMP_rel8(22);1.202 + load_xf_bank( R_ECX );1.203 + load_fr( R_EDX, R_EAX, FRm );1.204 + load_fr( R_EDX, R_EDX, FRm+1 );1.205 + store_fr( R_ECX, R_EAX, FRn-1 );1.206 + store_fr( R_ECX, R_EDX, FRn );1.207 + } else /* FRn&1 == 0 */ {1.208 + JMP_rel8(12);1.209 + load_fr( R_EDX, R_EAX, FRm );1.210 + load_fr( R_EDX, R_ECX, FRm+1 );1.211 + store_fr( R_EDX, R_EAX, FRn );1.212 + store_fr( R_EDX, R_ECX, FRn+1 );1.213 + }1.214 + }1.215 }1.216 break;1.217 case 0xD:
2.1 --- a/src/sh4/sh4x86.in Tue Sep 11 02:14:46 2007 +00002.2 +++ b/src/sh4/sh4x86.in Tue Sep 11 21:23:48 2007 +00002.3 @@ -1,5 +1,5 @@2.4 /**2.5 - * $Id: sh4x86.in,v 1.4 2007-09-11 02:14:46 nkeynes Exp $2.6 + * $Id: sh4x86.in,v 1.5 2007-09-11 21:23:48 nkeynes Exp $2.7 *2.8 * SH4 => x86 translation. This version does no real optimization, it just2.9 * outputs straight-line x86 code - it mainly exists to provide a baseline2.10 @@ -148,44 +148,67 @@2.12 #define load_fr_bank(bankreg) load_spreg( bankreg, REG_OFFSET(fr_bank))2.14 +/**2.15 + * Load an FR register (single-precision floating point) into an integer x862.16 + * register (eg for register-to-register moves)2.17 + */2.18 +void static inline load_fr( int bankreg, int x86reg, int frm )2.19 +{2.20 + OP(0x8B); OP(0x40+bankreg+(x86reg<<3)); OP((frm^1)<<2);2.21 +}2.22 +2.23 +/**2.24 + * Store an FR register (single-precision floating point) into an integer x862.25 + * register (eg for register-to-register moves)2.26 + */2.27 +void static inline store_fr( int bankreg, int x86reg, int frn )2.28 +{2.29 + OP(0x89); OP(0x40+bankreg+(x86reg<<3)); OP((frn^1)<<2);2.30 +}2.31 +2.32 +2.33 +/**2.34 + * Load a pointer to the back fp back into the specified x86 register. The2.35 + * bankreg must have been previously loaded with FPSCR.2.36 + * NB: 10 bytes2.37 + */2.38 static inline void load_xf_bank( int bankreg )2.39 {2.40 - load_spreg( bankreg, R_FPSCR );2.41 SHR_imm8_r32( (21 - 6), bankreg ); // Extract bit 21 then *64 for bank size2.42 AND_imm8s_r32( 0x40, bankreg ); // Complete extraction2.43 OP(0x8D); OP(0x44+(bankreg<<3)); OP(0x28+bankreg); OP(REG_OFFSET(fr)); // LEA [ebp+bankreg+disp], bankreg2.44 }2.46 +/**2.47 + * Push a 32-bit float onto the FPU stack, with bankreg previously loaded2.48 + * with the location of the current fp bank.2.49 + */2.50 static inline void push_fr( int bankreg, int frm )2.51 {2.52 OP(0xD9); OP(0x40 + bankreg); OP((frm^1)<<2); // FLD.S [bankreg + frm^1*4]2.53 }2.55 +/**2.56 + * Pop a 32-bit float from the FPU stack and store it back into the fp bank,2.57 + * with bankreg previously loaded with the location of the current fp bank.2.58 + */2.59 static inline void pop_fr( int bankreg, int frm )2.60 {2.61 OP(0xD9); OP(0x58 + bankreg); OP((frm^1)<<2); // FST.S [bankreg + frm^1*4]2.62 }2.64 +/**2.65 + * Push a 64-bit double onto the FPU stack, with bankreg previously loaded2.66 + * with the location of the current fp bank.2.67 + */2.68 static inline void push_dr( int bankreg, int frm )2.69 {2.70 - if( frm&1 ) {2.71 - // this is technically undefined, but it seems to work consistently - high 32 bits2.72 - // loaded from FRm (32-bits), low 32bits are 0.2.73 - OP(0xFF); OP(0x70 + bankreg); OP((frm^1)<<2); // PUSH [bankreg + frm^1]2.74 - PUSH_imm32(0);2.75 -2.76 -2.77 - } else {2.78 - OP(0xDD); OP(0x40 + bankreg); OP(frm<<2); // FLD.D [bankreg + frm*4]2.79 - }2.80 + OP(0xDD); OP(0x40 + bankreg); OP(frm<<2); // FLD.D [bankreg + frm*4]2.81 }2.83 static inline void pop_dr( int bankreg, int frm )2.84 {2.85 - if( frm&1 ) {2.86 - } else {2.87 - OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]2.88 - }2.89 + OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]2.90 }2.92 /**2.93 @@ -210,7 +233,45 @@2.94 PUSH_r32(arg2);2.95 PUSH_r32(arg1);2.96 call_func0(ptr);2.97 + ADD_imm8s_r32( -8, R_ESP );2.98 +}2.99 +2.100 +/**2.101 + * Write a double (64-bit) value into memory, with the first word in arg2a, and2.102 + * the second in arg2b2.103 + * NB: 30 bytes2.104 + */2.105 +static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )2.106 +{2.107 + ADD_imm8s_r32( 4, addr );2.108 + PUSH_r32(addr);2.109 + PUSH_r32(arg2b);2.110 + ADD_imm8s_r32( -4, addr );2.111 + PUSH_r32(addr);2.112 + PUSH_r32(arg2a);2.113 + call_func0(sh4_write_long);2.114 + ADD_imm8s_r32( -8, R_ESP );2.115 + call_func0(sh4_write_long);2.116 + ADD_imm8s_r32( -8, R_ESP );2.117 +}2.118 +2.119 +/**2.120 + * Read a double (64-bit) value from memory, writing the first word into arg2a2.121 + * and the second into arg2b. The addr must not be in EAX2.122 + * NB: 27 bytes2.123 + */2.124 +static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )2.125 +{2.126 + PUSH_r32(addr);2.127 + call_func0(sh4_read_long);2.128 + POP_r32(addr);2.129 + PUSH_r32(R_EAX);2.130 + ADD_imm8s_r32( 4, addr );2.131 + PUSH_r32(addr);2.132 + call_func0(sh4_read_long);2.133 ADD_imm8s_r32( -4, R_ESP );2.134 + MOV_r32_r32( R_EAX, arg2b );2.135 + POP_r32(arg2a);2.136 }2.138 /* Exception checks - Note that all exception checks will clobber EAX */2.139 @@ -1227,6 +1288,98 @@2.140 SETC_t();2.141 :}2.143 +/* Floating point moves */2.144 +FMOV FRm, FRn {:2.145 + /* As horrible as this looks, it's actually covering 5 separate cases:2.146 + * 1. 32-bit fr-to-fr (PR=0)2.147 + * 2. 64-bit dr-to-dr (PR=1, FRm&1 == 0, FRn&1 == 0 )2.148 + * 3. 64-bit dr-to-xd (PR=1, FRm&1 == 0, FRn&1 == 1 )2.149 + * 4. 64-bit xd-to-dr (PR=1, FRm&1 == 1, FRn&1 == 0 )2.150 + * 5. 64-bit xd-to-xd (PR=1, FRm&1 == 1, FRn&1 == 1 )2.151 + */2.152 + load_spreg( R_ECX, R_FPSCR );2.153 + load_spreg( R_EDX, REG_OFFSET(fr_bank) );2.154 + TEST_imm32_r32( FPSCR_SZ, R_ECX );2.155 + JNE_rel8(8);2.156 + load_fr( R_EDX, R_EAX, FRm ); // PR=0 branch2.157 + store_fr( R_EDX, R_EAX, FRn );2.158 + if( FRm&1 ) {2.159 + JMP_rel8(22);2.160 + load_xf_bank( R_ECX );2.161 + load_fr( R_ECX, R_EAX, FRm-1 );2.162 + if( FRn&1 ) {2.163 + load_fr( R_ECX, R_EDX, FRm );2.164 + store_fr( R_ECX, R_EAX, FRn-1 );2.165 + store_fr( R_ECX, R_EDX, FRn );2.166 + } else /* FRn&1 == 0 */ {2.167 + load_fr( R_ECX, R_ECX, FRm );2.168 + store_fr( R_EDX, R_EAX, FRn-1 );2.169 + store_fr( R_EDX, R_ECX, FRn );2.170 + }2.171 + } else /* FRm&1 == 0 */ {2.172 + if( FRn&1 ) {2.173 + JMP_rel8(22);2.174 + load_xf_bank( R_ECX );2.175 + load_fr( R_EDX, R_EAX, FRm );2.176 + load_fr( R_EDX, R_EDX, FRm+1 );2.177 + store_fr( R_ECX, R_EAX, FRn-1 );2.178 + store_fr( R_ECX, R_EDX, FRn );2.179 + } else /* FRn&1 == 0 */ {2.180 + JMP_rel8(12);2.181 + load_fr( R_EDX, R_EAX, FRm );2.182 + load_fr( R_EDX, R_ECX, FRm+1 );2.183 + store_fr( R_EDX, R_EAX, FRn );2.184 + store_fr( R_EDX, R_ECX, FRn+1 );2.185 + }2.186 + }2.187 +:}2.188 +FMOV FRm, @Rn {:2.189 + load_reg( R_EDX, Rn );2.190 + check_walign32( R_EDX );2.191 + load_spreg( R_ECX, R_FPSCR );2.192 + TEST_imm32_r32( FPSCR_SZ, R_ECX );2.193 + JNE_rel8(20);2.194 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );2.195 + load_fr( R_ECX, R_EAX, FRm );2.196 + MEM_WRITE_LONG( R_EDX, R_EAX ); // 122.197 + if( FRm&1 ) {2.198 + JMP_rel8( 46 );2.199 + load_xf_bank( R_ECX );2.200 + } else {2.201 + JMP_rel8( 39 );2.202 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );2.203 + }2.204 + load_fr( R_ECX, R_EAX, FRm&0x0E );2.205 + load_fr( R_ECX, R_ECX, FRm|0x01 );2.206 + MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );2.207 +:}2.208 +FMOV @Rm, FRn {:2.209 + load_reg( R_EDX, Rm );2.210 + check_ralign32( R_EDX );2.211 + load_spreg( R_ECX, R_FPSCR );2.212 + TEST_imm32_r32( FPSCR_SZ, R_ECX );2.213 + JNE_rel8(19);2.214 + MEM_READ_LONG( R_EDX, R_EAX );2.215 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );2.216 + store_fr( R_ECX, R_EAX, FRn );2.217 + if( FRn&1 ) {2.218 + JMP_rel8(46);2.219 + MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );2.220 + load_spreg( R_ECX, R_FPSCR ); // assume read_long clobbered it2.221 + load_xf_bank( R_ECX );2.222 + } else {2.223 + JMP_rel8(36);2.224 + MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );2.225 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );2.226 + }2.227 + store_fr( R_ECX, R_EAX, FRn&0x0E );2.228 + store_fr( R_ECX, R_EDX, FRn|0x01 );2.229 +:}2.230 +FMOV FRm, @-Rn {: :}2.231 +FMOV FRm, @(R0, Rn) {: :}2.232 +FMOV @Rm+, FRn {: :}2.233 +FMOV @(R0, Rm), FRn {: :}2.234 +2.235 /* Floating point instructions */2.236 FABS FRn {:2.237 load_spreg( R_ECX, R_FPSCR );2.238 @@ -1249,17 +1402,52 @@2.239 FDIV FRm, FRn {: :}2.240 FIPR FVm, FVn {: :}2.241 FLDS FRm, FPUL {: :}2.242 -FLDI0 FRn {: :}2.243 -FLDI1 FRn {: :}2.244 -FLOAT FPUL, FRn {: :}2.245 -FMAC FR0, FRm, FRn {: :}2.246 -FMOV FRm, FRn {: :}2.247 -FMOV FRm, @Rn {: :}2.248 -FMOV FRm, @-Rn {: :}2.249 -FMOV FRm, @(R0, Rn) {: :}2.250 -FMOV @Rm, FRn {: :}2.251 -FMOV @Rm+, FRn {: :}2.252 -FMOV @(R0, Rm), FRn {: :}2.253 +FLDI0 FRn {: /* IFF PR=0 */2.254 + load_spreg( R_ECX, R_FPSCR );2.255 + TEST_imm32_r32( FPSCR_PR, R_ECX );2.256 + JNE_rel8(8);2.257 + xor_r32_r32( R_EAX, R_EAX );2.258 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );2.259 + store_fr( R_ECX, R_EAX, FRn );2.260 +:}2.261 +FLDI1 FRn {: /* IFF PR=0 */2.262 + load_spreg( R_ECX, R_FPSCR );2.263 + TEST_imm32_r32( FPSCR_PR, R_ECX );2.264 + JNE_rel8(11);2.265 + load_imm32(R_EAX, 0x3F800000);2.266 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );2.267 + store_fr( R_ECX, R_EAX, FRn );2.268 +:}2.269 +FLOAT FPUL, FRn {:2.270 + load_spreg( R_ECX, R_FPSCR );2.271 + load_spreg(R_EDX, REG_OFFSET(fr_bank));2.272 + FILD_sh4r(R_FPUL);2.273 + TEST_imm32_r32( FPSCR_PR, R_ECX );2.274 + JNE_rel8(5);2.275 + pop_fr( R_EDX, FRn );2.276 + JMP_rel8(3);2.277 + pop_dr( R_EDX, FRn );2.278 +:}2.279 +FMAC FR0, FRm, FRn {:2.280 + load_spreg( R_ECX, R_FPSCR );2.281 + load_spreg( R_EDX, REG_OFFSET(fr_bank));2.282 + TEST_imm32_r32( FPSCR_PR, R_ECX );2.283 + JNE_rel8(18);2.284 + push_fr( R_EDX, 0 );2.285 + push_fr( R_EDX, FRm );2.286 + FMULP_st(1);2.287 + push_fr( R_EDX, FRn );2.288 + FADDP_st(1);2.289 + pop_fr( R_EDX, FRn );2.290 + JMP_rel8(16);2.291 + push_dr( R_EDX, 0 );2.292 + push_dr( R_EDX, FRm );2.293 + FMULP_st(1);2.294 + push_dr( R_EDX, FRn );2.295 + FADDP_st(1);2.296 + pop_dr( R_EDX, FRn );2.297 +:}2.298 +2.299 FMUL FRm, FRn {: :}2.300 FNEG FRn {: :}2.301 FRCHG {: :}
3.1 --- a/src/sh4/x86op.h Tue Sep 11 02:14:46 2007 +00003.2 +++ b/src/sh4/x86op.h Tue Sep 11 21:23:48 2007 +00003.3 @@ -1,5 +1,5 @@3.4 /**3.5 - * $Id: x86op.h,v 1.4 2007-09-11 02:14:46 nkeynes Exp $3.6 + * $Id: x86op.h,v 1.5 2007-09-11 21:23:48 nkeynes Exp $3.7 *3.8 * Definitions of x86 opcodes for use by the translator.3.9 *3.10 @@ -152,6 +152,8 @@3.11 #define FADDP_st(st) OP(0xDE); OP(0xC0+st)3.12 #define FCHS_st0() OP(0xD9); OP(0xE0)3.13 #define FDIVP_st(st) OP(0xDE); OP(0xF8+st)3.14 +#define FILD_sh4r(disp) OP(0xDB); MODRM_sh4r_r32(disp, 0)3.15 +#define FISTTP_shr4(disp) OP(0xDB); MODRM_sh4r_r32(disp, 1)3.16 #define FMULP_st(st) OP(0xDE); OP(0xC8+st)3.17 #define FSUB_st(st) OP(0xDE); OP(0xE8+st)3.18 #define FSQRT_st0() OP(0xD9); OP(0xFA)
.