Search
lxdream.org :: lxdream :: r375:4627600f7f8e
lxdream 0.9.1
released Jun 29
Download Now
changeset375:4627600f7f8e
parent374:8f80a795513e
child376:8c7587af5a5d
authornkeynes
dateTue Sep 11 21:23:48 2007 +0000 (16 years ago)
Start in on the FP instructions (simplest possible impl)
src/sh4/sh4x86.c
src/sh4/sh4x86.in
src/sh4/x86op.h
1.1 --- a/src/sh4/sh4x86.c Tue Sep 11 02:14:46 2007 +0000
1.2 +++ b/src/sh4/sh4x86.c Tue Sep 11 21:23:48 2007 +0000
1.3 @@ -1,5 +1,5 @@
1.4 /**
1.5 - * $Id: sh4x86.c,v 1.4 2007-09-11 02:14:46 nkeynes Exp $
1.6 + * $Id: sh4x86.c,v 1.5 2007-09-11 21:23:48 nkeynes Exp $
1.7 *
1.8 * SH4 => x86 translation. This version does no real optimization, it just
1.9 * outputs straight-line x86 code - it mainly exists to provide a baseline
1.10 @@ -148,24 +148,59 @@
1.11
1.12 #define load_fr_bank(bankreg) load_spreg( bankreg, REG_OFFSET(fr_bank))
1.13
1.14 +/**
1.15 + * Load an FR register (single-precision floating point) into an integer x86
1.16 + * register (eg for register-to-register moves)
1.17 + */
1.18 +void static inline load_fr( int bankreg, int x86reg, int frm )
1.19 +{
1.20 + OP(0x8B); OP(0x40+bankreg+(x86reg<<3)); OP((frm^1)<<2);
1.21 +}
1.22 +
1.23 +/**
1.24 + * Store an FR register (single-precision floating point) into an integer x86
1.25 + * register (eg for register-to-register moves)
1.26 + */
1.27 +void static inline store_fr( int bankreg, int x86reg, int frn )
1.28 +{
1.29 + OP(0x89); OP(0x40+bankreg+(x86reg<<3)); OP((frn^1)<<2);
1.30 +}
1.31 +
1.32 +
1.33 +/**
1.34 + * Load a pointer to the back fp back into the specified x86 register. The
1.35 + * bankreg must have been previously loaded with FPSCR.
1.36 + * NB: 10 bytes
1.37 + */
1.38 static inline void load_xf_bank( int bankreg )
1.39 {
1.40 - load_spreg( bankreg, R_FPSCR );
1.41 SHR_imm8_r32( (21 - 6), bankreg ); // Extract bit 21 then *64 for bank size
1.42 AND_imm8s_r32( 0x40, bankreg ); // Complete extraction
1.43 OP(0x8D); OP(0x44+(bankreg<<3)); OP(0x28+bankreg); OP(REG_OFFSET(fr)); // LEA [ebp+bankreg+disp], bankreg
1.44 }
1.45
1.46 +/**
1.47 + * Push a 32-bit float onto the FPU stack, with bankreg previously loaded
1.48 + * with the location of the current fp bank.
1.49 + */
1.50 static inline void push_fr( int bankreg, int frm )
1.51 {
1.52 OP(0xD9); OP(0x40 + bankreg); OP((frm^1)<<2); // FLD.S [bankreg + frm^1*4]
1.53 }
1.54
1.55 +/**
1.56 + * Pop a 32-bit float from the FPU stack and store it back into the fp bank,
1.57 + * with bankreg previously loaded with the location of the current fp bank.
1.58 + */
1.59 static inline void pop_fr( int bankreg, int frm )
1.60 {
1.61 OP(0xD9); OP(0x58 + bankreg); OP((frm^1)<<2); // FST.S [bankreg + frm^1*4]
1.62 }
1.63
1.64 +/**
1.65 + * Push a 64-bit double onto the FPU stack, with bankreg previously loaded
1.66 + * with the location of the current fp bank.
1.67 + */
1.68 static inline void push_dr( int bankreg, int frm )
1.69 {
1.70 if( frm&1 ) {
1.71 @@ -210,7 +245,45 @@
1.72 PUSH_r32(arg2);
1.73 PUSH_r32(arg1);
1.74 call_func0(ptr);
1.75 + ADD_imm8s_r32( -8, R_ESP );
1.76 +}
1.77 +
1.78 +/**
1.79 + * Write a double (64-bit) value into memory, with the first word in arg2a, and
1.80 + * the second in arg2b
1.81 + * NB: 30 bytes
1.82 + */
1.83 +static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )
1.84 +{
1.85 + ADD_imm8s_r32( 4, addr );
1.86 + PUSH_r32(addr);
1.87 + PUSH_r32(arg2b);
1.88 + ADD_imm8s_r32( -4, addr );
1.89 + PUSH_r32(addr);
1.90 + PUSH_r32(arg2a);
1.91 + call_func0(sh4_write_long);
1.92 + ADD_imm8s_r32( -8, R_ESP );
1.93 + call_func0(sh4_write_long);
1.94 + ADD_imm8s_r32( -8, R_ESP );
1.95 +}
1.96 +
1.97 +/**
1.98 + * Read a double (64-bit) value from memory, writing the first word into arg2a
1.99 + * and the second into arg2b. The addr must not be in EAX
1.100 + * NB: 27 bytes
1.101 + */
1.102 +static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )
1.103 +{
1.104 + PUSH_r32(addr);
1.105 + call_func0(sh4_read_long);
1.106 + POP_r32(addr);
1.107 + PUSH_r32(R_EAX);
1.108 + ADD_imm8s_r32( 4, addr );
1.109 + PUSH_r32(addr);
1.110 + call_func0(sh4_read_long);
1.111 ADD_imm8s_r32( -4, R_ESP );
1.112 + MOV_r32_r32( R_EAX, arg2b );
1.113 + POP_r32(arg2a);
1.114 }
1.115
1.116 /* Exception checks - Note that all exception checks will clobber EAX */
1.117 @@ -2299,6 +2372,26 @@
1.118 case 0x8:
1.119 { /* FMOV @Rm, FRn */
1.120 uint32_t FRn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
1.121 + load_reg( R_EDX, Rm );
1.122 + check_ralign32( R_EDX );
1.123 + load_spreg( R_ECX, R_FPSCR );
1.124 + TEST_imm32_r32( FPSCR_SZ, R_ECX );
1.125 + JNE_rel8(19);
1.126 + MEM_READ_LONG( R_EDX, R_EAX );
1.127 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );
1.128 + store_fr( R_ECX, R_EAX, FRn );
1.129 + if( FRn&1 ) {
1.130 + JMP_rel8(46);
1.131 + MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
1.132 + load_spreg( R_ECX, R_FPSCR ); // assume read_long clobbered it
1.133 + load_xf_bank( R_ECX );
1.134 + } else {
1.135 + JMP_rel8(36);
1.136 + MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
1.137 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );
1.138 + }
1.139 + store_fr( R_ECX, R_EAX, FRn&0x0E );
1.140 + store_fr( R_ECX, R_EDX, FRn|0x01 );
1.141 }
1.142 break;
1.143 case 0x9:
1.144 @@ -2309,6 +2402,24 @@
1.145 case 0xA:
1.146 { /* FMOV FRm, @Rn */
1.147 uint32_t Rn = ((ir>>8)&0xF); uint32_t FRm = ((ir>>4)&0xF);
1.148 + load_reg( R_EDX, Rn );
1.149 + check_walign32( R_EDX );
1.150 + load_spreg( R_ECX, R_FPSCR );
1.151 + TEST_imm32_r32( FPSCR_SZ, R_ECX );
1.152 + JNE_rel8(20);
1.153 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );
1.154 + load_fr( R_ECX, R_EAX, FRm );
1.155 + MEM_WRITE_LONG( R_EDX, R_EAX ); // 12
1.156 + if( FRm&1 ) {
1.157 + JMP_rel8( 46 );
1.158 + load_xf_bank( R_ECX );
1.159 + } else {
1.160 + JMP_rel8( 39 );
1.161 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );
1.162 + }
1.163 + load_fr( R_ECX, R_EAX, FRm&0x0E );
1.164 + load_fr( R_ECX, R_ECX, FRm|0x01 );
1.165 + MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );
1.166 }
1.167 break;
1.168 case 0xB:
1.169 @@ -2319,6 +2430,48 @@
1.170 case 0xC:
1.171 { /* FMOV FRm, FRn */
1.172 uint32_t FRn = ((ir>>8)&0xF); uint32_t FRm = ((ir>>4)&0xF);
1.173 + /* As horrible as this looks, it's actually covering 5 separate cases:
1.174 + * 1. 32-bit fr-to-fr (PR=0)
1.175 + * 2. 64-bit dr-to-dr (PR=1, FRm&1 == 0, FRn&1 == 0 )
1.176 + * 3. 64-bit dr-to-xd (PR=1, FRm&1 == 0, FRn&1 == 1 )
1.177 + * 4. 64-bit xd-to-dr (PR=1, FRm&1 == 1, FRn&1 == 0 )
1.178 + * 5. 64-bit xd-to-xd (PR=1, FRm&1 == 1, FRn&1 == 1 )
1.179 + */
1.180 + load_spreg( R_ECX, R_FPSCR );
1.181 + load_spreg( R_EDX, REG_OFFSET(fr_bank) );
1.182 + TEST_imm32_r32( FPSCR_SZ, R_ECX );
1.183 + JNE_rel8(8);
1.184 + load_fr( R_EDX, R_EAX, FRm ); // PR=0 branch
1.185 + store_fr( R_EDX, R_EAX, FRn );
1.186 + if( FRm&1 ) {
1.187 + JMP_rel8(22);
1.188 + load_xf_bank( R_ECX );
1.189 + load_fr( R_ECX, R_EAX, FRm-1 );
1.190 + if( FRn&1 ) {
1.191 + load_fr( R_ECX, R_EDX, FRm );
1.192 + store_fr( R_ECX, R_EAX, FRn-1 );
1.193 + store_fr( R_ECX, R_EDX, FRn );
1.194 + } else /* FRn&1 == 0 */ {
1.195 + load_fr( R_ECX, R_ECX, FRm );
1.196 + store_fr( R_EDX, R_EAX, FRn-1 );
1.197 + store_fr( R_EDX, R_ECX, FRn );
1.198 + }
1.199 + } else /* FRm&1 == 0 */ {
1.200 + if( FRn&1 ) {
1.201 + JMP_rel8(22);
1.202 + load_xf_bank( R_ECX );
1.203 + load_fr( R_EDX, R_EAX, FRm );
1.204 + load_fr( R_EDX, R_EDX, FRm+1 );
1.205 + store_fr( R_ECX, R_EAX, FRn-1 );
1.206 + store_fr( R_ECX, R_EDX, FRn );
1.207 + } else /* FRn&1 == 0 */ {
1.208 + JMP_rel8(12);
1.209 + load_fr( R_EDX, R_EAX, FRm );
1.210 + load_fr( R_EDX, R_ECX, FRm+1 );
1.211 + store_fr( R_EDX, R_EAX, FRn );
1.212 + store_fr( R_EDX, R_ECX, FRn+1 );
1.213 + }
1.214 + }
1.215 }
1.216 break;
1.217 case 0xD:
2.1 --- a/src/sh4/sh4x86.in Tue Sep 11 02:14:46 2007 +0000
2.2 +++ b/src/sh4/sh4x86.in Tue Sep 11 21:23:48 2007 +0000
2.3 @@ -1,5 +1,5 @@
2.4 /**
2.5 - * $Id: sh4x86.in,v 1.4 2007-09-11 02:14:46 nkeynes Exp $
2.6 + * $Id: sh4x86.in,v 1.5 2007-09-11 21:23:48 nkeynes Exp $
2.7 *
2.8 * SH4 => x86 translation. This version does no real optimization, it just
2.9 * outputs straight-line x86 code - it mainly exists to provide a baseline
2.10 @@ -148,44 +148,67 @@
2.11
2.12 #define load_fr_bank(bankreg) load_spreg( bankreg, REG_OFFSET(fr_bank))
2.13
2.14 +/**
2.15 + * Load an FR register (single-precision floating point) into an integer x86
2.16 + * register (eg for register-to-register moves)
2.17 + */
2.18 +void static inline load_fr( int bankreg, int x86reg, int frm )
2.19 +{
2.20 + OP(0x8B); OP(0x40+bankreg+(x86reg<<3)); OP((frm^1)<<2);
2.21 +}
2.22 +
2.23 +/**
2.24 + * Store an FR register (single-precision floating point) into an integer x86
2.25 + * register (eg for register-to-register moves)
2.26 + */
2.27 +void static inline store_fr( int bankreg, int x86reg, int frn )
2.28 +{
2.29 + OP(0x89); OP(0x40+bankreg+(x86reg<<3)); OP((frn^1)<<2);
2.30 +}
2.31 +
2.32 +
2.33 +/**
2.34 + * Load a pointer to the back fp back into the specified x86 register. The
2.35 + * bankreg must have been previously loaded with FPSCR.
2.36 + * NB: 10 bytes
2.37 + */
2.38 static inline void load_xf_bank( int bankreg )
2.39 {
2.40 - load_spreg( bankreg, R_FPSCR );
2.41 SHR_imm8_r32( (21 - 6), bankreg ); // Extract bit 21 then *64 for bank size
2.42 AND_imm8s_r32( 0x40, bankreg ); // Complete extraction
2.43 OP(0x8D); OP(0x44+(bankreg<<3)); OP(0x28+bankreg); OP(REG_OFFSET(fr)); // LEA [ebp+bankreg+disp], bankreg
2.44 }
2.45
2.46 +/**
2.47 + * Push a 32-bit float onto the FPU stack, with bankreg previously loaded
2.48 + * with the location of the current fp bank.
2.49 + */
2.50 static inline void push_fr( int bankreg, int frm )
2.51 {
2.52 OP(0xD9); OP(0x40 + bankreg); OP((frm^1)<<2); // FLD.S [bankreg + frm^1*4]
2.53 }
2.54
2.55 +/**
2.56 + * Pop a 32-bit float from the FPU stack and store it back into the fp bank,
2.57 + * with bankreg previously loaded with the location of the current fp bank.
2.58 + */
2.59 static inline void pop_fr( int bankreg, int frm )
2.60 {
2.61 OP(0xD9); OP(0x58 + bankreg); OP((frm^1)<<2); // FST.S [bankreg + frm^1*4]
2.62 }
2.63
2.64 +/**
2.65 + * Push a 64-bit double onto the FPU stack, with bankreg previously loaded
2.66 + * with the location of the current fp bank.
2.67 + */
2.68 static inline void push_dr( int bankreg, int frm )
2.69 {
2.70 - if( frm&1 ) {
2.71 - // this is technically undefined, but it seems to work consistently - high 32 bits
2.72 - // loaded from FRm (32-bits), low 32bits are 0.
2.73 - OP(0xFF); OP(0x70 + bankreg); OP((frm^1)<<2); // PUSH [bankreg + frm^1]
2.74 - PUSH_imm32(0);
2.75 -
2.76 -
2.77 - } else {
2.78 - OP(0xDD); OP(0x40 + bankreg); OP(frm<<2); // FLD.D [bankreg + frm*4]
2.79 - }
2.80 + OP(0xDD); OP(0x40 + bankreg); OP(frm<<2); // FLD.D [bankreg + frm*4]
2.81 }
2.82
2.83 static inline void pop_dr( int bankreg, int frm )
2.84 {
2.85 - if( frm&1 ) {
2.86 - } else {
2.87 - OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]
2.88 - }
2.89 + OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]
2.90 }
2.91
2.92 /**
2.93 @@ -210,7 +233,45 @@
2.94 PUSH_r32(arg2);
2.95 PUSH_r32(arg1);
2.96 call_func0(ptr);
2.97 + ADD_imm8s_r32( -8, R_ESP );
2.98 +}
2.99 +
2.100 +/**
2.101 + * Write a double (64-bit) value into memory, with the first word in arg2a, and
2.102 + * the second in arg2b
2.103 + * NB: 30 bytes
2.104 + */
2.105 +static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )
2.106 +{
2.107 + ADD_imm8s_r32( 4, addr );
2.108 + PUSH_r32(addr);
2.109 + PUSH_r32(arg2b);
2.110 + ADD_imm8s_r32( -4, addr );
2.111 + PUSH_r32(addr);
2.112 + PUSH_r32(arg2a);
2.113 + call_func0(sh4_write_long);
2.114 + ADD_imm8s_r32( -8, R_ESP );
2.115 + call_func0(sh4_write_long);
2.116 + ADD_imm8s_r32( -8, R_ESP );
2.117 +}
2.118 +
2.119 +/**
2.120 + * Read a double (64-bit) value from memory, writing the first word into arg2a
2.121 + * and the second into arg2b. The addr must not be in EAX
2.122 + * NB: 27 bytes
2.123 + */
2.124 +static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )
2.125 +{
2.126 + PUSH_r32(addr);
2.127 + call_func0(sh4_read_long);
2.128 + POP_r32(addr);
2.129 + PUSH_r32(R_EAX);
2.130 + ADD_imm8s_r32( 4, addr );
2.131 + PUSH_r32(addr);
2.132 + call_func0(sh4_read_long);
2.133 ADD_imm8s_r32( -4, R_ESP );
2.134 + MOV_r32_r32( R_EAX, arg2b );
2.135 + POP_r32(arg2a);
2.136 }
2.137
2.138 /* Exception checks - Note that all exception checks will clobber EAX */
2.139 @@ -1227,6 +1288,98 @@
2.140 SETC_t();
2.141 :}
2.142
2.143 +/* Floating point moves */
2.144 +FMOV FRm, FRn {:
2.145 + /* As horrible as this looks, it's actually covering 5 separate cases:
2.146 + * 1. 32-bit fr-to-fr (PR=0)
2.147 + * 2. 64-bit dr-to-dr (PR=1, FRm&1 == 0, FRn&1 == 0 )
2.148 + * 3. 64-bit dr-to-xd (PR=1, FRm&1 == 0, FRn&1 == 1 )
2.149 + * 4. 64-bit xd-to-dr (PR=1, FRm&1 == 1, FRn&1 == 0 )
2.150 + * 5. 64-bit xd-to-xd (PR=1, FRm&1 == 1, FRn&1 == 1 )
2.151 + */
2.152 + load_spreg( R_ECX, R_FPSCR );
2.153 + load_spreg( R_EDX, REG_OFFSET(fr_bank) );
2.154 + TEST_imm32_r32( FPSCR_SZ, R_ECX );
2.155 + JNE_rel8(8);
2.156 + load_fr( R_EDX, R_EAX, FRm ); // PR=0 branch
2.157 + store_fr( R_EDX, R_EAX, FRn );
2.158 + if( FRm&1 ) {
2.159 + JMP_rel8(22);
2.160 + load_xf_bank( R_ECX );
2.161 + load_fr( R_ECX, R_EAX, FRm-1 );
2.162 + if( FRn&1 ) {
2.163 + load_fr( R_ECX, R_EDX, FRm );
2.164 + store_fr( R_ECX, R_EAX, FRn-1 );
2.165 + store_fr( R_ECX, R_EDX, FRn );
2.166 + } else /* FRn&1 == 0 */ {
2.167 + load_fr( R_ECX, R_ECX, FRm );
2.168 + store_fr( R_EDX, R_EAX, FRn-1 );
2.169 + store_fr( R_EDX, R_ECX, FRn );
2.170 + }
2.171 + } else /* FRm&1 == 0 */ {
2.172 + if( FRn&1 ) {
2.173 + JMP_rel8(22);
2.174 + load_xf_bank( R_ECX );
2.175 + load_fr( R_EDX, R_EAX, FRm );
2.176 + load_fr( R_EDX, R_EDX, FRm+1 );
2.177 + store_fr( R_ECX, R_EAX, FRn-1 );
2.178 + store_fr( R_ECX, R_EDX, FRn );
2.179 + } else /* FRn&1 == 0 */ {
2.180 + JMP_rel8(12);
2.181 + load_fr( R_EDX, R_EAX, FRm );
2.182 + load_fr( R_EDX, R_ECX, FRm+1 );
2.183 + store_fr( R_EDX, R_EAX, FRn );
2.184 + store_fr( R_EDX, R_ECX, FRn+1 );
2.185 + }
2.186 + }
2.187 +:}
2.188 +FMOV FRm, @Rn {:
2.189 + load_reg( R_EDX, Rn );
2.190 + check_walign32( R_EDX );
2.191 + load_spreg( R_ECX, R_FPSCR );
2.192 + TEST_imm32_r32( FPSCR_SZ, R_ECX );
2.193 + JNE_rel8(20);
2.194 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );
2.195 + load_fr( R_ECX, R_EAX, FRm );
2.196 + MEM_WRITE_LONG( R_EDX, R_EAX ); // 12
2.197 + if( FRm&1 ) {
2.198 + JMP_rel8( 46 );
2.199 + load_xf_bank( R_ECX );
2.200 + } else {
2.201 + JMP_rel8( 39 );
2.202 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );
2.203 + }
2.204 + load_fr( R_ECX, R_EAX, FRm&0x0E );
2.205 + load_fr( R_ECX, R_ECX, FRm|0x01 );
2.206 + MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );
2.207 +:}
2.208 +FMOV @Rm, FRn {:
2.209 + load_reg( R_EDX, Rm );
2.210 + check_ralign32( R_EDX );
2.211 + load_spreg( R_ECX, R_FPSCR );
2.212 + TEST_imm32_r32( FPSCR_SZ, R_ECX );
2.213 + JNE_rel8(19);
2.214 + MEM_READ_LONG( R_EDX, R_EAX );
2.215 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );
2.216 + store_fr( R_ECX, R_EAX, FRn );
2.217 + if( FRn&1 ) {
2.218 + JMP_rel8(46);
2.219 + MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
2.220 + load_spreg( R_ECX, R_FPSCR ); // assume read_long clobbered it
2.221 + load_xf_bank( R_ECX );
2.222 + } else {
2.223 + JMP_rel8(36);
2.224 + MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
2.225 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );
2.226 + }
2.227 + store_fr( R_ECX, R_EAX, FRn&0x0E );
2.228 + store_fr( R_ECX, R_EDX, FRn|0x01 );
2.229 +:}
2.230 +FMOV FRm, @-Rn {: :}
2.231 +FMOV FRm, @(R0, Rn) {: :}
2.232 +FMOV @Rm+, FRn {: :}
2.233 +FMOV @(R0, Rm), FRn {: :}
2.234 +
2.235 /* Floating point instructions */
2.236 FABS FRn {:
2.237 load_spreg( R_ECX, R_FPSCR );
2.238 @@ -1249,17 +1402,52 @@
2.239 FDIV FRm, FRn {: :}
2.240 FIPR FVm, FVn {: :}
2.241 FLDS FRm, FPUL {: :}
2.242 -FLDI0 FRn {: :}
2.243 -FLDI1 FRn {: :}
2.244 -FLOAT FPUL, FRn {: :}
2.245 -FMAC FR0, FRm, FRn {: :}
2.246 -FMOV FRm, FRn {: :}
2.247 -FMOV FRm, @Rn {: :}
2.248 -FMOV FRm, @-Rn {: :}
2.249 -FMOV FRm, @(R0, Rn) {: :}
2.250 -FMOV @Rm, FRn {: :}
2.251 -FMOV @Rm+, FRn {: :}
2.252 -FMOV @(R0, Rm), FRn {: :}
2.253 +FLDI0 FRn {: /* IFF PR=0 */
2.254 + load_spreg( R_ECX, R_FPSCR );
2.255 + TEST_imm32_r32( FPSCR_PR, R_ECX );
2.256 + JNE_rel8(8);
2.257 + xor_r32_r32( R_EAX, R_EAX );
2.258 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );
2.259 + store_fr( R_ECX, R_EAX, FRn );
2.260 +:}
2.261 +FLDI1 FRn {: /* IFF PR=0 */
2.262 + load_spreg( R_ECX, R_FPSCR );
2.263 + TEST_imm32_r32( FPSCR_PR, R_ECX );
2.264 + JNE_rel8(11);
2.265 + load_imm32(R_EAX, 0x3F800000);
2.266 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );
2.267 + store_fr( R_ECX, R_EAX, FRn );
2.268 +:}
2.269 +FLOAT FPUL, FRn {:
2.270 + load_spreg( R_ECX, R_FPSCR );
2.271 + load_spreg(R_EDX, REG_OFFSET(fr_bank));
2.272 + FILD_sh4r(R_FPUL);
2.273 + TEST_imm32_r32( FPSCR_PR, R_ECX );
2.274 + JNE_rel8(5);
2.275 + pop_fr( R_EDX, FRn );
2.276 + JMP_rel8(3);
2.277 + pop_dr( R_EDX, FRn );
2.278 +:}
2.279 +FMAC FR0, FRm, FRn {:
2.280 + load_spreg( R_ECX, R_FPSCR );
2.281 + load_spreg( R_EDX, REG_OFFSET(fr_bank));
2.282 + TEST_imm32_r32( FPSCR_PR, R_ECX );
2.283 + JNE_rel8(18);
2.284 + push_fr( R_EDX, 0 );
2.285 + push_fr( R_EDX, FRm );
2.286 + FMULP_st(1);
2.287 + push_fr( R_EDX, FRn );
2.288 + FADDP_st(1);
2.289 + pop_fr( R_EDX, FRn );
2.290 + JMP_rel8(16);
2.291 + push_dr( R_EDX, 0 );
2.292 + push_dr( R_EDX, FRm );
2.293 + FMULP_st(1);
2.294 + push_dr( R_EDX, FRn );
2.295 + FADDP_st(1);
2.296 + pop_dr( R_EDX, FRn );
2.297 +:}
2.298 +
2.299 FMUL FRm, FRn {: :}
2.300 FNEG FRn {: :}
2.301 FRCHG {: :}
3.1 --- a/src/sh4/x86op.h Tue Sep 11 02:14:46 2007 +0000
3.2 +++ b/src/sh4/x86op.h Tue Sep 11 21:23:48 2007 +0000
3.3 @@ -1,5 +1,5 @@
3.4 /**
3.5 - * $Id: x86op.h,v 1.4 2007-09-11 02:14:46 nkeynes Exp $
3.6 + * $Id: x86op.h,v 1.5 2007-09-11 21:23:48 nkeynes Exp $
3.7 *
3.8 * Definitions of x86 opcodes for use by the translator.
3.9 *
3.10 @@ -152,6 +152,8 @@
3.11 #define FADDP_st(st) OP(0xDE); OP(0xC0+st)
3.12 #define FCHS_st0() OP(0xD9); OP(0xE0)
3.13 #define FDIVP_st(st) OP(0xDE); OP(0xF8+st)
3.14 +#define FILD_sh4r(disp) OP(0xDB); MODRM_sh4r_r32(disp, 0)
3.15 +#define FISTTP_shr4(disp) OP(0xDB); MODRM_sh4r_r32(disp, 1)
3.16 #define FMULP_st(st) OP(0xDE); OP(0xC8+st)
3.17 #define FSUB_st(st) OP(0xDE); OP(0xE8+st)
3.18 #define FSQRT_st0() OP(0xD9); OP(0xFA)
.