1.1 --- a/src/sh4/sh4x86.c Tue Sep 11 02:14:46 2007 +0000
1.2 +++ b/src/sh4/sh4x86.c Tue Sep 11 21:23:48 2007 +0000
1.5 - * $Id: sh4x86.c,v 1.4 2007-09-11 02:14:46 nkeynes Exp $
1.6 + * $Id: sh4x86.c,v 1.5 2007-09-11 21:23:48 nkeynes Exp $
1.8 * SH4 => x86 translation. This version does no real optimization, it just
1.9 * outputs straight-line x86 code - it mainly exists to provide a baseline
1.10 @@ -148,24 +148,59 @@
1.12 #define load_fr_bank(bankreg) load_spreg( bankreg, REG_OFFSET(fr_bank))
1.15 + * Load an FR register (single-precision floating point) into an integer x86
1.16 + * register (eg for register-to-register moves)
1.18 +void static inline load_fr( int bankreg, int x86reg, int frm )
1.20 + OP(0x8B); OP(0x40+bankreg+(x86reg<<3)); OP((frm^1)<<2);
1.24 + * Store an FR register (single-precision floating point) into an integer x86
1.25 + * register (eg for register-to-register moves)
1.27 +void static inline store_fr( int bankreg, int x86reg, int frn )
1.29 + OP(0x89); OP(0x40+bankreg+(x86reg<<3)); OP((frn^1)<<2);
1.34 + * Load a pointer to the back fp back into the specified x86 register. The
1.35 + * bankreg must have been previously loaded with FPSCR.
1.38 static inline void load_xf_bank( int bankreg )
1.40 - load_spreg( bankreg, R_FPSCR );
1.41 SHR_imm8_r32( (21 - 6), bankreg ); // Extract bit 21 then *64 for bank size
1.42 AND_imm8s_r32( 0x40, bankreg ); // Complete extraction
1.43 OP(0x8D); OP(0x44+(bankreg<<3)); OP(0x28+bankreg); OP(REG_OFFSET(fr)); // LEA [ebp+bankreg+disp], bankreg
1.47 + * Push a 32-bit float onto the FPU stack, with bankreg previously loaded
1.48 + * with the location of the current fp bank.
1.50 static inline void push_fr( int bankreg, int frm )
1.52 OP(0xD9); OP(0x40 + bankreg); OP((frm^1)<<2); // FLD.S [bankreg + frm^1*4]
1.56 + * Pop a 32-bit float from the FPU stack and store it back into the fp bank,
1.57 + * with bankreg previously loaded with the location of the current fp bank.
1.59 static inline void pop_fr( int bankreg, int frm )
1.61 OP(0xD9); OP(0x58 + bankreg); OP((frm^1)<<2); // FST.S [bankreg + frm^1*4]
1.65 + * Push a 64-bit double onto the FPU stack, with bankreg previously loaded
1.66 + * with the location of the current fp bank.
1.68 static inline void push_dr( int bankreg, int frm )
1.71 @@ -210,7 +245,45 @@
1.75 + ADD_imm8s_r32( -8, R_ESP );
1.79 + * Write a double (64-bit) value into memory, with the first word in arg2a, and
1.80 + * the second in arg2b
1.83 +static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )
1.85 + ADD_imm8s_r32( 4, addr );
1.88 + ADD_imm8s_r32( -4, addr );
1.91 + call_func0(sh4_write_long);
1.92 + ADD_imm8s_r32( -8, R_ESP );
1.93 + call_func0(sh4_write_long);
1.94 + ADD_imm8s_r32( -8, R_ESP );
1.98 + * Read a double (64-bit) value from memory, writing the first word into arg2a
1.99 + * and the second into arg2b. The addr must not be in EAX
1.102 +static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )
1.105 + call_func0(sh4_read_long);
1.108 + ADD_imm8s_r32( 4, addr );
1.110 + call_func0(sh4_read_long);
1.111 ADD_imm8s_r32( -4, R_ESP );
1.112 + MOV_r32_r32( R_EAX, arg2b );
1.116 /* Exception checks - Note that all exception checks will clobber EAX */
1.117 @@ -2299,6 +2372,26 @@
1.119 { /* FMOV @Rm, FRn */
1.120 uint32_t FRn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
1.121 + load_reg( R_EDX, Rm );
1.122 + check_ralign32( R_EDX );
1.123 + load_spreg( R_ECX, R_FPSCR );
1.124 + TEST_imm32_r32( FPSCR_SZ, R_ECX );
1.126 + MEM_READ_LONG( R_EDX, R_EAX );
1.127 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );
1.128 + store_fr( R_ECX, R_EAX, FRn );
1.131 + MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
1.132 + load_spreg( R_ECX, R_FPSCR ); // assume read_long clobbered it
1.133 + load_xf_bank( R_ECX );
1.136 + MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
1.137 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );
1.139 + store_fr( R_ECX, R_EAX, FRn&0x0E );
1.140 + store_fr( R_ECX, R_EDX, FRn|0x01 );
1.144 @@ -2309,6 +2402,24 @@
1.146 { /* FMOV FRm, @Rn */
1.147 uint32_t Rn = ((ir>>8)&0xF); uint32_t FRm = ((ir>>4)&0xF);
1.148 + load_reg( R_EDX, Rn );
1.149 + check_walign32( R_EDX );
1.150 + load_spreg( R_ECX, R_FPSCR );
1.151 + TEST_imm32_r32( FPSCR_SZ, R_ECX );
1.153 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );
1.154 + load_fr( R_ECX, R_EAX, FRm );
1.155 + MEM_WRITE_LONG( R_EDX, R_EAX ); // 12
1.158 + load_xf_bank( R_ECX );
1.161 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );
1.163 + load_fr( R_ECX, R_EAX, FRm&0x0E );
1.164 + load_fr( R_ECX, R_ECX, FRm|0x01 );
1.165 + MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );
1.169 @@ -2319,6 +2430,48 @@
1.171 { /* FMOV FRm, FRn */
1.172 uint32_t FRn = ((ir>>8)&0xF); uint32_t FRm = ((ir>>4)&0xF);
1.173 + /* As horrible as this looks, it's actually covering 5 separate cases:
1.174 + * 1. 32-bit fr-to-fr (PR=0)
1.175 + * 2. 64-bit dr-to-dr (PR=1, FRm&1 == 0, FRn&1 == 0 )
1.176 + * 3. 64-bit dr-to-xd (PR=1, FRm&1 == 0, FRn&1 == 1 )
1.177 + * 4. 64-bit xd-to-dr (PR=1, FRm&1 == 1, FRn&1 == 0 )
1.178 + * 5. 64-bit xd-to-xd (PR=1, FRm&1 == 1, FRn&1 == 1 )
1.180 + load_spreg( R_ECX, R_FPSCR );
1.181 + load_spreg( R_EDX, REG_OFFSET(fr_bank) );
1.182 + TEST_imm32_r32( FPSCR_SZ, R_ECX );
1.184 + load_fr( R_EDX, R_EAX, FRm ); // PR=0 branch
1.185 + store_fr( R_EDX, R_EAX, FRn );
1.188 + load_xf_bank( R_ECX );
1.189 + load_fr( R_ECX, R_EAX, FRm-1 );
1.191 + load_fr( R_ECX, R_EDX, FRm );
1.192 + store_fr( R_ECX, R_EAX, FRn-1 );
1.193 + store_fr( R_ECX, R_EDX, FRn );
1.194 + } else /* FRn&1 == 0 */ {
1.195 + load_fr( R_ECX, R_ECX, FRm );
1.196 + store_fr( R_EDX, R_EAX, FRn-1 );
1.197 + store_fr( R_EDX, R_ECX, FRn );
1.199 + } else /* FRm&1 == 0 */ {
1.202 + load_xf_bank( R_ECX );
1.203 + load_fr( R_EDX, R_EAX, FRm );
1.204 + load_fr( R_EDX, R_EDX, FRm+1 );
1.205 + store_fr( R_ECX, R_EAX, FRn-1 );
1.206 + store_fr( R_ECX, R_EDX, FRn );
1.207 + } else /* FRn&1 == 0 */ {
1.209 + load_fr( R_EDX, R_EAX, FRm );
1.210 + load_fr( R_EDX, R_ECX, FRm+1 );
1.211 + store_fr( R_EDX, R_EAX, FRn );
1.212 + store_fr( R_EDX, R_ECX, FRn+1 );