Search
lxdream.org :: lxdream/src/sh4/sh4x86.in :: diff
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 375:4627600f7f8e
prev374:8f80a795513e
next377:fa18743f6905
author nkeynes
date Tue Sep 11 21:23:48 2007 +0000 (12 years ago)
permissions -rw-r--r--
last change Start in on the FP instructions (simplest possible impl)
file annotate diff log raw
1.1 --- a/src/sh4/sh4x86.in Tue Sep 11 02:14:46 2007 +0000
1.2 +++ b/src/sh4/sh4x86.in Tue Sep 11 21:23:48 2007 +0000
1.3 @@ -1,5 +1,5 @@
1.4 /**
1.5 - * $Id: sh4x86.in,v 1.4 2007-09-11 02:14:46 nkeynes Exp $
1.6 + * $Id: sh4x86.in,v 1.5 2007-09-11 21:23:48 nkeynes Exp $
1.7 *
1.8 * SH4 => x86 translation. This version does no real optimization, it just
1.9 * outputs straight-line x86 code - it mainly exists to provide a baseline
1.10 @@ -148,44 +148,67 @@
1.11
1.12 #define load_fr_bank(bankreg) load_spreg( bankreg, REG_OFFSET(fr_bank))
1.13
1.14 +/**
1.15 + * Load an FR register (single-precision floating point) into an integer x86
1.16 + * register (eg for register-to-register moves)
1.17 + */
1.18 +void static inline load_fr( int bankreg, int x86reg, int frm )
1.19 +{
1.20 + OP(0x8B); OP(0x40+bankreg+(x86reg<<3)); OP((frm^1)<<2);
1.21 +}
1.22 +
1.23 +/**
1.24 + * Store an FR register (single-precision floating point) into an integer x86
1.25 + * register (eg for register-to-register moves)
1.26 + */
1.27 +void static inline store_fr( int bankreg, int x86reg, int frn )
1.28 +{
1.29 + OP(0x89); OP(0x40+bankreg+(x86reg<<3)); OP((frn^1)<<2);
1.30 +}
1.31 +
1.32 +
1.33 +/**
1.34 + * Load a pointer to the back fp back into the specified x86 register. The
1.35 + * bankreg must have been previously loaded with FPSCR.
1.36 + * NB: 10 bytes
1.37 + */
1.38 static inline void load_xf_bank( int bankreg )
1.39 {
1.40 - load_spreg( bankreg, R_FPSCR );
1.41 SHR_imm8_r32( (21 - 6), bankreg ); // Extract bit 21 then *64 for bank size
1.42 AND_imm8s_r32( 0x40, bankreg ); // Complete extraction
1.43 OP(0x8D); OP(0x44+(bankreg<<3)); OP(0x28+bankreg); OP(REG_OFFSET(fr)); // LEA [ebp+bankreg+disp], bankreg
1.44 }
1.45
1.46 +/**
1.47 + * Push a 32-bit float onto the FPU stack, with bankreg previously loaded
1.48 + * with the location of the current fp bank.
1.49 + */
1.50 static inline void push_fr( int bankreg, int frm )
1.51 {
1.52 OP(0xD9); OP(0x40 + bankreg); OP((frm^1)<<2); // FLD.S [bankreg + frm^1*4]
1.53 }
1.54
1.55 +/**
1.56 + * Pop a 32-bit float from the FPU stack and store it back into the fp bank,
1.57 + * with bankreg previously loaded with the location of the current fp bank.
1.58 + */
1.59 static inline void pop_fr( int bankreg, int frm )
1.60 {
1.61 OP(0xD9); OP(0x58 + bankreg); OP((frm^1)<<2); // FST.S [bankreg + frm^1*4]
1.62 }
1.63
1.64 +/**
1.65 + * Push a 64-bit double onto the FPU stack, with bankreg previously loaded
1.66 + * with the location of the current fp bank.
1.67 + */
1.68 static inline void push_dr( int bankreg, int frm )
1.69 {
1.70 - if( frm&1 ) {
1.71 - // this is technically undefined, but it seems to work consistently - high 32 bits
1.72 - // loaded from FRm (32-bits), low 32bits are 0.
1.73 - OP(0xFF); OP(0x70 + bankreg); OP((frm^1)<<2); // PUSH [bankreg + frm^1]
1.74 - PUSH_imm32(0);
1.75 -
1.76 -
1.77 - } else {
1.78 - OP(0xDD); OP(0x40 + bankreg); OP(frm<<2); // FLD.D [bankreg + frm*4]
1.79 - }
1.80 + OP(0xDD); OP(0x40 + bankreg); OP(frm<<2); // FLD.D [bankreg + frm*4]
1.81 }
1.82
1.83 static inline void pop_dr( int bankreg, int frm )
1.84 {
1.85 - if( frm&1 ) {
1.86 - } else {
1.87 - OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]
1.88 - }
1.89 + OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]
1.90 }
1.91
1.92 /**
1.93 @@ -210,7 +233,45 @@
1.94 PUSH_r32(arg2);
1.95 PUSH_r32(arg1);
1.96 call_func0(ptr);
1.97 + ADD_imm8s_r32( -8, R_ESP );
1.98 +}
1.99 +
1.100 +/**
1.101 + * Write a double (64-bit) value into memory, with the first word in arg2a, and
1.102 + * the second in arg2b
1.103 + * NB: 30 bytes
1.104 + */
1.105 +static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )
1.106 +{
1.107 + ADD_imm8s_r32( 4, addr );
1.108 + PUSH_r32(addr);
1.109 + PUSH_r32(arg2b);
1.110 + ADD_imm8s_r32( -4, addr );
1.111 + PUSH_r32(addr);
1.112 + PUSH_r32(arg2a);
1.113 + call_func0(sh4_write_long);
1.114 + ADD_imm8s_r32( -8, R_ESP );
1.115 + call_func0(sh4_write_long);
1.116 + ADD_imm8s_r32( -8, R_ESP );
1.117 +}
1.118 +
1.119 +/**
1.120 + * Read a double (64-bit) value from memory, writing the first word into arg2a
1.121 + * and the second into arg2b. The addr must not be in EAX
1.122 + * NB: 27 bytes
1.123 + */
1.124 +static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )
1.125 +{
1.126 + PUSH_r32(addr);
1.127 + call_func0(sh4_read_long);
1.128 + POP_r32(addr);
1.129 + PUSH_r32(R_EAX);
1.130 + ADD_imm8s_r32( 4, addr );
1.131 + PUSH_r32(addr);
1.132 + call_func0(sh4_read_long);
1.133 ADD_imm8s_r32( -4, R_ESP );
1.134 + MOV_r32_r32( R_EAX, arg2b );
1.135 + POP_r32(arg2a);
1.136 }
1.137
1.138 /* Exception checks - Note that all exception checks will clobber EAX */
1.139 @@ -1227,6 +1288,98 @@
1.140 SETC_t();
1.141 :}
1.142
1.143 +/* Floating point moves */
1.144 +FMOV FRm, FRn {:
1.145 + /* As horrible as this looks, it's actually covering 5 separate cases:
1.146 + * 1. 32-bit fr-to-fr (PR=0)
1.147 + * 2. 64-bit dr-to-dr (PR=1, FRm&1 == 0, FRn&1 == 0 )
1.148 + * 3. 64-bit dr-to-xd (PR=1, FRm&1 == 0, FRn&1 == 1 )
1.149 + * 4. 64-bit xd-to-dr (PR=1, FRm&1 == 1, FRn&1 == 0 )
1.150 + * 5. 64-bit xd-to-xd (PR=1, FRm&1 == 1, FRn&1 == 1 )
1.151 + */
1.152 + load_spreg( R_ECX, R_FPSCR );
1.153 + load_spreg( R_EDX, REG_OFFSET(fr_bank) );
1.154 + TEST_imm32_r32( FPSCR_SZ, R_ECX );
1.155 + JNE_rel8(8);
1.156 + load_fr( R_EDX, R_EAX, FRm ); // PR=0 branch
1.157 + store_fr( R_EDX, R_EAX, FRn );
1.158 + if( FRm&1 ) {
1.159 + JMP_rel8(22);
1.160 + load_xf_bank( R_ECX );
1.161 + load_fr( R_ECX, R_EAX, FRm-1 );
1.162 + if( FRn&1 ) {
1.163 + load_fr( R_ECX, R_EDX, FRm );
1.164 + store_fr( R_ECX, R_EAX, FRn-1 );
1.165 + store_fr( R_ECX, R_EDX, FRn );
1.166 + } else /* FRn&1 == 0 */ {
1.167 + load_fr( R_ECX, R_ECX, FRm );
1.168 + store_fr( R_EDX, R_EAX, FRn-1 );
1.169 + store_fr( R_EDX, R_ECX, FRn );
1.170 + }
1.171 + } else /* FRm&1 == 0 */ {
1.172 + if( FRn&1 ) {
1.173 + JMP_rel8(22);
1.174 + load_xf_bank( R_ECX );
1.175 + load_fr( R_EDX, R_EAX, FRm );
1.176 + load_fr( R_EDX, R_EDX, FRm+1 );
1.177 + store_fr( R_ECX, R_EAX, FRn-1 );
1.178 + store_fr( R_ECX, R_EDX, FRn );
1.179 + } else /* FRn&1 == 0 */ {
1.180 + JMP_rel8(12);
1.181 + load_fr( R_EDX, R_EAX, FRm );
1.182 + load_fr( R_EDX, R_ECX, FRm+1 );
1.183 + store_fr( R_EDX, R_EAX, FRn );
1.184 + store_fr( R_EDX, R_ECX, FRn+1 );
1.185 + }
1.186 + }
1.187 +:}
1.188 +FMOV FRm, @Rn {:
1.189 + load_reg( R_EDX, Rn );
1.190 + check_walign32( R_EDX );
1.191 + load_spreg( R_ECX, R_FPSCR );
1.192 + TEST_imm32_r32( FPSCR_SZ, R_ECX );
1.193 + JNE_rel8(20);
1.194 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );
1.195 + load_fr( R_ECX, R_EAX, FRm );
1.196 + MEM_WRITE_LONG( R_EDX, R_EAX ); // 12
1.197 + if( FRm&1 ) {
1.198 + JMP_rel8( 46 );
1.199 + load_xf_bank( R_ECX );
1.200 + } else {
1.201 + JMP_rel8( 39 );
1.202 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );
1.203 + }
1.204 + load_fr( R_ECX, R_EAX, FRm&0x0E );
1.205 + load_fr( R_ECX, R_ECX, FRm|0x01 );
1.206 + MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );
1.207 +:}
1.208 +FMOV @Rm, FRn {:
1.209 + load_reg( R_EDX, Rm );
1.210 + check_ralign32( R_EDX );
1.211 + load_spreg( R_ECX, R_FPSCR );
1.212 + TEST_imm32_r32( FPSCR_SZ, R_ECX );
1.213 + JNE_rel8(19);
1.214 + MEM_READ_LONG( R_EDX, R_EAX );
1.215 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );
1.216 + store_fr( R_ECX, R_EAX, FRn );
1.217 + if( FRn&1 ) {
1.218 + JMP_rel8(46);
1.219 + MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
1.220 + load_spreg( R_ECX, R_FPSCR ); // assume read_long clobbered it
1.221 + load_xf_bank( R_ECX );
1.222 + } else {
1.223 + JMP_rel8(36);
1.224 + MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
1.225 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );
1.226 + }
1.227 + store_fr( R_ECX, R_EAX, FRn&0x0E );
1.228 + store_fr( R_ECX, R_EDX, FRn|0x01 );
1.229 +:}
1.230 +FMOV FRm, @-Rn {: :}
1.231 +FMOV FRm, @(R0, Rn) {: :}
1.232 +FMOV @Rm+, FRn {: :}
1.233 +FMOV @(R0, Rm), FRn {: :}
1.234 +
1.235 /* Floating point instructions */
1.236 FABS FRn {:
1.237 load_spreg( R_ECX, R_FPSCR );
1.238 @@ -1249,17 +1402,52 @@
1.239 FDIV FRm, FRn {: :}
1.240 FIPR FVm, FVn {: :}
1.241 FLDS FRm, FPUL {: :}
1.242 -FLDI0 FRn {: :}
1.243 -FLDI1 FRn {: :}
1.244 -FLOAT FPUL, FRn {: :}
1.245 -FMAC FR0, FRm, FRn {: :}
1.246 -FMOV FRm, FRn {: :}
1.247 -FMOV FRm, @Rn {: :}
1.248 -FMOV FRm, @-Rn {: :}
1.249 -FMOV FRm, @(R0, Rn) {: :}
1.250 -FMOV @Rm, FRn {: :}
1.251 -FMOV @Rm+, FRn {: :}
1.252 -FMOV @(R0, Rm), FRn {: :}
1.253 +FLDI0 FRn {: /* IFF PR=0 */
1.254 + load_spreg( R_ECX, R_FPSCR );
1.255 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.256 + JNE_rel8(8);
1.257 + xor_r32_r32( R_EAX, R_EAX );
1.258 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );
1.259 + store_fr( R_ECX, R_EAX, FRn );
1.260 +:}
1.261 +FLDI1 FRn {: /* IFF PR=0 */
1.262 + load_spreg( R_ECX, R_FPSCR );
1.263 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.264 + JNE_rel8(11);
1.265 + load_imm32(R_EAX, 0x3F800000);
1.266 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );
1.267 + store_fr( R_ECX, R_EAX, FRn );
1.268 +:}
1.269 +FLOAT FPUL, FRn {:
1.270 + load_spreg( R_ECX, R_FPSCR );
1.271 + load_spreg(R_EDX, REG_OFFSET(fr_bank));
1.272 + FILD_sh4r(R_FPUL);
1.273 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.274 + JNE_rel8(5);
1.275 + pop_fr( R_EDX, FRn );
1.276 + JMP_rel8(3);
1.277 + pop_dr( R_EDX, FRn );
1.278 +:}
1.279 +FMAC FR0, FRm, FRn {:
1.280 + load_spreg( R_ECX, R_FPSCR );
1.281 + load_spreg( R_EDX, REG_OFFSET(fr_bank));
1.282 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.283 + JNE_rel8(18);
1.284 + push_fr( R_EDX, 0 );
1.285 + push_fr( R_EDX, FRm );
1.286 + FMULP_st(1);
1.287 + push_fr( R_EDX, FRn );
1.288 + FADDP_st(1);
1.289 + pop_fr( R_EDX, FRn );
1.290 + JMP_rel8(16);
1.291 + push_dr( R_EDX, 0 );
1.292 + push_dr( R_EDX, FRm );
1.293 + FMULP_st(1);
1.294 + push_dr( R_EDX, FRn );
1.295 + FADDP_st(1);
1.296 + pop_dr( R_EDX, FRn );
1.297 +:}
1.298 +
1.299 FMUL FRm, FRn {: :}
1.300 FNEG FRn {: :}
1.301 FRCHG {: :}
.