Search
lxdream.org :: lxdream/src/sh4/sh4x86.c :: diff
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.c
changeset 375:4627600f7f8e
prev374:8f80a795513e
next377:fa18743f6905
author nkeynes
date Tue Sep 11 21:23:48 2007 +0000 (13 years ago)
permissions -rw-r--r--
last change Start in on the FP instructions (simplest possible impl)
file annotate diff log raw
1.1 --- a/src/sh4/sh4x86.c Tue Sep 11 02:14:46 2007 +0000
1.2 +++ b/src/sh4/sh4x86.c Tue Sep 11 21:23:48 2007 +0000
1.3 @@ -1,5 +1,5 @@
1.4 /**
1.5 - * $Id: sh4x86.c,v 1.4 2007-09-11 02:14:46 nkeynes Exp $
1.6 + * $Id: sh4x86.c,v 1.5 2007-09-11 21:23:48 nkeynes Exp $
1.7 *
1.8 * SH4 => x86 translation. This version does no real optimization, it just
1.9 * outputs straight-line x86 code - it mainly exists to provide a baseline
1.10 @@ -148,24 +148,59 @@
1.11
1.12 #define load_fr_bank(bankreg) load_spreg( bankreg, REG_OFFSET(fr_bank))
1.13
1.14 +/**
1.15 + * Load an FR register (single-precision floating point) into an integer x86
1.16 + * register (eg for register-to-register moves)
1.17 + */
1.18 +void static inline load_fr( int bankreg, int x86reg, int frm )
1.19 +{
1.20 + OP(0x8B); OP(0x40+bankreg+(x86reg<<3)); OP((frm^1)<<2);
1.21 +}
1.22 +
1.23 +/**
1.24 + * Store an FR register (single-precision floating point) into an integer x86
1.25 + * register (eg for register-to-register moves)
1.26 + */
1.27 +void static inline store_fr( int bankreg, int x86reg, int frn )
1.28 +{
1.29 + OP(0x89); OP(0x40+bankreg+(x86reg<<3)); OP((frn^1)<<2);
1.30 +}
1.31 +
1.32 +
1.33 +/**
1.34 + * Load a pointer to the back fp back into the specified x86 register. The
1.35 + * bankreg must have been previously loaded with FPSCR.
1.36 + * NB: 10 bytes
1.37 + */
1.38 static inline void load_xf_bank( int bankreg )
1.39 {
1.40 - load_spreg( bankreg, R_FPSCR );
1.41 SHR_imm8_r32( (21 - 6), bankreg ); // Extract bit 21 then *64 for bank size
1.42 AND_imm8s_r32( 0x40, bankreg ); // Complete extraction
1.43 OP(0x8D); OP(0x44+(bankreg<<3)); OP(0x28+bankreg); OP(REG_OFFSET(fr)); // LEA [ebp+bankreg+disp], bankreg
1.44 }
1.45
1.46 +/**
1.47 + * Push a 32-bit float onto the FPU stack, with bankreg previously loaded
1.48 + * with the location of the current fp bank.
1.49 + */
1.50 static inline void push_fr( int bankreg, int frm )
1.51 {
1.52 OP(0xD9); OP(0x40 + bankreg); OP((frm^1)<<2); // FLD.S [bankreg + frm^1*4]
1.53 }
1.54
1.55 +/**
1.56 + * Pop a 32-bit float from the FPU stack and store it back into the fp bank,
1.57 + * with bankreg previously loaded with the location of the current fp bank.
1.58 + */
1.59 static inline void pop_fr( int bankreg, int frm )
1.60 {
1.61 OP(0xD9); OP(0x58 + bankreg); OP((frm^1)<<2); // FST.S [bankreg + frm^1*4]
1.62 }
1.63
1.64 +/**
1.65 + * Push a 64-bit double onto the FPU stack, with bankreg previously loaded
1.66 + * with the location of the current fp bank.
1.67 + */
1.68 static inline void push_dr( int bankreg, int frm )
1.69 {
1.70 if( frm&1 ) {
1.71 @@ -210,7 +245,45 @@
1.72 PUSH_r32(arg2);
1.73 PUSH_r32(arg1);
1.74 call_func0(ptr);
1.75 + ADD_imm8s_r32( -8, R_ESP );
1.76 +}
1.77 +
1.78 +/**
1.79 + * Write a double (64-bit) value into memory, with the first word in arg2a, and
1.80 + * the second in arg2b
1.81 + * NB: 30 bytes
1.82 + */
1.83 +static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )
1.84 +{
1.85 + ADD_imm8s_r32( 4, addr );
1.86 + PUSH_r32(addr);
1.87 + PUSH_r32(arg2b);
1.88 + ADD_imm8s_r32( -4, addr );
1.89 + PUSH_r32(addr);
1.90 + PUSH_r32(arg2a);
1.91 + call_func0(sh4_write_long);
1.92 + ADD_imm8s_r32( -8, R_ESP );
1.93 + call_func0(sh4_write_long);
1.94 + ADD_imm8s_r32( -8, R_ESP );
1.95 +}
1.96 +
1.97 +/**
1.98 + * Read a double (64-bit) value from memory, writing the first word into arg2a
1.99 + * and the second into arg2b. The addr must not be in EAX
1.100 + * NB: 27 bytes
1.101 + */
1.102 +static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )
1.103 +{
1.104 + PUSH_r32(addr);
1.105 + call_func0(sh4_read_long);
1.106 + POP_r32(addr);
1.107 + PUSH_r32(R_EAX);
1.108 + ADD_imm8s_r32( 4, addr );
1.109 + PUSH_r32(addr);
1.110 + call_func0(sh4_read_long);
1.111 ADD_imm8s_r32( -4, R_ESP );
1.112 + MOV_r32_r32( R_EAX, arg2b );
1.113 + POP_r32(arg2a);
1.114 }
1.115
1.116 /* Exception checks - Note that all exception checks will clobber EAX */
1.117 @@ -2299,6 +2372,26 @@
1.118 case 0x8:
1.119 { /* FMOV @Rm, FRn */
1.120 uint32_t FRn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
1.121 + load_reg( R_EDX, Rm );
1.122 + check_ralign32( R_EDX );
1.123 + load_spreg( R_ECX, R_FPSCR );
1.124 + TEST_imm32_r32( FPSCR_SZ, R_ECX );
1.125 + JNE_rel8(19);
1.126 + MEM_READ_LONG( R_EDX, R_EAX );
1.127 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );
1.128 + store_fr( R_ECX, R_EAX, FRn );
1.129 + if( FRn&1 ) {
1.130 + JMP_rel8(46);
1.131 + MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
1.132 + load_spreg( R_ECX, R_FPSCR ); // assume read_long clobbered it
1.133 + load_xf_bank( R_ECX );
1.134 + } else {
1.135 + JMP_rel8(36);
1.136 + MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
1.137 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );
1.138 + }
1.139 + store_fr( R_ECX, R_EAX, FRn&0x0E );
1.140 + store_fr( R_ECX, R_EDX, FRn|0x01 );
1.141 }
1.142 break;
1.143 case 0x9:
1.144 @@ -2309,6 +2402,24 @@
1.145 case 0xA:
1.146 { /* FMOV FRm, @Rn */
1.147 uint32_t Rn = ((ir>>8)&0xF); uint32_t FRm = ((ir>>4)&0xF);
1.148 + load_reg( R_EDX, Rn );
1.149 + check_walign32( R_EDX );
1.150 + load_spreg( R_ECX, R_FPSCR );
1.151 + TEST_imm32_r32( FPSCR_SZ, R_ECX );
1.152 + JNE_rel8(20);
1.153 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );
1.154 + load_fr( R_ECX, R_EAX, FRm );
1.155 + MEM_WRITE_LONG( R_EDX, R_EAX ); // 12
1.156 + if( FRm&1 ) {
1.157 + JMP_rel8( 46 );
1.158 + load_xf_bank( R_ECX );
1.159 + } else {
1.160 + JMP_rel8( 39 );
1.161 + load_spreg( R_ECX, REG_OFFSET(fr_bank) );
1.162 + }
1.163 + load_fr( R_ECX, R_EAX, FRm&0x0E );
1.164 + load_fr( R_ECX, R_ECX, FRm|0x01 );
1.165 + MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );
1.166 }
1.167 break;
1.168 case 0xB:
1.169 @@ -2319,6 +2430,48 @@
1.170 case 0xC:
1.171 { /* FMOV FRm, FRn */
1.172 uint32_t FRn = ((ir>>8)&0xF); uint32_t FRm = ((ir>>4)&0xF);
1.173 + /* As horrible as this looks, it's actually covering 5 separate cases:
1.174 + * 1. 32-bit fr-to-fr (PR=0)
1.175 + * 2. 64-bit dr-to-dr (PR=1, FRm&1 == 0, FRn&1 == 0 )
1.176 + * 3. 64-bit dr-to-xd (PR=1, FRm&1 == 0, FRn&1 == 1 )
1.177 + * 4. 64-bit xd-to-dr (PR=1, FRm&1 == 1, FRn&1 == 0 )
1.178 + * 5. 64-bit xd-to-xd (PR=1, FRm&1 == 1, FRn&1 == 1 )
1.179 + */
1.180 + load_spreg( R_ECX, R_FPSCR );
1.181 + load_spreg( R_EDX, REG_OFFSET(fr_bank) );
1.182 + TEST_imm32_r32( FPSCR_SZ, R_ECX );
1.183 + JNE_rel8(8);
1.184 + load_fr( R_EDX, R_EAX, FRm ); // PR=0 branch
1.185 + store_fr( R_EDX, R_EAX, FRn );
1.186 + if( FRm&1 ) {
1.187 + JMP_rel8(22);
1.188 + load_xf_bank( R_ECX );
1.189 + load_fr( R_ECX, R_EAX, FRm-1 );
1.190 + if( FRn&1 ) {
1.191 + load_fr( R_ECX, R_EDX, FRm );
1.192 + store_fr( R_ECX, R_EAX, FRn-1 );
1.193 + store_fr( R_ECX, R_EDX, FRn );
1.194 + } else /* FRn&1 == 0 */ {
1.195 + load_fr( R_ECX, R_ECX, FRm );
1.196 + store_fr( R_EDX, R_EAX, FRn-1 );
1.197 + store_fr( R_EDX, R_ECX, FRn );
1.198 + }
1.199 + } else /* FRm&1 == 0 */ {
1.200 + if( FRn&1 ) {
1.201 + JMP_rel8(22);
1.202 + load_xf_bank( R_ECX );
1.203 + load_fr( R_EDX, R_EAX, FRm );
1.204 + load_fr( R_EDX, R_EDX, FRm+1 );
1.205 + store_fr( R_ECX, R_EAX, FRn-1 );
1.206 + store_fr( R_ECX, R_EDX, FRn );
1.207 + } else /* FRn&1 == 0 */ {
1.208 + JMP_rel8(12);
1.209 + load_fr( R_EDX, R_EAX, FRm );
1.210 + load_fr( R_EDX, R_ECX, FRm+1 );
1.211 + store_fr( R_EDX, R_EAX, FRn );
1.212 + store_fr( R_EDX, R_ECX, FRn+1 );
1.213 + }
1.214 + }
1.215 }
1.216 break;
1.217 case 0xD:
.