Search
lxdream.org :: lxdream :: r374:8f80a795513e
lxdream 0.9.1
released Jun 29
Download Now
changeset374:8f80a795513e
parent373:0ac2ac96a4c5
child375:4627600f7f8e
authornkeynes
dateTue Sep 11 02:14:46 2007 +0000 (12 years ago)
Cache the pointer to the last FR bank (speeds fp ops up by about 10%)
Implement experimental fix for FLOAT/FTRC
Make read/write sr functions non-static (share with translator)
Much more translator WIP
src/sh4/sh4core.c
src/sh4/sh4core.h
src/sh4/sh4core.in
src/sh4/sh4x86.c
src/sh4/sh4x86.in
src/sh4/x86op.h
src/test/testsh4x86.c
1.1 --- a/src/sh4/sh4core.c Tue Sep 11 01:05:05 2007 +0000
1.2 +++ b/src/sh4/sh4core.c Tue Sep 11 02:14:46 2007 +0000
1.3 @@ -1,5 +1,5 @@
1.4 /**
1.5 - * $Id: sh4core.c,v 1.43 2007-09-08 03:11:53 nkeynes Exp $
1.6 + * $Id: sh4core.c,v 1.44 2007-09-11 02:14:46 nkeynes Exp $
1.7 *
1.8 * SH4 emulation core, and parent module for all the SH4 peripheral
1.9 * modules.
1.10 @@ -75,6 +75,7 @@
1.11 sh4r.vbr = 0x00000000;
1.12 sh4r.fpscr = 0x00040001;
1.13 sh4r.sr = 0x700000F0;
1.14 + sh4r.fr_bank = &sh4r.fr[0][0];
1.15
1.16 /* Mem reset will do this, but if we want to reset _just_ the SH4... */
1.17 MMIO_WRITE( MMU, EXPEVT, EXC_POWER_RESET );
1.18 @@ -290,7 +291,7 @@
1.19 MMIO_WRITE(MMU,EXPEVT,x); \
1.20 sh4r.pc = sh4r.vbr + v; \
1.21 sh4r.new_pc = sh4r.pc + 2; \
1.22 - sh4_load_sr( sh4r.ssr |SR_MD|SR_BL|SR_RB ); \
1.23 + sh4_write_sr( sh4r.ssr |SR_MD|SR_BL|SR_RB ); \
1.24 if( sh4r.in_delay_slot ) { \
1.25 sh4r.in_delay_slot = 0; \
1.26 sh4r.spc -= 2; \
1.27 @@ -329,7 +330,7 @@
1.28 memcpy( sh4r.r_bank, tmp, sizeof(uint32_t)*8 );
1.29 }
1.30
1.31 -static void sh4_load_sr( uint32_t newval )
1.32 +void sh4_write_sr( uint32_t newval )
1.33 {
1.34 if( (newval ^ sh4r.sr) & SR_RB )
1.35 sh4_switch_banks();
1.36 @@ -371,7 +372,7 @@
1.37 }
1.38 }
1.39
1.40 -static uint32_t sh4_read_sr( void )
1.41 +uint32_t sh4_read_sr( void )
1.42 {
1.43 /* synchronize sh4r.sr with the various bitflags */
1.44 sh4r.sr &= SR_MQSTMASK;
1.45 @@ -410,7 +411,7 @@
1.46 sh4r.ssr = sh4_read_sr();
1.47 sh4r.spc = sh4r.pc;
1.48 sh4r.sgr = sh4r.r[15];
1.49 - sh4_load_sr( sh4r.ssr|SR_BL|SR_MD|SR_RB );
1.50 + sh4_write_sr( sh4r.ssr|SR_BL|SR_MD|SR_RB );
1.51 MMIO_WRITE( MMU, INTEVT, code );
1.52 sh4r.pc = sh4r.vbr + 0x600;
1.53 sh4r.new_pc = sh4r.pc + 2;
1.54 @@ -740,7 +741,7 @@
1.55 sh4r.in_delay_slot = 1;
1.56 sh4r.pc = sh4r.new_pc;
1.57 sh4r.new_pc = sh4r.spc;
1.58 - sh4_load_sr( sh4r.ssr );
1.59 + sh4_write_sr( sh4r.ssr );
1.60 return TRUE;
1.61 }
1.62 break;
1.63 @@ -1307,6 +1308,7 @@
1.64 CHECKRALIGN32( sh4r.r[Rm] );
1.65 sh4r.fpscr = MEM_READ_LONG(sh4r.r[Rm]);
1.66 sh4r.r[Rm] +=4;
1.67 + sh4r.fr_bank = &sh4r.fr[(sh4r.fpscr&FPSCR_FR)>>21][0];
1.68 }
1.69 break;
1.70 case 0xF:
1.71 @@ -1333,7 +1335,7 @@
1.72 CHECKSLOTILLEGAL();
1.73 CHECKPRIV();
1.74 CHECKWALIGN32( sh4r.r[Rm] );
1.75 - sh4_load_sr( MEM_READ_LONG(sh4r.r[Rm]) );
1.76 + sh4_write_sr( MEM_READ_LONG(sh4r.r[Rm]) );
1.77 sh4r.r[Rm] +=4;
1.78 }
1.79 break;
1.80 @@ -1476,7 +1478,8 @@
1.81 case 0x6:
1.82 { /* LDS Rm, FPSCR */
1.83 uint32_t Rm = ((ir>>8)&0xF);
1.84 - sh4r.fpscr = sh4r.r[Rm];
1.85 + sh4r.fpscr = sh4r.r[Rm];
1.86 + sh4r.fr_bank = &sh4r.fr[(sh4r.fpscr&FPSCR_FR)>>21][0];
1.87 }
1.88 break;
1.89 case 0xF:
1.90 @@ -1559,7 +1562,7 @@
1.91 uint32_t Rm = ((ir>>8)&0xF);
1.92 CHECKSLOTILLEGAL();
1.93 CHECKPRIV();
1.94 - sh4_load_sr( sh4r.r[Rm] );
1.95 + sh4_write_sr( sh4r.r[Rm] );
1.96 }
1.97 break;
1.98 case 0x1:
1.99 @@ -2126,18 +2129,29 @@
1.100 { /* FLOAT FPUL, FRn */
1.101 uint32_t FRn = ((ir>>8)&0xF);
1.102 CHECKFPUEN();
1.103 - if( IS_FPU_DOUBLEPREC() )
1.104 - DR(FRn) = (float)FPULi;
1.105 - else
1.106 + if( IS_FPU_DOUBLEPREC() ) {
1.107 + if( FRn&1 ) { // No, really...
1.108 + dtmp = (double)FPULi;
1.109 + FR(FRn) = *(((float *)&dtmp)+1);
1.110 + } else {
1.111 + DRF(FRn>>1) = (double)FPULi;
1.112 + }
1.113 + } else {
1.114 FR(FRn) = (float)FPULi;
1.115 }
1.116 + }
1.117 break;
1.118 case 0x3:
1.119 { /* FTRC FRm, FPUL */
1.120 uint32_t FRm = ((ir>>8)&0xF);
1.121 CHECKFPUEN();
1.122 if( IS_FPU_DOUBLEPREC() ) {
1.123 - dtmp = DR(FRm);
1.124 + if( FRm&1 ) {
1.125 + dtmp = 0;
1.126 + *(((float *)&dtmp)+1) = FR(FRm);
1.127 + } else {
1.128 + dtmp = DRF(FRm>>1);
1.129 + }
1.130 if( dtmp >= MAX_INTF )
1.131 FPULi = MAX_INT;
1.132 else if( dtmp <= MIN_INTF )
1.133 @@ -2272,15 +2286,16 @@
1.134 CHECKFPUEN();
1.135 if( !IS_FPU_DOUBLEPREC() ) {
1.136 tmp = FVn<<2;
1.137 + float *xf = &sh4r.fr[((~sh4r.fpscr)&FPSCR_FR)>>21][0];
1.138 float fv[4] = { FR(tmp), FR(tmp+1), FR(tmp+2), FR(tmp+3) };
1.139 - FR(tmp) = XF(0) * fv[0] + XF(4)*fv[1] +
1.140 - XF(8)*fv[2] + XF(12)*fv[3];
1.141 - FR(tmp+1) = XF(1) * fv[0] + XF(5)*fv[1] +
1.142 - XF(9)*fv[2] + XF(13)*fv[3];
1.143 - FR(tmp+2) = XF(2) * fv[0] + XF(6)*fv[1] +
1.144 - XF(10)*fv[2] + XF(14)*fv[3];
1.145 - FR(tmp+3) = XF(3) * fv[0] + XF(7)*fv[1] +
1.146 - XF(11)*fv[2] + XF(15)*fv[3];
1.147 + FR(tmp) = xf[1] * fv[0] + xf[5]*fv[1] +
1.148 + xf[9]*fv[2] + xf[13]*fv[3];
1.149 + FR(tmp+1) = xf[0] * fv[0] + xf[4]*fv[1] +
1.150 + xf[8]*fv[2] + xf[12]*fv[3];
1.151 + FR(tmp+2) = xf[3] * fv[0] + xf[7]*fv[1] +
1.152 + xf[11]*fv[2] + xf[15]*fv[3];
1.153 + FR(tmp+3) = xf[2] * fv[0] + xf[6]*fv[1] +
1.154 + xf[10]*fv[2] + xf[14]*fv[3];
1.155 }
1.156 }
1.157 break;
1.158 @@ -2293,7 +2308,9 @@
1.159 break;
1.160 case 0x2:
1.161 { /* FRCHG */
1.162 - CHECKFPUEN(); sh4r.fpscr ^= FPSCR_FR;
1.163 + CHECKFPUEN();
1.164 + sh4r.fpscr ^= FPSCR_FR;
1.165 + sh4r.fr_bank = &sh4r.fr[(sh4r.fpscr&FPSCR_FR)>>21][0];
1.166 }
1.167 break;
1.168 case 0x3:
2.1 --- a/src/sh4/sh4core.h Tue Sep 11 01:05:05 2007 +0000
2.2 +++ b/src/sh4/sh4core.h Tue Sep 11 02:14:46 2007 +0000
2.3 @@ -1,5 +1,5 @@
2.4 /**
2.5 - * $Id: sh4core.h,v 1.21 2007-09-08 03:12:21 nkeynes Exp $
2.6 + * $Id: sh4core.h,v 1.22 2007-09-11 02:14:46 nkeynes Exp $
2.7 *
2.8 * This file defines the internal functions exported/used by the SH4 core,
2.9 * except for disassembly functions defined in sh4dasm.h
2.10 @@ -58,14 +58,15 @@
2.11
2.12 struct sh4_registers {
2.13 uint32_t r[16];
2.14 + uint32_t sr, pr, pc, fpscr;
2.15 + uint32_t t, m, q, s; /* really boolean - 0 or 1 */
2.16 + int32_t fpul;
2.17 + float *fr_bank;
2.18 + float fr[2][16];
2.19 + uint64_t mac;
2.20 + uint32_t gbr, ssr, spc, sgr, dbr, vbr;
2.21 +
2.22 uint32_t r_bank[8]; /* hidden banked registers */
2.23 - uint32_t sr, pr, pc, fpscr, t;
2.24 - int32_t fpul;
2.25 - uint32_t gbr, ssr, spc, sgr, dbr, vbr;
2.26 - uint64_t mac;
2.27 - uint32_t m, q, s; /* really boolean - 0 or 1 */
2.28 - float fr[2][16];
2.29 -
2.30 int32_t store_queue[16]; /* technically 2 banks of 32 bytes */
2.31
2.32 uint32_t new_pc; /* Not a real register, but used to handle delay slots */
2.33 @@ -111,6 +112,10 @@
2.34 int32_t sh4_read_phys_word( uint32_t addr );
2.35 void sh4_flush_store_queue( uint32_t addr );
2.36
2.37 +/* SH4 Support methods */
2.38 +uint32_t sh4_read_sr(void);
2.39 +void sh4_write_sr(uint32_t val);
2.40 +
2.41 /* Peripheral functions */
2.42 void CPG_reset( void );
2.43 void TMU_run_slice( uint32_t );
2.44 @@ -171,8 +176,8 @@
2.45 #define IS_FPU_DOUBLESIZE() (sh4r.fpscr&FPSCR_SZ)
2.46 #define IS_FPU_ENABLED() ((sh4r.sr&SR_FD)==0)
2.47
2.48 -#define FR(x) sh4r.fr[(sh4r.fpscr&FPSCR_FR)>>21][(x)^1]
2.49 -#define DRF(x) ((double *)(sh4r.fr[(sh4r.fpscr&FPSCR_FR)>>21]))[x]
2.50 +#define FR(x) sh4r.fr_bank[(x)^1]
2.51 +#define DRF(x) ((double *)sh4r.fr_bank)[x]
2.52 #define XF(x) sh4r.fr[((~sh4r.fpscr)&FPSCR_FR)>>21][(x)^1]
2.53 #define XDR(x) ((double *)(sh4r.fr[((~sh4r.fpscr)&FPSCR_FR)>>21]))[x]
2.54 #define DRb(x,b) ((double *)(sh4r.fr[((b ? (~sh4r.fpscr) : sh4r.fpscr)&FPSCR_FR)>>21]))[x]
3.1 --- a/src/sh4/sh4core.in Tue Sep 11 01:05:05 2007 +0000
3.2 +++ b/src/sh4/sh4core.in Tue Sep 11 02:14:46 2007 +0000
3.3 @@ -1,5 +1,5 @@
3.4 /**
3.5 - * $Id: sh4core.in,v 1.3 2007-09-08 03:12:21 nkeynes Exp $
3.6 + * $Id: sh4core.in,v 1.4 2007-09-11 02:14:46 nkeynes Exp $
3.7 *
3.8 * SH4 emulation core, and parent module for all the SH4 peripheral
3.9 * modules.
3.10 @@ -75,6 +75,7 @@
3.11 sh4r.vbr = 0x00000000;
3.12 sh4r.fpscr = 0x00040001;
3.13 sh4r.sr = 0x700000F0;
3.14 + sh4r.fr_bank = &sh4r.fr[0][0];
3.15
3.16 /* Mem reset will do this, but if we want to reset _just_ the SH4... */
3.17 MMIO_WRITE( MMU, EXPEVT, EXC_POWER_RESET );
3.18 @@ -290,7 +291,7 @@
3.19 MMIO_WRITE(MMU,EXPEVT,x); \
3.20 sh4r.pc = sh4r.vbr + v; \
3.21 sh4r.new_pc = sh4r.pc + 2; \
3.22 - sh4_load_sr( sh4r.ssr |SR_MD|SR_BL|SR_RB ); \
3.23 + sh4_write_sr( sh4r.ssr |SR_MD|SR_BL|SR_RB ); \
3.24 if( sh4r.in_delay_slot ) { \
3.25 sh4r.in_delay_slot = 0; \
3.26 sh4r.spc -= 2; \
3.27 @@ -329,7 +330,7 @@
3.28 memcpy( sh4r.r_bank, tmp, sizeof(uint32_t)*8 );
3.29 }
3.30
3.31 -static void sh4_load_sr( uint32_t newval )
3.32 +void sh4_write_sr( uint32_t newval )
3.33 {
3.34 if( (newval ^ sh4r.sr) & SR_RB )
3.35 sh4_switch_banks();
3.36 @@ -371,7 +372,7 @@
3.37 }
3.38 }
3.39
3.40 -static uint32_t sh4_read_sr( void )
3.41 +uint32_t sh4_read_sr( void )
3.42 {
3.43 /* synchronize sh4r.sr with the various bitflags */
3.44 sh4r.sr &= SR_MQSTMASK;
3.45 @@ -410,7 +411,7 @@
3.46 sh4r.ssr = sh4_read_sr();
3.47 sh4r.spc = sh4r.pc;
3.48 sh4r.sgr = sh4r.r[15];
3.49 - sh4_load_sr( sh4r.ssr|SR_BL|SR_MD|SR_RB );
3.50 + sh4_write_sr( sh4r.ssr|SR_BL|SR_MD|SR_RB );
3.51 MMIO_WRITE( MMU, INTEVT, code );
3.52 sh4r.pc = sh4r.vbr + 0x600;
3.53 sh4r.new_pc = sh4r.pc + 2;
3.54 @@ -872,7 +873,7 @@
3.55 sh4r.in_delay_slot = 1;
3.56 sh4r.pc = sh4r.new_pc;
3.57 sh4r.new_pc = sh4r.spc;
3.58 - sh4_load_sr( sh4r.ssr );
3.59 + sh4_write_sr( sh4r.ssr );
3.60 return TRUE;
3.61 :}
3.62 JMP @Rn {:
3.63 @@ -915,7 +916,7 @@
3.64 CHECKSLOTILLEGAL();
3.65 CHECKPRIV();
3.66 CHECKWALIGN32( sh4r.r[Rm] );
3.67 - sh4_load_sr( MEM_READ_LONG(sh4r.r[Rm]) );
3.68 + sh4_write_sr( MEM_READ_LONG(sh4r.r[Rm]) );
3.69 sh4r.r[Rm] +=4;
3.70 :}
3.71 LDS Rm, MACH {:
3.72 @@ -925,7 +926,7 @@
3.73 LDC Rm, SR {:
3.74 CHECKSLOTILLEGAL();
3.75 CHECKPRIV();
3.76 - sh4_load_sr( sh4r.r[Rm] );
3.77 + sh4_write_sr( sh4r.r[Rm] );
3.78 :}
3.79 LDC Rm, SGR {:
3.80 CHECKPRIV();
3.81 @@ -1056,8 +1057,12 @@
3.82 CHECKRALIGN32( sh4r.r[Rm] );
3.83 sh4r.fpscr = MEM_READ_LONG(sh4r.r[Rm]);
3.84 sh4r.r[Rm] +=4;
3.85 + sh4r.fr_bank = &sh4r.fr[(sh4r.fpscr&FPSCR_FR)>>21][0];
3.86 :}
3.87 -LDS Rm, FPSCR {: sh4r.fpscr = sh4r.r[Rm]; :}
3.88 +LDS Rm, FPSCR {:
3.89 + sh4r.fpscr = sh4r.r[Rm];
3.90 + sh4r.fr_bank = &sh4r.fr[(sh4r.fpscr&FPSCR_FR)>>21][0];
3.91 +:}
3.92 STC DBR, Rn {: CHECKPRIV(); sh4r.r[Rn] = sh4r.dbr; :}
3.93 STC.L DBR, @-Rn {:
3.94 CHECKPRIV();
3.95 @@ -1185,15 +1190,26 @@
3.96 FLDS FRm, FPUL {: CHECKFPUEN(); FPULf = FR(FRm); :}
3.97 FLOAT FPUL, FRn {:
3.98 CHECKFPUEN();
3.99 - if( IS_FPU_DOUBLEPREC() )
3.100 - DR(FRn) = (float)FPULi;
3.101 - else
3.102 + if( IS_FPU_DOUBLEPREC() ) {
3.103 + if( FRn&1 ) { // No, really...
3.104 + dtmp = (double)FPULi;
3.105 + FR(FRn) = *(((float *)&dtmp)+1);
3.106 + } else {
3.107 + DRF(FRn>>1) = (double)FPULi;
3.108 + }
3.109 + } else {
3.110 FR(FRn) = (float)FPULi;
3.111 + }
3.112 :}
3.113 FTRC FRm, FPUL {:
3.114 CHECKFPUEN();
3.115 if( IS_FPU_DOUBLEPREC() ) {
3.116 - dtmp = DR(FRm);
3.117 + if( FRm&1 ) {
3.118 + dtmp = 0;
3.119 + *(((float *)&dtmp)+1) = FR(FRm);
3.120 + } else {
3.121 + dtmp = DRF(FRm>>1);
3.122 + }
3.123 if( dtmp >= MAX_INTF )
3.124 FPULi = MAX_INT;
3.125 else if( dtmp <= MIN_INTF )
3.126 @@ -1258,7 +1274,11 @@
3.127 FR(FRn) += FR(FRm)*FR(0);
3.128 }
3.129 :}
3.130 -FRCHG {: CHECKFPUEN(); sh4r.fpscr ^= FPSCR_FR; :}
3.131 +FRCHG {:
3.132 + CHECKFPUEN();
3.133 + sh4r.fpscr ^= FPSCR_FR;
3.134 + sh4r.fr_bank = &sh4r.fr[(sh4r.fpscr&FPSCR_FR)>>21][0];
3.135 +:}
3.136 FSCHG {: CHECKFPUEN(); sh4r.fpscr ^= FPSCR_SZ; :}
3.137 FCNVSD FPUL, FRn {:
3.138 CHECKFPUEN();
3.139 @@ -1302,15 +1322,16 @@
3.140 CHECKFPUEN();
3.141 if( !IS_FPU_DOUBLEPREC() ) {
3.142 tmp = FVn<<2;
3.143 + float *xf = &sh4r.fr[((~sh4r.fpscr)&FPSCR_FR)>>21][0];
3.144 float fv[4] = { FR(tmp), FR(tmp+1), FR(tmp+2), FR(tmp+3) };
3.145 - FR(tmp) = XF(0) * fv[0] + XF(4)*fv[1] +
3.146 - XF(8)*fv[2] + XF(12)*fv[3];
3.147 - FR(tmp+1) = XF(1) * fv[0] + XF(5)*fv[1] +
3.148 - XF(9)*fv[2] + XF(13)*fv[3];
3.149 - FR(tmp+2) = XF(2) * fv[0] + XF(6)*fv[1] +
3.150 - XF(10)*fv[2] + XF(14)*fv[3];
3.151 - FR(tmp+3) = XF(3) * fv[0] + XF(7)*fv[1] +
3.152 - XF(11)*fv[2] + XF(15)*fv[3];
3.153 + FR(tmp) = xf[1] * fv[0] + xf[5]*fv[1] +
3.154 + xf[9]*fv[2] + xf[13]*fv[3];
3.155 + FR(tmp+1) = xf[0] * fv[0] + xf[4]*fv[1] +
3.156 + xf[8]*fv[2] + xf[12]*fv[3];
3.157 + FR(tmp+2) = xf[3] * fv[0] + xf[7]*fv[1] +
3.158 + xf[11]*fv[2] + xf[15]*fv[3];
3.159 + FR(tmp+3) = xf[2] * fv[0] + xf[6]*fv[1] +
3.160 + xf[10]*fv[2] + xf[14]*fv[3];
3.161 }
3.162 :}
3.163 UNDEF {:
4.1 --- a/src/sh4/sh4x86.c Tue Sep 11 01:05:05 2007 +0000
4.2 +++ b/src/sh4/sh4x86.c Tue Sep 11 02:14:46 2007 +0000
4.3 @@ -1,5 +1,5 @@
4.4 /**
4.5 - * $Id: sh4x86.c,v 1.3 2007-09-04 08:40:23 nkeynes Exp $
4.6 + * $Id: sh4x86.c,v 1.4 2007-09-11 02:14:46 nkeynes Exp $
4.7 *
4.8 * SH4 => x86 translation. This version does no real optimization, it just
4.9 * outputs straight-line x86 code - it mainly exists to provide a baseline
4.10 @@ -73,7 +73,7 @@
4.11 {
4.12 unsigned int i;
4.13 for( i=0; i<sh4_x86.backpatch_posn; i++ ) {
4.14 - *sh4_x86.backpatch_list[i] += (reloc_base - ((uint8_t *)sh4_x86.backpatch_list[i]));
4.15 + *sh4_x86.backpatch_list[i] += (reloc_base - ((uint8_t *)sh4_x86.backpatch_list[i]) - 4);
4.16 }
4.17 }
4.18
4.19 @@ -97,35 +97,20 @@
4.20 OP(REG_OFFSET(r[sh4reg]));
4.21 }
4.22
4.23 -/**
4.24 - * Load the SR register into an x86 register
4.25 - */
4.26 -static inline void read_sr( int x86reg )
4.27 +static inline void load_reg16s( int x86reg, int sh4reg )
4.28 {
4.29 - MOV_ebp_r32( R_M, x86reg );
4.30 - SHL1_r32( x86reg );
4.31 - OR_ebp_r32( R_Q, x86reg );
4.32 - SHL_imm8_r32( 7, x86reg );
4.33 - OR_ebp_r32( R_S, x86reg );
4.34 - SHL1_r32( x86reg );
4.35 - OR_ebp_r32( R_T, x86reg );
4.36 - OR_ebp_r32( R_SR, x86reg );
4.37 + OP(0x0F);
4.38 + OP(0xBF);
4.39 + MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
4.40 }
4.41
4.42 -static inline void write_sr( int x86reg )
4.43 +static inline void load_reg16u( int x86reg, int sh4reg )
4.44 {
4.45 - TEST_imm32_r32( SR_M, x86reg );
4.46 - SETNE_ebp(R_M);
4.47 - TEST_imm32_r32( SR_Q, x86reg );
4.48 - SETNE_ebp(R_Q);
4.49 - TEST_imm32_r32( SR_S, x86reg );
4.50 - SETNE_ebp(R_S);
4.51 - TEST_imm32_r32( SR_T, x86reg );
4.52 - SETNE_ebp(R_T);
4.53 - AND_imm32_r32( SR_MQSTMASK, x86reg );
4.54 - MOV_r32_ebp( x86reg, R_SR );
4.55 + OP(0x0F);
4.56 + OP(0xB7);
4.57 + MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
4.58 +
4.59 }
4.60 -
4.61
4.62 static inline void load_spreg( int x86reg, int regoffset )
4.63 {
4.64 @@ -160,6 +145,49 @@
4.65 OP(regoffset);
4.66 }
4.67
4.68 +
4.69 +#define load_fr_bank(bankreg) load_spreg( bankreg, REG_OFFSET(fr_bank))
4.70 +
4.71 +static inline void load_xf_bank( int bankreg )
4.72 +{
4.73 + load_spreg( bankreg, R_FPSCR );
4.74 + SHR_imm8_r32( (21 - 6), bankreg ); // Extract bit 21 then *64 for bank size
4.75 + AND_imm8s_r32( 0x40, bankreg ); // Complete extraction
4.76 + OP(0x8D); OP(0x44+(bankreg<<3)); OP(0x28+bankreg); OP(REG_OFFSET(fr)); // LEA [ebp+bankreg+disp], bankreg
4.77 +}
4.78 +
4.79 +static inline void push_fr( int bankreg, int frm )
4.80 +{
4.81 + OP(0xD9); OP(0x40 + bankreg); OP((frm^1)<<2); // FLD.S [bankreg + frm^1*4]
4.82 +}
4.83 +
4.84 +static inline void pop_fr( int bankreg, int frm )
4.85 +{
4.86 + OP(0xD9); OP(0x58 + bankreg); OP((frm^1)<<2); // FST.S [bankreg + frm^1*4]
4.87 +}
4.88 +
4.89 +static inline void push_dr( int bankreg, int frm )
4.90 +{
4.91 + if( frm&1 ) {
4.92 + // this is technically undefined, but it seems to work consistently - high 32 bits
4.93 + // loaded from FRm (32-bits), low 32bits are 0.
4.94 + OP(0xFF); OP(0x70 + bankreg); OP((frm^1)<<2); // PUSH [bankreg + frm^1]
4.95 + PUSH_imm32(0);
4.96 +
4.97 +
4.98 + } else {
4.99 + OP(0xDD); OP(0x40 + bankreg); OP(frm<<2); // FLD.D [bankreg + frm*4]
4.100 + }
4.101 +}
4.102 +
4.103 +static inline void pop_dr( int bankreg, int frm )
4.104 +{
4.105 + if( frm&1 ) {
4.106 + } else {
4.107 + OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]
4.108 + }
4.109 +}
4.110 +
4.111 /**
4.112 * Note: clobbers EAX to make the indirect call - this isn't usually
4.113 * a problem since the callee will usually clobber it anyway.
4.114 @@ -248,7 +276,7 @@
4.115 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
4.116
4.117 #define RAISE_EXCEPTION( exc ) call_func1(sh4_raise_exception, exc);
4.118 -#define CHECKSLOTILLEGAL() if(sh4_x86.in_delay_slot) RAISE_EXCEPTION(EXC_SLOT_ILLEGAL)
4.119 +#define SLOTILLEGAL() RAISE_EXCEPTION(EXC_SLOT_ILLEGAL); return 1
4.120
4.121
4.122
4.123 @@ -259,9 +287,9 @@
4.124 void sh4_translate_begin_block()
4.125 {
4.126 PUSH_r32(R_EBP);
4.127 - PUSH_r32(R_ESI);
4.128 /* mov &sh4r, ebp */
4.129 load_imm32( R_EBP, (uint32_t)&sh4r );
4.130 + PUSH_r32(R_EDI);
4.131 PUSH_r32(R_ESI);
4.132
4.133 sh4_x86.in_delay_slot = FALSE;
4.134 @@ -273,16 +301,18 @@
4.135 /**
4.136 * Exit the block early (ie branch out), conditionally or otherwise
4.137 */
4.138 -void exit_block( uint32_t pc )
4.139 +void exit_block( )
4.140 {
4.141 - load_imm32( R_ECX, pc );
4.142 - store_spreg( R_ECX, REG_OFFSET(pc) );
4.143 + store_spreg( R_EDI, REG_OFFSET(pc) );
4.144 MOV_moff32_EAX( (uint32_t)&sh4_cpu_period );
4.145 load_spreg( R_ECX, REG_OFFSET(slice_cycle) );
4.146 MUL_r32( R_ESI );
4.147 ADD_r32_r32( R_EAX, R_ECX );
4.148 store_spreg( R_ECX, REG_OFFSET(slice_cycle) );
4.149 XOR_r32_r32( R_EAX, R_EAX );
4.150 + POP_r32(R_ESI);
4.151 + POP_r32(R_EDI);
4.152 + POP_r32(R_EBP);
4.153 RET();
4.154 }
4.155
4.156 @@ -292,7 +322,7 @@
4.157 void sh4_translate_end_block( sh4addr_t pc ) {
4.158 assert( !sh4_x86.in_delay_slot ); // should never stop here
4.159 // Normal termination - save PC, cycle count
4.160 - exit_block( pc );
4.161 + exit_block( );
4.162
4.163 uint8_t *end_ptr = xlat_output;
4.164 // Exception termination. Jump block for various exception codes:
4.165 @@ -348,7 +378,7 @@
4.166 case 0x0:
4.167 { /* STC SR, Rn */
4.168 uint32_t Rn = ((ir>>8)&0xF);
4.169 - read_sr( R_EAX );
4.170 + call_func0(sh4_read_sr);
4.171 store_reg( R_EAX, Rn );
4.172 }
4.173 break;
4.174 @@ -388,7 +418,8 @@
4.175 case 0x1:
4.176 { /* STC Rm_BANK, Rn */
4.177 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm_BANK = ((ir>>4)&0x7);
4.178 - /* TODO */
4.179 + load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
4.180 + store_reg( R_EAX, Rn );
4.181 }
4.182 break;
4.183 }
4.184 @@ -398,16 +429,42 @@
4.185 case 0x0:
4.186 { /* BSRF Rn */
4.187 uint32_t Rn = ((ir>>8)&0xF);
4.188 + if( sh4_x86.in_delay_slot ) {
4.189 + SLOTILLEGAL();
4.190 + } else {
4.191 + load_imm32( R_EAX, pc + 4 );
4.192 + store_spreg( R_EAX, R_PR );
4.193 + load_reg( R_EDI, Rn );
4.194 + ADD_r32_r32( R_EAX, R_EDI );
4.195 + sh4_x86.in_delay_slot = TRUE;
4.196 + INC_r32(R_ESI);
4.197 + return 0;
4.198 + }
4.199 }
4.200 break;
4.201 case 0x2:
4.202 { /* BRAF Rn */
4.203 uint32_t Rn = ((ir>>8)&0xF);
4.204 + if( sh4_x86.in_delay_slot ) {
4.205 + SLOTILLEGAL();
4.206 + } else {
4.207 + load_reg( R_EDI, Rn );
4.208 + sh4_x86.in_delay_slot = TRUE;
4.209 + INC_r32(R_ESI);
4.210 + return 0;
4.211 + }
4.212 }
4.213 break;
4.214 case 0x8:
4.215 { /* PREF @Rn */
4.216 uint32_t Rn = ((ir>>8)&0xF);
4.217 + load_reg( R_EAX, Rn );
4.218 + PUSH_r32( R_EAX );
4.219 + AND_imm32_r32( 0xFC000000, R_EAX );
4.220 + CMP_imm32_r32( 0xE0000000, R_EAX );
4.221 + JNE_rel8(8);
4.222 + call_func0( sh4_flush_store_queue );
4.223 + ADD_imm8s_r32( -4, R_ESP );
4.224 }
4.225 break;
4.226 case 0x9:
4.227 @@ -430,6 +487,7 @@
4.228 uint32_t Rn = ((ir>>8)&0xF);
4.229 load_reg( R_EAX, 0 );
4.230 load_reg( R_ECX, Rn );
4.231 + check_walign32( R_ECX );
4.232 MEM_WRITE_LONG( R_ECX, R_EAX );
4.233 }
4.234 break;
4.235 @@ -454,6 +512,7 @@
4.236 load_reg( R_EAX, 0 );
4.237 load_reg( R_ECX, Rn );
4.238 ADD_r32_r32( R_EAX, R_ECX );
4.239 + check_walign16( R_ECX );
4.240 load_reg( R_EAX, Rm );
4.241 MEM_WRITE_WORD( R_ECX, R_EAX );
4.242 }
4.243 @@ -464,6 +523,7 @@
4.244 load_reg( R_EAX, 0 );
4.245 load_reg( R_ECX, Rn );
4.246 ADD_r32_r32( R_EAX, R_ECX );
4.247 + check_walign32( R_ECX );
4.248 load_reg( R_EAX, Rm );
4.249 MEM_WRITE_LONG( R_ECX, R_EAX );
4.250 }
4.251 @@ -481,14 +541,21 @@
4.252 switch( (ir&0xFF0) >> 4 ) {
4.253 case 0x0:
4.254 { /* CLRT */
4.255 + CLC();
4.256 + SETC_t();
4.257 }
4.258 break;
4.259 case 0x1:
4.260 { /* SETT */
4.261 + STC();
4.262 + SETC_t();
4.263 }
4.264 break;
4.265 case 0x2:
4.266 { /* CLRMAC */
4.267 + XOR_r32_r32(R_EAX, R_EAX);
4.268 + store_spreg( R_EAX, R_MACL );
4.269 + store_spreg( R_EAX, R_MACH );
4.270 }
4.271 break;
4.272 case 0x3:
4.273 @@ -497,10 +564,14 @@
4.274 break;
4.275 case 0x4:
4.276 { /* CLRS */
4.277 + CLC();
4.278 + SETC_sh4r(R_S);
4.279 }
4.280 break;
4.281 case 0x5:
4.282 { /* SETS */
4.283 + STC();
4.284 + SETC_sh4r(R_S);
4.285 }
4.286 break;
4.287 default:
4.288 @@ -595,14 +666,34 @@
4.289 switch( (ir&0xFF0) >> 4 ) {
4.290 case 0x0:
4.291 { /* RTS */
4.292 + if( sh4_x86.in_delay_slot ) {
4.293 + SLOTILLEGAL();
4.294 + } else {
4.295 + load_spreg( R_EDI, R_PR );
4.296 + sh4_x86.in_delay_slot = TRUE;
4.297 + INC_r32(R_ESI);
4.298 + return 0;
4.299 + }
4.300 }
4.301 break;
4.302 case 0x1:
4.303 { /* SLEEP */
4.304 + /* TODO */
4.305 }
4.306 break;
4.307 case 0x2:
4.308 { /* RTE */
4.309 + check_priv();
4.310 + if( sh4_x86.in_delay_slot ) {
4.311 + SLOTILLEGAL();
4.312 + } else {
4.313 + load_spreg( R_EDI, R_PR );
4.314 + load_spreg( R_EAX, R_SSR );
4.315 + call_func1( sh4_write_sr, R_EAX );
4.316 + sh4_x86.in_delay_slot = TRUE;
4.317 + INC_r32(R_ESI);
4.318 + return 0;
4.319 + }
4.320 }
4.321 break;
4.322 default:
4.323 @@ -626,6 +717,7 @@
4.324 load_reg( R_EAX, 0 );
4.325 load_reg( R_ECX, Rm );
4.326 ADD_r32_r32( R_EAX, R_ECX );
4.327 + check_ralign16( R_ECX );
4.328 MEM_READ_WORD( R_ECX, R_EAX );
4.329 store_reg( R_EAX, Rn );
4.330 }
4.331 @@ -636,6 +728,7 @@
4.332 load_reg( R_EAX, 0 );
4.333 load_reg( R_ECX, Rm );
4.334 ADD_r32_r32( R_EAX, R_ECX );
4.335 + check_ralign32( R_ECX );
4.336 MEM_READ_LONG( R_ECX, R_EAX );
4.337 store_reg( R_EAX, Rn );
4.338 }
4.339 @@ -656,6 +749,7 @@
4.340 load_reg( R_ECX, Rn );
4.341 load_reg( R_EAX, Rm );
4.342 ADD_imm32_r32( disp, R_ECX );
4.343 + check_walign32( R_ECX );
4.344 MEM_WRITE_LONG( R_ECX, R_EAX );
4.345 }
4.346 break;
4.347 @@ -673,6 +767,7 @@
4.348 { /* MOV.W Rm, @Rn */
4.349 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
4.350 load_reg( R_ECX, Rn );
4.351 + check_walign16( R_ECX );
4.352 MEM_READ_WORD( R_ECX, R_EAX );
4.353 store_reg( R_EAX, Rn );
4.354 }
4.355 @@ -682,6 +777,7 @@
4.356 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
4.357 load_reg( R_EAX, Rm );
4.358 load_reg( R_ECX, Rn );
4.359 + check_walign32(R_ECX);
4.360 MEM_WRITE_LONG( R_ECX, R_EAX );
4.361 }
4.362 break;
4.363 @@ -699,6 +795,7 @@
4.364 { /* MOV.W Rm, @-Rn */
4.365 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
4.366 load_reg( R_ECX, Rn );
4.367 + check_walign16( R_ECX );
4.368 load_reg( R_EAX, Rm );
4.369 ADD_imm8s_r32( -2, R_ECX );
4.370 MEM_WRITE_WORD( R_ECX, R_EAX );
4.371 @@ -709,6 +806,7 @@
4.372 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
4.373 load_reg( R_EAX, Rm );
4.374 load_reg( R_ECX, Rn );
4.375 + check_walign32( R_ECX );
4.376 ADD_imm8s_r32( -4, R_ECX );
4.377 store_reg( R_ECX, Rn );
4.378 MEM_WRITE_LONG( R_ECX, R_EAX );
4.379 @@ -794,11 +892,19 @@
4.380 case 0xE:
4.381 { /* MULU.W Rm, Rn */
4.382 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
4.383 + load_reg16u( R_EAX, Rm );
4.384 + load_reg16u( R_ECX, Rn );
4.385 + MUL_r32( R_ECX );
4.386 + store_spreg( R_EAX, R_MACL );
4.387 }
4.388 break;
4.389 case 0xF:
4.390 { /* MULS.W Rm, Rn */
4.391 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
4.392 + load_reg16s( R_EAX, Rm );
4.393 + load_reg16s( R_ECX, Rn );
4.394 + MUL_r32( R_ECX );
4.395 + store_spreg( R_EAX, R_MACL );
4.396 }
4.397 break;
4.398 default:
4.399 @@ -838,6 +944,17 @@
4.400 case 0x4:
4.401 { /* DIV1 Rm, Rn */
4.402 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
4.403 + load_reg( R_ECX, Rn );
4.404 + LDC_t();
4.405 + RCL1_r32( R_ECX ); // OP2
4.406 + SETC_r32( R_EDX ); // Q
4.407 + load_spreg( R_EAX, R_Q );
4.408 + CMP_sh4r_r32( R_M, R_EAX );
4.409 + JE_rel8(8);
4.410 + ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_ECX );
4.411 + JMP_rel8(3);
4.412 + SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_ECX );
4.413 + // TODO
4.414 }
4.415 break;
4.416 case 0x5:
4.417 @@ -1091,12 +1208,11 @@
4.418 case 0x0:
4.419 { /* STC.L SR, @-Rn */
4.420 uint32_t Rn = ((ir>>8)&0xF);
4.421 - /* TODO */
4.422 - load_reg( R_ECX, Rn );
4.423 - ADD_imm8s_r32( -4, Rn );
4.424 - store_reg( R_ECX, Rn );
4.425 - read_sr( R_EAX );
4.426 - MEM_WRITE_LONG( R_ECX, R_EAX );
4.427 + load_reg( R_ECX, Rn );
4.428 + ADD_imm8s_r32( -4, Rn );
4.429 + store_reg( R_ECX, Rn );
4.430 + call_func0( sh4_read_sr );
4.431 + MEM_WRITE_LONG( R_ECX, R_EAX );
4.432 }
4.433 break;
4.434 case 0x1:
4.435 @@ -1147,6 +1263,11 @@
4.436 case 0x1:
4.437 { /* STC.L Rm_BANK, @-Rn */
4.438 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm_BANK = ((ir>>4)&0x7);
4.439 + load_reg( R_ECX, Rn );
4.440 + ADD_imm8s_r32( -4, Rn );
4.441 + store_reg( R_ECX, Rn );
4.442 + load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
4.443 + MEM_WRITE_LONG( R_ECX, R_EAX );
4.444 }
4.445 break;
4.446 }
4.447 @@ -1307,7 +1428,7 @@
4.448 ADD_imm8s_r32( 4, R_EAX );
4.449 store_reg( R_EAX, Rm );
4.450 MEM_READ_LONG( R_ECX, R_EAX );
4.451 - write_sr( R_EAX );
4.452 + call_func1( sh4_write_sr, R_EAX );
4.453 }
4.454 break;
4.455 case 0x1:
4.456 @@ -1362,6 +1483,12 @@
4.457 case 0x1:
4.458 { /* LDC.L @Rm+, Rn_BANK */
4.459 uint32_t Rm = ((ir>>8)&0xF); uint32_t Rn_BANK = ((ir>>4)&0x7);
4.460 + load_reg( R_EAX, Rm );
4.461 + MOV_r32_r32( R_EAX, R_ECX );
4.462 + ADD_imm8s_r32( 4, R_EAX );
4.463 + store_reg( R_EAX, Rm );
4.464 + MEM_READ_LONG( R_ECX, R_EAX );
4.465 + store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
4.466 }
4.467 break;
4.468 }
4.469 @@ -1489,6 +1616,16 @@
4.470 case 0x0:
4.471 { /* JSR @Rn */
4.472 uint32_t Rn = ((ir>>8)&0xF);
4.473 + if( sh4_x86.in_delay_slot ) {
4.474 + SLOTILLEGAL();
4.475 + } else {
4.476 + load_imm32( R_EAX, pc + 4 );
4.477 + store_spreg( R_EAX, R_PR );
4.478 + load_reg( R_EDI, Rn );
4.479 + sh4_x86.in_delay_slot = TRUE;
4.480 + INC_r32(R_ESI);
4.481 + return 0;
4.482 + }
4.483 }
4.484 break;
4.485 case 0x1:
4.486 @@ -1505,6 +1642,14 @@
4.487 case 0x2:
4.488 { /* JMP @Rn */
4.489 uint32_t Rn = ((ir>>8)&0xF);
4.490 + if( sh4_x86.in_delay_slot ) {
4.491 + SLOTILLEGAL();
4.492 + } else {
4.493 + load_reg( R_EDI, Rn );
4.494 + sh4_x86.in_delay_slot = TRUE;
4.495 + INC_r32(R_ESI);
4.496 + return 0;
4.497 + }
4.498 }
4.499 break;
4.500 default:
4.501 @@ -1555,7 +1700,7 @@
4.502 { /* LDC Rm, SR */
4.503 uint32_t Rm = ((ir>>8)&0xF);
4.504 load_reg( R_EAX, Rm );
4.505 - write_sr( R_EAX );
4.506 + call_func1( sh4_write_sr, R_EAX );
4.507 }
4.508 break;
4.509 case 0x1:
4.510 @@ -1594,6 +1739,8 @@
4.511 case 0x1:
4.512 { /* LDC Rm, Rn_BANK */
4.513 uint32_t Rm = ((ir>>8)&0xF); uint32_t Rn_BANK = ((ir>>4)&0x7);
4.514 + load_reg( R_EAX, Rm );
4.515 + store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
4.516 }
4.517 break;
4.518 }
4.519 @@ -1610,6 +1757,7 @@
4.520 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF); uint32_t disp = (ir&0xF)<<2;
4.521 load_reg( R_ECX, Rm );
4.522 ADD_imm8s_r32( disp, R_ECX );
4.523 + check_ralign32( R_ECX );
4.524 MEM_READ_LONG( R_ECX, R_EAX );
4.525 store_reg( R_EAX, Rn );
4.526 }
4.527 @@ -1628,6 +1776,7 @@
4.528 { /* MOV.W @Rm, Rn */
4.529 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
4.530 load_reg( R_ECX, Rm );
4.531 + check_ralign16( R_ECX );
4.532 MEM_READ_WORD( R_ECX, R_EAX );
4.533 store_reg( R_EAX, Rn );
4.534 }
4.535 @@ -1636,6 +1785,7 @@
4.536 { /* MOV.L @Rm, Rn */
4.537 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
4.538 load_reg( R_ECX, Rm );
4.539 + check_ralign32( R_ECX );
4.540 MEM_READ_LONG( R_ECX, R_EAX );
4.541 store_reg( R_EAX, Rn );
4.542 }
4.543 @@ -1662,6 +1812,7 @@
4.544 { /* MOV.W @Rm+, Rn */
4.545 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
4.546 load_reg( R_EAX, Rm );
4.547 + check_ralign16( R_EAX );
4.548 MOV_r32_r32( R_EAX, R_ECX );
4.549 ADD_imm8s_r32( 2, R_EAX );
4.550 store_reg( R_EAX, Rm );
4.551 @@ -1673,6 +1824,7 @@
4.552 { /* MOV.L @Rm+, Rn */
4.553 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
4.554 load_reg( R_EAX, Rm );
4.555 + check_ralign32( R_ECX );
4.556 MOV_r32_r32( R_EAX, R_ECX );
4.557 ADD_imm8s_r32( 4, R_EAX );
4.558 store_reg( R_EAX, Rm );
4.559 @@ -1785,6 +1937,7 @@
4.560 load_reg( R_ECX, Rn );
4.561 load_reg( R_EAX, 0 );
4.562 ADD_imm32_r32( disp, R_ECX );
4.563 + check_walign16( R_ECX );
4.564 MEM_WRITE_WORD( R_ECX, R_EAX );
4.565 }
4.566 break;
4.567 @@ -1802,6 +1955,7 @@
4.568 uint32_t Rm = ((ir>>4)&0xF); uint32_t disp = (ir&0xF)<<1;
4.569 load_reg( R_ECX, Rm );
4.570 ADD_imm32_r32( disp, R_ECX );
4.571 + check_ralign16( R_ECX );
4.572 MEM_READ_WORD( R_ECX, R_EAX );
4.573 store_reg( R_EAX, 0 );
4.574 }
4.575 @@ -1817,32 +1971,63 @@
4.576 case 0x9:
4.577 { /* BT disp */
4.578 int32_t disp = SIGNEXT8(ir&0xFF)<<1;
4.579 - /* If true, result PC += 4 + disp. else result PC = pc+2 */
4.580 - return pc + 2;
4.581 + if( sh4_x86.in_delay_slot ) {
4.582 + SLOTILLEGAL();
4.583 + } else {
4.584 + load_imm32( R_EDI, pc + 2 );
4.585 + CMP_imm8s_sh4r( 0, R_T );
4.586 + JE_rel8( 5 );
4.587 + load_imm32( R_EDI, disp + pc + 4 );
4.588 + INC_r32(R_ESI);
4.589 + return 1;
4.590 + }
4.591 }
4.592 break;
4.593 case 0xB:
4.594 { /* BF disp */
4.595 int32_t disp = SIGNEXT8(ir&0xFF)<<1;
4.596 - CMP_imm8s_ebp( 0, R_T );
4.597 - JNE_rel8( 1 );
4.598 - exit_block( disp + pc + 4 );
4.599 - return 1;
4.600 + if( sh4_x86.in_delay_slot ) {
4.601 + SLOTILLEGAL();
4.602 + } else {
4.603 + load_imm32( R_EDI, pc + 2 );
4.604 + CMP_imm8s_sh4r( 0, R_T );
4.605 + JNE_rel8( 5 );
4.606 + load_imm32( R_EDI, disp + pc + 4 );
4.607 + INC_r32(R_ESI);
4.608 + return 1;
4.609 + }
4.610 }
4.611 break;
4.612 case 0xD:
4.613 { /* BT/S disp */
4.614 int32_t disp = SIGNEXT8(ir&0xFF)<<1;
4.615 - return pc + 4;
4.616 + if( sh4_x86.in_delay_slot ) {
4.617 + SLOTILLEGAL();
4.618 + } else {
4.619 + load_imm32( R_EDI, pc + 2 );
4.620 + CMP_imm8s_sh4r( 0, R_T );
4.621 + JE_rel8( 5 );
4.622 + load_imm32( R_EDI, disp + pc + 4 );
4.623 + sh4_x86.in_delay_slot = TRUE;
4.624 + INC_r32(R_ESI);
4.625 + return 0;
4.626 + }
4.627 }
4.628 break;
4.629 case 0xF:
4.630 { /* BF/S disp */
4.631 int32_t disp = SIGNEXT8(ir&0xFF)<<1;
4.632 - CMP_imm8s_ebp( 0, R_T );
4.633 - JNE_rel8( 1 );
4.634 - exit_block( disp + pc + 4 );
4.635 - sh4_x86.in_delay_slot = TRUE;
4.636 + if( sh4_x86.in_delay_slot ) {
4.637 + SLOTILLEGAL();
4.638 + } else {
4.639 + load_imm32( R_EDI, pc + 2 );
4.640 + CMP_imm8s_sh4r( 0, R_T );
4.641 + JNE_rel8( 5 );
4.642 + load_imm32( R_EDI, disp + pc + 4 );
4.643 + sh4_x86.in_delay_slot = TRUE;
4.644 + INC_r32(R_ESI);
4.645 + return 0;
4.646 + }
4.647 }
4.648 break;
4.649 default:
4.650 @@ -1853,20 +2038,41 @@
4.651 case 0x9:
4.652 { /* MOV.W @(disp, PC), Rn */
4.653 uint32_t Rn = ((ir>>8)&0xF); uint32_t disp = (ir&0xFF)<<1;
4.654 - load_imm32( R_ECX, pc + disp + 4 );
4.655 - MEM_READ_WORD( R_ECX, R_EAX );
4.656 - store_reg( R_EAX, Rn );
4.657 + if( sh4_x86.in_delay_slot ) {
4.658 + SLOTILLEGAL();
4.659 + } else {
4.660 + load_imm32( R_ECX, pc + disp + 4 );
4.661 + MEM_READ_WORD( R_ECX, R_EAX );
4.662 + store_reg( R_EAX, Rn );
4.663 + }
4.664 }
4.665 break;
4.666 case 0xA:
4.667 { /* BRA disp */
4.668 int32_t disp = SIGNEXT12(ir&0xFFF)<<1;
4.669 - exit_block( disp + pc + 4 );
4.670 + if( sh4_x86.in_delay_slot ) {
4.671 + SLOTILLEGAL();
4.672 + } else {
4.673 + load_imm32( R_EDI, disp + pc + 4 );
4.674 + sh4_x86.in_delay_slot = TRUE;
4.675 + INC_r32(R_ESI);
4.676 + return 0;
4.677 + }
4.678 }
4.679 break;
4.680 case 0xB:
4.681 { /* BSR disp */
4.682 int32_t disp = SIGNEXT12(ir&0xFFF)<<1;
4.683 + if( sh4_x86.in_delay_slot ) {
4.684 + SLOTILLEGAL();
4.685 + } else {
4.686 + load_imm32( R_EAX, pc + 4 );
4.687 + store_spreg( R_EAX, R_PR );
4.688 + load_imm32( R_EDI, disp + pc + 4 );
4.689 + sh4_x86.in_delay_slot = TRUE;
4.690 + INC_r32(R_ESI);
4.691 + return 0;
4.692 + }
4.693 }
4.694 break;
4.695 case 0xC:
4.696 @@ -1886,6 +2092,7 @@
4.697 load_spreg( R_ECX, R_GBR );
4.698 load_reg( R_EAX, 0 );
4.699 ADD_imm32_r32( disp, R_ECX );
4.700 + check_walign16( R_ECX );
4.701 MEM_WRITE_WORD( R_ECX, R_EAX );
4.702 }
4.703 break;
4.704 @@ -1895,12 +2102,19 @@
4.705 load_spreg( R_ECX, R_GBR );
4.706 load_reg( R_EAX, 0 );
4.707 ADD_imm32_r32( disp, R_ECX );
4.708 + check_walign32( R_ECX );
4.709 MEM_WRITE_LONG( R_ECX, R_EAX );
4.710 }
4.711 break;
4.712 case 0x3:
4.713 { /* TRAPA #imm */
4.714 uint32_t imm = (ir&0xFF);
4.715 + if( sh4_x86.in_delay_slot ) {
4.716 + SLOTILLEGAL();
4.717 + } else {
4.718 + // TODO: Write TRA
4.719 + RAISE_EXCEPTION(EXC_TRAP);
4.720 + }
4.721 }
4.722 break;
4.723 case 0x4:
4.724 @@ -1917,6 +2131,7 @@
4.725 uint32_t disp = (ir&0xFF)<<1;
4.726 load_spreg( R_ECX, R_GBR );
4.727 ADD_imm32_r32( disp, R_ECX );
4.728 + check_ralign16( R_ECX );
4.729 MEM_READ_WORD( R_ECX, R_EAX );
4.730 store_reg( R_EAX, 0 );
4.731 }
4.732 @@ -1926,6 +2141,7 @@
4.733 uint32_t disp = (ir&0xFF)<<2;
4.734 load_spreg( R_ECX, R_GBR );
4.735 ADD_imm32_r32( disp, R_ECX );
4.736 + check_ralign32( R_ECX );
4.737 MEM_READ_LONG( R_ECX, R_EAX );
4.738 store_reg( R_EAX, 0 );
4.739 }
4.740 @@ -1933,8 +2149,12 @@
4.741 case 0x7:
4.742 { /* MOVA @(disp, PC), R0 */
4.743 uint32_t disp = (ir&0xFF)<<2;
4.744 - load_imm32( R_ECX, (pc & 0xFFFFFFFC) + disp + 4 );
4.745 - store_reg( R_ECX, 0 );
4.746 + if( sh4_x86.in_delay_slot ) {
4.747 + SLOTILLEGAL();
4.748 + } else {
4.749 + load_imm32( R_ECX, (pc & 0xFFFFFFFC) + disp + 4 );
4.750 + store_reg( R_ECX, 0 );
4.751 + }
4.752 }
4.753 break;
4.754 case 0x8:
4.755 @@ -1985,7 +2205,7 @@
4.756 uint32_t imm = (ir&0xFF);
4.757 load_reg( R_EAX, 0 );
4.758 load_spreg( R_ECX, R_GBR );
4.759 - ADD_r32_r32( R_EAX, R_EBX );
4.760 + ADD_r32_r32( R_EAX, R_ECX );
4.761 MEM_READ_BYTE( R_ECX, R_EAX );
4.762 AND_imm32_r32(imm, R_ECX );
4.763 MEM_WRITE_BYTE( R_ECX, R_EAX );
4.764 @@ -2005,6 +2225,12 @@
4.765 case 0xF:
4.766 { /* OR.B #imm, @(R0, GBR) */
4.767 uint32_t imm = (ir&0xFF);
4.768 + load_reg( R_EAX, 0 );
4.769 + load_spreg( R_ECX, R_GBR );
4.770 + ADD_r32_r32( R_EAX, R_ECX );
4.771 + MEM_READ_BYTE( R_ECX, R_EAX );
4.772 + OR_imm32_r32(imm, R_ECX );
4.773 + MEM_WRITE_BYTE( R_ECX, R_EAX );
4.774 }
4.775 break;
4.776 }
4.777 @@ -2012,9 +2238,13 @@
4.778 case 0xD:
4.779 { /* MOV.L @(disp, PC), Rn */
4.780 uint32_t Rn = ((ir>>8)&0xF); uint32_t disp = (ir&0xFF)<<2;
4.781 - load_imm32( R_ECX, (pc & 0xFFFFFFFC) + disp + 4 );
4.782 - MEM_READ_LONG( R_ECX, R_EAX );
4.783 - store_reg( R_EAX, 0 );
4.784 + if( sh4_x86.in_delay_slot ) {
4.785 + SLOTILLEGAL();
4.786 + } else {
4.787 + load_imm32( R_ECX, (pc & 0xFFFFFFFC) + disp + 4 );
4.788 + MEM_READ_LONG( R_ECX, R_EAX );
4.789 + store_reg( R_EAX, 0 );
4.790 + }
4.791 }
4.792 break;
4.793 case 0xE:
4.794 @@ -2121,6 +2351,17 @@
4.795 case 0x5:
4.796 { /* FABS FRn */
4.797 uint32_t FRn = ((ir>>8)&0xF);
4.798 + load_spreg( R_ECX, R_FPSCR );
4.799 + load_spreg( R_EDX, REG_OFFSET(fr_bank) );
4.800 + TEST_imm32_r32( FPSCR_PR, R_ECX );
4.801 + JNE_rel8(10);
4.802 + push_fr(R_EDX, FRn); // 3
4.803 + FABS_st0(); // 2
4.804 + pop_fr( R_EDX, FRn); //3
4.805 + JMP_rel8(8); // 2
4.806 + push_dr(R_EDX, FRn);
4.807 + FABS_st0();
4.808 + pop_dr(R_EDX, FRn);
4.809 }
4.810 break;
4.811 case 0x6:
4.812 @@ -2184,6 +2425,12 @@
4.813 break;
4.814 case 0x3:
4.815 { /* UNDEF */
4.816 + if( sh4_x86.in_delay_slot ) {
4.817 + RAISE_EXCEPTION(EXC_SLOT_ILLEGAL);
4.818 + } else {
4.819 + RAISE_EXCEPTION(EXC_ILLEGAL);
4.820 + }
4.821 + return 1;
4.822 }
4.823 break;
4.824 default:
4.825 @@ -2213,6 +2460,9 @@
4.826 }
4.827
4.828 INC_r32(R_ESI);
4.829 -
4.830 + if( sh4_x86.in_delay_slot ) {
4.831 + sh4_x86.in_delay_slot = FALSE;
4.832 + return 1;
4.833 + }
4.834 return 0;
4.835 }
5.1 --- a/src/sh4/sh4x86.in Tue Sep 11 01:05:05 2007 +0000
5.2 +++ b/src/sh4/sh4x86.in Tue Sep 11 02:14:46 2007 +0000
5.3 @@ -1,5 +1,5 @@
5.4 /**
5.5 - * $Id: sh4x86.in,v 1.3 2007-09-04 08:40:23 nkeynes Exp $
5.6 + * $Id: sh4x86.in,v 1.4 2007-09-11 02:14:46 nkeynes Exp $
5.7 *
5.8 * SH4 => x86 translation. This version does no real optimization, it just
5.9 * outputs straight-line x86 code - it mainly exists to provide a baseline
5.10 @@ -73,7 +73,7 @@
5.11 {
5.12 unsigned int i;
5.13 for( i=0; i<sh4_x86.backpatch_posn; i++ ) {
5.14 - *sh4_x86.backpatch_list[i] += (reloc_base - ((uint8_t *)sh4_x86.backpatch_list[i]));
5.15 + *sh4_x86.backpatch_list[i] += (reloc_base - ((uint8_t *)sh4_x86.backpatch_list[i]) - 4);
5.16 }
5.17 }
5.18
5.19 @@ -97,35 +97,20 @@
5.20 OP(REG_OFFSET(r[sh4reg]));
5.21 }
5.22
5.23 -/**
5.24 - * Load the SR register into an x86 register
5.25 - */
5.26 -static inline void read_sr( int x86reg )
5.27 +static inline void load_reg16s( int x86reg, int sh4reg )
5.28 {
5.29 - MOV_ebp_r32( R_M, x86reg );
5.30 - SHL1_r32( x86reg );
5.31 - OR_ebp_r32( R_Q, x86reg );
5.32 - SHL_imm8_r32( 7, x86reg );
5.33 - OR_ebp_r32( R_S, x86reg );
5.34 - SHL1_r32( x86reg );
5.35 - OR_ebp_r32( R_T, x86reg );
5.36 - OR_ebp_r32( R_SR, x86reg );
5.37 + OP(0x0F);
5.38 + OP(0xBF);
5.39 + MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
5.40 }
5.41
5.42 -static inline void write_sr( int x86reg )
5.43 +static inline void load_reg16u( int x86reg, int sh4reg )
5.44 {
5.45 - TEST_imm32_r32( SR_M, x86reg );
5.46 - SETNE_ebp(R_M);
5.47 - TEST_imm32_r32( SR_Q, x86reg );
5.48 - SETNE_ebp(R_Q);
5.49 - TEST_imm32_r32( SR_S, x86reg );
5.50 - SETNE_ebp(R_S);
5.51 - TEST_imm32_r32( SR_T, x86reg );
5.52 - SETNE_ebp(R_T);
5.53 - AND_imm32_r32( SR_MQSTMASK, x86reg );
5.54 - MOV_r32_ebp( x86reg, R_SR );
5.55 + OP(0x0F);
5.56 + OP(0xB7);
5.57 + MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
5.58 +
5.59 }
5.60 -
5.61
5.62 static inline void load_spreg( int x86reg, int regoffset )
5.63 {
5.64 @@ -160,6 +145,49 @@
5.65 OP(regoffset);
5.66 }
5.67
5.68 +
5.69 +#define load_fr_bank(bankreg) load_spreg( bankreg, REG_OFFSET(fr_bank))
5.70 +
5.71 +static inline void load_xf_bank( int bankreg )
5.72 +{
5.73 + load_spreg( bankreg, R_FPSCR );
5.74 + SHR_imm8_r32( (21 - 6), bankreg ); // Extract bit 21 then *64 for bank size
5.75 + AND_imm8s_r32( 0x40, bankreg ); // Complete extraction
5.76 + OP(0x8D); OP(0x44+(bankreg<<3)); OP(0x28+bankreg); OP(REG_OFFSET(fr)); // LEA [ebp+bankreg+disp], bankreg
5.77 +}
5.78 +
5.79 +static inline void push_fr( int bankreg, int frm )
5.80 +{
5.81 + OP(0xD9); OP(0x40 + bankreg); OP((frm^1)<<2); // FLD.S [bankreg + frm^1*4]
5.82 +}
5.83 +
5.84 +static inline void pop_fr( int bankreg, int frm )
5.85 +{
5.86 + OP(0xD9); OP(0x58 + bankreg); OP((frm^1)<<2); // FST.S [bankreg + frm^1*4]
5.87 +}
5.88 +
5.89 +static inline void push_dr( int bankreg, int frm )
5.90 +{
5.91 + if( frm&1 ) {
5.92 + // this is technically undefined, but it seems to work consistently - high 32 bits
5.93 + // loaded from FRm (32-bits), low 32bits are 0.
5.94 + OP(0xFF); OP(0x70 + bankreg); OP((frm^1)<<2); // PUSH [bankreg + frm^1]
5.95 + PUSH_imm32(0);
5.96 +
5.97 +
5.98 + } else {
5.99 + OP(0xDD); OP(0x40 + bankreg); OP(frm<<2); // FLD.D [bankreg + frm*4]
5.100 + }
5.101 +}
5.102 +
5.103 +static inline void pop_dr( int bankreg, int frm )
5.104 +{
5.105 + if( frm&1 ) {
5.106 + } else {
5.107 + OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]
5.108 + }
5.109 +}
5.110 +
5.111 /**
5.112 * Note: clobbers EAX to make the indirect call - this isn't usually
5.113 * a problem since the callee will usually clobber it anyway.
5.114 @@ -248,7 +276,7 @@
5.115 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
5.116
5.117 #define RAISE_EXCEPTION( exc ) call_func1(sh4_raise_exception, exc);
5.118 -#define CHECKSLOTILLEGAL() if(sh4_x86.in_delay_slot) RAISE_EXCEPTION(EXC_SLOT_ILLEGAL)
5.119 +#define SLOTILLEGAL() RAISE_EXCEPTION(EXC_SLOT_ILLEGAL); return 1
5.120
5.121
5.122
5.123 @@ -259,9 +287,9 @@
5.124 void sh4_translate_begin_block()
5.125 {
5.126 PUSH_r32(R_EBP);
5.127 - PUSH_r32(R_ESI);
5.128 /* mov &sh4r, ebp */
5.129 load_imm32( R_EBP, (uint32_t)&sh4r );
5.130 + PUSH_r32(R_EDI);
5.131 PUSH_r32(R_ESI);
5.132
5.133 sh4_x86.in_delay_slot = FALSE;
5.134 @@ -273,16 +301,18 @@
5.135 /**
5.136 * Exit the block early (ie branch out), conditionally or otherwise
5.137 */
5.138 -void exit_block( uint32_t pc )
5.139 +void exit_block( )
5.140 {
5.141 - load_imm32( R_ECX, pc );
5.142 - store_spreg( R_ECX, REG_OFFSET(pc) );
5.143 + store_spreg( R_EDI, REG_OFFSET(pc) );
5.144 MOV_moff32_EAX( (uint32_t)&sh4_cpu_period );
5.145 load_spreg( R_ECX, REG_OFFSET(slice_cycle) );
5.146 MUL_r32( R_ESI );
5.147 ADD_r32_r32( R_EAX, R_ECX );
5.148 store_spreg( R_ECX, REG_OFFSET(slice_cycle) );
5.149 XOR_r32_r32( R_EAX, R_EAX );
5.150 + POP_r32(R_ESI);
5.151 + POP_r32(R_EDI);
5.152 + POP_r32(R_EBP);
5.153 RET();
5.154 }
5.155
5.156 @@ -292,7 +322,7 @@
5.157 void sh4_translate_end_block( sh4addr_t pc ) {
5.158 assert( !sh4_x86.in_delay_slot ); // should never stop here
5.159 // Normal termination - save PC, cycle count
5.160 - exit_block( pc );
5.161 + exit_block( );
5.162
5.163 uint8_t *end_ptr = xlat_output;
5.164 // Exception termination. Jump block for various exception codes:
5.165 @@ -380,7 +410,7 @@
5.166 AND.B #imm, @(R0, GBR) {:
5.167 load_reg( R_EAX, 0 );
5.168 load_spreg( R_ECX, R_GBR );
5.169 - ADD_r32_r32( R_EAX, R_EBX );
5.170 + ADD_r32_r32( R_EAX, R_ECX );
5.171 MEM_READ_BYTE( R_ECX, R_EAX );
5.172 AND_imm32_r32(imm, R_ECX );
5.173 MEM_WRITE_BYTE( R_ECX, R_EAX );
5.174 @@ -460,7 +490,19 @@
5.175 store_spreg( R_EAX, R_M );
5.176 store_spreg( R_EAX, R_T );
5.177 :}
5.178 -DIV1 Rm, Rn {: :}
5.179 +DIV1 Rm, Rn {:
5.180 + load_reg( R_ECX, Rn );
5.181 + LDC_t();
5.182 + RCL1_r32( R_ECX ); // OP2
5.183 + SETC_r32( R_EDX ); // Q
5.184 + load_spreg( R_EAX, R_Q );
5.185 + CMP_sh4r_r32( R_M, R_EAX );
5.186 + JE_rel8(8);
5.187 + ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_ECX );
5.188 + JMP_rel8(3);
5.189 + SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_ECX );
5.190 + // TODO
5.191 +:}
5.192 DMULS.L Rm, Rn {:
5.193 load_reg( R_EAX, Rm );
5.194 load_reg( R_ECX, Rn );
5.195 @@ -513,9 +555,18 @@
5.196 MUL_r32( R_ECX );
5.197 store_spreg( R_EAX, R_MACL );
5.198 :}
5.199 -MULS.W Rm, Rn {:
5.200 +MULS.W Rm, Rn {:
5.201 + load_reg16s( R_EAX, Rm );
5.202 + load_reg16s( R_ECX, Rn );
5.203 + MUL_r32( R_ECX );
5.204 + store_spreg( R_EAX, R_MACL );
5.205 :}
5.206 -MULU.W Rm, Rn {: :}
5.207 +MULU.W Rm, Rn {:
5.208 + load_reg16u( R_EAX, Rm );
5.209 + load_reg16u( R_ECX, Rn );
5.210 + MUL_r32( R_ECX );
5.211 + store_spreg( R_EAX, R_MACL );
5.212 +:}
5.213 NEG Rm, Rn {:
5.214 load_reg( R_EAX, Rm );
5.215 NEG_r32( R_EAX );
5.216 @@ -545,7 +596,14 @@
5.217 OR_imm32_r32(imm, R_EAX);
5.218 store_reg( R_EAX, 0 );
5.219 :}
5.220 -OR.B #imm, @(R0, GBR) {: :}
5.221 +OR.B #imm, @(R0, GBR) {:
5.222 + load_reg( R_EAX, 0 );
5.223 + load_spreg( R_ECX, R_GBR );
5.224 + ADD_r32_r32( R_EAX, R_ECX );
5.225 + MEM_READ_BYTE( R_ECX, R_EAX );
5.226 + OR_imm32_r32(imm, R_ECX );
5.227 + MEM_WRITE_BYTE( R_ECX, R_EAX );
5.228 +:}
5.229 ROTCL Rn {:
5.230 load_reg( R_EAX, Rn );
5.231 LDC_t();
5.232 @@ -811,14 +869,16 @@
5.233 MEM_READ_BYTE( R_ECX, R_EAX );
5.234 store_reg( R_EAX, 0 );
5.235 :}
5.236 -MOV.L Rm, @Rn {:
5.237 +MOV.L Rm, @Rn {:
5.238 load_reg( R_EAX, Rm );
5.239 load_reg( R_ECX, Rn );
5.240 + check_walign32(R_ECX);
5.241 MEM_WRITE_LONG( R_ECX, R_EAX );
5.242 :}
5.243 MOV.L Rm, @-Rn {:
5.244 load_reg( R_EAX, Rm );
5.245 load_reg( R_ECX, Rn );
5.246 + check_walign32( R_ECX );
5.247 ADD_imm8s_r32( -4, R_ECX );
5.248 store_reg( R_ECX, Rn );
5.249 MEM_WRITE_LONG( R_ECX, R_EAX );
5.250 @@ -827,6 +887,7 @@
5.251 load_reg( R_EAX, 0 );
5.252 load_reg( R_ECX, Rn );
5.253 ADD_r32_r32( R_EAX, R_ECX );
5.254 + check_walign32( R_ECX );
5.255 load_reg( R_EAX, Rm );
5.256 MEM_WRITE_LONG( R_ECX, R_EAX );
5.257 :}
5.258 @@ -834,21 +895,25 @@
5.259 load_spreg( R_ECX, R_GBR );
5.260 load_reg( R_EAX, 0 );
5.261 ADD_imm32_r32( disp, R_ECX );
5.262 + check_walign32( R_ECX );
5.263 MEM_WRITE_LONG( R_ECX, R_EAX );
5.264 :}
5.265 MOV.L Rm, @(disp, Rn) {:
5.266 load_reg( R_ECX, Rn );
5.267 load_reg( R_EAX, Rm );
5.268 ADD_imm32_r32( disp, R_ECX );
5.269 + check_walign32( R_ECX );
5.270 MEM_WRITE_LONG( R_ECX, R_EAX );
5.271 :}
5.272 MOV.L @Rm, Rn {:
5.273 load_reg( R_ECX, Rm );
5.274 + check_ralign32( R_ECX );
5.275 MEM_READ_LONG( R_ECX, R_EAX );
5.276 store_reg( R_EAX, Rn );
5.277 :}
5.278 MOV.L @Rm+, Rn {:
5.279 load_reg( R_EAX, Rm );
5.280 + check_ralign32( R_ECX );
5.281 MOV_r32_r32( R_EAX, R_ECX );
5.282 ADD_imm8s_r32( 4, R_EAX );
5.283 store_reg( R_EAX, Rm );
5.284 @@ -859,33 +924,42 @@
5.285 load_reg( R_EAX, 0 );
5.286 load_reg( R_ECX, Rm );
5.287 ADD_r32_r32( R_EAX, R_ECX );
5.288 + check_ralign32( R_ECX );
5.289 MEM_READ_LONG( R_ECX, R_EAX );
5.290 store_reg( R_EAX, Rn );
5.291 :}
5.292 MOV.L @(disp, GBR), R0 {:
5.293 load_spreg( R_ECX, R_GBR );
5.294 ADD_imm32_r32( disp, R_ECX );
5.295 + check_ralign32( R_ECX );
5.296 MEM_READ_LONG( R_ECX, R_EAX );
5.297 store_reg( R_EAX, 0 );
5.298 :}
5.299 MOV.L @(disp, PC), Rn {:
5.300 - load_imm32( R_ECX, (pc & 0xFFFFFFFC) + disp + 4 );
5.301 - MEM_READ_LONG( R_ECX, R_EAX );
5.302 - store_reg( R_EAX, 0 );
5.303 + if( sh4_x86.in_delay_slot ) {
5.304 + SLOTILLEGAL();
5.305 + } else {
5.306 + load_imm32( R_ECX, (pc & 0xFFFFFFFC) + disp + 4 );
5.307 + MEM_READ_LONG( R_ECX, R_EAX );
5.308 + store_reg( R_EAX, 0 );
5.309 + }
5.310 :}
5.311 MOV.L @(disp, Rm), Rn {:
5.312 load_reg( R_ECX, Rm );
5.313 ADD_imm8s_r32( disp, R_ECX );
5.314 + check_ralign32( R_ECX );
5.315 MEM_READ_LONG( R_ECX, R_EAX );
5.316 store_reg( R_EAX, Rn );
5.317 :}
5.318 MOV.W Rm, @Rn {:
5.319 load_reg( R_ECX, Rn );
5.320 + check_walign16( R_ECX );
5.321 MEM_READ_WORD( R_ECX, R_EAX );
5.322 store_reg( R_EAX, Rn );
5.323 :}
5.324 MOV.W Rm, @-Rn {:
5.325 load_reg( R_ECX, Rn );
5.326 + check_walign16( R_ECX );
5.327 load_reg( R_EAX, Rm );
5.328 ADD_imm8s_r32( -2, R_ECX );
5.329 MEM_WRITE_WORD( R_ECX, R_EAX );
5.330 @@ -894,6 +968,7 @@
5.331 load_reg( R_EAX, 0 );
5.332 load_reg( R_ECX, Rn );
5.333 ADD_r32_r32( R_EAX, R_ECX );
5.334 + check_walign16( R_ECX );
5.335 load_reg( R_EAX, Rm );
5.336 MEM_WRITE_WORD( R_ECX, R_EAX );
5.337 :}
5.338 @@ -901,21 +976,25 @@
5.339 load_spreg( R_ECX, R_GBR );
5.340 load_reg( R_EAX, 0 );
5.341 ADD_imm32_r32( disp, R_ECX );
5.342 + check_walign16( R_ECX );
5.343 MEM_WRITE_WORD( R_ECX, R_EAX );
5.344 :}
5.345 MOV.W R0, @(disp, Rn) {:
5.346 load_reg( R_ECX, Rn );
5.347 load_reg( R_EAX, 0 );
5.348 ADD_imm32_r32( disp, R_ECX );
5.349 + check_walign16( R_ECX );
5.350 MEM_WRITE_WORD( R_ECX, R_EAX );
5.351 :}
5.352 MOV.W @Rm, Rn {:
5.353 load_reg( R_ECX, Rm );
5.354 + check_ralign16( R_ECX );
5.355 MEM_READ_WORD( R_ECX, R_EAX );
5.356 store_reg( R_EAX, Rn );
5.357 :}
5.358 MOV.W @Rm+, Rn {:
5.359 load_reg( R_EAX, Rm );
5.360 + check_ralign16( R_EAX );
5.361 MOV_r32_r32( R_EAX, R_ECX );
5.362 ADD_imm8s_r32( 2, R_EAX );
5.363 store_reg( R_EAX, Rm );
5.364 @@ -926,77 +1005,242 @@
5.365 load_reg( R_EAX, 0 );
5.366 load_reg( R_ECX, Rm );
5.367 ADD_r32_r32( R_EAX, R_ECX );
5.368 + check_ralign16( R_ECX );
5.369 MEM_READ_WORD( R_ECX, R_EAX );
5.370 store_reg( R_EAX, Rn );
5.371 :}
5.372 MOV.W @(disp, GBR), R0 {:
5.373 load_spreg( R_ECX, R_GBR );
5.374 ADD_imm32_r32( disp, R_ECX );
5.375 + check_ralign16( R_ECX );
5.376 MEM_READ_WORD( R_ECX, R_EAX );
5.377 store_reg( R_EAX, 0 );
5.378 :}
5.379 MOV.W @(disp, PC), Rn {:
5.380 - load_imm32( R_ECX, pc + disp + 4 );
5.381 - MEM_READ_WORD( R_ECX, R_EAX );
5.382 - store_reg( R_EAX, Rn );
5.383 + if( sh4_x86.in_delay_slot ) {
5.384 + SLOTILLEGAL();
5.385 + } else {
5.386 + load_imm32( R_ECX, pc + disp + 4 );
5.387 + MEM_READ_WORD( R_ECX, R_EAX );
5.388 + store_reg( R_EAX, Rn );
5.389 + }
5.390 :}
5.391 MOV.W @(disp, Rm), R0 {:
5.392 load_reg( R_ECX, Rm );
5.393 ADD_imm32_r32( disp, R_ECX );
5.394 + check_ralign16( R_ECX );
5.395 MEM_READ_WORD( R_ECX, R_EAX );
5.396 store_reg( R_EAX, 0 );
5.397 :}
5.398 MOVA @(disp, PC), R0 {:
5.399 - load_imm32( R_ECX, (pc & 0xFFFFFFFC) + disp + 4 );
5.400 - store_reg( R_ECX, 0 );
5.401 + if( sh4_x86.in_delay_slot ) {
5.402 + SLOTILLEGAL();
5.403 + } else {
5.404 + load_imm32( R_ECX, (pc & 0xFFFFFFFC) + disp + 4 );
5.405 + store_reg( R_ECX, 0 );
5.406 + }
5.407 :}
5.408 MOVCA.L R0, @Rn {:
5.409 load_reg( R_EAX, 0 );
5.410 load_reg( R_ECX, Rn );
5.411 + check_walign32( R_ECX );
5.412 MEM_WRITE_LONG( R_ECX, R_EAX );
5.413 :}
5.414
5.415 /* Control transfer instructions */
5.416 -BF disp {:
5.417 - CMP_imm8s_ebp( 0, R_T );
5.418 - JNE_rel8( 1 );
5.419 - exit_block( disp + pc + 4 );
5.420 +BF disp {:
5.421 + if( sh4_x86.in_delay_slot ) {
5.422 + SLOTILLEGAL();
5.423 + } else {
5.424 + load_imm32( R_EDI, pc + 2 );
5.425 + CMP_imm8s_sh4r( 0, R_T );
5.426 + JNE_rel8( 5 );
5.427 + load_imm32( R_EDI, disp + pc + 4 );
5.428 + INC_r32(R_ESI);
5.429 + return 1;
5.430 + }
5.431 +:}
5.432 +BF/S disp {:
5.433 + if( sh4_x86.in_delay_slot ) {
5.434 + SLOTILLEGAL();
5.435 + } else {
5.436 + load_imm32( R_EDI, pc + 2 );
5.437 + CMP_imm8s_sh4r( 0, R_T );
5.438 + JNE_rel8( 5 );
5.439 + load_imm32( R_EDI, disp + pc + 4 );
5.440 + sh4_x86.in_delay_slot = TRUE;
5.441 + INC_r32(R_ESI);
5.442 + return 0;
5.443 + }
5.444 +:}
5.445 +BRA disp {:
5.446 + if( sh4_x86.in_delay_slot ) {
5.447 + SLOTILLEGAL();
5.448 + } else {
5.449 + load_imm32( R_EDI, disp + pc + 4 );
5.450 + sh4_x86.in_delay_slot = TRUE;
5.451 + INC_r32(R_ESI);
5.452 + return 0;
5.453 + }
5.454 +:}
5.455 +BRAF Rn {:
5.456 + if( sh4_x86.in_delay_slot ) {
5.457 + SLOTILLEGAL();
5.458 + } else {
5.459 + load_reg( R_EDI, Rn );
5.460 + sh4_x86.in_delay_slot = TRUE;
5.461 + INC_r32(R_ESI);
5.462 + return 0;
5.463 + }
5.464 +:}
5.465 +BSR disp {:
5.466 + if( sh4_x86.in_delay_slot ) {
5.467 + SLOTILLEGAL();
5.468 + } else {
5.469 + load_imm32( R_EAX, pc + 4 );
5.470 + store_spreg( R_EAX, R_PR );
5.471 + load_imm32( R_EDI, disp + pc + 4 );
5.472 + sh4_x86.in_delay_slot = TRUE;
5.473 + INC_r32(R_ESI);
5.474 + return 0;
5.475 + }
5.476 +:}
5.477 +BSRF Rn {:
5.478 + if( sh4_x86.in_delay_slot ) {
5.479 + SLOTILLEGAL();
5.480 + } else {
5.481 + load_imm32( R_EAX, pc + 4 );
5.482 + store_spreg( R_EAX, R_PR );
5.483 + load_reg( R_EDI, Rn );
5.484 + ADD_r32_r32( R_EAX, R_EDI );
5.485 + sh4_x86.in_delay_slot = TRUE;
5.486 + INC_r32(R_ESI);
5.487 + return 0;
5.488 + }
5.489 +:}
5.490 +BT disp {:
5.491 + if( sh4_x86.in_delay_slot ) {
5.492 + SLOTILLEGAL();
5.493 + } else {
5.494 + load_imm32( R_EDI, pc + 2 );
5.495 + CMP_imm8s_sh4r( 0, R_T );
5.496 + JE_rel8( 5 );
5.497 + load_imm32( R_EDI, disp + pc + 4 );
5.498 + INC_r32(R_ESI);
5.499 + return 1;
5.500 + }
5.501 +:}
5.502 +BT/S disp {:
5.503 + if( sh4_x86.in_delay_slot ) {
5.504 + SLOTILLEGAL();
5.505 + } else {
5.506 + load_imm32( R_EDI, pc + 2 );
5.507 + CMP_imm8s_sh4r( 0, R_T );
5.508 + JE_rel8( 5 );
5.509 + load_imm32( R_EDI, disp + pc + 4 );
5.510 + sh4_x86.in_delay_slot = TRUE;
5.511 + INC_r32(R_ESI);
5.512 + return 0;
5.513 + }
5.514 +:}
5.515 +JMP @Rn {:
5.516 + if( sh4_x86.in_delay_slot ) {
5.517 + SLOTILLEGAL();
5.518 + } else {
5.519 + load_reg( R_EDI, Rn );
5.520 + sh4_x86.in_delay_slot = TRUE;
5.521 + INC_r32(R_ESI);
5.522 + return 0;
5.523 + }
5.524 +:}
5.525 +JSR @Rn {:
5.526 + if( sh4_x86.in_delay_slot ) {
5.527 + SLOTILLEGAL();
5.528 + } else {
5.529 + load_imm32( R_EAX, pc + 4 );
5.530 + store_spreg( R_EAX, R_PR );
5.531 + load_reg( R_EDI, Rn );
5.532 + sh4_x86.in_delay_slot = TRUE;
5.533 + INC_r32(R_ESI);
5.534 + return 0;
5.535 + }
5.536 +:}
5.537 +RTE {:
5.538 + check_priv();
5.539 + if( sh4_x86.in_delay_slot ) {
5.540 + SLOTILLEGAL();
5.541 + } else {
5.542 + load_spreg( R_EDI, R_PR );
5.543 + load_spreg( R_EAX, R_SSR );
5.544 + call_func1( sh4_write_sr, R_EAX );
5.545 + sh4_x86.in_delay_slot = TRUE;
5.546 + INC_r32(R_ESI);
5.547 + return 0;
5.548 + }
5.549 +:}
5.550 +RTS {:
5.551 + if( sh4_x86.in_delay_slot ) {
5.552 + SLOTILLEGAL();
5.553 + } else {
5.554 + load_spreg( R_EDI, R_PR );
5.555 + sh4_x86.in_delay_slot = TRUE;
5.556 + INC_r32(R_ESI);
5.557 + return 0;
5.558 + }
5.559 +:}
5.560 +TRAPA #imm {:
5.561 + if( sh4_x86.in_delay_slot ) {
5.562 + SLOTILLEGAL();
5.563 + } else {
5.564 + // TODO: Write TRA
5.565 + RAISE_EXCEPTION(EXC_TRAP);
5.566 + }
5.567 +:}
5.568 +UNDEF {:
5.569 + if( sh4_x86.in_delay_slot ) {
5.570 + RAISE_EXCEPTION(EXC_SLOT_ILLEGAL);
5.571 + } else {
5.572 + RAISE_EXCEPTION(EXC_ILLEGAL);
5.573 + }
5.574 return 1;
5.575 :}
5.576 -BF/S disp {:
5.577 - CMP_imm8s_ebp( 0, R_T );
5.578 - JNE_rel8( 1 );
5.579 - exit_block( disp + pc + 4 );
5.580 - sh4_x86.in_delay_slot = TRUE;
5.581 +
5.582 +CLRMAC {:
5.583 + XOR_r32_r32(R_EAX, R_EAX);
5.584 + store_spreg( R_EAX, R_MACL );
5.585 + store_spreg( R_EAX, R_MACH );
5.586 :}
5.587 -BRA disp {:
5.588 - exit_block( disp + pc + 4 );
5.589 +CLRS {:
5.590 + CLC();
5.591 + SETC_sh4r(R_S);
5.592 :}
5.593 -BRAF Rn {: :}
5.594 -BSR disp {: :}
5.595 -BSRF Rn {: :}
5.596 -BT disp {: /* If true, result PC += 4 + disp. else result PC = pc+2 */
5.597 - return pc + 2;
5.598 +CLRT {:
5.599 + CLC();
5.600 + SETC_t();
5.601 :}
5.602 -BT/S disp {:
5.603 -
5.604 - return pc + 4;
5.605 +SETS {:
5.606 + STC();
5.607 + SETC_sh4r(R_S);
5.608 :}
5.609 -JMP @Rn {: :}
5.610 -JSR @Rn {: :}
5.611 -RTE {: :}
5.612 -RTS {: :}
5.613 -TRAPA #imm {: :}
5.614 -UNDEF {: :}
5.615 -
5.616 -CLRMAC {: :}
5.617 -CLRS {: :}
5.618 -CLRT {: :}
5.619 -SETS {: :}
5.620 -SETT {: :}
5.621 +SETT {:
5.622 + STC();
5.623 + SETC_t();
5.624 +:}
5.625
5.626 /* Floating point instructions */
5.627 -FABS FRn {: :}
5.628 +FABS FRn {:
5.629 + load_spreg( R_ECX, R_FPSCR );
5.630 + load_spreg( R_EDX, REG_OFFSET(fr_bank) );
5.631 + TEST_imm32_r32( FPSCR_PR, R_ECX );
5.632 + JNE_rel8(10);
5.633 + push_fr(R_EDX, FRn); // 3
5.634 + FABS_st0(); // 2
5.635 + pop_fr( R_EDX, FRn); //3
5.636 + JMP_rel8(8); // 2
5.637 + push_dr(R_EDX, FRn);
5.638 + FABS_st0();
5.639 + pop_dr(R_EDX, FRn);
5.640 +:}
5.641 FADD FRm, FRn {: :}
5.642 FCMP/EQ FRm, FRn {: :}
5.643 FCMP/GT FRm, FRn {: :}
5.644 @@ -1031,7 +1275,7 @@
5.645 /* Processor control instructions */
5.646 LDC Rm, SR {:
5.647 load_reg( R_EAX, Rm );
5.648 - write_sr( R_EAX );
5.649 + call_func1( sh4_write_sr, R_EAX );
5.650 :}
5.651 LDC Rm, GBR {:
5.652 load_reg( R_EAX, Rm );
5.653 @@ -1057,7 +1301,10 @@
5.654 load_reg( R_EAX, Rm );
5.655 store_spreg( R_EAX, R_DBR );
5.656 :}
5.657 -LDC Rm, Rn_BANK {: :}
5.658 +LDC Rm, Rn_BANK {:
5.659 + load_reg( R_EAX, Rm );
5.660 + store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
5.661 +:}
5.662 LDC.L @Rm+, GBR {:
5.663 load_reg( R_EAX, Rm );
5.664 MOV_r32_r32( R_EAX, R_ECX );
5.665 @@ -1072,7 +1319,7 @@
5.666 ADD_imm8s_r32( 4, R_EAX );
5.667 store_reg( R_EAX, Rm );
5.668 MEM_READ_LONG( R_ECX, R_EAX );
5.669 - write_sr( R_EAX );
5.670 + call_func1( sh4_write_sr, R_EAX );
5.671 :}
5.672 LDC.L @Rm+, VBR {:
5.673 load_reg( R_EAX, Rm );
5.674 @@ -1115,6 +1362,12 @@
5.675 store_spreg( R_EAX, R_DBR );
5.676 :}
5.677 LDC.L @Rm+, Rn_BANK {:
5.678 + load_reg( R_EAX, Rm );
5.679 + MOV_r32_r32( R_EAX, R_ECX );
5.680 + ADD_imm8s_r32( 4, R_EAX );
5.681 + store_reg( R_EAX, Rm );
5.682 + MEM_READ_LONG( R_ECX, R_EAX );
5.683 + store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
5.684 :}
5.685 LDS Rm, FPSCR {:
5.686 load_reg( R_EAX, Rm );
5.687 @@ -1180,10 +1433,18 @@
5.688 OCBI @Rn {: :}
5.689 OCBP @Rn {: :}
5.690 OCBWB @Rn {: :}
5.691 -PREF @Rn {: :}
5.692 -SLEEP {: :}
5.693 +PREF @Rn {:
5.694 + load_reg( R_EAX, Rn );
5.695 + PUSH_r32( R_EAX );
5.696 + AND_imm32_r32( 0xFC000000, R_EAX );
5.697 + CMP_imm32_r32( 0xE0000000, R_EAX );
5.698 + JNE_rel8(8);
5.699 + call_func0( sh4_flush_store_queue );
5.700 + ADD_imm8s_r32( -4, R_ESP );
5.701 +:}
5.702 + SLEEP {: /* TODO */ :}
5.703 STC SR, Rn {:
5.704 - read_sr( R_EAX );
5.705 + call_func0(sh4_read_sr);
5.706 store_reg( R_EAX, Rn );
5.707 :}
5.708 STC GBR, Rn {:
5.709 @@ -1210,13 +1471,15 @@
5.710 load_spreg( R_EAX, R_DBR );
5.711 store_reg( R_EAX, Rn );
5.712 :}
5.713 -STC Rm_BANK, Rn {: /* TODO */
5.714 +STC Rm_BANK, Rn {:
5.715 + load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
5.716 + store_reg( R_EAX, Rn );
5.717 :}
5.718 -STC.L SR, @-Rn {: /* TODO */
5.719 +STC.L SR, @-Rn {:
5.720 load_reg( R_ECX, Rn );
5.721 ADD_imm8s_r32( -4, Rn );
5.722 store_reg( R_ECX, Rn );
5.723 - read_sr( R_EAX );
5.724 + call_func0( sh4_read_sr );
5.725 MEM_WRITE_LONG( R_ECX, R_EAX );
5.726 :}
5.727 STC.L VBR, @-Rn {:
5.728 @@ -1254,7 +1517,13 @@
5.729 load_spreg( R_EAX, R_DBR );
5.730 MEM_WRITE_LONG( R_ECX, R_EAX );
5.731 :}
5.732 -STC.L Rm_BANK, @-Rn {: :}
5.733 +STC.L Rm_BANK, @-Rn {:
5.734 + load_reg( R_ECX, Rn );
5.735 + ADD_imm8s_r32( -4, Rn );
5.736 + store_reg( R_ECX, Rn );
5.737 + load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
5.738 + MEM_WRITE_LONG( R_ECX, R_EAX );
5.739 +:}
5.740 STC.L GBR, @-Rn {:
5.741 load_reg( R_ECX, Rn );
5.742 ADD_imm8s_r32( -4, Rn );
5.743 @@ -1321,6 +1590,9 @@
5.744 NOP {: /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ :}
5.745 %%
5.746 INC_r32(R_ESI);
5.747 -
5.748 + if( sh4_x86.in_delay_slot ) {
5.749 + sh4_x86.in_delay_slot = FALSE;
5.750 + return 1;
5.751 + }
5.752 return 0;
5.753 }
6.1 --- a/src/sh4/x86op.h Tue Sep 11 01:05:05 2007 +0000
6.2 +++ b/src/sh4/x86op.h Tue Sep 11 02:14:46 2007 +0000
6.3 @@ -1,5 +1,5 @@
6.4 /**
6.5 - * $Id: x86op.h,v 1.3 2007-09-04 08:40:23 nkeynes Exp $
6.6 + * $Id: x86op.h,v 1.4 2007-09-11 02:14:46 nkeynes Exp $
6.7 *
6.8 * Definitions of x86 opcodes for use by the translator.
6.9 *
6.10 @@ -77,31 +77,34 @@
6.11 /* ebp+disp32 modrm form */
6.12 #define MODRM_r32_ebp32(r1,disp) OP(0x85 | (r1<<3)); OP32(disp)
6.13
6.14 -#define MODRM_r32_ebp(r1,disp) if(disp>127){ MODRM_r32_ebp32(r1,disp);}else{ MODRM_r32_ebp8(r1,(unsigned char)disp); }
6.15 +#define MODRM_r32_sh4r(r1,disp) if(disp>127){ MODRM_r32_ebp32(r1,disp);}else{ MODRM_r32_ebp8(r1,(unsigned char)disp); }
6.16
6.17 /* Major opcodes */
6.18 +#define ADD_sh4r_r32(disp,r1) OP(0x03); MODRM_r32_sh4r(r1,disp)
6.19 #define ADD_r32_r32(r1,r2) OP(0x03); MODRM_rm32_r32(r1,r2)
6.20 #define ADD_imm8s_r32(imm,r1) OP(0x83); MODRM_rm32_r32(r1, 0); OP(imm)
6.21 #define ADD_imm32_r32(imm32,r1) OP(0x81); MODRM_rm32_r32(r1,0); OP32(imm32)
6.22 #define ADC_r32_r32(r1,r2) OP(0x13); MODRM_rm32_r32(r1,r2)
6.23 #define AND_r32_r32(r1,r2) OP(0x23); MODRM_rm32_r32(r1,r2)
6.24 #define AND_imm8_r8(imm8, r1) OP(0x80); MODRM_rm32_r32(r1,4); OP(imm8)
6.25 +#define AND_imm8s_r32(imm8,r1) OP(0x83); MODRM_rm32_r32(r1,4); OP(imm8)
6.26 #define AND_imm32_r32(imm,r1) OP(0x81); MODRM_rm32_r32(r1,4); OP32(imm)
6.27 #define CALL_r32(r1) OP(0xFF); MODRM_rm32_r32(r1,2)
6.28 +#define CLC() OP(0xF8)
6.29 #define CMC() OP(0xF5)
6.30 +#define CMP_sh4r_r32(disp,r1) OP(0x3B); MODRM_r32_sh4r(r1,disp)
6.31 #define CMP_r32_r32(r1,r2) OP(0x3B); MODRM_rm32_r32(r1,r2)
6.32 #define CMP_imm32_r32(imm32, r1) OP(0x81); MODRM_rm32_r32(r1,7); OP32(imm32)
6.33 #define CMP_imm8s_r32(imm,r1) OP(0x83); MODRM_rm32_r32(r1,7); OP(imm)
6.34 -#define CMP_imm8s_ebp(imm,disp) OP(0x83); MODRM_r32_ebp(7,disp) OP(imm)
6.35 +#define CMP_imm8s_sh4r(imm,disp) OP(0x83); MODRM_r32_sh4r(7,disp) OP(imm)
6.36 #define DEC_r32(r1) OP(0x48+r1)
6.37 #define IMUL_r32(r1) OP(0xF7); MODRM_rm32_r32(r1,5)
6.38 #define INC_r32(r1) OP(0x40+r1)
6.39 #define JMP_rel8(rel) OP(0xEB); OP(rel)
6.40 #define MOV_r32_r32(r1,r2) OP(0x89); MODRM_r32_rm32(r1,r2)
6.41 -#define MOV_r32_ebp(r1,disp) OP(0x89); MODRM_r32_ebp(r1,disp)
6.42 -#define MOV_r32_ebp32(r1,disp) OP(0x89); MODRM_r32_ebp32(r1,disp)
6.43 +#define MOV_r32_sh4r(r1,disp) OP(0x89); MODRM_r32_sh4r(r1,disp)
6.44 #define MOV_moff32_EAX(off) OP(0xA1); OP32(off)
6.45 -#define MOV_ebp_r32(disp, r1) OP(0x8B); MODRM_r32_ebp(r1,disp)
6.46 +#define MOV_sh4r_r32(disp, r1) OP(0x8B); MODRM_r32_sh4r(r1,disp)
6.47 #define MOVSX_r8_r32(r1,r2) OP(0x0F); OP(0xBE); MODRM_rm32_r32(r1,r2)
6.48 #define MOVSX_r16_r32(r1,r2) OP(0x0F); OP(0xBF); MODRM_rm32_r32(r1,r2)
6.49 #define MOVZX_r8_r32(r1,r2) OP(0x0F); OP(0xB6); MODRM_rm32_r32(r1,r2)
6.50 @@ -112,7 +115,7 @@
6.51 #define OR_r32_r32(r1,r2) OP(0x0B); MODRM_rm32_r32(r1,r2)
6.52 #define OR_imm8_r8(imm,r1) OP(0x80); MODRM_rm32_r32(r1,1)
6.53 #define OR_imm32_r32(imm,r1) OP(0x81); MODRM_rm32_r32(r1,1); OP32(imm)
6.54 -#define OR_ebp_r32(disp,r1) OP(0x0B); MODRM_r32_ebp(r1,disp)
6.55 +#define OR_sh4r_r32(disp,r1) OP(0x0B); MODRM_r32_sh4r(r1,disp)
6.56 #define POP_r32(r1) OP(0x58 + r1)
6.57 #define PUSH_r32(r1) OP(0x50 + r1)
6.58 #define PUSH_imm32(imm) OP(0x68); OP32(imm)
6.59 @@ -131,16 +134,28 @@
6.60 #define SHR1_r32(r1) OP(0xD1); MODRM_rm32_r32(r1,5)
6.61 #define SHR_r32_CL(r1) OP(0xD3); MODRM_rm32_r32(r1,5)
6.62 #define SHR_imm8_r32(imm,r1) OP(0xC1); MODRM_rm32_r32(r1,5); OP(imm)
6.63 +#define STC() OP(0xF9)
6.64 #define SUB_r32_r32(r1,r2) OP(0x2B); MODRM_rm32_r32(r1,r2)
6.65 +#define SUB_sh4r_r32(disp,r1) OP(0x2B); MODRM_r32_sh4r(r1, disp)
6.66 #define TEST_r8_r8(r1,r2) OP(0x84); MODRM_r32_rm32(r1,r2)
6.67 #define TEST_r32_r32(r1,r2) OP(0x85); MODRM_rm32_r32(r1,r2)
6.68 #define TEST_imm8_r8(imm8,r1) OP(0xF6); MODRM_rm32_r32(r1,0); OP(imm8)
6.69 #define TEST_imm32_r32(imm,r1) OP(0xF7); MODRM_rm32_r32(r1,0); OP32(imm)
6.70 #define XCHG_r8_r8(r1,r2) OP(0x86); MODRM_rm32_r32(r1,r2)
6.71 #define XOR_r32_r32(r1,r2) OP(0x33); MODRM_rm32_r32(r1,r2)
6.72 +#define XOR_sh4r_r32(disp,r1) OP(0x33); MODRM_r32_sh4r(r1,disp)
6.73 #define XOR_imm32_r32(imm,r1) OP(0x81); MODRM_rm32_r32(r1,6); OP32(imm)
6.74
6.75
6.76 +/* Floating point ops */
6.77 +#define FABS_st0() OP(0xD9); OP(0xE1)
6.78 +#define FADDP_st(st) OP(0xDE); OP(0xC0+st)
6.79 +#define FCHS_st0() OP(0xD9); OP(0xE0)
6.80 +#define FDIVP_st(st) OP(0xDE); OP(0xF8+st)
6.81 +#define FMULP_st(st) OP(0xDE); OP(0xC8+st)
6.82 +#define FSUB_st(st) OP(0xDE); OP(0xE8+st)
6.83 +#define FSQRT_st0() OP(0xD9); OP(0xFA)
6.84 +
6.85 /* Conditional branches */
6.86 #define JE_rel8(rel) OP(0x74); OP(rel)
6.87 #define JA_rel8(rel) OP(0x77); OP(rel)
6.88 @@ -185,31 +200,33 @@
6.89
6.90
6.91 /* Conditional setcc - writeback to sh4r.t */
6.92 -#define SETE_ebp(disp) OP(0x0F); OP(0x94); MODRM_r32_ebp(0, disp);
6.93 -#define SETA_ebp(disp) OP(0x0F); OP(0x97); MODRM_r32_ebp(0, disp);
6.94 -#define SETAE_ebp(disp) OP(0x0F); OP(0x93); MODRM_r32_ebp(0, disp);
6.95 -#define SETG_ebp(disp) OP(0x0F); OP(0x9F); MODRM_r32_ebp(0, disp);
6.96 -#define SETGE_ebp(disp) OP(0x0F); OP(0x9D); MODRM_r32_ebp(0, disp);
6.97 -#define SETC_ebp(disp) OP(0x0F); OP(0x92); MODRM_r32_ebp(0, disp);
6.98 -#define SETO_ebp(disp) OP(0x0F); OP(0x90); MODRM_r32_ebp(0, disp);
6.99 +#define SETE_sh4r(disp) OP(0x0F); OP(0x94); MODRM_r32_sh4r(0, disp);
6.100 +#define SETA_sh4r(disp) OP(0x0F); OP(0x97); MODRM_r32_sh4r(0, disp);
6.101 +#define SETAE_sh4r(disp) OP(0x0F); OP(0x93); MODRM_r32_sh4r(0, disp);
6.102 +#define SETG_sh4r(disp) OP(0x0F); OP(0x9F); MODRM_r32_sh4r(0, disp);
6.103 +#define SETGE_sh4r(disp) OP(0x0F); OP(0x9D); MODRM_r32_sh4r(0, disp);
6.104 +#define SETC_sh4r(disp) OP(0x0F); OP(0x92); MODRM_r32_sh4r(0, disp);
6.105 +#define SETO_sh4r(disp) OP(0x0F); OP(0x90); MODRM_r32_sh4r(0, disp);
6.106
6.107 -#define SETNE_ebp(disp) OP(0x0F); OP(0x95); MODRM_r32_ebp(0, disp);
6.108 -#define SETNA_ebp(disp) OP(0x0F); OP(0x96); MODRM_r32_ebp(0, disp);
6.109 -#define SETNAE_ebp(disp) OP(0x0F); OP(0x92); MODRM_r32_ebp(0, disp);
6.110 -#define SETNG_ebp(disp) OP(0x0F); OP(0x9E); MODRM_r32_ebp(0, disp);
6.111 -#define SETNGE_ebp(disp) OP(0x0F); OP(0x9C); MODRM_r32_ebp(0, disp);
6.112 -#define SETNC_ebp(disp) OP(0x0F); OP(0x93); MODRM_r32_ebp(0, disp);
6.113 -#define SETNO_ebp(disp) OP(0x0F); OP(0x91); MODRM_r32_ebp(0, disp);
6.114 +#define SETNE_sh4r(disp) OP(0x0F); OP(0x95); MODRM_r32_sh4r(0, disp);
6.115 +#define SETNA_sh4r(disp) OP(0x0F); OP(0x96); MODRM_r32_sh4r(0, disp);
6.116 +#define SETNAE_sh4r(disp) OP(0x0F); OP(0x92); MODRM_r32_sh4r(0, disp);
6.117 +#define SETNG_sh4r(disp) OP(0x0F); OP(0x9E); MODRM_r32_sh4r(0, disp);
6.118 +#define SETNGE_sh4r(disp) OP(0x0F); OP(0x9C); MODRM_r32_sh4r(0, disp);
6.119 +#define SETNC_sh4r(disp) OP(0x0F); OP(0x93); MODRM_r32_sh4r(0, disp);
6.120 +#define SETNO_sh4r(disp) OP(0x0F); OP(0x91); MODRM_r32_sh4r(0, disp);
6.121
6.122 -#define SETE_t() SETE_ebp(R_T)
6.123 -#define SETA_t() SETA_ebp(R_T)
6.124 -#define SETAE_t() SETAE_ebp(R_T)
6.125 -#define SETG_t() SETG_ebp(R_T)
6.126 -#define SETGE_t() SETGE_ebp(R_T)
6.127 -#define SETC_t() SETC_ebp(R_T)
6.128 -#define SETO_t() SETO_ebp(R_T)
6.129 +#define SETE_t() SETE_sh4r(R_T)
6.130 +#define SETA_t() SETA_sh4r(R_T)
6.131 +#define SETAE_t() SETAE_sh4r(R_T)
6.132 +#define SETG_t() SETG_sh4r(R_T)
6.133 +#define SETGE_t() SETGE_sh4r(R_T)
6.134 +#define SETC_t() SETC_sh4r(R_T)
6.135 +#define SETO_t() SETO_sh4r(R_T)
6.136 +
6.137 +#define SETC_r32(r1) OP(0x0F); OP(0x92); MODRM_rm32_r32(r1, 0)
6.138
6.139 /* Pseudo-op Load carry from T: CMP [EBP+t], #01 ; CMC */
6.140 -#define LDC_t() OP(0x83); MODRM_r32_ebp(7,R_T); OP(0x01); CMC()
6.141 +#define LDC_t() OP(0x83); MODRM_r32_sh4r(7,R_T); OP(0x01); CMC()
6.142
6.143 #endif /* !__lxdream_x86op_H */
7.1 --- a/src/test/testsh4x86.c Tue Sep 11 01:05:05 2007 +0000
7.2 +++ b/src/test/testsh4x86.c Tue Sep 11 02:14:46 2007 +0000
7.3 @@ -1,5 +1,5 @@
7.4 /**
7.5 - * $Id: testsh4x86.c,v 1.3 2007-09-08 03:11:53 nkeynes Exp $
7.6 + * $Id: testsh4x86.c,v 1.4 2007-09-11 02:14:46 nkeynes Exp $
7.7 *
7.8 * Test cases for the SH4 => x86 translator core. Takes as
7.9 * input a binary SH4 object (and VMA), generates the
7.10 @@ -74,6 +74,8 @@
7.11 void sh4_write_word( uint32_t addr, uint32_t val ) {}
7.12 void sh4_write_long( uint32_t addr, uint32_t val ) {}
7.13 void sh4_flush_store_queue( uint32_t addr ) {}
7.14 +void sh4_write_sr( uint32_t val ) { }
7.15 +uint32_t sh4_read_sr( void ) { }
7.16 gboolean sh4_raise_exception( int exc ) {}
7.17
7.18 void usage()
.