Search
lxdream.org :: lxdream :: r394:7eb172bfeefe
lxdream 0.9.1
released Jun 29
Download Now
changeset394:7eb172bfeefe
parent393:5e5335b61373
child395:c473acbde186
authornkeynes
dateWed Sep 19 09:15:18 2007 +0000 (12 years ago)
Fix SUBC (not updating T), FTRC (not truncating), and XTRCT (just b0rked)
src/sh4/sh4x86.c
src/sh4/sh4x86.in
src/sh4/x86op.h
1.1 --- a/src/sh4/sh4x86.c Wed Sep 19 09:13:08 2007 +0000
1.2 +++ b/src/sh4/sh4x86.c Wed Sep 19 09:15:18 2007 +0000
1.3 @@ -1,5 +1,5 @@
1.4 /**
1.5 - * $Id: sh4x86.c,v 1.10 2007-09-18 08:59:00 nkeynes Exp $
1.6 + * $Id: sh4x86.c,v 1.11 2007-09-19 09:15:18 nkeynes Exp $
1.7 *
1.8 * SH4 => x86 translation. This version does no real optimization, it just
1.9 * outputs straight-line x86 code - it mainly exists to provide a baseline
1.10 @@ -61,6 +61,8 @@
1.11
1.12 static uint32_t max_int = 0x7FFFFFFF;
1.13 static uint32_t min_int = 0x80000000;
1.14 +static uint32_t save_fcw; /* save value for fpu control word */
1.15 +static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
1.16 void signsat48( void )
1.17 {
1.18 if( ((int64_t)sh4r.mac) < (int64_t)0xFFFF800000000000LL )
1.19 @@ -838,6 +840,7 @@
1.20 call_func0( sh4_sleep );
1.21 sh4_x86.exit_code = 0;
1.22 sh4_x86.in_delay_slot = FALSE;
1.23 + INC_r32(R_ESI);
1.24 return 1;
1.25 }
1.26 break;
1.27 @@ -1067,9 +1070,9 @@
1.28 { /* XTRCT Rm, Rn */
1.29 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
1.30 load_reg( R_EAX, Rm );
1.31 - MOV_r32_r32( R_EAX, R_ECX );
1.32 - SHR_imm8_r32( 16, R_EAX );
1.33 - SHL_imm8_r32( 16, R_ECX );
1.34 + load_reg( R_ECX, Rn );
1.35 + SHL_imm8_r32( 16, R_EAX );
1.36 + SHR_imm8_r32( 16, R_ECX );
1.37 OR_r32_r32( R_EAX, R_ECX );
1.38 store_reg( R_ECX, Rn );
1.39 }
1.40 @@ -1196,6 +1199,7 @@
1.41 LDC_t();
1.42 SBB_r32_r32( R_EAX, R_ECX );
1.43 store_reg( R_ECX, Rn );
1.44 + SETC_t();
1.45 }
1.46 break;
1.47 case 0xB:
1.48 @@ -2480,7 +2484,7 @@
1.49 load_reg( R_ECX, R_GBR);
1.50 ADD_r32_r32( R_EAX, R_ECX );
1.51 MEM_READ_BYTE( R_ECX, R_EAX );
1.52 - TEST_imm8_r8( imm, R_EAX );
1.53 + TEST_imm8_r8( imm, R_AL );
1.54 SETE_t();
1.55 }
1.56 break;
1.57 @@ -2983,12 +2987,17 @@
1.58 load_imm32( R_ECX, (uint32_t)&max_int );
1.59 FILD_r32ind( R_ECX );
1.60 FCOMIP_st(1);
1.61 - JNA_rel8( 16, sat );
1.62 + JNA_rel8( 32, sat );
1.63 load_imm32( R_ECX, (uint32_t)&min_int ); // 5
1.64 FILD_r32ind( R_ECX ); // 2
1.65 FCOMIP_st(1); // 2
1.66 - JAE_rel8( 5, sat2 ); // 2
1.67 + JAE_rel8( 21, sat2 ); // 2
1.68 + load_imm32( R_EAX, (uint32_t)&save_fcw );
1.69 + FNSTCW_r32ind( R_EAX );
1.70 + load_imm32( R_EDX, (uint32_t)&trunc_fcw );
1.71 + FLDCW_r32ind( R_EDX );
1.72 FISTP_sh4r(R_FPUL); // 3
1.73 + FLDCW_r32ind( R_EAX );
1.74 JMP_rel8( 9, end ); // 2
1.75
1.76 JMP_TARGET(sat);
2.1 --- a/src/sh4/sh4x86.in Wed Sep 19 09:13:08 2007 +0000
2.2 +++ b/src/sh4/sh4x86.in Wed Sep 19 09:15:18 2007 +0000
2.3 @@ -1,5 +1,5 @@
2.4 /**
2.5 - * $Id: sh4x86.in,v 1.11 2007-09-18 08:59:00 nkeynes Exp $
2.6 + * $Id: sh4x86.in,v 1.12 2007-09-19 09:15:18 nkeynes Exp $
2.7 *
2.8 * SH4 => x86 translation. This version does no real optimization, it just
2.9 * outputs straight-line x86 code - it mainly exists to provide a baseline
2.10 @@ -61,6 +61,8 @@
2.11
2.12 static uint32_t max_int = 0x7FFFFFFF;
2.13 static uint32_t min_int = 0x80000000;
2.14 +static uint32_t save_fcw; /* save value for fpu control word */
2.15 +static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
2.16 void signsat48( void )
2.17 {
2.18 if( ((int64_t)sh4r.mac) < (int64_t)0xFFFF800000000000LL )
2.19 @@ -964,6 +966,7 @@
2.20 LDC_t();
2.21 SBB_r32_r32( R_EAX, R_ECX );
2.22 store_reg( R_ECX, Rn );
2.23 + SETC_t();
2.24 :}
2.25 SUBV Rm, Rn {:
2.26 load_reg( R_EAX, Rm );
2.27 @@ -1010,7 +1013,7 @@
2.28 load_reg( R_ECX, R_GBR);
2.29 ADD_r32_r32( R_EAX, R_ECX );
2.30 MEM_READ_BYTE( R_ECX, R_EAX );
2.31 - TEST_imm8_r8( imm, R_EAX );
2.32 + TEST_imm8_r8( imm, R_AL );
2.33 SETE_t();
2.34 :}
2.35 XOR Rm, Rn {:
2.36 @@ -1036,9 +1039,9 @@
2.37 :}
2.38 XTRCT Rm, Rn {:
2.39 load_reg( R_EAX, Rm );
2.40 - MOV_r32_r32( R_EAX, R_ECX );
2.41 - SHR_imm8_r32( 16, R_EAX );
2.42 - SHL_imm8_r32( 16, R_ECX );
2.43 + load_reg( R_ECX, Rn );
2.44 + SHL_imm8_r32( 16, R_EAX );
2.45 + SHR_imm8_r32( 16, R_ECX );
2.46 OR_r32_r32( R_EAX, R_ECX );
2.47 store_reg( R_ECX, Rn );
2.48 :}
2.49 @@ -1761,12 +1764,17 @@
2.50 load_imm32( R_ECX, (uint32_t)&max_int );
2.51 FILD_r32ind( R_ECX );
2.52 FCOMIP_st(1);
2.53 - JNA_rel8( 16, sat );
2.54 + JNA_rel8( 32, sat );
2.55 load_imm32( R_ECX, (uint32_t)&min_int ); // 5
2.56 FILD_r32ind( R_ECX ); // 2
2.57 FCOMIP_st(1); // 2
2.58 - JAE_rel8( 5, sat2 ); // 2
2.59 + JAE_rel8( 21, sat2 ); // 2
2.60 + load_imm32( R_EAX, (uint32_t)&save_fcw );
2.61 + FNSTCW_r32ind( R_EAX );
2.62 + load_imm32( R_EDX, (uint32_t)&trunc_fcw );
2.63 + FLDCW_r32ind( R_EDX );
2.64 FISTP_sh4r(R_FPUL); // 3
2.65 + FLDCW_r32ind( R_EAX );
2.66 JMP_rel8( 9, end ); // 2
2.67
2.68 JMP_TARGET(sat);
2.69 @@ -2268,6 +2276,7 @@
2.70 call_func0( sh4_sleep );
2.71 sh4_x86.exit_code = 0;
2.72 sh4_x86.in_delay_slot = FALSE;
2.73 + INC_r32(R_ESI);
2.74 return 1;
2.75 :}
2.76 STC SR, Rn {:
3.1 --- a/src/sh4/x86op.h Wed Sep 19 09:13:08 2007 +0000
3.2 +++ b/src/sh4/x86op.h Wed Sep 19 09:15:18 2007 +0000
3.3 @@ -1,5 +1,5 @@
3.4 /**
3.5 - * $Id: x86op.h,v 1.9 2007-09-18 08:59:00 nkeynes Exp $
3.6 + * $Id: x86op.h,v 1.10 2007-09-19 09:15:18 nkeynes Exp $
3.7 *
3.8 * Definitions of x86 opcodes for use by the translator.
3.9 *
3.10 @@ -176,7 +176,9 @@
3.11 #define FISTP_sh4r(disp) OP(0xDB); MODRM_r32_sh4r(3, disp)
3.12 #define FLD0_st0() OP(0xD9); OP(0xEE);
3.13 #define FLD1_st0() OP(0xD9); OP(0xE8);
3.14 +#define FLDCW_r32ind(r32) OP(0xD9); OP(0x28+r32)
3.15 #define FMULP_st(st) OP(0xDE); OP(0xC8+st)
3.16 +#define FNSTCW_r32ind(r32) OP(0xD9); OP(0x38+r32)
3.17 #define FPOP_st() OP(0xDD); OP(0xC0); OP(0xD9); OP(0xF7)
3.18 #define FSUBP_st(st) OP(0xDE); OP(0xE8+st)
3.19 #define FSQRT_st0() OP(0xD9); OP(0xFA)
.