Search
lxdream.org :: lxdream/src/sh4/sh4x86.in :: diff
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 905:4c17ebd9ef5e
prev904:5b92e51ac06b
next908:a00debcf2600
author nkeynes
date Wed Oct 29 23:51:58 2008 +0000 (12 years ago)
permissions -rw-r--r--
last change Use regparam calling conventions for all functions called from translated code,
along with a few other high-use functions. Can probably extend this to all functions,
but as it is this is a nice performance boost
file annotate diff log raw
1.1 --- a/src/sh4/sh4x86.in Wed Oct 29 23:36:31 2008 +0000
1.2 +++ b/src/sh4/sh4x86.in Wed Oct 29 23:51:58 2008 +0000
1.3 @@ -25,6 +25,7 @@
1.4 #define DEBUG_JUMPS 1
1.5 #endif
1.6
1.7 +#include "lxdream.h"
1.8 #include "sh4/xltcache.h"
1.9 #include "sh4/sh4core.h"
1.10 #include "sh4/sh4trans.h"
1.11 @@ -312,10 +313,6 @@
1.12 */
1.13 #define MMU_TRANSLATE_WRITE( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_write, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
1.14
1.15 -#define MEM_READ_SIZE (CALL_FUNC1_SIZE)
1.16 -#define MEM_WRITE_SIZE (CALL_FUNC2_SIZE)
1.17 -#define MMU_TRANSLATE_SIZE (sh4_x86.tlb_on ? (CALL_FUNC1_SIZE + 12) : 0 )
1.18 -
1.19 #define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = DELAY_NONE; return 1;
1.20
1.21 /****** Import appropriate calling conventions ******/
1.22 @@ -491,10 +488,10 @@
1.23 ADD_r32_r32( R_ECX, R_EAX );
1.24 MMU_TRANSLATE_WRITE( R_EAX );
1.25 PUSH_realigned_r32(R_EAX);
1.26 - MEM_READ_BYTE( R_EAX, R_EAX );
1.27 - POP_realigned_r32(R_ECX);
1.28 - AND_imm32_r32(imm, R_EAX );
1.29 - MEM_WRITE_BYTE( R_ECX, R_EAX );
1.30 + MEM_READ_BYTE( R_EAX, R_EDX );
1.31 + POP_realigned_r32(R_EAX);
1.32 + AND_imm32_r32(imm, R_EDX );
1.33 + MEM_WRITE_BYTE( R_EAX, R_EDX );
1.34 sh4_x86.tstate = TSTATE_NONE;
1.35 :}
1.36 CMP/EQ Rm, Rn {:
1.37 @@ -848,10 +845,10 @@
1.38 ADD_r32_r32( R_ECX, R_EAX );
1.39 MMU_TRANSLATE_WRITE( R_EAX );
1.40 PUSH_realigned_r32(R_EAX);
1.41 - MEM_READ_BYTE( R_EAX, R_EAX );
1.42 - POP_realigned_r32(R_ECX);
1.43 - OR_imm32_r32(imm, R_EAX );
1.44 - MEM_WRITE_BYTE( R_ECX, R_EAX );
1.45 + MEM_READ_BYTE( R_EAX, R_EDX );
1.46 + POP_realigned_r32(R_EAX);
1.47 + OR_imm32_r32(imm, R_EDX );
1.48 + MEM_WRITE_BYTE( R_EAX, R_EDX );
1.49 sh4_x86.tstate = TSTATE_NONE;
1.50 :}
1.51 ROTCL Rn {:
1.52 @@ -1067,12 +1064,12 @@
1.53 load_reg( R_EAX, Rn );
1.54 MMU_TRANSLATE_WRITE( R_EAX );
1.55 PUSH_realigned_r32( R_EAX );
1.56 - MEM_READ_BYTE( R_EAX, R_EAX );
1.57 - TEST_r8_r8( R_AL, R_AL );
1.58 + MEM_READ_BYTE( R_EAX, R_EDX );
1.59 + TEST_r8_r8( R_DL, R_DL );
1.60 SETE_t();
1.61 - OR_imm8_r8( 0x80, R_AL );
1.62 - POP_realigned_r32( R_ECX );
1.63 - MEM_WRITE_BYTE( R_ECX, R_EAX );
1.64 + OR_imm8_r8( 0x80, R_DL );
1.65 + POP_realigned_r32( R_EAX );
1.66 + MEM_WRITE_BYTE( R_EAX, R_EDX );
1.67 sh4_x86.tstate = TSTATE_NONE;
1.68 :}
1.69 TST Rm, Rn {:
1.70 @@ -1123,10 +1120,10 @@
1.71 ADD_r32_r32( R_ECX, R_EAX );
1.72 MMU_TRANSLATE_WRITE( R_EAX );
1.73 PUSH_realigned_r32(R_EAX);
1.74 - MEM_READ_BYTE(R_EAX, R_EAX);
1.75 - POP_realigned_r32(R_ECX);
1.76 - XOR_imm32_r32( imm, R_EAX );
1.77 - MEM_WRITE_BYTE( R_ECX, R_EAX );
1.78 + MEM_READ_BYTE(R_EAX, R_EDX);
1.79 + POP_realigned_r32(R_EAX);
1.80 + XOR_imm32_r32( imm, R_EDX );
1.81 + MEM_WRITE_BYTE( R_EAX, R_EDX );
1.82 sh4_x86.tstate = TSTATE_NONE;
1.83 :}
1.84 XTRCT Rm, Rn {:
1.85 @@ -1863,14 +1860,14 @@
1.86 if( sh4_x86.double_size ) {
1.87 check_walign64( R_EAX );
1.88 MMU_TRANSLATE_WRITE( R_EAX );
1.89 - load_dr0( R_ECX, FRm );
1.90 - load_dr1( R_EDX, FRm );
1.91 - MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
1.92 + load_dr0( R_EDX, FRm );
1.93 + load_dr1( R_ECX, FRm );
1.94 + MEM_WRITE_DOUBLE( R_EAX, R_EDX, R_ECX );
1.95 } else {
1.96 check_walign32( R_EAX );
1.97 MMU_TRANSLATE_WRITE( R_EAX );
1.98 - load_fr( R_ECX, FRm );
1.99 - MEM_WRITE_LONG( R_EAX, R_ECX );
1.100 + load_fr( R_EDX, FRm );
1.101 + MEM_WRITE_LONG( R_EAX, R_EDX );
1.102 }
1.103 sh4_x86.tstate = TSTATE_NONE;
1.104 :}
1.105 @@ -1881,8 +1878,8 @@
1.106 if( sh4_x86.double_size ) {
1.107 check_ralign64( R_EAX );
1.108 MMU_TRANSLATE_READ( R_EAX );
1.109 - MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
1.110 - store_dr0( R_ECX, FRn );
1.111 + MEM_READ_DOUBLE( R_EAX, R_EDX, R_EAX );
1.112 + store_dr0( R_EDX, FRn );
1.113 store_dr1( R_EAX, FRn );
1.114 } else {
1.115 check_ralign32( R_EAX );
1.116 @@ -1900,17 +1897,17 @@
1.117 check_walign64( R_EAX );
1.118 ADD_imm8s_r32(-8,R_EAX);
1.119 MMU_TRANSLATE_WRITE( R_EAX );
1.120 - load_dr0( R_ECX, FRm );
1.121 - load_dr1( R_EDX, FRm );
1.122 + load_dr0( R_EDX, FRm );
1.123 + load_dr1( R_ECX, FRm );
1.124 ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
1.125 - MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
1.126 + MEM_WRITE_DOUBLE( R_EAX, R_EDX, R_ECX );
1.127 } else {
1.128 check_walign32( R_EAX );
1.129 ADD_imm8s_r32( -4, R_EAX );
1.130 MMU_TRANSLATE_WRITE( R_EAX );
1.131 - load_fr( R_ECX, FRm );
1.132 + load_fr( R_EDX, FRm );
1.133 ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
1.134 - MEM_WRITE_LONG( R_EAX, R_ECX );
1.135 + MEM_WRITE_LONG( R_EAX, R_EDX );
1.136 }
1.137 sh4_x86.tstate = TSTATE_NONE;
1.138 :}
1.139 @@ -1922,8 +1919,8 @@
1.140 check_ralign64( R_EAX );
1.141 MMU_TRANSLATE_READ( R_EAX );
1.142 ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
1.143 - MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
1.144 - store_dr0( R_ECX, FRn );
1.145 + MEM_READ_DOUBLE( R_EAX, R_EDX, R_EAX );
1.146 + store_dr0( R_EDX, FRn );
1.147 store_dr1( R_EAX, FRn );
1.148 } else {
1.149 check_ralign32( R_EAX );
1.150 @@ -1942,14 +1939,14 @@
1.151 if( sh4_x86.double_size ) {
1.152 check_walign64( R_EAX );
1.153 MMU_TRANSLATE_WRITE( R_EAX );
1.154 - load_dr0( R_ECX, FRm );
1.155 - load_dr1( R_EDX, FRm );
1.156 - MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
1.157 + load_dr0( R_EDX, FRm );
1.158 + load_dr1( R_ECX, FRm );
1.159 + MEM_WRITE_DOUBLE( R_EAX, R_EDX, R_ECX );
1.160 } else {
1.161 check_walign32( R_EAX );
1.162 MMU_TRANSLATE_WRITE( R_EAX );
1.163 - load_fr( R_ECX, FRm );
1.164 - MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
1.165 + load_fr( R_EDX, FRm );
1.166 + MEM_WRITE_LONG( R_EAX, R_EDX ); // 12
1.167 }
1.168 sh4_x86.tstate = TSTATE_NONE;
1.169 :}
1.170 @@ -2228,9 +2225,9 @@
1.171 COUNT_INST(I_FSCA);
1.172 check_fpuen();
1.173 if( sh4_x86.double_prec == 0 ) {
1.174 - LEA_sh4r_rptr( REG_OFFSET(fr[0][FRn&0x0E]), R_ECX );
1.175 - load_spreg( R_EDX, R_FPUL );
1.176 - call_func2( sh4_fsca, R_EDX, R_ECX );
1.177 + LEA_sh4r_rptr( REG_OFFSET(fr[0][FRn&0x0E]), R_EDX );
1.178 + load_spreg( R_EAX, R_FPUL );
1.179 + call_func2( sh4_fsca, R_EAX, R_EDX );
1.180 }
1.181 sh4_x86.tstate = TSTATE_NONE;
1.182 :}
1.183 @@ -2571,10 +2568,10 @@
1.184 COUNT_INST(I_PREF);
1.185 load_reg( R_EAX, Rn );
1.186 MOV_r32_r32( R_EAX, R_ECX );
1.187 - AND_imm32_r32( 0xFC000000, R_EAX );
1.188 - CMP_imm32_r32( 0xE0000000, R_EAX );
1.189 + AND_imm32_r32( 0xFC000000, R_ECX );
1.190 + CMP_imm32_r32( 0xE0000000, R_ECX );
1.191 JNE_rel8(end);
1.192 - call_func1( sh4_flush_store_queue, R_ECX );
1.193 + call_func1( sh4_flush_store_queue, R_EAX );
1.194 TEST_r32_r32( R_EAX, R_EAX );
1.195 JE_exc(-1);
1.196 JMP_TARGET(end);
.