Search
lxdream.org :: lxdream/src/sh4/sh4x86.in :: diff
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 1292:799fdd4f704a
prev1263:b3de98d19faf
next1298:d0eb2307b847
author nkeynes
date Fri Aug 24 08:53:50 2012 +1000 (11 years ago)
permissions -rw-r--r--
last change Move the generated prologue/epilogue code out into a common entry stub
(reduces space requirements) and pre-save all saved registers. Change
FASTCALL to use 3 regs instead of 2 since we can now keep everything in
regs.
file annotate diff log raw
1.1 --- a/src/sh4/sh4x86.in Tue Mar 06 09:04:34 2012 +1000
1.2 +++ b/src/sh4/sh4x86.in Fri Aug 24 08:53:50 2012 +1000
1.3 @@ -115,6 +115,9 @@
1.4
1.5 static struct sh4_x86_state sh4_x86;
1.6
1.7 +static uint8_t sh4_entry_stub[128];
1.8 +void FASTCALL (*sh4_translate_enter)(void *code);
1.9 +
1.10 static uint32_t max_int = 0x7FFFFFFF;
1.11 static uint32_t min_int = 0x80000000;
1.12 static uint32_t save_fcw; /* save value for fpu control word */
1.13 @@ -143,16 +146,45 @@
1.14 sh4_x86.user_address_space = user;
1.15 }
1.16
1.17 +void sh4_translate_write_entry_stub(void)
1.18 +{
1.19 + mem_unprotect(sh4_entry_stub, sizeof(sh4_entry_stub));
1.20 + xlat_output = sh4_entry_stub;
1.21 + PUSH_r32(REG_EBP);
1.22 + MOVP_immptr_rptr( ((uint8_t *)&sh4r) + 128, REG_EBP );
1.23 + PUSH_r32(REG_EBX);
1.24 + PUSH_r32(REG_SAVE1);
1.25 + PUSH_r32(REG_SAVE2);
1.26 +#if SIZEOF_VOID_P == 8
1.27 + PUSH_r32(REG_SAVE3);
1.28 + PUSH_r32(REG_SAVE4);
1.29 + CALL_r32( REG_ARG1 );
1.30 + POP_r32(REG_SAVE4);
1.31 + POP_r32(REG_SAVE3);
1.32 +#else
1.33 + SUBL_imms_r32( 8, REG_ESP );
1.34 + CALL_r32( REG_ARG1 );
1.35 + ADDL_imms_r32( 8, REG_ESP );
1.36 +#endif
1.37 + POP_r32(REG_SAVE2);
1.38 + POP_r32(REG_SAVE1);
1.39 + POP_r32(REG_EBX);
1.40 + POP_r32(REG_EBP);
1.41 + RET();
1.42 + sh4_translate_enter = sh4_entry_stub;
1.43 +}
1.44 +
1.45 void sh4_translate_init(void)
1.46 {
1.47 sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
1.48 sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
1.49 sh4_x86.begin_callback = NULL;
1.50 sh4_x86.end_callback = NULL;
1.51 - sh4_translate_set_address_space( sh4_address_space, sh4_user_address_space );
1.52 sh4_x86.fastmem = TRUE;
1.53 sh4_x86.sse3_enabled = is_sse3_supported();
1.54 xlat_set_target_fns(&x86_target_fns);
1.55 + sh4_translate_set_address_space( sh4_address_space, sh4_user_address_space );
1.56 + sh4_translate_write_entry_stub();
1.57 }
1.58
1.59 void sh4_translate_set_callbacks( xlat_block_begin_callback_t begin, xlat_block_end_callback_t end )
1.60 @@ -344,16 +376,16 @@
1.61 #ifdef HAVE_FRAME_ADDRESS
1.62 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
1.63 {
1.64 - decode_address(address_space(), addr_reg);
1.65 + decode_address(address_space(), addr_reg, REG_CALLPTR);
1.66 if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) {
1.67 - CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
1.68 + CALL1_r32disp_r32(REG_CALLPTR, offset, addr_reg);
1.69 } else {
1.70 if( addr_reg != REG_ARG1 ) {
1.71 MOVL_r32_r32( addr_reg, REG_ARG1 );
1.72 }
1.73 MOVP_immptr_rptr( 0, REG_ARG2 );
1.74 sh4_x86_add_backpatch( xlat_output, pc, -2 );
1.75 - CALL2_r32disp_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2);
1.76 + CALL2_r32disp_r32_r32(REG_CALLPTR, offset, REG_ARG1, REG_ARG2);
1.77 }
1.78 if( value_reg != REG_RESULT1 ) {
1.79 MOVL_r32_r32( REG_RESULT1, value_reg );
1.80 @@ -362,9 +394,9 @@
1.81
1.82 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
1.83 {
1.84 - decode_address(address_space(), addr_reg);
1.85 + decode_address(address_space(), addr_reg, REG_CALLPTR);
1.86 if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) {
1.87 - CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
1.88 + CALL2_r32disp_r32_r32(REG_CALLPTR, offset, addr_reg, value_reg);
1.89 } else {
1.90 if( value_reg != REG_ARG2 ) {
1.91 MOVL_r32_r32( value_reg, REG_ARG2 );
1.92 @@ -375,19 +407,19 @@
1.93 #if MAX_REG_ARG > 2
1.94 MOVP_immptr_rptr( 0, REG_ARG3 );
1.95 sh4_x86_add_backpatch( xlat_output, pc, -2 );
1.96 - CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, REG_ARG3);
1.97 + CALL3_r32disp_r32_r32_r32(REG_CALLPTR, offset, REG_ARG1, REG_ARG2, REG_ARG3);
1.98 #else
1.99 MOVL_imm32_rspdisp( 0, 0 );
1.100 sh4_x86_add_backpatch( xlat_output, pc, -2 );
1.101 - CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, 0);
1.102 + CALL3_r32disp_r32_r32_r32(REG_CALLPTR, offset, REG_ARG1, REG_ARG2, 0);
1.103 #endif
1.104 }
1.105 }
1.106 #else
1.107 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
1.108 {
1.109 - decode_address(address_space(), addr_reg);
1.110 - CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
1.111 + decode_address(address_space(), addr_reg, REG_CALLPTR);
1.112 + CALL1_r32disp_r32(REG_CALLPTR, offset, addr_reg);
1.113 if( value_reg != REG_RESULT1 ) {
1.114 MOVL_r32_r32( REG_RESULT1, value_reg );
1.115 }
1.116 @@ -395,8 +427,8 @@
1.117
1.118 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
1.119 {
1.120 - decode_address(address_space(), addr_reg);
1.121 - CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
1.122 + decode_address(address_space(), addr_reg, REG_CALLPTR);
1.123 + CALL2_r32disp_r32_r32(REG_CALLPTR, offset, addr_reg, value_reg);
1.124 }
1.125 #endif
1.126
1.127 @@ -430,7 +462,6 @@
1.128 sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
1.129 sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
1.130 sh4_x86.sh4_mode = sh4r.xlat_sh4_mode;
1.131 - emit_prologue();
1.132 if( sh4_x86.begin_callback ) {
1.133 CALL_ptr( sh4_x86.begin_callback );
1.134 }
1.135 @@ -486,7 +517,6 @@
1.136 CMPL_imms_r32disp( sh4_x86.sh4_mode, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
1.137 }
1.138 JNE_label(wrongmode);
1.139 - LEAP_rptrdisp_rptr(REG_EAX, PROLOGUE_SIZE,REG_EAX);
1.140 if( sh4_x86.end_callback ) {
1.141 /* Note this does leave the stack out of alignment, but doesn't matter
1.142 * for what we're currently using it for.
1.143 @@ -518,7 +548,7 @@
1.144 }
1.145 uint8_t *backpatch = ((uint8_t *)__builtin_return_address(0)) - (CALL1_PTR_MIN_SIZE);
1.146 *backpatch = 0xE9;
1.147 - *(uint32_t *)(backpatch+1) = (uint32_t)(target-backpatch)+PROLOGUE_SIZE-5;
1.148 + *(uint32_t *)(backpatch+1) = (uint32_t)(target-backpatch)-5;
1.149 *(void **)(backpatch+5) = XLAT_BLOCK_FOR_CODE(target)->use_list;
1.150 XLAT_BLOCK_FOR_CODE(target)->use_list = backpatch;
1.151
1.152 @@ -586,7 +616,6 @@
1.153
1.154 static void exit_block()
1.155 {
1.156 - emit_epilogue();
1.157 if( sh4_x86.end_callback ) {
1.158 MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
1.159 JMP_rptr(REG_ECX);
1.160 @@ -674,7 +703,7 @@
1.161 * looping.
1.162 */
1.163 CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
1.164 - uint32_t backdisp = ((uintptr_t)(sh4_x86.code - xlat_output)) + PROLOGUE_SIZE;
1.165 + uint32_t backdisp = ((uintptr_t)(sh4_x86.code - xlat_output));
1.166 JCC_cc_prerel(X86_COND_A, backdisp);
1.167 } else {
1.168 MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ARG1 );
1.169 @@ -855,9 +884,9 @@
1.170 COUNT_INST(I_ANDB);
1.171 load_reg( REG_EAX, 0 );
1.172 ADDL_rbpdisp_r32( R_GBR, REG_EAX );
1.173 - MOVL_r32_rspdisp(REG_EAX, 0);
1.174 + MOVL_r32_r32(REG_EAX, REG_SAVE1);
1.175 MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
1.176 - MOVL_rspdisp_r32(0, REG_EAX);
1.177 + MOVL_r32_r32(REG_SAVE1, REG_EAX);
1.178 ANDL_imms_r32(imm, REG_EDX );
1.179 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1.180 sh4_x86.tstate = TSTATE_NONE;
1.181 @@ -1044,7 +1073,7 @@
1.182 load_reg( REG_EAX, Rm );
1.183 check_ralign32( REG_EAX );
1.184 MEM_READ_LONG( REG_EAX, REG_EAX );
1.185 - MOVL_r32_rspdisp(REG_EAX, 0);
1.186 + MOVL_r32_r32(REG_EAX, REG_SAVE1);
1.187 load_reg( REG_EAX, Rm );
1.188 LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
1.189 MEM_READ_LONG( REG_EAX, REG_EAX );
1.190 @@ -1053,7 +1082,7 @@
1.191 load_reg( REG_EAX, Rm );
1.192 check_ralign32( REG_EAX );
1.193 MEM_READ_LONG( REG_EAX, REG_EAX );
1.194 - MOVL_r32_rspdisp( REG_EAX, 0 );
1.195 + MOVL_r32_r32(REG_EAX, REG_SAVE1);
1.196 load_reg( REG_EAX, Rn );
1.197 check_ralign32( REG_EAX );
1.198 MEM_READ_LONG( REG_EAX, REG_EAX );
1.199 @@ -1061,7 +1090,7 @@
1.200 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
1.201 }
1.202
1.203 - IMULL_rspdisp( 0 );
1.204 + IMULL_r32( REG_SAVE1 );
1.205 ADDL_r32_rbpdisp( REG_EAX, R_MACL );
1.206 ADCL_r32_rbpdisp( REG_EDX, R_MACH );
1.207
1.208 @@ -1078,7 +1107,7 @@
1.209 load_reg( REG_EAX, Rm );
1.210 check_ralign16( REG_EAX );
1.211 MEM_READ_WORD( REG_EAX, REG_EAX );
1.212 - MOVL_r32_rspdisp( REG_EAX, 0 );
1.213 + MOVL_r32_r32( REG_EAX, REG_SAVE1 );
1.214 load_reg( REG_EAX, Rm );
1.215 LEAL_r32disp_r32( REG_EAX, 2, REG_EAX );
1.216 MEM_READ_WORD( REG_EAX, REG_EAX );
1.217 @@ -1089,14 +1118,14 @@
1.218 load_reg( REG_EAX, Rn );
1.219 check_ralign16( REG_EAX );
1.220 MEM_READ_WORD( REG_EAX, REG_EAX );
1.221 - MOVL_r32_rspdisp( REG_EAX, 0 );
1.222 + MOVL_r32_r32( REG_EAX, REG_SAVE1 );
1.223 load_reg( REG_EAX, Rm );
1.224 check_ralign16( REG_EAX );
1.225 MEM_READ_WORD( REG_EAX, REG_EAX );
1.226 ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rn]) );
1.227 ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
1.228 }
1.229 - IMULL_rspdisp( 0 );
1.230 + IMULL_r32( REG_SAVE1 );
1.231 MOVL_rbpdisp_r32( R_S, REG_ECX );
1.232 TESTL_r32_r32( REG_ECX, REG_ECX );
1.233 JE_label( nosat );
1.234 @@ -1195,9 +1224,9 @@
1.235 COUNT_INST(I_ORB);
1.236 load_reg( REG_EAX, 0 );
1.237 ADDL_rbpdisp_r32( R_GBR, REG_EAX );
1.238 - MOVL_r32_rspdisp( REG_EAX, 0 );
1.239 + MOVL_r32_r32( REG_EAX, REG_SAVE1 );
1.240 MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
1.241 - MOVL_rspdisp_r32( 0, REG_EAX );
1.242 + MOVL_r32_r32( REG_SAVE1, REG_EAX );
1.243 ORL_imms_r32(imm, REG_EDX );
1.244 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1.245 sh4_x86.tstate = TSTATE_NONE;
1.246 @@ -1413,12 +1442,12 @@
1.247 TAS.B @Rn {:
1.248 COUNT_INST(I_TASB);
1.249 load_reg( REG_EAX, Rn );
1.250 - MOVL_r32_rspdisp( REG_EAX, 0 );
1.251 + MOVL_r32_r32( REG_EAX, REG_SAVE1 );
1.252 MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
1.253 TESTB_r8_r8( REG_DL, REG_DL );
1.254 SETE_t();
1.255 ORB_imms_r8( 0x80, REG_DL );
1.256 - MOVL_rspdisp_r32( 0, REG_EAX );
1.257 + MOVL_r32_r32( REG_SAVE1, REG_EAX );
1.258 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1.259 sh4_x86.tstate = TSTATE_NONE;
1.260 :}
1.261 @@ -1465,9 +1494,9 @@
1.262 COUNT_INST(I_XORB);
1.263 load_reg( REG_EAX, 0 );
1.264 ADDL_rbpdisp_r32( R_GBR, REG_EAX );
1.265 - MOVL_r32_rspdisp( REG_EAX, 0 );
1.266 + MOVL_r32_r32( REG_EAX, REG_SAVE1 );
1.267 MEM_READ_BYTE_FOR_WRITE(REG_EAX, REG_EDX);
1.268 - MOVL_rspdisp_r32( 0, REG_EAX );
1.269 + MOVL_r32_r32( REG_SAVE1, REG_EAX );
1.270 XORL_imms_r32( imm, REG_EDX );
1.271 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1.272 sh4_x86.tstate = TSTATE_NONE;
.