filename | src/sh4/sh4x86.in |
changeset | 1292:799fdd4f704a |
prev | 1263:b3de98d19faf |
next | 1298:d0eb2307b847 |
author | nkeynes |
date | Fri Aug 24 08:53:50 2012 +1000 (11 years ago) |
permissions | -rw-r--r-- |
last change | Move the generated prologue/epilogue code out into a common entry stub (reduces space requirements) and pre-save all saved registers. Change FASTCALL to use 3 regs instead of 2 since we can now keep everything in regs. |
file | annotate | diff | log | raw |
1.1 --- a/src/sh4/sh4x86.in Tue Mar 06 09:04:34 2012 +10001.2 +++ b/src/sh4/sh4x86.in Fri Aug 24 08:53:50 2012 +10001.3 @@ -115,6 +115,9 @@1.5 static struct sh4_x86_state sh4_x86;1.7 +static uint8_t sh4_entry_stub[128];1.8 +void FASTCALL (*sh4_translate_enter)(void *code);1.9 +1.10 static uint32_t max_int = 0x7FFFFFFF;1.11 static uint32_t min_int = 0x80000000;1.12 static uint32_t save_fcw; /* save value for fpu control word */1.13 @@ -143,16 +146,45 @@1.14 sh4_x86.user_address_space = user;1.15 }1.17 +void sh4_translate_write_entry_stub(void)1.18 +{1.19 + mem_unprotect(sh4_entry_stub, sizeof(sh4_entry_stub));1.20 + xlat_output = sh4_entry_stub;1.21 + PUSH_r32(REG_EBP);1.22 + MOVP_immptr_rptr( ((uint8_t *)&sh4r) + 128, REG_EBP );1.23 + PUSH_r32(REG_EBX);1.24 + PUSH_r32(REG_SAVE1);1.25 + PUSH_r32(REG_SAVE2);1.26 +#if SIZEOF_VOID_P == 81.27 + PUSH_r32(REG_SAVE3);1.28 + PUSH_r32(REG_SAVE4);1.29 + CALL_r32( REG_ARG1 );1.30 + POP_r32(REG_SAVE4);1.31 + POP_r32(REG_SAVE3);1.32 +#else1.33 + SUBL_imms_r32( 8, REG_ESP );1.34 + CALL_r32( REG_ARG1 );1.35 + ADDL_imms_r32( 8, REG_ESP );1.36 +#endif1.37 + POP_r32(REG_SAVE2);1.38 + POP_r32(REG_SAVE1);1.39 + POP_r32(REG_EBX);1.40 + POP_r32(REG_EBP);1.41 + RET();1.42 + sh4_translate_enter = sh4_entry_stub;1.43 +}1.44 +1.45 void sh4_translate_init(void)1.46 {1.47 sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);1.48 sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);1.49 sh4_x86.begin_callback = NULL;1.50 sh4_x86.end_callback = NULL;1.51 - sh4_translate_set_address_space( sh4_address_space, sh4_user_address_space );1.52 sh4_x86.fastmem = TRUE;1.53 sh4_x86.sse3_enabled = is_sse3_supported();1.54 xlat_set_target_fns(&x86_target_fns);1.55 + sh4_translate_set_address_space( sh4_address_space, sh4_user_address_space );1.56 + sh4_translate_write_entry_stub();1.57 }1.59 void sh4_translate_set_callbacks( xlat_block_begin_callback_t begin, xlat_block_end_callback_t end )1.60 @@ -344,16 +376,16 @@1.61 #ifdef HAVE_FRAME_ADDRESS1.62 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)1.63 {1.64 - decode_address(address_space(), addr_reg);1.65 + decode_address(address_space(), addr_reg, REG_CALLPTR);1.66 if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) {1.67 - CALL1_r32disp_r32(REG_ECX, offset, addr_reg);1.68 + CALL1_r32disp_r32(REG_CALLPTR, offset, addr_reg);1.69 } else {1.70 if( addr_reg != REG_ARG1 ) {1.71 MOVL_r32_r32( addr_reg, REG_ARG1 );1.72 }1.73 MOVP_immptr_rptr( 0, REG_ARG2 );1.74 sh4_x86_add_backpatch( xlat_output, pc, -2 );1.75 - CALL2_r32disp_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2);1.76 + CALL2_r32disp_r32_r32(REG_CALLPTR, offset, REG_ARG1, REG_ARG2);1.77 }1.78 if( value_reg != REG_RESULT1 ) {1.79 MOVL_r32_r32( REG_RESULT1, value_reg );1.80 @@ -362,9 +394,9 @@1.82 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)1.83 {1.84 - decode_address(address_space(), addr_reg);1.85 + decode_address(address_space(), addr_reg, REG_CALLPTR);1.86 if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) {1.87 - CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);1.88 + CALL2_r32disp_r32_r32(REG_CALLPTR, offset, addr_reg, value_reg);1.89 } else {1.90 if( value_reg != REG_ARG2 ) {1.91 MOVL_r32_r32( value_reg, REG_ARG2 );1.92 @@ -375,19 +407,19 @@1.93 #if MAX_REG_ARG > 21.94 MOVP_immptr_rptr( 0, REG_ARG3 );1.95 sh4_x86_add_backpatch( xlat_output, pc, -2 );1.96 - CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, REG_ARG3);1.97 + CALL3_r32disp_r32_r32_r32(REG_CALLPTR, offset, REG_ARG1, REG_ARG2, REG_ARG3);1.98 #else1.99 MOVL_imm32_rspdisp( 0, 0 );1.100 sh4_x86_add_backpatch( xlat_output, pc, -2 );1.101 - CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, 0);1.102 + CALL3_r32disp_r32_r32_r32(REG_CALLPTR, offset, REG_ARG1, REG_ARG2, 0);1.103 #endif1.104 }1.105 }1.106 #else1.107 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)1.108 {1.109 - decode_address(address_space(), addr_reg);1.110 - CALL1_r32disp_r32(REG_ECX, offset, addr_reg);1.111 + decode_address(address_space(), addr_reg, REG_CALLPTR);1.112 + CALL1_r32disp_r32(REG_CALLPTR, offset, addr_reg);1.113 if( value_reg != REG_RESULT1 ) {1.114 MOVL_r32_r32( REG_RESULT1, value_reg );1.115 }1.116 @@ -395,8 +427,8 @@1.118 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)1.119 {1.120 - decode_address(address_space(), addr_reg);1.121 - CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);1.122 + decode_address(address_space(), addr_reg, REG_CALLPTR);1.123 + CALL2_r32disp_r32_r32(REG_CALLPTR, offset, addr_reg, value_reg);1.124 }1.125 #endif1.127 @@ -430,7 +462,6 @@1.128 sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;1.129 sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;1.130 sh4_x86.sh4_mode = sh4r.xlat_sh4_mode;1.131 - emit_prologue();1.132 if( sh4_x86.begin_callback ) {1.133 CALL_ptr( sh4_x86.begin_callback );1.134 }1.135 @@ -486,7 +517,6 @@1.136 CMPL_imms_r32disp( sh4_x86.sh4_mode, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );1.137 }1.138 JNE_label(wrongmode);1.139 - LEAP_rptrdisp_rptr(REG_EAX, PROLOGUE_SIZE,REG_EAX);1.140 if( sh4_x86.end_callback ) {1.141 /* Note this does leave the stack out of alignment, but doesn't matter1.142 * for what we're currently using it for.1.143 @@ -518,7 +548,7 @@1.144 }1.145 uint8_t *backpatch = ((uint8_t *)__builtin_return_address(0)) - (CALL1_PTR_MIN_SIZE);1.146 *backpatch = 0xE9;1.147 - *(uint32_t *)(backpatch+1) = (uint32_t)(target-backpatch)+PROLOGUE_SIZE-5;1.148 + *(uint32_t *)(backpatch+1) = (uint32_t)(target-backpatch)-5;1.149 *(void **)(backpatch+5) = XLAT_BLOCK_FOR_CODE(target)->use_list;1.150 XLAT_BLOCK_FOR_CODE(target)->use_list = backpatch;1.152 @@ -586,7 +616,6 @@1.154 static void exit_block()1.155 {1.156 - emit_epilogue();1.157 if( sh4_x86.end_callback ) {1.158 MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);1.159 JMP_rptr(REG_ECX);1.160 @@ -674,7 +703,7 @@1.161 * looping.1.162 */1.163 CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );1.164 - uint32_t backdisp = ((uintptr_t)(sh4_x86.code - xlat_output)) + PROLOGUE_SIZE;1.165 + uint32_t backdisp = ((uintptr_t)(sh4_x86.code - xlat_output));1.166 JCC_cc_prerel(X86_COND_A, backdisp);1.167 } else {1.168 MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ARG1 );1.169 @@ -855,9 +884,9 @@1.170 COUNT_INST(I_ANDB);1.171 load_reg( REG_EAX, 0 );1.172 ADDL_rbpdisp_r32( R_GBR, REG_EAX );1.173 - MOVL_r32_rspdisp(REG_EAX, 0);1.174 + MOVL_r32_r32(REG_EAX, REG_SAVE1);1.175 MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );1.176 - MOVL_rspdisp_r32(0, REG_EAX);1.177 + MOVL_r32_r32(REG_SAVE1, REG_EAX);1.178 ANDL_imms_r32(imm, REG_EDX );1.179 MEM_WRITE_BYTE( REG_EAX, REG_EDX );1.180 sh4_x86.tstate = TSTATE_NONE;1.181 @@ -1044,7 +1073,7 @@1.182 load_reg( REG_EAX, Rm );1.183 check_ralign32( REG_EAX );1.184 MEM_READ_LONG( REG_EAX, REG_EAX );1.185 - MOVL_r32_rspdisp(REG_EAX, 0);1.186 + MOVL_r32_r32(REG_EAX, REG_SAVE1);1.187 load_reg( REG_EAX, Rm );1.188 LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );1.189 MEM_READ_LONG( REG_EAX, REG_EAX );1.190 @@ -1053,7 +1082,7 @@1.191 load_reg( REG_EAX, Rm );1.192 check_ralign32( REG_EAX );1.193 MEM_READ_LONG( REG_EAX, REG_EAX );1.194 - MOVL_r32_rspdisp( REG_EAX, 0 );1.195 + MOVL_r32_r32(REG_EAX, REG_SAVE1);1.196 load_reg( REG_EAX, Rn );1.197 check_ralign32( REG_EAX );1.198 MEM_READ_LONG( REG_EAX, REG_EAX );1.199 @@ -1061,7 +1090,7 @@1.200 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );1.201 }1.203 - IMULL_rspdisp( 0 );1.204 + IMULL_r32( REG_SAVE1 );1.205 ADDL_r32_rbpdisp( REG_EAX, R_MACL );1.206 ADCL_r32_rbpdisp( REG_EDX, R_MACH );1.208 @@ -1078,7 +1107,7 @@1.209 load_reg( REG_EAX, Rm );1.210 check_ralign16( REG_EAX );1.211 MEM_READ_WORD( REG_EAX, REG_EAX );1.212 - MOVL_r32_rspdisp( REG_EAX, 0 );1.213 + MOVL_r32_r32( REG_EAX, REG_SAVE1 );1.214 load_reg( REG_EAX, Rm );1.215 LEAL_r32disp_r32( REG_EAX, 2, REG_EAX );1.216 MEM_READ_WORD( REG_EAX, REG_EAX );1.217 @@ -1089,14 +1118,14 @@1.218 load_reg( REG_EAX, Rn );1.219 check_ralign16( REG_EAX );1.220 MEM_READ_WORD( REG_EAX, REG_EAX );1.221 - MOVL_r32_rspdisp( REG_EAX, 0 );1.222 + MOVL_r32_r32( REG_EAX, REG_SAVE1 );1.223 load_reg( REG_EAX, Rm );1.224 check_ralign16( REG_EAX );1.225 MEM_READ_WORD( REG_EAX, REG_EAX );1.226 ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rn]) );1.227 ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );1.228 }1.229 - IMULL_rspdisp( 0 );1.230 + IMULL_r32( REG_SAVE1 );1.231 MOVL_rbpdisp_r32( R_S, REG_ECX );1.232 TESTL_r32_r32( REG_ECX, REG_ECX );1.233 JE_label( nosat );1.234 @@ -1195,9 +1224,9 @@1.235 COUNT_INST(I_ORB);1.236 load_reg( REG_EAX, 0 );1.237 ADDL_rbpdisp_r32( R_GBR, REG_EAX );1.238 - MOVL_r32_rspdisp( REG_EAX, 0 );1.239 + MOVL_r32_r32( REG_EAX, REG_SAVE1 );1.240 MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );1.241 - MOVL_rspdisp_r32( 0, REG_EAX );1.242 + MOVL_r32_r32( REG_SAVE1, REG_EAX );1.243 ORL_imms_r32(imm, REG_EDX );1.244 MEM_WRITE_BYTE( REG_EAX, REG_EDX );1.245 sh4_x86.tstate = TSTATE_NONE;1.246 @@ -1413,12 +1442,12 @@1.247 TAS.B @Rn {:1.248 COUNT_INST(I_TASB);1.249 load_reg( REG_EAX, Rn );1.250 - MOVL_r32_rspdisp( REG_EAX, 0 );1.251 + MOVL_r32_r32( REG_EAX, REG_SAVE1 );1.252 MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );1.253 TESTB_r8_r8( REG_DL, REG_DL );1.254 SETE_t();1.255 ORB_imms_r8( 0x80, REG_DL );1.256 - MOVL_rspdisp_r32( 0, REG_EAX );1.257 + MOVL_r32_r32( REG_SAVE1, REG_EAX );1.258 MEM_WRITE_BYTE( REG_EAX, REG_EDX );1.259 sh4_x86.tstate = TSTATE_NONE;1.260 :}1.261 @@ -1465,9 +1494,9 @@1.262 COUNT_INST(I_XORB);1.263 load_reg( REG_EAX, 0 );1.264 ADDL_rbpdisp_r32( R_GBR, REG_EAX );1.265 - MOVL_r32_rspdisp( REG_EAX, 0 );1.266 + MOVL_r32_r32( REG_EAX, REG_SAVE1 );1.267 MEM_READ_BYTE_FOR_WRITE(REG_EAX, REG_EDX);1.268 - MOVL_rspdisp_r32( 0, REG_EAX );1.269 + MOVL_r32_r32( REG_SAVE1, REG_EAX );1.270 XORL_imms_r32( imm, REG_EDX );1.271 MEM_WRITE_BYTE( REG_EAX, REG_EDX );1.272 sh4_x86.tstate = TSTATE_NONE;
.