filename | src/sh4/ia32mac.h |
changeset | 901:32c5cf5e206f |
prev | 736:a02d1475ccfd |
next | 905:4c17ebd9ef5e |
author | nkeynes |
date | Sun Oct 26 02:28:29 2008 +0000 (13 years ago) |
permissions | -rw-r--r-- |
last change | Move the precision/size tests to translation-time rather than execution-time, and flush/retranslate on a mismatch. Shaves a few percent off the core runtime |
view | annotate | diff | log | raw |
1 /**
2 * $Id$
3 *
4 * Provides the implementation for the ia32 Mac OS X ABI variant
5 * (eg prologue, epilogue, and calling conventions). Main difference
6 * from ia32abi is that stack frames are aligned on 16-byte boundaries.
7 *
8 * Copyright (c) 2007 Nathan Keynes.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 */
21 #ifndef lxdream_ia32mac_H
22 #define lxdream_ia32mac_H 1
24 #define load_ptr( reg, ptr ) load_imm32( reg, (uint32_t)ptr );
26 /**
27 * Note: clobbers EAX to make the indirect call - this isn't usually
28 * a problem since the callee will usually clobber it anyway.
29 */
30 #define CALL_FUNC0_SIZE 13
31 static inline void call_func0( void *ptr )
32 {
33 int adj = (-sh4_x86.stack_posn)&0x0F;
34 SUB_imm8s_r32( adj, R_ESP );
35 load_imm32(R_EAX, (uint32_t)ptr);
36 CALL_r32(R_EAX);
37 ADD_imm8s_r32( adj, R_ESP );
38 }
40 #define CALL_FUNC1_SIZE 14
41 static inline void call_func1( void *ptr, int arg1 )
42 {
43 int adj = (-4-sh4_x86.stack_posn)&0x0F;
44 SUB_imm8s_r32( adj, R_ESP );
45 PUSH_r32(arg1);
46 load_imm32(R_EAX, (uint32_t)ptr);
47 CALL_r32(R_EAX);
48 ADD_imm8s_r32( adj+4, R_ESP );
49 sh4_x86.stack_posn -= 4;
50 }
52 #define CALL_FUNC2_SIZE 15
53 static inline void call_func2( void *ptr, int arg1, int arg2 )
54 {
55 int adj = (-8-sh4_x86.stack_posn)&0x0F;
56 SUB_imm8s_r32( adj, R_ESP );
57 PUSH_r32(arg2);
58 PUSH_r32(arg1);
59 load_imm32(R_EAX, (uint32_t)ptr);
60 CALL_r32(R_EAX);
61 ADD_imm8s_r32( adj+8, R_ESP );
62 sh4_x86.stack_posn -= 8;
63 }
65 /**
66 * Write a double (64-bit) value into memory, with the first word in arg2a, and
67 * the second in arg2b
68 * NB: 30 bytes
69 */
70 #define MEM_WRITE_DOUBLE_SIZE 36
71 static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )
72 {
73 int adj = (-8-sh4_x86.stack_posn)&0x0F;
74 SUB_imm8s_r32( adj, R_ESP );
75 ADD_imm8s_r32( 4, addr );
76 PUSH_r32(arg2b);
77 PUSH_r32(addr);
78 ADD_imm8s_r32( -4, addr );
79 SUB_imm8s_r32( 8, R_ESP );
80 PUSH_r32(arg2a);
81 PUSH_r32(addr);
82 load_imm32(R_EAX, (uint32_t)sh4_write_long);
83 CALL_r32(R_EAX);
84 ADD_imm8s_r32( 16, R_ESP );
85 load_imm32(R_EAX, (uint32_t)sh4_write_long);
86 CALL_r32(R_EAX);
87 ADD_imm8s_r32( adj+8, R_ESP );
88 sh4_x86.stack_posn -= 16;
89 }
91 /**
92 * Read a double (64-bit) value from memory, writing the first word into arg2a
93 * and the second into arg2b. The addr must not be in EAX
94 * NB: 27 bytes
95 */
96 #define MEM_READ_DOUBLE_SIZE 36
97 static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )
98 {
99 int adj = (-4-sh4_x86.stack_posn)&0x0F;
100 int adj2 = (-8-sh4_x86.stack_posn)&0x0F;
101 SUB_imm8s_r32( adj, R_ESP );
102 PUSH_r32(addr);
103 load_imm32(R_EAX, (uint32_t)sh4_read_long);
104 CALL_r32(R_EAX);
105 POP_r32(R_ECX);
106 SUB_imm8s_r32( adj2-adj, R_ESP );
107 PUSH_r32(R_EAX);
108 ADD_imm8s_r32( 4, R_ECX );
109 PUSH_r32(R_ECX);
110 load_imm32(R_EAX, (uint32_t)sh4_read_long);
111 CALL_r32(R_EAX);
112 ADD_imm8s_r32( 4, R_ESP );
113 MOV_r32_r32( R_EAX, arg2b );
114 POP_r32(arg2a);
115 ADD_imm8s_r32( adj2, R_ESP );
116 sh4_x86.stack_posn -= 4;
117 }
119 /**
120 * Emit the 'start of block' assembly. Sets up the stack frame and save
121 * SI/DI as required
122 */
123 void enter_block( )
124 {
125 PUSH_r32(R_EBP);
126 /* mov &sh4r, ebp */
127 load_ptr( R_EBP, ((uint8_t *)&sh4r) + 128 );
128 sh4_x86.stack_posn = 8;
129 }
131 /**
132 * Exit the block with sh4r.new_pc written with the target pc
133 */
134 void exit_block_pcset( sh4addr_t pc )
135 {
136 load_imm32( R_ECX, ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
137 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
138 load_spreg( R_EAX, R_PC );
139 if( sh4_x86.tlb_on ) {
140 call_func1(xlat_get_code_by_vma,R_EAX);
141 } else {
142 call_func1(xlat_get_code,R_EAX);
143 }
144 POP_r32(R_EBP);
145 RET();
146 }
148 /**
149 * Exit the block with sh4r.new_pc written with the target pc
150 */
151 void exit_block_newpcset( sh4addr_t pc )
152 {
153 load_imm32( R_ECX, ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
154 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
155 load_spreg( R_EAX, R_NEW_PC );
156 store_spreg( R_EAX, R_PC );
157 if( sh4_x86.tlb_on ) {
158 call_func1(xlat_get_code_by_vma,R_EAX);
159 } else {
160 call_func1(xlat_get_code,R_EAX);
161 }
162 POP_r32(R_EBP);
163 RET();
164 }
167 #define EXIT_BLOCK_SIZE(pc) (24 + (IS_IN_ICACHE(pc)?5:CALL_FUNC1_SIZE))
170 /**
171 * Exit the block to an absolute PC
172 */
173 void exit_block( sh4addr_t pc, sh4addr_t endpc )
174 {
175 load_imm32( R_ECX, pc ); // 5
176 store_spreg( R_ECX, REG_OFFSET(pc) ); // 3
177 if( IS_IN_ICACHE(pc) ) {
178 MOV_moff32_EAX( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) ); // 5
179 } else if( sh4_x86.tlb_on ) {
180 call_func1(xlat_get_code_by_vma,R_ECX);
181 } else {
182 call_func1(xlat_get_code,R_ECX);
183 }
184 AND_imm8s_r32( 0xFC, R_EAX ); // 3
185 load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
186 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
187 POP_r32(R_EBP);
188 RET();
189 }
191 #define EXIT_BLOCK_REL_SIZE(pc) (27 + (IS_IN_ICACHE(pc)?5:CALL_FUNC1_SIZE))
193 /**
194 * Exit the block to a relative PC
195 */
196 void exit_block_rel( sh4addr_t pc, sh4addr_t endpc )
197 {
198 load_imm32( R_ECX, pc - sh4_x86.block_start_pc ); // 5
199 ADD_sh4r_r32( R_PC, R_ECX );
200 store_spreg( R_ECX, REG_OFFSET(pc) ); // 3
201 if( IS_IN_ICACHE(pc) ) {
202 MOV_moff32_EAX( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) ); // 5
203 } else if( sh4_x86.tlb_on ) {
204 call_func1(xlat_get_code_by_vma,R_ECX);
205 } else {
206 call_func1(xlat_get_code,R_ECX);
207 }
208 AND_imm8s_r32( 0xFC, R_EAX ); // 3
209 load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
210 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
211 POP_r32(R_EBP);
212 RET();
213 }
215 /**
216 * Write the block trailer (exception handling block)
217 */
218 void sh4_translate_end_block( sh4addr_t pc ) {
219 if( sh4_x86.branch_taken == FALSE ) {
220 // Didn't exit unconditionally already, so write the termination here
221 exit_block_rel( pc, pc );
222 }
223 if( sh4_x86.backpatch_posn != 0 ) {
224 unsigned int i;
225 // Raise exception
226 uint8_t *end_ptr = xlat_output;
227 MOV_r32_r32( R_EDX, R_ECX );
228 ADD_r32_r32( R_EDX, R_ECX );
229 ADD_r32_sh4r( R_ECX, R_PC );
230 MOV_moff32_EAX( &sh4_cpu_period );
231 MUL_r32( R_EDX );
232 ADD_r32_sh4r( R_EAX, REG_OFFSET(slice_cycle) );
234 POP_r32(R_EDX);
235 call_func1( sh4_raise_exception, R_EDX );
236 load_spreg( R_EAX, R_PC );
237 if( sh4_x86.tlb_on ) {
238 call_func1(xlat_get_code_by_vma,R_EAX);
239 } else {
240 call_func1(xlat_get_code,R_EAX);
241 }
242 POP_r32(R_EBP);
243 RET();
245 // Exception already raised - just cleanup
246 uint8_t *preexc_ptr = xlat_output;
247 MOV_r32_r32( R_EDX, R_ECX );
248 ADD_r32_r32( R_EDX, R_ECX );
249 ADD_r32_sh4r( R_ECX, R_SPC );
250 MOV_moff32_EAX( &sh4_cpu_period );
251 MUL_r32( R_EDX );
252 ADD_r32_sh4r( R_EAX, REG_OFFSET(slice_cycle) );
253 load_spreg( R_EAX, R_PC );
254 if( sh4_x86.tlb_on ) {
255 call_func1(xlat_get_code_by_vma,R_EAX);
256 } else {
257 call_func1(xlat_get_code,R_EAX);
258 }
259 POP_r32(R_EBP);
260 RET();
262 for( i=0; i< sh4_x86.backpatch_posn; i++ ) {
263 uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];
264 *fixup_addr = xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
265 if( sh4_x86.backpatch_list[i].exc_code < 0 ) {
266 load_imm32( R_EDX, sh4_x86.backpatch_list[i].fixup_icount );
267 int stack_adj = -1 - sh4_x86.backpatch_list[i].exc_code;
268 if( stack_adj > 0 ) {
269 ADD_imm8s_r32( stack_adj, R_ESP );
270 }
271 int rel = preexc_ptr - xlat_output;
272 JMP_rel(rel);
273 } else {
274 PUSH_imm32( sh4_x86.backpatch_list[i].exc_code );
275 load_imm32( R_EDX, sh4_x86.backpatch_list[i].fixup_icount );
276 int rel = end_ptr - xlat_output;
277 JMP_rel(rel);
278 }
279 }
280 }
281 }
283 void *xlat_get_native_pc()
284 {
285 void *result = NULL;
286 asm(
287 "mov %%ebp, %%eax\n\t"
288 "mov $0x8, %%ecx\n\t"
289 "mov %1, %%edx\n"
290 "frame_loop: test %%eax, %%eax\n\t"
291 "je frame_not_found\n\t"
292 "cmp (%%eax), %%edx\n\t"
293 "je frame_found\n\t"
294 "sub $0x1, %%ecx\n\t"
295 "je frame_not_found\n\t"
296 "movl (%%eax), %%eax\n\t"
297 "jmp frame_loop\n"
298 "frame_found: movl 0x4(%%eax), %0\n"
299 "frame_not_found:"
300 : "=r" (result)
301 : "r" (((uint8_t *)&sh4r) + 128 )
302 : "eax", "ecx", "edx" );
303 return result;
304 }
307 #endif /* !lxdream_ia32mac.h */
.