filename | src/sh4/ia32mac.h |
changeset | 905:4c17ebd9ef5e |
prev | 901:32c5cf5e206f |
next | 906:268ea359f884 |
author | nkeynes |
date | Wed Oct 29 23:51:58 2008 +0000 (15 years ago) |
permissions | -rw-r--r-- |
last change | Use regparam calling conventions for all functions called from translated code, along with a few other high-use functions. Can probably extend this to all functions, but as it is this is a nice performance boost |
view | annotate | diff | log | raw |
1 /**
2 * $Id$
3 *
4 * Provides the implementation for the ia32 Mac OS X ABI variant
5 * (eg prologue, epilogue, and calling conventions). Main difference
6 * from ia32abi is that stack frames are aligned on 16-byte boundaries.
7 *
8 * Copyright (c) 2007 Nathan Keynes.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 */
21 #ifndef lxdream_ia32mac_H
22 #define lxdream_ia32mac_H 1
24 #define load_ptr( reg, ptr ) load_imm32( reg, (uint32_t)ptr );
26 /**
27 * Note: clobbers EAX to make the indirect call - this isn't usually
28 * a problem since the callee will usually clobber it anyway.
29 */
30 #define CALL_FUNC0_SIZE 13
31 static inline void call_func0( void *ptr )
32 {
33 int adj = (-sh4_x86.stack_posn)&0x0F;
34 SUB_imm8s_r32( adj, R_ESP );
35 load_imm32(R_ECX, (uint32_t)ptr);
36 CALL_r32(R_ECX);
37 ADD_imm8s_r32( adj, R_ESP );
38 }
40 #ifdef HAVE_FASTCALL
41 static inline void call_func1( void *ptr, int arg1 )
42 {
43 int adj = (-sh4_x86.stack_posn)&0x0F;
44 SUB_imm8s_r32( adj, R_ESP );
45 if( arg1 != R_EAX ) {
46 MOV_r32_r32( arg1, R_EAX );
47 }
48 load_imm32(R_ECX, (uint32_t)ptr);
49 CALL_r32(R_ECX);
50 ADD_imm8s_r32( adj, R_ESP );
51 }
53 static inline void call_func2( void *ptr, int arg1, int arg2 )
54 {
55 int adj = (-sh4_x86.stack_posn)&0x0F;
56 SUB_imm8s_r32( adj, R_ESP );
57 if( arg2 != R_EDX ) {
58 MOV_r32_r32( arg2, R_EDX );
59 }
60 if( arg1 != R_EAX ) {
61 MOV_r32_r32( arg1, R_EAX );
62 }
63 load_imm32(R_ECX, (uint32_t)ptr);
64 CALL_r32(R_ECX);
65 ADD_imm8s_r32( adj, R_ESP );
66 }
68 /**
69 * Write a double (64-bit) value into memory, with the first word in arg2a, and
70 * the second in arg2b
71 */
72 static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )
73 {
74 PUSH_r32(arg2b);
75 PUSH_r32(addr);
76 call_func2(sh4_write_long, addr, arg2a);
77 POP_r32(R_EAX);
78 POP_r32(R_EDX);
79 ADD_imm8s_r32(4, R_EAX);
80 call_func0(sh4_write_long);
81 }
83 /**
84 * Read a double (64-bit) value from memory, writing the first word into arg2a
85 * and the second into arg2b. The addr must not be in EAX
86 */
87 static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )
88 {
89 PUSH_r32(addr);
90 call_func1(sh4_read_long, addr);
91 POP_r32(R_ECX);
92 PUSH_r32(R_EAX);
93 MOV_r32_r32(R_ECX, R_EAX);
94 ADD_imm8s_r32(4, R_EAX);
95 call_func0(sh4_read_long);
96 if( arg2b != R_EAX ) {
97 MOV_r32_r32(R_EAX, arg2b);
98 }
99 POP_r32(arg2a);
100 }
101 #else
102 static inline void call_func1( void *ptr, int arg1 )
103 {
104 int adj = (-4-sh4_x86.stack_posn)&0x0F;
105 SUB_imm8s_r32( adj, R_ESP );
106 PUSH_r32(arg1);
107 load_imm32(R_EAX, (uint32_t)ptr);
108 CALL_r32(R_EAX);
109 ADD_imm8s_r32( adj+4, R_ESP );
110 sh4_x86.stack_posn -= 4;
111 }
113 #define CALL_FUNC2_SIZE 15
114 static inline void call_func2( void *ptr, int arg1, int arg2 )
115 {
116 int adj = (-8-sh4_x86.stack_posn)&0x0F;
117 SUB_imm8s_r32( adj, R_ESP );
118 PUSH_r32(arg2);
119 PUSH_r32(arg1);
120 load_imm32(R_EAX, (uint32_t)ptr);
121 CALL_r32(R_EAX);
122 ADD_imm8s_r32( adj+8, R_ESP );
123 sh4_x86.stack_posn -= 8;
124 }
126 /**
127 * Write a double (64-bit) value into memory, with the first word in arg2a, and
128 * the second in arg2b
129 */
130 static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )
131 {
132 int adj = (-8-sh4_x86.stack_posn)&0x0F;
133 SUB_imm8s_r32( adj, R_ESP );
134 ADD_imm8s_r32( 4, addr );
135 PUSH_r32(arg2b);
136 PUSH_r32(addr);
137 ADD_imm8s_r32( -4, addr );
138 SUB_imm8s_r32( 8, R_ESP );
139 PUSH_r32(arg2a);
140 PUSH_r32(addr);
141 load_imm32(R_EAX, (uint32_t)sh4_write_long);
142 CALL_r32(R_EAX);
143 ADD_imm8s_r32( 16, R_ESP );
144 load_imm32(R_EAX, (uint32_t)sh4_write_long);
145 CALL_r32(R_EAX);
146 ADD_imm8s_r32( adj+8, R_ESP );
147 sh4_x86.stack_posn -= 16;
148 }
150 /**
151 * Read a double (64-bit) value from memory, writing the first word into arg2a
152 * and the second into arg2b. The addr must not be in EAX
153 */
154 static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )
155 {
156 int adj = (-4-sh4_x86.stack_posn)&0x0F;
157 int adj2 = (-8-sh4_x86.stack_posn)&0x0F;
158 SUB_imm8s_r32( adj, R_ESP );
159 PUSH_r32(addr);
160 load_imm32(R_EAX, (uint32_t)sh4_read_long);
161 CALL_r32(R_EAX);
162 POP_r32(R_ECX);
163 SUB_imm8s_r32( adj2-adj, R_ESP );
164 PUSH_r32(R_EAX);
165 ADD_imm8s_r32( 4, R_ECX );
166 PUSH_r32(R_ECX);
167 load_imm32(R_EAX, (uint32_t)sh4_read_long);
168 CALL_r32(R_EAX);
169 ADD_imm8s_r32( 4, R_ESP );
170 MOV_r32_r32( R_EAX, arg2b );
171 POP_r32(arg2a);
172 ADD_imm8s_r32( adj2, R_ESP );
173 sh4_x86.stack_posn -= 4;
174 }
176 #endif
178 /**
179 * Emit the 'start of block' assembly. Sets up the stack frame and save
180 * SI/DI as required
181 */
182 void enter_block( )
183 {
184 PUSH_r32(R_EBP);
185 /* mov &sh4r, ebp */
186 load_ptr( R_EBP, ((uint8_t *)&sh4r) + 128 );
187 sh4_x86.stack_posn = 8;
188 }
190 /**
191 * Exit the block with sh4r.new_pc written with the target pc
192 */
193 void exit_block_pcset( sh4addr_t pc )
194 {
195 load_imm32( R_ECX, ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
196 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
197 load_spreg( R_EAX, R_PC );
198 if( sh4_x86.tlb_on ) {
199 call_func1(xlat_get_code_by_vma,R_EAX);
200 } else {
201 call_func1(xlat_get_code,R_EAX);
202 }
203 POP_r32(R_EBP);
204 RET();
205 }
207 /**
208 * Exit the block with sh4r.new_pc written with the target pc
209 */
210 void exit_block_newpcset( sh4addr_t pc )
211 {
212 load_imm32( R_ECX, ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
213 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
214 load_spreg( R_EAX, R_NEW_PC );
215 store_spreg( R_EAX, R_PC );
216 if( sh4_x86.tlb_on ) {
217 call_func1(xlat_get_code_by_vma,R_EAX);
218 } else {
219 call_func1(xlat_get_code,R_EAX);
220 }
221 POP_r32(R_EBP);
222 RET();
223 }
226 /**
227 * Exit the block to an absolute PC
228 */
229 void exit_block( sh4addr_t pc, sh4addr_t endpc )
230 {
231 load_imm32( R_ECX, pc ); // 5
232 store_spreg( R_ECX, REG_OFFSET(pc) ); // 3
233 if( IS_IN_ICACHE(pc) ) {
234 MOV_moff32_EAX( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) ); // 5
235 } else if( sh4_x86.tlb_on ) {
236 call_func1(xlat_get_code_by_vma,R_ECX);
237 } else {
238 call_func1(xlat_get_code,R_ECX);
239 }
240 AND_imm8s_r32( 0xFC, R_EAX ); // 3
241 load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
242 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
243 POP_r32(R_EBP);
244 RET();
245 }
247 /**
248 * Exit the block to a relative PC
249 */
250 void exit_block_rel( sh4addr_t pc, sh4addr_t endpc )
251 {
252 load_imm32( R_ECX, pc - sh4_x86.block_start_pc ); // 5
253 ADD_sh4r_r32( R_PC, R_ECX );
254 store_spreg( R_ECX, REG_OFFSET(pc) ); // 3
255 if( IS_IN_ICACHE(pc) ) {
256 MOV_moff32_EAX( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) ); // 5
257 } else if( sh4_x86.tlb_on ) {
258 call_func1(xlat_get_code_by_vma,R_ECX);
259 } else {
260 call_func1(xlat_get_code,R_ECX);
261 }
262 AND_imm8s_r32( 0xFC, R_EAX ); // 3
263 load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
264 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
265 POP_r32(R_EBP);
266 RET();
267 }
269 /**
270 * Write the block trailer (exception handling block)
271 */
272 void sh4_translate_end_block( sh4addr_t pc ) {
273 if( sh4_x86.branch_taken == FALSE ) {
274 // Didn't exit unconditionally already, so write the termination here
275 exit_block_rel( pc, pc );
276 }
277 if( sh4_x86.backpatch_posn != 0 ) {
278 unsigned int i;
279 // Raise exception
280 uint8_t *end_ptr = xlat_output;
281 MOV_r32_r32( R_EDX, R_ECX );
282 ADD_r32_r32( R_EDX, R_ECX );
283 ADD_r32_sh4r( R_ECX, R_PC );
284 MOV_moff32_EAX( &sh4_cpu_period );
285 MUL_r32( R_EDX );
286 ADD_r32_sh4r( R_EAX, REG_OFFSET(slice_cycle) );
288 POP_r32(R_EDX);
289 call_func1( sh4_raise_exception, R_EDX );
290 load_spreg( R_EAX, R_PC );
291 if( sh4_x86.tlb_on ) {
292 call_func1(xlat_get_code_by_vma,R_EAX);
293 } else {
294 call_func1(xlat_get_code,R_EAX);
295 }
296 POP_r32(R_EBP);
297 RET();
299 // Exception already raised - just cleanup
300 uint8_t *preexc_ptr = xlat_output;
301 MOV_r32_r32( R_EDX, R_ECX );
302 ADD_r32_r32( R_EDX, R_ECX );
303 ADD_r32_sh4r( R_ECX, R_SPC );
304 MOV_moff32_EAX( &sh4_cpu_period );
305 MUL_r32( R_EDX );
306 ADD_r32_sh4r( R_EAX, REG_OFFSET(slice_cycle) );
307 load_spreg( R_EAX, R_PC );
308 if( sh4_x86.tlb_on ) {
309 call_func1(xlat_get_code_by_vma,R_EAX);
310 } else {
311 call_func1(xlat_get_code,R_EAX);
312 }
313 POP_r32(R_EBP);
314 RET();
316 for( i=0; i< sh4_x86.backpatch_posn; i++ ) {
317 uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];
318 *fixup_addr = xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
319 if( sh4_x86.backpatch_list[i].exc_code < 0 ) {
320 load_imm32( R_EDX, sh4_x86.backpatch_list[i].fixup_icount );
321 int stack_adj = -1 - sh4_x86.backpatch_list[i].exc_code;
322 if( stack_adj > 0 ) {
323 ADD_imm8s_r32( stack_adj, R_ESP );
324 }
325 int rel = preexc_ptr - xlat_output;
326 JMP_rel(rel);
327 } else {
328 PUSH_imm32( sh4_x86.backpatch_list[i].exc_code );
329 load_imm32( R_EDX, sh4_x86.backpatch_list[i].fixup_icount );
330 int rel = end_ptr - xlat_output;
331 JMP_rel(rel);
332 }
333 }
334 }
335 }
338 /**
339 * The unwind methods only work if we compiled with DWARF2 frame information
340 * (ie -fexceptions), otherwise we have to use the direct frame scan.
341 */
342 #ifdef HAVE_EXCEPTIONS
343 #include <unwind.h>
345 struct UnwindInfo {
346 int have_result;
347 void *pc;
348 };
350 _Unwind_Reason_Code xlat_check_frame( struct _Unwind_Context *context, void *arg )
351 {
352 void *ebp = (void *)_Unwind_GetGR(context, 5);
353 void *expect = (((uint8_t *)&sh4r) + 128 );
354 struct UnwindInfo *info = arg;
355 if( ebp == expect ) {
356 info->have_result = 1;
357 info->pc = (void *)_Unwind_GetIP(context);
358 } else if( info->have_result ) {
359 return _URC_NORMAL_STOP;
360 }
362 return _URC_NO_REASON;
363 }
365 void *xlat_get_native_pc()
366 {
367 struct _Unwind_Exception exc;
368 struct UnwindInfo info;
370 info.have_result = 0;
371 void *result = NULL;
372 _Unwind_Backtrace( xlat_check_frame, &info );
373 if( info.have_result )
374 return info.pc;
375 return NULL;
376 }
377 #else
378 void *xlat_get_native_pc()
379 {
380 void *result = NULL;
381 asm(
382 "mov %%ebp, %%eax\n\t"
383 "mov $0x8, %%ecx\n\t"
384 "mov %1, %%edx\n"
385 "frame_loop: test %%eax, %%eax\n\t"
386 "je frame_not_found\n\t"
387 "cmp (%%eax), %%edx\n\t"
388 "je frame_found\n\t"
389 "sub $0x1, %%ecx\n\t"
390 "je frame_not_found\n\t"
391 "movl (%%eax), %%eax\n\t"
392 "jmp frame_loop\n"
393 "frame_found: movl 0x4(%%eax), %0\n"
394 "frame_not_found:"
395 : "=r" (result)
396 : "r" (((uint8_t *)&sh4r) + 128 )
397 : "eax", "ecx", "edx" );
398 return result;
399 }
400 #endif
402 #endif /* !lxdream_ia32mac.h */
.