filename | src/sh4/ia32abi.h |
changeset | 930:07e5b11419db |
prev | 929:fd8cb0c82f5f |
next | 939:6f2302afeb89 |
author | nkeynes |
date | Sat Dec 27 02:59:35 2008 +0000 (15 years ago) |
branch | lxdream-mem |
permissions | -rw-r--r-- |
last change | Replace fpscr_mask/fpscr flags in xlat_cache_block with a single xlat_sh4_mode, which tracks the field of the same name in sh4r - actually a little faster this way. Now depends on SR.MD, FPSCR.PR and FPSCR.SZ (although it doesn't benefit from the SR flag yet). Also fixed the failure to check the flags in the common case (code address returned by previous block) which took away the performance benefits, but oh well. |
view | annotate | diff | log | raw |
1 /**
2 * $Id$
3 *
4 * Provides the implementation for the ia32 ABI variant
5 * (eg prologue, epilogue, and calling conventions). Stack frame is
6 * aligned on 16-byte boundaries for the benefit of OS X (which
7 * requires it).
8 *
9 * Copyright (c) 2007 Nathan Keynes.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 */
22 #ifndef lxdream_ia32mac_H
23 #define lxdream_ia32mac_H 1
25 #define load_ptr( reg, ptr ) load_imm32( reg, (uint32_t)ptr );
27 static inline decode_address( int addr_reg )
28 {
29 MOV_r32_r32( addr_reg, R_ECX );
30 SHR_imm8_r32( 12, R_ECX );
31 MOV_r32disp32x4_r32( R_ECX, (uintptr_t)sh4_address_space, R_ECX );
32 }
34 /**
35 * Note: clobbers EAX to make the indirect call - this isn't usually
36 * a problem since the callee will usually clobber it anyway.
37 */
38 static inline void call_func0( void *ptr )
39 {
40 CALL_ptr(ptr);
41 }
43 #ifdef HAVE_FASTCALL
44 static inline void call_func1( void *ptr, int arg1 )
45 {
46 if( arg1 != R_EAX ) {
47 MOV_r32_r32( arg1, R_EAX );
48 }
49 CALL_ptr(ptr);
50 }
52 static inline void call_func1_r32( int addr_reg, int arg1 )
53 {
54 if( arg1 != R_EAX ) {
55 MOV_r32_r32( arg1, R_EAX );
56 }
57 CALL_r32(addr_reg);
58 }
60 static inline void call_func1_r32disp8( int preg, uint32_t disp8, int arg1 )
61 {
62 if( arg1 != R_EAX ) {
63 MOV_r32_r32( arg1, R_EAX );
64 }
65 CALL_r32disp8(preg, disp8);
66 }
68 static inline void call_func2( void *ptr, int arg1, int arg2 )
69 {
70 if( arg2 != R_EDX ) {
71 MOV_r32_r32( arg2, R_EDX );
72 }
73 if( arg1 != R_EAX ) {
74 MOV_r32_r32( arg1, R_EAX );
75 }
76 CALL_ptr(ptr);
77 }
79 static inline void call_func2_r32( int addr_reg, int arg1, int arg2 )
80 {
81 if( arg2 != R_EDX ) {
82 MOV_r32_r32( arg2, R_EDX );
83 }
84 if( arg1 != R_EAX ) {
85 MOV_r32_r32( arg1, R_EAX );
86 }
87 CALL_r32(addr_reg);
88 }
90 static inline void call_func2_r32disp8( int preg, uint32_t disp8, int arg1, int arg2 )
91 {
92 if( arg2 != R_EDX ) {
93 MOV_r32_r32( arg2, R_EDX );
94 }
95 if( arg1 != R_EAX ) {
96 MOV_r32_r32( arg1, R_EAX );
97 }
98 CALL_r32disp8(preg, disp8);
99 }
103 static inline void call_func1_exc( void *ptr, int arg1, int pc )
104 {
105 if( arg1 != R_EAX ) {
106 MOV_r32_r32( arg1, R_EAX );
107 }
108 load_exc_backpatch(R_EDX);
109 CALL_ptr(ptr);
110 }
112 static inline void call_func2_exc( void *ptr, int arg1, int arg2, int pc )
113 {
114 if( arg2 != R_EDX ) {
115 MOV_r32_r32( arg2, R_EDX );
116 }
117 if( arg1 != R_EAX ) {
118 MOV_r32_r32( arg1, R_EAX );
119 }
120 load_exc_backpatch(R_ECX);
121 CALL_ptr(ptr);
122 }
124 /**
125 * Write a double (64-bit) value into memory, with the first word in arg2a, and
126 * the second in arg2b
127 */
128 static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )
129 {
130 MOV_r32_esp8(addr, 0);
131 MOV_r32_esp8(arg2b, 4);
132 MEM_WRITE_LONG(addr, arg2a);
133 MOV_esp8_r32(0, R_EAX);
134 MOV_esp8_r32(4, R_EDX);
135 ADD_imm8s_r32(4, R_EAX);
136 MEM_WRITE_LONG(R_EAX, R_EDX);
137 }
139 /**
140 * Read a double (64-bit) value from memory, writing the first word into arg2a
141 * and the second into arg2b. The addr must not be in EAX
142 */
143 static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )
144 {
145 MOV_r32_esp8(addr, 0);
146 MEM_READ_LONG(addr, R_EAX);
147 MOV_r32_esp8(R_EAX, 4);
148 MOV_esp8_r32(0, R_EAX);
149 ADD_imm8s_r32(4, R_EAX);
150 MEM_READ_LONG(R_EAX, arg2b );
151 MOV_esp8_r32(4, arg2a);
152 }
153 #else
154 static inline void call_func1( void *ptr, int arg1 )
155 {
156 SUB_imm8s_r32( 12, R_ESP );
157 PUSH_r32(arg1);
158 CALL_ptr(ptr);
159 ADD_imm8s_r32( 16, R_ESP );
160 }
162 static inline void call_func2( void *ptr, int arg1, int arg2 )
163 {
164 SUB_imm8s_r32( 8, R_ESP );
165 PUSH_r32(arg2);
166 PUSH_r32(arg1);
167 CALL_ptr(ptr);
168 ADD_imm8s_r32( 16, R_ESP );
169 }
171 /**
172 * Write a double (64-bit) value into memory, with the first word in arg2a, and
173 * the second in arg2b
174 */
175 static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )
176 {
177 SUB_imm8s_r32( 8, R_ESP );
178 PUSH_r32(arg2b);
179 LEA_r32disp8_r32( addr, 4, arg2b );
180 PUSH_r32(arg2b);
181 SUB_imm8s_r32( 8, R_ESP );
182 PUSH_r32(arg2a);
183 PUSH_r32(addr);
184 CALL_ptr(sh4_write_long);
185 ADD_imm8s_r32( 16, R_ESP );
186 CALL_ptr(sh4_write_long);
187 ADD_imm8s_r32( 16, R_ESP );
188 }
190 /**
191 * Read a double (64-bit) value from memory, writing the first word into arg2a
192 * and the second into arg2b. The addr must not be in EAX
193 */
194 static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )
195 {
196 SUB_imm8s_r32( 12, R_ESP );
197 PUSH_r32(addr);
198 CALL_ptr(sh4_read_long);
199 MOV_r32_esp8(R_EAX, 4);
200 ADD_imm8s_esp8(4, 0);
201 CALL_ptr(sh4_read_long);
202 if( arg2b != R_EAX ) {
203 MOV_r32_r32( R_EAX, arg2b );
204 }
205 MOV_esp8_r32( 4, arg2a );
206 ADD_imm8s_r32( 16, R_ESP );
207 }
209 #endif
211 /**
212 * Emit the 'start of block' assembly. Sets up the stack frame and save
213 * SI/DI as required
214 * Allocates 8 bytes for local variables, which also has the convenient
215 * side-effect of aligning the stack.
216 */
217 void enter_block( )
218 {
219 PUSH_r32(R_EBP);
220 load_ptr( R_EBP, ((uint8_t *)&sh4r) + 128 );
221 SUB_imm8s_r32( 8, R_ESP );
222 }
224 static inline void exit_block( )
225 {
226 ADD_imm8s_r32( 8, R_ESP );
227 POP_r32(R_EBP);
228 RET();
229 }
231 /**
232 * Exit the block with sh4r.new_pc written with the target pc
233 */
234 void exit_block_pcset( sh4addr_t pc )
235 {
236 load_imm32( R_ECX, ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
237 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
238 load_spreg( R_EAX, R_PC );
239 if( sh4_x86.tlb_on ) {
240 call_func1(xlat_get_code_by_vma,R_EAX);
241 } else {
242 call_func1(xlat_get_code,R_EAX);
243 }
244 exit_block();
245 }
247 /**
248 * Exit the block with sh4r.new_pc written with the target pc
249 */
250 void exit_block_newpcset( sh4addr_t pc )
251 {
252 load_imm32( R_ECX, ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
253 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
254 load_spreg( R_EAX, R_NEW_PC );
255 store_spreg( R_EAX, R_PC );
256 if( sh4_x86.tlb_on ) {
257 call_func1(xlat_get_code_by_vma,R_EAX);
258 } else {
259 call_func1(xlat_get_code,R_EAX);
260 }
261 exit_block();
262 }
265 /**
266 * Exit the block to an absolute PC
267 */
268 void exit_block_abs( sh4addr_t pc, sh4addr_t endpc )
269 {
270 load_imm32( R_ECX, pc ); // 5
271 store_spreg( R_ECX, REG_OFFSET(pc) ); // 3
272 if( IS_IN_ICACHE(pc) ) {
273 MOV_moff32_EAX( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) ); // 5
274 AND_imm8s_r32( 0xFC, R_EAX ); // 3
275 } else if( sh4_x86.tlb_on ) {
276 call_func1(xlat_get_code_by_vma,R_ECX);
277 } else {
278 call_func1(xlat_get_code,R_ECX);
279 }
280 load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
281 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
282 exit_block();
283 }
285 /**
286 * Exit the block to a relative PC
287 */
288 void exit_block_rel( sh4addr_t pc, sh4addr_t endpc )
289 {
290 load_imm32( R_ECX, pc - sh4_x86.block_start_pc ); // 5
291 ADD_sh4r_r32( R_PC, R_ECX );
292 store_spreg( R_ECX, REG_OFFSET(pc) ); // 3
293 if( IS_IN_ICACHE(pc) ) {
294 MOV_moff32_EAX( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) ); // 5
295 AND_imm8s_r32( 0xFC, R_EAX ); // 3
296 } else if( sh4_x86.tlb_on ) {
297 call_func1(xlat_get_code_by_vma,R_ECX);
298 } else {
299 call_func1(xlat_get_code,R_ECX);
300 }
301 load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
302 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
303 exit_block();
304 }
306 /**
307 * Write the block trailer (exception handling block)
308 */
309 void sh4_translate_end_block( sh4addr_t pc ) {
310 if( sh4_x86.branch_taken == FALSE ) {
311 // Didn't exit unconditionally already, so write the termination here
312 exit_block_rel( pc, pc );
313 }
314 if( sh4_x86.backpatch_posn != 0 ) {
315 unsigned int i;
316 // Raise exception
317 uint8_t *end_ptr = xlat_output;
318 MOV_r32_r32( R_EDX, R_ECX );
319 ADD_r32_r32( R_EDX, R_ECX );
320 ADD_r32_sh4r( R_ECX, R_PC );
321 MOV_moff32_EAX( &sh4_cpu_period );
322 MUL_r32( R_EDX );
323 ADD_r32_sh4r( R_EAX, REG_OFFSET(slice_cycle) );
325 POP_r32(R_EAX);
326 call_func1( sh4_raise_exception, R_EAX );
327 load_spreg( R_EAX, R_PC );
328 if( sh4_x86.tlb_on ) {
329 call_func1(xlat_get_code_by_vma,R_EAX);
330 } else {
331 call_func1(xlat_get_code,R_EAX);
332 }
333 exit_block();
335 // Exception already raised - just cleanup
336 uint8_t *preexc_ptr = xlat_output;
337 MOV_r32_r32( R_EDX, R_ECX );
338 ADD_r32_r32( R_EDX, R_ECX );
339 ADD_r32_sh4r( R_ECX, R_SPC );
340 MOV_moff32_EAX( &sh4_cpu_period );
341 MUL_r32( R_EDX );
342 ADD_r32_sh4r( R_EAX, REG_OFFSET(slice_cycle) );
343 load_spreg( R_EAX, R_PC );
344 if( sh4_x86.tlb_on ) {
345 call_func1(xlat_get_code_by_vma,R_EAX);
346 } else {
347 call_func1(xlat_get_code,R_EAX);
348 }
349 exit_block();
351 for( i=0; i< sh4_x86.backpatch_posn; i++ ) {
352 uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];
353 if( sh4_x86.backpatch_list[i].exc_code < 0 ) {
354 if( sh4_x86.backpatch_list[i].exc_code == -2 ) {
355 *fixup_addr = (uint32_t)xlat_output;
356 } else {
357 *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
358 }
359 load_imm32( R_EDX, sh4_x86.backpatch_list[i].fixup_icount );
360 int rel = preexc_ptr - xlat_output;
361 JMP_rel(rel);
362 } else {
363 *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
364 PUSH_imm32( sh4_x86.backpatch_list[i].exc_code );
365 load_imm32( R_EDX, sh4_x86.backpatch_list[i].fixup_icount );
366 int rel = end_ptr - xlat_output;
367 JMP_rel(rel);
368 }
369 }
370 }
371 }
374 /**
375 * The unwind methods only work if we compiled with DWARF2 frame information
376 * (ie -fexceptions), otherwise we have to use the direct frame scan.
377 */
378 #ifdef HAVE_EXCEPTIONS
379 #include <unwind.h>
381 struct UnwindInfo {
382 uintptr_t block_start;
383 uintptr_t block_end;
384 void *pc;
385 };
387 _Unwind_Reason_Code xlat_check_frame( struct _Unwind_Context *context, void *arg )
388 {
389 struct UnwindInfo *info = arg;
390 void *pc = (void *)_Unwind_GetIP(context);
391 if( ((uintptr_t)pc) >= info->block_start && ((uintptr_t)pc) < info->block_end ) {
392 info->pc = pc;
393 return _URC_NORMAL_STOP;
394 }
396 return _URC_NO_REASON;
397 }
399 void *xlat_get_native_pc( void *code, uint32_t code_size )
400 {
401 struct _Unwind_Exception exc;
402 struct UnwindInfo info;
404 info.pc = NULL;
405 info.block_start = (uintptr_t)code;
406 info.block_end = info.block_start + code_size;
407 void *result = NULL;
408 _Unwind_Backtrace( xlat_check_frame, &info );
409 return info.pc;
410 }
411 #else
412 void *xlat_get_native_pc( void *code, uint32_t code_size )
413 {
414 void *result = NULL;
415 asm(
416 "mov %%ebp, %%eax\n\t"
417 "mov $0x8, %%ecx\n\t"
418 "mov %1, %%edx\n"
419 "frame_loop: test %%eax, %%eax\n\t"
420 "je frame_not_found\n\t"
421 "cmp (%%eax), %%edx\n\t"
422 "je frame_found\n\t"
423 "sub $0x1, %%ecx\n\t"
424 "je frame_not_found\n\t"
425 "movl (%%eax), %%eax\n\t"
426 "jmp frame_loop\n"
427 "frame_found: movl 0x4(%%eax), %0\n"
428 "frame_not_found:"
429 : "=r" (result)
430 : "r" (((uint8_t *)&sh4r) + 128 )
431 : "eax", "ecx", "edx" );
432 return result;
433 }
434 #endif
436 #endif /* !lxdream_ia32mac.h */
.