filename | src/sh4/ia32mac.h |
changeset | 669:ab344e42bca9 |
prev | 605:6ecdb604306b |
next | 736:a02d1475ccfd |
author | nkeynes |
date | Mon May 12 10:00:13 2008 +0000 (15 years ago) |
permissions | -rw-r--r-- |
last change | Cleanup most of the -Wall warnings (getting a bit sloppy...) Convert FP code to use fixed banks rather than indirect pointer (3-4% faster this way now) |
view | annotate | diff | log | raw |
1 /**
2 * $Id$
3 *
4 * Provides the implementation for the ia32 ABI (eg prologue, epilogue, and
5 * calling conventions)
6 *
7 * Copyright (c) 2007 Nathan Keynes.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 */
20 #ifndef __lxdream_ia32abi_H
21 #define __lxdream_ia32abi_H 1
23 #define load_ptr( reg, ptr ) load_imm32( reg, (uint32_t)ptr );
25 /**
26 * Note: clobbers EAX to make the indirect call - this isn't usually
27 * a problem since the callee will usually clobber it anyway.
28 */
29 #define CALL_FUNC0_SIZE 13
30 static inline void call_func0( void *ptr )
31 {
32 int adj = (-sh4_x86.stack_posn)&0x0F;
33 SUB_imm8s_r32( adj, R_ESP );
34 load_imm32(R_EAX, (uint32_t)ptr);
35 CALL_r32(R_EAX);
36 ADD_imm8s_r32( adj, R_ESP );
37 }
39 #define CALL_FUNC1_SIZE 14
40 static inline void call_func1( void *ptr, int arg1 )
41 {
42 int adj = (-4-sh4_x86.stack_posn)&0x0F;
43 SUB_imm8s_r32( adj, R_ESP );
44 PUSH_r32(arg1);
45 load_imm32(R_EAX, (uint32_t)ptr);
46 CALL_r32(R_EAX);
47 ADD_imm8s_r32( adj+4, R_ESP );
48 sh4_x86.stack_posn -= 4;
49 }
51 #define CALL_FUNC2_SIZE 15
52 static inline void call_func2( void *ptr, int arg1, int arg2 )
53 {
54 int adj = (-8-sh4_x86.stack_posn)&0x0F;
55 SUB_imm8s_r32( adj, R_ESP );
56 PUSH_r32(arg2);
57 PUSH_r32(arg1);
58 load_imm32(R_EAX, (uint32_t)ptr);
59 CALL_r32(R_EAX);
60 ADD_imm8s_r32( adj+8, R_ESP );
61 sh4_x86.stack_posn -= 8;
62 }
64 /**
65 * Write a double (64-bit) value into memory, with the first word in arg2a, and
66 * the second in arg2b
67 * NB: 30 bytes
68 */
69 #define MEM_WRITE_DOUBLE_SIZE 36
70 static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )
71 {
72 int adj = (-8-sh4_x86.stack_posn)&0x0F;
73 SUB_imm8s_r32( adj, R_ESP );
74 ADD_imm8s_r32( 4, addr );
75 PUSH_r32(arg2b);
76 PUSH_r32(addr);
77 ADD_imm8s_r32( -4, addr );
78 SUB_imm8s_r32( 8, R_ESP );
79 PUSH_r32(arg2a);
80 PUSH_r32(addr);
81 load_imm32(R_EAX, (uint32_t)sh4_write_long);
82 CALL_r32(R_EAX);
83 ADD_imm8s_r32( 16, R_ESP );
84 load_imm32(R_EAX, (uint32_t)sh4_write_long);
85 CALL_r32(R_EAX);
86 ADD_imm8s_r32( adj+8, R_ESP );
87 sh4_x86.stack_posn -= 16;
88 }
90 /**
91 * Read a double (64-bit) value from memory, writing the first word into arg2a
92 * and the second into arg2b. The addr must not be in EAX
93 * NB: 27 bytes
94 */
95 #define MEM_READ_DOUBLE_SIZE 36
96 static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )
97 {
98 int adj = (-4-sh4_x86.stack_posn)&0x0F;
99 int adj2 = (-8-sh4_x86.stack_posn)&0x0F;
100 SUB_imm8s_r32( adj, R_ESP );
101 PUSH_r32(addr);
102 load_imm32(R_EAX, (uint32_t)sh4_read_long);
103 CALL_r32(R_EAX);
104 POP_r32(R_ECX);
105 SUB_imm8s_r32( adj2-adj, R_ESP );
106 PUSH_r32(R_EAX);
107 ADD_imm8s_r32( 4, R_ECX );
108 PUSH_r32(R_ECX);
109 load_imm32(R_EAX, (uint32_t)sh4_read_long);
110 CALL_r32(R_EAX);
111 ADD_imm8s_r32( 4, R_ESP );
112 MOV_r32_r32( R_EAX, arg2b );
113 POP_r32(arg2a);
114 ADD_imm8s_r32( adj2, R_ESP );
115 sh4_x86.stack_posn -= 4;
116 }
118 /**
119 * Emit the 'start of block' assembly. Sets up the stack frame and save
120 * SI/DI as required
121 */
122 void sh4_translate_begin_block( sh4addr_t pc )
123 {
124 PUSH_r32(R_EBP);
125 /* mov &sh4r, ebp */
126 load_ptr( R_EBP, ((uint8_t *)&sh4r) + 128 );
128 sh4_x86.in_delay_slot = FALSE;
129 sh4_x86.priv_checked = FALSE;
130 sh4_x86.fpuen_checked = FALSE;
131 sh4_x86.branch_taken = FALSE;
132 sh4_x86.backpatch_posn = 0;
133 sh4_x86.block_start_pc = pc;
134 sh4_x86.tstate = TSTATE_NONE;
135 sh4_x86.tlb_on = IS_MMU_ENABLED();
136 sh4_x86.stack_posn = 8;
137 }
139 /**
140 * Exit the block with sh4r.new_pc written with the target pc
141 */
142 void exit_block_pcset( sh4addr_t pc )
143 {
144 load_imm32( R_ECX, ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
145 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
146 load_spreg( R_EAX, R_PC );
147 if( sh4_x86.tlb_on ) {
148 call_func1(xlat_get_code_by_vma,R_EAX);
149 } else {
150 call_func1(xlat_get_code,R_EAX);
151 }
152 POP_r32(R_EBP);
153 RET();
154 }
156 /**
157 * Exit the block with sh4r.new_pc written with the target pc
158 */
159 void exit_block_newpcset( sh4addr_t pc )
160 {
161 load_imm32( R_ECX, ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
162 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
163 load_spreg( R_EAX, R_NEW_PC );
164 store_spreg( R_EAX, R_PC );
165 if( sh4_x86.tlb_on ) {
166 call_func1(xlat_get_code_by_vma,R_EAX);
167 } else {
168 call_func1(xlat_get_code,R_EAX);
169 }
170 POP_r32(R_EBP);
171 RET();
172 }
175 #define EXIT_BLOCK_SIZE(pc) (24 + (IS_IN_ICACHE(pc)?5:CALL_FUNC1_SIZE))
178 /**
179 * Exit the block to an absolute PC
180 */
181 void exit_block( sh4addr_t pc, sh4addr_t endpc )
182 {
183 load_imm32( R_ECX, pc ); // 5
184 store_spreg( R_ECX, REG_OFFSET(pc) ); // 3
185 if( IS_IN_ICACHE(pc) ) {
186 MOV_moff32_EAX( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) ); // 5
187 } else if( sh4_x86.tlb_on ) {
188 call_func1(xlat_get_code_by_vma,R_ECX);
189 } else {
190 call_func1(xlat_get_code,R_ECX);
191 }
192 AND_imm8s_r32( 0xFC, R_EAX ); // 3
193 load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
194 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
195 POP_r32(R_EBP);
196 RET();
197 }
199 #define EXIT_BLOCK_REL_SIZE(pc) (27 + (IS_IN_ICACHE(pc)?5:CALL_FUNC1_SIZE))
201 /**
202 * Exit the block to a relative PC
203 */
204 void exit_block_rel( sh4addr_t pc, sh4addr_t endpc )
205 {
206 load_imm32( R_ECX, pc - sh4_x86.block_start_pc ); // 5
207 ADD_sh4r_r32( R_PC, R_ECX );
208 store_spreg( R_ECX, REG_OFFSET(pc) ); // 3
209 if( IS_IN_ICACHE(pc) ) {
210 MOV_moff32_EAX( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) ); // 5
211 } else if( sh4_x86.tlb_on ) {
212 call_func1(xlat_get_code_by_vma,R_ECX);
213 } else {
214 call_func1(xlat_get_code,R_ECX);
215 }
216 AND_imm8s_r32( 0xFC, R_EAX ); // 3
217 load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
218 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
219 POP_r32(R_EBP);
220 RET();
221 }
223 /**
224 * Write the block trailer (exception handling block)
225 */
226 void sh4_translate_end_block( sh4addr_t pc ) {
227 if( sh4_x86.branch_taken == FALSE ) {
228 // Didn't exit unconditionally already, so write the termination here
229 exit_block_rel( pc, pc );
230 }
231 if( sh4_x86.backpatch_posn != 0 ) {
232 unsigned int i;
233 // Raise exception
234 uint8_t *end_ptr = xlat_output;
235 MOV_r32_r32( R_EDX, R_ECX );
236 ADD_r32_r32( R_EDX, R_ECX );
237 ADD_r32_sh4r( R_ECX, R_PC );
238 MOV_moff32_EAX( &sh4_cpu_period );
239 MUL_r32( R_EDX );
240 ADD_r32_sh4r( R_EAX, REG_OFFSET(slice_cycle) );
242 POP_r32(R_EDX);
243 call_func1( sh4_raise_exception, R_EDX );
244 load_spreg( R_EAX, R_PC );
245 if( sh4_x86.tlb_on ) {
246 call_func1(xlat_get_code_by_vma,R_EAX);
247 } else {
248 call_func1(xlat_get_code,R_EAX);
249 }
250 POP_r32(R_EBP);
251 RET();
253 // Exception already raised - just cleanup
254 uint8_t *preexc_ptr = xlat_output;
255 MOV_r32_r32( R_EDX, R_ECX );
256 ADD_r32_r32( R_EDX, R_ECX );
257 ADD_r32_sh4r( R_ECX, R_SPC );
258 MOV_moff32_EAX( &sh4_cpu_period );
259 MUL_r32( R_EDX );
260 ADD_r32_sh4r( R_EAX, REG_OFFSET(slice_cycle) );
261 load_spreg( R_EAX, R_PC );
262 if( sh4_x86.tlb_on ) {
263 call_func1(xlat_get_code_by_vma,R_EAX);
264 } else {
265 call_func1(xlat_get_code,R_EAX);
266 }
267 POP_r32(R_EBP);
268 RET();
270 for( i=0; i< sh4_x86.backpatch_posn; i++ ) {
271 uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];
272 *fixup_addr = xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
273 if( sh4_x86.backpatch_list[i].exc_code < 0 ) {
274 load_imm32( R_EDX, sh4_x86.backpatch_list[i].fixup_icount );
275 int stack_adj = -1 - sh4_x86.backpatch_list[i].exc_code;
276 if( stack_adj > 0 ) {
277 ADD_imm8s_r32( stack_adj, R_ESP );
278 }
279 int rel = preexc_ptr - xlat_output;
280 JMP_rel(rel);
281 } else {
282 PUSH_imm32( sh4_x86.backpatch_list[i].exc_code );
283 load_imm32( R_EDX, sh4_x86.backpatch_list[i].fixup_icount );
284 int rel = end_ptr - xlat_output;
285 JMP_rel(rel);
286 }
287 }
288 }
289 }
291 void *xlat_get_native_pc()
292 {
293 void *result = NULL;
294 asm(
295 "mov %%ebp, %%eax\n\t"
296 "mov $0x8, %%ecx\n\t"
297 "mov %1, %%edx\n"
298 "frame_loop: test %%eax, %%eax\n\t"
299 "je frame_not_found\n\t"
300 "cmp (%%eax), %%edx\n\t"
301 "je frame_found\n\t"
302 "sub $0x1, %%ecx\n\t"
303 "je frame_not_found\n\t"
304 "movl (%%eax), %%eax\n\t"
305 "jmp frame_loop\n"
306 "frame_found: movl 0x4(%%eax), %0\n"
307 "frame_not_found:"
308 : "=r" (result)
309 : "r" (((uint8_t *)&sh4r) + 128 )
310 : "eax", "ecx", "edx" );
311 return result;
312 }
315 #endif
.