filename | src/sh4/ia32mac.h |
changeset | 669:ab344e42bca9 |
prev | 605:6ecdb604306b |
next | 736:a02d1475ccfd |
author | nkeynes |
date | Mon May 12 10:00:13 2008 +0000 (15 years ago) |
permissions | -rw-r--r-- |
last change | Cleanup most of the -Wall warnings (getting a bit sloppy...) Convert FP code to use fixed banks rather than indirect pointer (3-4% faster this way now) |
file | annotate | diff | log | raw |
nkeynes@539 | 1 | /** |
nkeynes@586 | 2 | * $Id$ |
nkeynes@539 | 3 | * |
nkeynes@539 | 4 | * Provides the implementation for the ia32 ABI (eg prologue, epilogue, and |
nkeynes@539 | 5 | * calling conventions) |
nkeynes@539 | 6 | * |
nkeynes@539 | 7 | * Copyright (c) 2007 Nathan Keynes. |
nkeynes@539 | 8 | * |
nkeynes@539 | 9 | * This program is free software; you can redistribute it and/or modify |
nkeynes@539 | 10 | * it under the terms of the GNU General Public License as published by |
nkeynes@539 | 11 | * the Free Software Foundation; either version 2 of the License, or |
nkeynes@539 | 12 | * (at your option) any later version. |
nkeynes@539 | 13 | * |
nkeynes@539 | 14 | * This program is distributed in the hope that it will be useful, |
nkeynes@539 | 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
nkeynes@539 | 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
nkeynes@539 | 17 | * GNU General Public License for more details. |
nkeynes@539 | 18 | */ |
nkeynes@539 | 19 | |
nkeynes@539 | 20 | #ifndef __lxdream_ia32abi_H |
nkeynes@539 | 21 | #define __lxdream_ia32abi_H 1 |
nkeynes@539 | 22 | |
nkeynes@539 | 23 | #define load_ptr( reg, ptr ) load_imm32( reg, (uint32_t)ptr ); |
nkeynes@539 | 24 | |
nkeynes@539 | 25 | /** |
nkeynes@539 | 26 | * Note: clobbers EAX to make the indirect call - this isn't usually |
nkeynes@539 | 27 | * a problem since the callee will usually clobber it anyway. |
nkeynes@539 | 28 | */ |
nkeynes@539 | 29 | #define CALL_FUNC0_SIZE 13 |
nkeynes@539 | 30 | static inline void call_func0( void *ptr ) |
nkeynes@539 | 31 | { |
nkeynes@539 | 32 | int adj = (-sh4_x86.stack_posn)&0x0F; |
nkeynes@539 | 33 | SUB_imm8s_r32( adj, R_ESP ); |
nkeynes@539 | 34 | load_imm32(R_EAX, (uint32_t)ptr); |
nkeynes@539 | 35 | CALL_r32(R_EAX); |
nkeynes@539 | 36 | ADD_imm8s_r32( adj, R_ESP ); |
nkeynes@539 | 37 | } |
nkeynes@539 | 38 | |
nkeynes@539 | 39 | #define CALL_FUNC1_SIZE 14 |
nkeynes@539 | 40 | static inline void call_func1( void *ptr, int arg1 ) |
nkeynes@539 | 41 | { |
nkeynes@539 | 42 | int adj = (-4-sh4_x86.stack_posn)&0x0F; |
nkeynes@539 | 43 | SUB_imm8s_r32( adj, R_ESP ); |
nkeynes@539 | 44 | PUSH_r32(arg1); |
nkeynes@539 | 45 | load_imm32(R_EAX, (uint32_t)ptr); |
nkeynes@539 | 46 | CALL_r32(R_EAX); |
nkeynes@539 | 47 | ADD_imm8s_r32( adj+4, R_ESP ); |
nkeynes@539 | 48 | sh4_x86.stack_posn -= 4; |
nkeynes@539 | 49 | } |
nkeynes@539 | 50 | |
nkeynes@539 | 51 | #define CALL_FUNC2_SIZE 15 |
nkeynes@539 | 52 | static inline void call_func2( void *ptr, int arg1, int arg2 ) |
nkeynes@539 | 53 | { |
nkeynes@539 | 54 | int adj = (-8-sh4_x86.stack_posn)&0x0F; |
nkeynes@539 | 55 | SUB_imm8s_r32( adj, R_ESP ); |
nkeynes@539 | 56 | PUSH_r32(arg2); |
nkeynes@539 | 57 | PUSH_r32(arg1); |
nkeynes@539 | 58 | load_imm32(R_EAX, (uint32_t)ptr); |
nkeynes@539 | 59 | CALL_r32(R_EAX); |
nkeynes@539 | 60 | ADD_imm8s_r32( adj+8, R_ESP ); |
nkeynes@539 | 61 | sh4_x86.stack_posn -= 8; |
nkeynes@539 | 62 | } |
nkeynes@539 | 63 | |
nkeynes@539 | 64 | /** |
nkeynes@539 | 65 | * Write a double (64-bit) value into memory, with the first word in arg2a, and |
nkeynes@539 | 66 | * the second in arg2b |
nkeynes@539 | 67 | * NB: 30 bytes |
nkeynes@539 | 68 | */ |
nkeynes@539 | 69 | #define MEM_WRITE_DOUBLE_SIZE 36 |
nkeynes@539 | 70 | static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b ) |
nkeynes@539 | 71 | { |
nkeynes@539 | 72 | int adj = (-8-sh4_x86.stack_posn)&0x0F; |
nkeynes@539 | 73 | SUB_imm8s_r32( adj, R_ESP ); |
nkeynes@539 | 74 | ADD_imm8s_r32( 4, addr ); |
nkeynes@539 | 75 | PUSH_r32(arg2b); |
nkeynes@539 | 76 | PUSH_r32(addr); |
nkeynes@539 | 77 | ADD_imm8s_r32( -4, addr ); |
nkeynes@539 | 78 | SUB_imm8s_r32( 8, R_ESP ); |
nkeynes@539 | 79 | PUSH_r32(arg2a); |
nkeynes@539 | 80 | PUSH_r32(addr); |
nkeynes@539 | 81 | load_imm32(R_EAX, (uint32_t)sh4_write_long); |
nkeynes@539 | 82 | CALL_r32(R_EAX); |
nkeynes@539 | 83 | ADD_imm8s_r32( 16, R_ESP ); |
nkeynes@539 | 84 | load_imm32(R_EAX, (uint32_t)sh4_write_long); |
nkeynes@539 | 85 | CALL_r32(R_EAX); |
nkeynes@539 | 86 | ADD_imm8s_r32( adj+8, R_ESP ); |
nkeynes@539 | 87 | sh4_x86.stack_posn -= 16; |
nkeynes@539 | 88 | } |
nkeynes@539 | 89 | |
nkeynes@539 | 90 | /** |
nkeynes@539 | 91 | * Read a double (64-bit) value from memory, writing the first word into arg2a |
nkeynes@539 | 92 | * and the second into arg2b. The addr must not be in EAX |
nkeynes@539 | 93 | * NB: 27 bytes |
nkeynes@539 | 94 | */ |
nkeynes@539 | 95 | #define MEM_READ_DOUBLE_SIZE 36 |
nkeynes@539 | 96 | static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b ) |
nkeynes@539 | 97 | { |
nkeynes@539 | 98 | int adj = (-4-sh4_x86.stack_posn)&0x0F; |
nkeynes@539 | 99 | int adj2 = (-8-sh4_x86.stack_posn)&0x0F; |
nkeynes@539 | 100 | SUB_imm8s_r32( adj, R_ESP ); |
nkeynes@539 | 101 | PUSH_r32(addr); |
nkeynes@539 | 102 | load_imm32(R_EAX, (uint32_t)sh4_read_long); |
nkeynes@539 | 103 | CALL_r32(R_EAX); |
nkeynes@586 | 104 | POP_r32(R_ECX); |
nkeynes@539 | 105 | SUB_imm8s_r32( adj2-adj, R_ESP ); |
nkeynes@539 | 106 | PUSH_r32(R_EAX); |
nkeynes@586 | 107 | ADD_imm8s_r32( 4, R_ECX ); |
nkeynes@586 | 108 | PUSH_r32(R_ECX); |
nkeynes@539 | 109 | load_imm32(R_EAX, (uint32_t)sh4_read_long); |
nkeynes@539 | 110 | CALL_r32(R_EAX); |
nkeynes@539 | 111 | ADD_imm8s_r32( 4, R_ESP ); |
nkeynes@539 | 112 | MOV_r32_r32( R_EAX, arg2b ); |
nkeynes@539 | 113 | POP_r32(arg2a); |
nkeynes@539 | 114 | ADD_imm8s_r32( adj2, R_ESP ); |
nkeynes@539 | 115 | sh4_x86.stack_posn -= 4; |
nkeynes@539 | 116 | } |
nkeynes@539 | 117 | |
nkeynes@539 | 118 | /** |
nkeynes@539 | 119 | * Emit the 'start of block' assembly. Sets up the stack frame and save |
nkeynes@539 | 120 | * SI/DI as required |
nkeynes@539 | 121 | */ |
nkeynes@539 | 122 | void sh4_translate_begin_block( sh4addr_t pc ) |
nkeynes@539 | 123 | { |
nkeynes@539 | 124 | PUSH_r32(R_EBP); |
nkeynes@539 | 125 | /* mov &sh4r, ebp */ |
nkeynes@669 | 126 | load_ptr( R_EBP, ((uint8_t *)&sh4r) + 128 ); |
nkeynes@539 | 127 | |
nkeynes@539 | 128 | sh4_x86.in_delay_slot = FALSE; |
nkeynes@539 | 129 | sh4_x86.priv_checked = FALSE; |
nkeynes@539 | 130 | sh4_x86.fpuen_checked = FALSE; |
nkeynes@539 | 131 | sh4_x86.branch_taken = FALSE; |
nkeynes@539 | 132 | sh4_x86.backpatch_posn = 0; |
nkeynes@539 | 133 | sh4_x86.block_start_pc = pc; |
nkeynes@539 | 134 | sh4_x86.tstate = TSTATE_NONE; |
nkeynes@586 | 135 | sh4_x86.tlb_on = IS_MMU_ENABLED(); |
nkeynes@539 | 136 | sh4_x86.stack_posn = 8; |
nkeynes@539 | 137 | } |
nkeynes@539 | 138 | |
nkeynes@539 | 139 | /** |
nkeynes@590 | 140 | * Exit the block with sh4r.new_pc written with the target pc |
nkeynes@539 | 141 | */ |
nkeynes@586 | 142 | void exit_block_pcset( sh4addr_t pc ) |
nkeynes@539 | 143 | { |
nkeynes@539 | 144 | load_imm32( R_ECX, ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5 |
nkeynes@539 | 145 | ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6 |
nkeynes@590 | 146 | load_spreg( R_EAX, R_PC ); |
nkeynes@586 | 147 | if( sh4_x86.tlb_on ) { |
nkeynes@586 | 148 | call_func1(xlat_get_code_by_vma,R_EAX); |
nkeynes@586 | 149 | } else { |
nkeynes@586 | 150 | call_func1(xlat_get_code,R_EAX); |
nkeynes@586 | 151 | } |
nkeynes@539 | 152 | POP_r32(R_EBP); |
nkeynes@539 | 153 | RET(); |
nkeynes@539 | 154 | } |
nkeynes@539 | 155 | |
nkeynes@590 | 156 | /** |
nkeynes@590 | 157 | * Exit the block with sh4r.new_pc written with the target pc |
nkeynes@590 | 158 | */ |
nkeynes@590 | 159 | void exit_block_newpcset( sh4addr_t pc ) |
nkeynes@590 | 160 | { |
nkeynes@590 | 161 | load_imm32( R_ECX, ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5 |
nkeynes@590 | 162 | ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6 |
nkeynes@590 | 163 | load_spreg( R_EAX, R_NEW_PC ); |
nkeynes@590 | 164 | store_spreg( R_EAX, R_PC ); |
nkeynes@590 | 165 | if( sh4_x86.tlb_on ) { |
nkeynes@590 | 166 | call_func1(xlat_get_code_by_vma,R_EAX); |
nkeynes@590 | 167 | } else { |
nkeynes@590 | 168 | call_func1(xlat_get_code,R_EAX); |
nkeynes@590 | 169 | } |
nkeynes@590 | 170 | POP_r32(R_EBP); |
nkeynes@590 | 171 | RET(); |
nkeynes@590 | 172 | } |
nkeynes@590 | 173 | |
nkeynes@590 | 174 | |
nkeynes@586 | 175 | #define EXIT_BLOCK_SIZE(pc) (24 + (IS_IN_ICACHE(pc)?5:CALL_FUNC1_SIZE)) |
nkeynes@586 | 176 | |
nkeynes@586 | 177 | |
nkeynes@539 | 178 | /** |
nkeynes@539 | 179 | * Exit the block to an absolute PC |
nkeynes@539 | 180 | */ |
nkeynes@539 | 181 | void exit_block( sh4addr_t pc, sh4addr_t endpc ) |
nkeynes@539 | 182 | { |
nkeynes@539 | 183 | load_imm32( R_ECX, pc ); // 5 |
nkeynes@539 | 184 | store_spreg( R_ECX, REG_OFFSET(pc) ); // 3 |
nkeynes@586 | 185 | if( IS_IN_ICACHE(pc) ) { |
nkeynes@586 | 186 | MOV_moff32_EAX( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) ); // 5 |
nkeynes@586 | 187 | } else if( sh4_x86.tlb_on ) { |
nkeynes@586 | 188 | call_func1(xlat_get_code_by_vma,R_ECX); |
nkeynes@586 | 189 | } else { |
nkeynes@586 | 190 | call_func1(xlat_get_code,R_ECX); |
nkeynes@586 | 191 | } |
nkeynes@586 | 192 | AND_imm8s_r32( 0xFC, R_EAX ); // 3 |
nkeynes@586 | 193 | load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5 |
nkeynes@586 | 194 | ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6 |
nkeynes@586 | 195 | POP_r32(R_EBP); |
nkeynes@586 | 196 | RET(); |
nkeynes@586 | 197 | } |
nkeynes@586 | 198 | |
nkeynes@586 | 199 | #define EXIT_BLOCK_REL_SIZE(pc) (27 + (IS_IN_ICACHE(pc)?5:CALL_FUNC1_SIZE)) |
nkeynes@586 | 200 | |
nkeynes@586 | 201 | /** |
nkeynes@586 | 202 | * Exit the block to a relative PC |
nkeynes@586 | 203 | */ |
nkeynes@586 | 204 | void exit_block_rel( sh4addr_t pc, sh4addr_t endpc ) |
nkeynes@586 | 205 | { |
nkeynes@586 | 206 | load_imm32( R_ECX, pc - sh4_x86.block_start_pc ); // 5 |
nkeynes@586 | 207 | ADD_sh4r_r32( R_PC, R_ECX ); |
nkeynes@586 | 208 | store_spreg( R_ECX, REG_OFFSET(pc) ); // 3 |
nkeynes@586 | 209 | if( IS_IN_ICACHE(pc) ) { |
nkeynes@586 | 210 | MOV_moff32_EAX( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) ); // 5 |
nkeynes@586 | 211 | } else if( sh4_x86.tlb_on ) { |
nkeynes@586 | 212 | call_func1(xlat_get_code_by_vma,R_ECX); |
nkeynes@586 | 213 | } else { |
nkeynes@586 | 214 | call_func1(xlat_get_code,R_ECX); |
nkeynes@586 | 215 | } |
nkeynes@539 | 216 | AND_imm8s_r32( 0xFC, R_EAX ); // 3 |
nkeynes@539 | 217 | load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5 |
nkeynes@539 | 218 | ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6 |
nkeynes@539 | 219 | POP_r32(R_EBP); |
nkeynes@539 | 220 | RET(); |
nkeynes@539 | 221 | } |
nkeynes@539 | 222 | |
nkeynes@539 | 223 | /** |
nkeynes@539 | 224 | * Write the block trailer (exception handling block) |
nkeynes@539 | 225 | */ |
nkeynes@539 | 226 | void sh4_translate_end_block( sh4addr_t pc ) { |
nkeynes@539 | 227 | if( sh4_x86.branch_taken == FALSE ) { |
nkeynes@539 | 228 | // Didn't exit unconditionally already, so write the termination here |
nkeynes@586 | 229 | exit_block_rel( pc, pc ); |
nkeynes@539 | 230 | } |
nkeynes@539 | 231 | if( sh4_x86.backpatch_posn != 0 ) { |
nkeynes@586 | 232 | unsigned int i; |
nkeynes@586 | 233 | // Raise exception |
nkeynes@539 | 234 | uint8_t *end_ptr = xlat_output; |
nkeynes@586 | 235 | MOV_r32_r32( R_EDX, R_ECX ); |
nkeynes@539 | 236 | ADD_r32_r32( R_EDX, R_ECX ); |
nkeynes@586 | 237 | ADD_r32_sh4r( R_ECX, R_PC ); |
nkeynes@539 | 238 | MOV_moff32_EAX( &sh4_cpu_period ); |
nkeynes@539 | 239 | MUL_r32( R_EDX ); |
nkeynes@539 | 240 | ADD_r32_sh4r( R_EAX, REG_OFFSET(slice_cycle) ); |
nkeynes@539 | 241 | |
nkeynes@586 | 242 | POP_r32(R_EDX); |
nkeynes@586 | 243 | call_func1( sh4_raise_exception, R_EDX ); |
nkeynes@586 | 244 | load_spreg( R_EAX, R_PC ); |
nkeynes@586 | 245 | if( sh4_x86.tlb_on ) { |
nkeynes@586 | 246 | call_func1(xlat_get_code_by_vma,R_EAX); |
nkeynes@586 | 247 | } else { |
nkeynes@586 | 248 | call_func1(xlat_get_code,R_EAX); |
nkeynes@586 | 249 | } |
nkeynes@539 | 250 | POP_r32(R_EBP); |
nkeynes@539 | 251 | RET(); |
nkeynes@539 | 252 | |
nkeynes@586 | 253 | // Exception already raised - just cleanup |
nkeynes@586 | 254 | uint8_t *preexc_ptr = xlat_output; |
nkeynes@586 | 255 | MOV_r32_r32( R_EDX, R_ECX ); |
nkeynes@586 | 256 | ADD_r32_r32( R_EDX, R_ECX ); |
nkeynes@586 | 257 | ADD_r32_sh4r( R_ECX, R_SPC ); |
nkeynes@586 | 258 | MOV_moff32_EAX( &sh4_cpu_period ); |
nkeynes@586 | 259 | MUL_r32( R_EDX ); |
nkeynes@586 | 260 | ADD_r32_sh4r( R_EAX, REG_OFFSET(slice_cycle) ); |
nkeynes@586 | 261 | load_spreg( R_EAX, R_PC ); |
nkeynes@586 | 262 | if( sh4_x86.tlb_on ) { |
nkeynes@586 | 263 | call_func1(xlat_get_code_by_vma,R_EAX); |
nkeynes@586 | 264 | } else { |
nkeynes@586 | 265 | call_func1(xlat_get_code,R_EAX); |
nkeynes@586 | 266 | } |
nkeynes@586 | 267 | POP_r32(R_EBP); |
nkeynes@586 | 268 | RET(); |
nkeynes@586 | 269 | |
nkeynes@586 | 270 | for( i=0; i< sh4_x86.backpatch_posn; i++ ) { |
nkeynes@604 | 271 | uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset]; |
nkeynes@604 | 272 | *fixup_addr = xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4; |
nkeynes@596 | 273 | if( sh4_x86.backpatch_list[i].exc_code < 0 ) { |
nkeynes@586 | 274 | load_imm32( R_EDX, sh4_x86.backpatch_list[i].fixup_icount ); |
nkeynes@596 | 275 | int stack_adj = -1 - sh4_x86.backpatch_list[i].exc_code; |
nkeynes@596 | 276 | if( stack_adj > 0 ) { |
nkeynes@596 | 277 | ADD_imm8s_r32( stack_adj, R_ESP ); |
nkeynes@596 | 278 | } |
nkeynes@586 | 279 | int rel = preexc_ptr - xlat_output; |
nkeynes@586 | 280 | JMP_rel(rel); |
nkeynes@586 | 281 | } else { |
nkeynes@586 | 282 | PUSH_imm32( sh4_x86.backpatch_list[i].exc_code ); |
nkeynes@586 | 283 | load_imm32( R_EDX, sh4_x86.backpatch_list[i].fixup_icount ); |
nkeynes@586 | 284 | int rel = end_ptr - xlat_output; |
nkeynes@586 | 285 | JMP_rel(rel); |
nkeynes@586 | 286 | } |
nkeynes@586 | 287 | } |
nkeynes@539 | 288 | } |
nkeynes@539 | 289 | } |
nkeynes@539 | 290 | |
nkeynes@586 | 291 | void *xlat_get_native_pc() |
nkeynes@586 | 292 | { |
nkeynes@586 | 293 | void *result = NULL; |
nkeynes@586 | 294 | asm( |
nkeynes@586 | 295 | "mov %%ebp, %%eax\n\t" |
nkeynes@586 | 296 | "mov $0x8, %%ecx\n\t" |
nkeynes@586 | 297 | "mov %1, %%edx\n" |
nkeynes@586 | 298 | "frame_loop: test %%eax, %%eax\n\t" |
nkeynes@586 | 299 | "je frame_not_found\n\t" |
nkeynes@586 | 300 | "cmp (%%eax), %%edx\n\t" |
nkeynes@586 | 301 | "je frame_found\n\t" |
nkeynes@586 | 302 | "sub $0x1, %%ecx\n\t" |
nkeynes@586 | 303 | "je frame_not_found\n\t" |
nkeynes@586 | 304 | "movl (%%eax), %%eax\n\t" |
nkeynes@586 | 305 | "jmp frame_loop\n" |
nkeynes@586 | 306 | "frame_found: movl 0x4(%%eax), %0\n" |
nkeynes@586 | 307 | "frame_not_found:" |
nkeynes@586 | 308 | : "=r" (result) |
nkeynes@669 | 309 | : "r" (((uint8_t *)&sh4r) + 128 ) |
nkeynes@586 | 310 | : "eax", "ecx", "edx" ); |
nkeynes@586 | 311 | return result; |
nkeynes@586 | 312 | } |
nkeynes@586 | 313 | |
nkeynes@586 | 314 | |
nkeynes@539 | 315 | #endif |
nkeynes@539 | 316 | |
nkeynes@539 | 317 |
.