nkeynes@359: /** nkeynes@586: * $Id$ nkeynes@359: * nkeynes@359: * SH4 => x86 translation. This version does no real optimization, it just nkeynes@359: * outputs straight-line x86 code - it mainly exists to provide a baseline nkeynes@359: * to test the optimizing versions against. nkeynes@359: * nkeynes@359: * Copyright (c) 2007 Nathan Keynes. nkeynes@359: * nkeynes@359: * This program is free software; you can redistribute it and/or modify nkeynes@359: * it under the terms of the GNU General Public License as published by nkeynes@359: * the Free Software Foundation; either version 2 of the License, or nkeynes@359: * (at your option) any later version. nkeynes@359: * nkeynes@359: * This program is distributed in the hope that it will be useful, nkeynes@359: * but WITHOUT ANY WARRANTY; without even the implied warranty of nkeynes@359: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the nkeynes@359: * GNU General Public License for more details. nkeynes@359: */ nkeynes@359: nkeynes@368: #include nkeynes@388: #include nkeynes@368: nkeynes@380: #ifndef NDEBUG nkeynes@380: #define DEBUG_JUMPS 1 nkeynes@380: #endif nkeynes@380: nkeynes@905: #include "lxdream.h" nkeynes@368: #include "sh4/sh4core.h" nkeynes@1091: #include "sh4/sh4dasm.h" nkeynes@368: #include "sh4/sh4trans.h" nkeynes@671: #include "sh4/sh4stat.h" nkeynes@388: #include "sh4/sh4mmio.h" nkeynes@939: #include "sh4/mmu.h" nkeynes@991: #include "xlat/xltcache.h" nkeynes@991: #include "xlat/x86/x86op.h" nkeynes@1091: #include "x86dasm/x86dasm.h" nkeynes@368: #include "clock.h" nkeynes@368: nkeynes@368: #define DEFAULT_BACKPATCH_SIZE 4096 nkeynes@368: nkeynes@991: /* Offset of a reg relative to the sh4r structure */ nkeynes@991: #define REG_OFFSET(reg) (((char *)&sh4r.reg) - ((char *)&sh4r) - 128) nkeynes@991: nkeynes@995: #define R_T REG_OFFSET(t) nkeynes@995: #define R_Q REG_OFFSET(q) nkeynes@995: #define R_S REG_OFFSET(s) nkeynes@995: #define R_M REG_OFFSET(m) nkeynes@995: #define R_SR REG_OFFSET(sr) nkeynes@995: #define R_GBR REG_OFFSET(gbr) nkeynes@995: #define R_SSR REG_OFFSET(ssr) nkeynes@995: #define R_SPC REG_OFFSET(spc) nkeynes@995: #define R_VBR REG_OFFSET(vbr) nkeynes@995: #define R_MACH REG_OFFSET(mac)+4 nkeynes@995: #define R_MACL REG_OFFSET(mac) nkeynes@995: #define R_PC REG_OFFSET(pc) nkeynes@991: #define R_NEW_PC REG_OFFSET(new_pc) nkeynes@995: #define R_PR REG_OFFSET(pr) nkeynes@995: #define R_SGR REG_OFFSET(sgr) nkeynes@995: #define R_FPUL REG_OFFSET(fpul) nkeynes@995: #define R_FPSCR REG_OFFSET(fpscr) nkeynes@995: #define R_DBR REG_OFFSET(dbr) nkeynes@995: #define R_R(rn) REG_OFFSET(r[rn]) nkeynes@995: #define R_FR(f) REG_OFFSET(fr[0][(f)^1]) nkeynes@995: #define R_XF(f) REG_OFFSET(fr[1][(f)^1]) nkeynes@995: #define R_DR(f) REG_OFFSET(fr[(f)&1][(f)&0x0E]) nkeynes@995: #define R_DRL(f) REG_OFFSET(fr[(f)&1][(f)|0x01]) nkeynes@995: #define R_DRH(f) REG_OFFSET(fr[(f)&1][(f)&0x0E]) nkeynes@995: nkeynes@995: #define DELAY_NONE 0 nkeynes@995: #define DELAY_PC 1 nkeynes@995: #define DELAY_PC_PR 2 nkeynes@991: nkeynes@1112: #define SH4_MODE_UNKNOWN -1 nkeynes@1112: nkeynes@586: struct backpatch_record { nkeynes@604: uint32_t fixup_offset; nkeynes@586: uint32_t fixup_icount; nkeynes@596: int32_t exc_code; nkeynes@586: }; nkeynes@586: nkeynes@368: /** nkeynes@368: * Struct to manage internal translation state. This state is not saved - nkeynes@368: * it is only valid between calls to sh4_translate_begin_block() and nkeynes@368: * sh4_translate_end_block() nkeynes@368: */ nkeynes@368: struct sh4_x86_state { nkeynes@590: int in_delay_slot; nkeynes@1112: uint8_t *code; nkeynes@368: gboolean fpuen_checked; /* true if we've already checked fpu enabled. */ nkeynes@409: gboolean branch_taken; /* true if we branched unconditionally */ nkeynes@901: gboolean double_prec; /* true if FPU is in double-precision mode */ nkeynes@903: gboolean double_size; /* true if FPU is in double-size mode */ nkeynes@903: gboolean sse3_enabled; /* true if host supports SSE3 instructions */ nkeynes@408: uint32_t block_start_pc; nkeynes@547: uint32_t stack_posn; /* Trace stack height for alignment purposes */ nkeynes@1112: uint32_t sh4_mode; /* Mirror of sh4r.xlat_sh4_mode */ nkeynes@417: int tstate; nkeynes@368: nkeynes@1125: /* mode settings */ nkeynes@586: gboolean tlb_on; /* True if tlb translation is active */ nkeynes@1125: struct mem_region_fn **priv_address_space; nkeynes@1125: struct mem_region_fn **user_address_space; nkeynes@586: nkeynes@1125: /* Instrumentation */ nkeynes@1125: xlat_block_begin_callback_t begin_callback; nkeynes@1125: xlat_block_end_callback_t end_callback; nkeynes@1125: gboolean fastmem; nkeynes@1182: gboolean profile_blocks; nkeynes@1125: nkeynes@368: /* Allocated memory for the (block-wide) back-patch list */ nkeynes@586: struct backpatch_record *backpatch_list; nkeynes@368: uint32_t backpatch_posn; nkeynes@368: uint32_t backpatch_size; nkeynes@368: }; nkeynes@368: nkeynes@368: static struct sh4_x86_state sh4_x86; nkeynes@368: nkeynes@388: static uint32_t max_int = 0x7FFFFFFF; nkeynes@388: static uint32_t min_int = 0x80000000; nkeynes@394: static uint32_t save_fcw; /* save value for fpu control word */ nkeynes@394: static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */ nkeynes@386: nkeynes@1196: static void FASTCALL sh4_translate_get_code_and_backpatch( uint32_t pc ); nkeynes@1196: nkeynes@1091: static struct x86_symbol x86_symbol_table[] = { nkeynes@1091: { "sh4r+128", ((char *)&sh4r)+128 }, nkeynes@1091: { "sh4_cpu_period", &sh4_cpu_period }, nkeynes@1091: { "sh4_address_space", NULL }, nkeynes@1125: { "sh4_user_address_space", NULL }, nkeynes@1120: { "sh4_translate_breakpoint_hit", sh4_translate_breakpoint_hit }, nkeynes@1196: { "sh4_translate_get_code_and_backpatch", sh4_translate_get_code_and_backpatch }, nkeynes@1091: { "sh4_write_fpscr", sh4_write_fpscr }, nkeynes@1091: { "sh4_write_sr", sh4_write_sr }, nkeynes@1091: { "sh4_read_sr", sh4_read_sr }, nkeynes@1191: { "sh4_raise_exception", sh4_raise_exception }, nkeynes@1091: { "sh4_sleep", sh4_sleep }, nkeynes@1091: { "sh4_fsca", sh4_fsca }, nkeynes@1091: { "sh4_ftrv", sh4_ftrv }, nkeynes@1091: { "sh4_switch_fr_banks", sh4_switch_fr_banks }, nkeynes@1091: { "sh4_execute_instruction", sh4_execute_instruction }, nkeynes@1091: { "signsat48", signsat48 }, nkeynes@1091: { "xlat_get_code_by_vma", xlat_get_code_by_vma }, nkeynes@1091: { "xlat_get_code", xlat_get_code } nkeynes@1091: }; nkeynes@1091: nkeynes@1091: nkeynes@903: gboolean is_sse3_supported() nkeynes@903: { nkeynes@903: uint32_t features; nkeynes@903: nkeynes@903: __asm__ __volatile__( nkeynes@903: "mov $0x01, %%eax\n\t" nkeynes@908: "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx"); nkeynes@903: return (features & 1) ? TRUE : FALSE; nkeynes@903: } nkeynes@903: nkeynes@1125: void sh4_translate_set_address_space( struct mem_region_fn **priv, struct mem_region_fn **user ) nkeynes@1125: { nkeynes@1125: sh4_x86.priv_address_space = priv; nkeynes@1125: sh4_x86.user_address_space = user; nkeynes@1125: x86_symbol_table[2].ptr = priv; nkeynes@1125: x86_symbol_table[3].ptr = user; nkeynes@1125: } nkeynes@1125: nkeynes@669: void sh4_translate_init(void) nkeynes@368: { nkeynes@368: sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE); nkeynes@586: sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record); nkeynes@1125: sh4_x86.begin_callback = NULL; nkeynes@1125: sh4_x86.end_callback = NULL; nkeynes@1125: sh4_translate_set_address_space( sh4_address_space, sh4_user_address_space ); nkeynes@1125: sh4_x86.fastmem = TRUE; nkeynes@1182: sh4_x86.profile_blocks = FALSE; nkeynes@903: sh4_x86.sse3_enabled = is_sse3_supported(); nkeynes@1091: x86_disasm_init(); nkeynes@1091: x86_set_symtab( x86_symbol_table, sizeof(x86_symbol_table)/sizeof(struct x86_symbol) ); nkeynes@368: } nkeynes@368: nkeynes@1125: void sh4_translate_set_callbacks( xlat_block_begin_callback_t begin, xlat_block_end_callback_t end ) nkeynes@1125: { nkeynes@1125: sh4_x86.begin_callback = begin; nkeynes@1125: sh4_x86.end_callback = end; nkeynes@1125: } nkeynes@1125: nkeynes@1125: void sh4_translate_set_fastmem( gboolean flag ) nkeynes@1125: { nkeynes@1125: sh4_x86.fastmem = flag; nkeynes@1125: } nkeynes@1125: nkeynes@1182: void sh4_translate_set_profile_blocks( gboolean flag ) nkeynes@1182: { nkeynes@1182: sh4_x86.profile_blocks = flag; nkeynes@1182: } nkeynes@1182: nkeynes@1182: gboolean sh4_translate_get_profile_blocks() nkeynes@1182: { nkeynes@1182: return sh4_x86.profile_blocks; nkeynes@1182: } nkeynes@1182: nkeynes@1091: /** nkeynes@1091: * Disassemble the given translated code block, and it's source SH4 code block nkeynes@1091: * side-by-side. The current native pc will be marked if non-null. nkeynes@1091: */ nkeynes@1091: void sh4_translate_disasm_block( FILE *out, void *code, sh4addr_t source_start, void *native_pc ) nkeynes@1091: { nkeynes@1091: char buf[256]; nkeynes@1091: char op[256]; nkeynes@1091: nkeynes@1091: uintptr_t target_start = (uintptr_t)code, target_pc; nkeynes@1091: uintptr_t target_end = target_start + xlat_get_code_size(code); nkeynes@1091: uint32_t source_pc = source_start; nkeynes@1091: uint32_t source_end = source_pc; nkeynes@1091: xlat_recovery_record_t source_recov_table = XLAT_RECOVERY_TABLE(code); nkeynes@1092: xlat_recovery_record_t source_recov_end = source_recov_table + XLAT_BLOCK_FOR_CODE(code)->recover_table_size - 1; nkeynes@1091: nkeynes@1091: for( target_pc = target_start; target_pc < target_end; ) { nkeynes@1091: uintptr_t pc2 = x86_disasm_instruction( target_pc, buf, sizeof(buf), op ); nkeynes@1092: #if SIZEOF_VOID_P == 8 nkeynes@1092: fprintf( out, "%c%016lx: %-30s %-40s", (target_pc == (uintptr_t)native_pc ? '*' : ' '), nkeynes@1092: target_pc, op, buf ); nkeynes@1092: #else nkeynes@1112: fprintf( out, "%c%08lx: %-30s %-40s", (target_pc == (uintptr_t)native_pc ? '*' : ' '), nkeynes@1092: target_pc, op, buf ); nkeynes@1092: #endif nkeynes@1091: if( source_recov_table < source_recov_end && nkeynes@1091: target_pc >= (target_start + source_recov_table->xlat_offset) ) { nkeynes@1091: source_recov_table++; nkeynes@1091: if( source_end < (source_start + (source_recov_table->sh4_icount)*2) ) nkeynes@1091: source_end = source_start + (source_recov_table->sh4_icount)*2; nkeynes@1091: } nkeynes@1091: nkeynes@1091: if( source_pc < source_end ) { nkeynes@1091: uint32_t source_pc2 = sh4_disasm_instruction( source_pc, buf, sizeof(buf), op ); nkeynes@1091: fprintf( out, " %08X: %s %s\n", source_pc, op, buf ); nkeynes@1091: source_pc = source_pc2; nkeynes@1091: } else { nkeynes@1091: fprintf( out, "\n" ); nkeynes@1091: } nkeynes@1091: nkeynes@1091: target_pc = pc2; nkeynes@1091: } nkeynes@1091: nkeynes@1091: while( source_pc < source_end ) { nkeynes@1091: uint32_t source_pc2 = sh4_disasm_instruction( source_pc, buf, sizeof(buf), op ); nkeynes@1091: fprintf( out, "%*c %08X: %s %s\n", 72,' ', source_pc, op, buf ); nkeynes@1091: source_pc = source_pc2; nkeynes@1091: } nkeynes@1091: } nkeynes@368: nkeynes@586: static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code ) nkeynes@368: { nkeynes@991: int reloc_size = 4; nkeynes@991: nkeynes@991: if( exc_code == -2 ) { nkeynes@991: reloc_size = sizeof(void *); nkeynes@991: } nkeynes@991: nkeynes@368: if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) { nkeynes@368: sh4_x86.backpatch_size <<= 1; nkeynes@586: sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, nkeynes@586: sh4_x86.backpatch_size * sizeof(struct backpatch_record)); nkeynes@368: assert( sh4_x86.backpatch_list != NULL ); nkeynes@368: } nkeynes@586: if( sh4_x86.in_delay_slot ) { nkeynes@586: fixup_pc -= 2; nkeynes@586: } nkeynes@991: nkeynes@604: sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = nkeynes@991: (((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code)) - reloc_size; nkeynes@586: sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1; nkeynes@586: sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code; nkeynes@586: sh4_x86.backpatch_posn++; nkeynes@368: } nkeynes@368: nkeynes@991: #define TSTATE_NONE -1 nkeynes@995: #define TSTATE_O X86_COND_O nkeynes@995: #define TSTATE_C X86_COND_C nkeynes@995: #define TSTATE_E X86_COND_E nkeynes@995: #define TSTATE_NE X86_COND_NE nkeynes@995: #define TSTATE_G X86_COND_G nkeynes@995: #define TSTATE_GE X86_COND_GE nkeynes@995: #define TSTATE_A X86_COND_A nkeynes@995: #define TSTATE_AE X86_COND_AE nkeynes@359: nkeynes@991: #define MARK_JMP8(x) uint8_t *_mark_jmp_##x = (xlat_output-1) nkeynes@991: #define JMP_TARGET(x) *_mark_jmp_##x += (xlat_output - _mark_jmp_##x) nkeynes@368: nkeynes@991: /* Convenience instructions */ nkeynes@991: #define LDC_t() CMPB_imms_rbpdisp(1,R_T); CMC() nkeynes@991: #define SETE_t() SETCCB_cc_rbpdisp(X86_COND_E,R_T) nkeynes@991: #define SETA_t() SETCCB_cc_rbpdisp(X86_COND_A,R_T) nkeynes@991: #define SETAE_t() SETCCB_cc_rbpdisp(X86_COND_AE,R_T) nkeynes@991: #define SETG_t() SETCCB_cc_rbpdisp(X86_COND_G,R_T) nkeynes@991: #define SETGE_t() SETCCB_cc_rbpdisp(X86_COND_GE,R_T) nkeynes@991: #define SETC_t() SETCCB_cc_rbpdisp(X86_COND_C,R_T) nkeynes@991: #define SETO_t() SETCCB_cc_rbpdisp(X86_COND_O,R_T) nkeynes@991: #define SETNE_t() SETCCB_cc_rbpdisp(X86_COND_NE,R_T) nkeynes@991: #define SETC_r8(r1) SETCCB_cc_r8(X86_COND_C, r1) nkeynes@991: #define JAE_label(label) JCC_cc_rel8(X86_COND_AE,-1); MARK_JMP8(label) nkeynes@1112: #define JBE_label(label) JCC_cc_rel8(X86_COND_BE,-1); MARK_JMP8(label) nkeynes@991: #define JE_label(label) JCC_cc_rel8(X86_COND_E,-1); MARK_JMP8(label) nkeynes@991: #define JGE_label(label) JCC_cc_rel8(X86_COND_GE,-1); MARK_JMP8(label) nkeynes@991: #define JNA_label(label) JCC_cc_rel8(X86_COND_NA,-1); MARK_JMP8(label) nkeynes@991: #define JNE_label(label) JCC_cc_rel8(X86_COND_NE,-1); MARK_JMP8(label) nkeynes@991: #define JNO_label(label) JCC_cc_rel8(X86_COND_NO,-1); MARK_JMP8(label) nkeynes@1197: #define JP_label(label) JCC_cc_rel8(X86_COND_P,-1); MARK_JMP8(label) nkeynes@991: #define JS_label(label) JCC_cc_rel8(X86_COND_S,-1); MARK_JMP8(label) nkeynes@991: #define JMP_label(label) JMP_rel8(-1); MARK_JMP8(label) nkeynes@991: #define JNE_exc(exc) JCC_cc_rel32(X86_COND_NE,0); sh4_x86_add_backpatch(xlat_output, pc, exc) nkeynes@374: nkeynes@1197: #define LOAD_t() if( sh4_x86.tstate == TSTATE_NONE ) { \ nkeynes@1197: CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; } nkeynes@1197: nkeynes@991: /** Branch if T is set (either in the current cflags, or in sh4r.t) */ nkeynes@1197: #define JT_label(label) LOAD_t() \ nkeynes@991: JCC_cc_rel8(sh4_x86.tstate,-1); MARK_JMP8(label) nkeynes@368: nkeynes@991: /** Branch if T is clear (either in the current cflags or in sh4r.t) */ nkeynes@1197: #define JF_label(label) LOAD_t() \ nkeynes@991: JCC_cc_rel8(sh4_x86.tstate^1, -1); MARK_JMP8(label) nkeynes@359: nkeynes@939: nkeynes@991: #define load_reg(x86reg,sh4reg) MOVL_rbpdisp_r32( REG_OFFSET(r[sh4reg]), x86reg ) nkeynes@991: #define store_reg(x86reg,sh4reg) MOVL_r32_rbpdisp( x86reg, REG_OFFSET(r[sh4reg]) ) nkeynes@374: nkeynes@375: /** nkeynes@375: * Load an FR register (single-precision floating point) into an integer x86 nkeynes@375: * register (eg for register-to-register moves) nkeynes@375: */ nkeynes@991: #define load_fr(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[0][(frm)^1]), reg ) nkeynes@991: #define load_xf(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[1][(frm)^1]), reg ) nkeynes@375: nkeynes@375: /** nkeynes@669: * Load the low half of a DR register (DR or XD) into an integer x86 register nkeynes@669: */ nkeynes@991: #define load_dr0(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm|0x01]), reg ) nkeynes@991: #define load_dr1(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm&0x0E]), reg ) nkeynes@669: nkeynes@669: /** nkeynes@669: * Store an FR register (single-precision floating point) from an integer x86+ nkeynes@375: * register (eg for register-to-register moves) nkeynes@375: */ nkeynes@991: #define store_fr(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[0][(frm)^1]) ) nkeynes@991: #define store_xf(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[1][(frm)^1]) ) nkeynes@375: nkeynes@991: #define store_dr0(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm|0x01]) ) nkeynes@991: #define store_dr1(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) ) nkeynes@375: nkeynes@374: nkeynes@991: #define push_fpul() FLDF_rbpdisp(R_FPUL) nkeynes@991: #define pop_fpul() FSTPF_rbpdisp(R_FPUL) nkeynes@991: #define push_fr(frm) FLDF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) ) nkeynes@991: #define pop_fr(frm) FSTPF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) ) nkeynes@991: #define push_xf(frm) FLDF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) ) nkeynes@991: #define pop_xf(frm) FSTPF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) ) nkeynes@991: #define push_dr(frm) FLDD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) ) nkeynes@991: #define pop_dr(frm) FSTPD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) ) nkeynes@991: #define push_xdr(frm) FLDD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) ) nkeynes@991: #define pop_xdr(frm) FSTPD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) ) nkeynes@377: nkeynes@991: #ifdef ENABLE_SH4STATS nkeynes@995: #define COUNT_INST(id) MOVL_imm32_r32( id, REG_EAX ); CALL1_ptr_r32(sh4_stats_add, REG_EAX); sh4_x86.tstate = TSTATE_NONE nkeynes@991: #else nkeynes@991: #define COUNT_INST(id) nkeynes@991: #endif nkeynes@377: nkeynes@374: nkeynes@368: /* Exception checks - Note that all exception checks will clobber EAX */ nkeynes@416: nkeynes@416: #define check_priv( ) \ nkeynes@1112: if( (sh4_x86.sh4_mode & SR_MD) == 0 ) { \ nkeynes@937: if( sh4_x86.in_delay_slot ) { \ nkeynes@1191: exit_block_exc(EXC_SLOT_ILLEGAL, (pc-2), 4 ); \ nkeynes@937: } else { \ nkeynes@1191: exit_block_exc(EXC_ILLEGAL, pc, 2); \ nkeynes@937: } \ nkeynes@956: sh4_x86.branch_taken = TRUE; \ nkeynes@937: sh4_x86.in_delay_slot = DELAY_NONE; \ nkeynes@937: return 2; \ nkeynes@937: } nkeynes@416: nkeynes@416: #define check_fpuen( ) \ nkeynes@416: if( !sh4_x86.fpuen_checked ) {\ nkeynes@416: sh4_x86.fpuen_checked = TRUE;\ nkeynes@995: MOVL_rbpdisp_r32( R_SR, REG_EAX );\ nkeynes@991: ANDL_imms_r32( SR_FD, REG_EAX );\ nkeynes@416: if( sh4_x86.in_delay_slot ) {\ nkeynes@586: JNE_exc(EXC_SLOT_FPU_DISABLED);\ nkeynes@416: } else {\ nkeynes@586: JNE_exc(EXC_FPU_DISABLED);\ nkeynes@416: }\ nkeynes@875: sh4_x86.tstate = TSTATE_NONE; \ nkeynes@416: } nkeynes@416: nkeynes@586: #define check_ralign16( x86reg ) \ nkeynes@991: TESTL_imms_r32( 0x00000001, x86reg ); \ nkeynes@586: JNE_exc(EXC_DATA_ADDR_READ) nkeynes@416: nkeynes@586: #define check_walign16( x86reg ) \ nkeynes@991: TESTL_imms_r32( 0x00000001, x86reg ); \ nkeynes@586: JNE_exc(EXC_DATA_ADDR_WRITE); nkeynes@368: nkeynes@586: #define check_ralign32( x86reg ) \ nkeynes@991: TESTL_imms_r32( 0x00000003, x86reg ); \ nkeynes@586: JNE_exc(EXC_DATA_ADDR_READ) nkeynes@368: nkeynes@586: #define check_walign32( x86reg ) \ nkeynes@991: TESTL_imms_r32( 0x00000003, x86reg ); \ nkeynes@586: JNE_exc(EXC_DATA_ADDR_WRITE); nkeynes@368: nkeynes@732: #define check_ralign64( x86reg ) \ nkeynes@991: TESTL_imms_r32( 0x00000007, x86reg ); \ nkeynes@732: JNE_exc(EXC_DATA_ADDR_READ) nkeynes@732: nkeynes@732: #define check_walign64( x86reg ) \ nkeynes@991: TESTL_imms_r32( 0x00000007, x86reg ); \ nkeynes@732: JNE_exc(EXC_DATA_ADDR_WRITE); nkeynes@732: nkeynes@1125: #define address_space() ((sh4_x86.sh4_mode&SR_MD) ? (uintptr_t)sh4_x86.priv_address_space : (uintptr_t)sh4_x86.user_address_space) nkeynes@1004: nkeynes@824: #define UNDEF(ir) nkeynes@939: /* Note: For SR.MD == 1 && MMUCR.AT == 0, there are no memory exceptions, so nkeynes@939: * don't waste the cycles expecting them. Otherwise we need to save the exception pointer. nkeynes@586: */ nkeynes@941: #ifdef HAVE_FRAME_ADDRESS nkeynes@995: static void call_read_func(int addr_reg, int value_reg, int offset, int pc) nkeynes@995: { nkeynes@1004: decode_address(address_space(), addr_reg); nkeynes@1112: if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) { nkeynes@995: CALL1_r32disp_r32(REG_ECX, offset, addr_reg); nkeynes@995: } else { nkeynes@995: if( addr_reg != REG_ARG1 ) { nkeynes@995: MOVL_r32_r32( addr_reg, REG_ARG1 ); nkeynes@995: } nkeynes@995: MOVP_immptr_rptr( 0, REG_ARG2 ); nkeynes@995: sh4_x86_add_backpatch( xlat_output, pc, -2 ); nkeynes@995: CALL2_r32disp_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2); nkeynes@995: } nkeynes@995: if( value_reg != REG_RESULT1 ) { nkeynes@995: MOVL_r32_r32( REG_RESULT1, value_reg ); nkeynes@995: } nkeynes@995: } nkeynes@995: nkeynes@995: static void call_write_func(int addr_reg, int value_reg, int offset, int pc) nkeynes@995: { nkeynes@1004: decode_address(address_space(), addr_reg); nkeynes@1112: if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) { nkeynes@995: CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg); nkeynes@995: } else { nkeynes@995: if( value_reg != REG_ARG2 ) { nkeynes@995: MOVL_r32_r32( value_reg, REG_ARG2 ); nkeynes@995: } nkeynes@995: if( addr_reg != REG_ARG1 ) { nkeynes@995: MOVL_r32_r32( addr_reg, REG_ARG1 ); nkeynes@995: } nkeynes@995: #if MAX_REG_ARG > 2 nkeynes@995: MOVP_immptr_rptr( 0, REG_ARG3 ); nkeynes@995: sh4_x86_add_backpatch( xlat_output, pc, -2 ); nkeynes@995: CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, REG_ARG3); nkeynes@995: #else nkeynes@995: MOVL_imm32_rspdisp( 0, 0 ); nkeynes@995: sh4_x86_add_backpatch( xlat_output, pc, -2 ); nkeynes@995: CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, 0); nkeynes@995: #endif nkeynes@995: } nkeynes@995: } nkeynes@995: #else nkeynes@995: static void call_read_func(int addr_reg, int value_reg, int offset, int pc) nkeynes@995: { nkeynes@1004: decode_address(address_space(), addr_reg); nkeynes@995: CALL1_r32disp_r32(REG_ECX, offset, addr_reg); nkeynes@995: if( value_reg != REG_RESULT1 ) { nkeynes@995: MOVL_r32_r32( REG_RESULT1, value_reg ); nkeynes@995: } nkeynes@995: } nkeynes@995: nkeynes@996: static void call_write_func(int addr_reg, int value_reg, int offset, int pc) nkeynes@995: { nkeynes@1004: decode_address(address_space(), addr_reg); nkeynes@995: CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg); nkeynes@995: } nkeynes@941: #endif nkeynes@939: nkeynes@995: #define MEM_REGION_PTR(name) offsetof( struct mem_region_fn, name ) nkeynes@995: #define MEM_READ_BYTE( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_byte), pc) nkeynes@995: #define MEM_READ_BYTE_FOR_WRITE( addr_reg, value_reg ) call_read_func( addr_reg, value_reg, MEM_REGION_PTR(read_byte_for_write), pc) nkeynes@995: #define MEM_READ_WORD( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_word), pc) nkeynes@995: #define MEM_READ_LONG( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_long), pc) nkeynes@995: #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_byte), pc) nkeynes@995: #define MEM_WRITE_WORD( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_word), pc) nkeynes@995: #define MEM_WRITE_LONG( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_long), pc) nkeynes@995: #define MEM_PREFETCH( addr_reg ) call_read_func(addr_reg, REG_RESULT1, MEM_REGION_PTR(prefetch), pc) nkeynes@368: nkeynes@1191: #define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2, 4); sh4_x86.in_delay_slot = DELAY_NONE; return 2; nkeynes@539: nkeynes@1182: /** Offset of xlat_sh4_mode field relative to the code pointer */ nkeynes@1186: #define XLAT_SH4_MODE_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, xlat_sh4_mode) - offsetof(struct xlat_cache_block,code) ) nkeynes@1186: #define XLAT_CHAIN_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, chain) - offsetof(struct xlat_cache_block,code) ) nkeynes@1186: #define XLAT_ACTIVE_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, active) - offsetof(struct xlat_cache_block,code) ) nkeynes@1182: nkeynes@901: void sh4_translate_begin_block( sh4addr_t pc ) nkeynes@901: { nkeynes@1112: sh4_x86.code = xlat_output; nkeynes@901: sh4_x86.in_delay_slot = FALSE; nkeynes@901: sh4_x86.fpuen_checked = FALSE; nkeynes@901: sh4_x86.branch_taken = FALSE; nkeynes@901: sh4_x86.backpatch_posn = 0; nkeynes@901: sh4_x86.block_start_pc = pc; nkeynes@939: sh4_x86.tlb_on = IS_TLB_ENABLED(); nkeynes@901: sh4_x86.tstate = TSTATE_NONE; nkeynes@901: sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR; nkeynes@903: sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ; nkeynes@1112: sh4_x86.sh4_mode = sh4r.xlat_sh4_mode; nkeynes@1125: emit_prologue(); nkeynes@1125: if( sh4_x86.begin_callback ) { nkeynes@1125: CALL_ptr( sh4_x86.begin_callback ); nkeynes@1125: } nkeynes@1182: if( sh4_x86.profile_blocks ) { nkeynes@1186: MOVP_immptr_rptr( sh4_x86.code + XLAT_ACTIVE_CODE_OFFSET, REG_EAX ); nkeynes@1182: ADDL_imms_r32disp( 1, REG_EAX, 0 ); nkeynes@1182: } nkeynes@901: } nkeynes@901: nkeynes@901: nkeynes@593: uint32_t sh4_translate_end_block_size() nkeynes@593: { nkeynes@1196: uint32_t epilogue_size = EPILOGUE_SIZE; nkeynes@1196: if( sh4_x86.end_callback ) { nkeynes@1196: epilogue_size += (CALL1_PTR_MIN_SIZE - 1); nkeynes@1196: } nkeynes@596: if( sh4_x86.backpatch_posn <= 3 ) { nkeynes@1196: epilogue_size += (sh4_x86.backpatch_posn*(12+CALL1_PTR_MIN_SIZE)); nkeynes@596: } else { nkeynes@1196: epilogue_size += (3*(12+CALL1_PTR_MIN_SIZE)) + (sh4_x86.backpatch_posn-3)*(15+CALL1_PTR_MIN_SIZE); nkeynes@596: } nkeynes@1196: return epilogue_size; nkeynes@593: } nkeynes@593: nkeynes@593: nkeynes@590: /** nkeynes@590: * Embed a breakpoint into the generated code nkeynes@590: */ nkeynes@586: void sh4_translate_emit_breakpoint( sh4vma_t pc ) nkeynes@586: { nkeynes@995: MOVL_imm32_r32( pc, REG_EAX ); nkeynes@995: CALL1_ptr_r32( sh4_translate_breakpoint_hit, REG_EAX ); nkeynes@875: sh4_x86.tstate = TSTATE_NONE; nkeynes@586: } nkeynes@590: nkeynes@601: nkeynes@601: #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc) nkeynes@601: nkeynes@1112: /** nkeynes@1112: * Test if the loaded target code pointer in %eax is valid, and if so jump nkeynes@1112: * directly into it, bypassing the normal exit. nkeynes@1112: */ nkeynes@1112: static void jump_next_block() nkeynes@1112: { nkeynes@1149: uint8_t *ptr = xlat_output; nkeynes@1112: TESTP_rptr_rptr(REG_EAX, REG_EAX); nkeynes@1112: JE_label(nocode); nkeynes@1112: if( sh4_x86.sh4_mode == SH4_MODE_UNKNOWN ) { nkeynes@1112: /* sr/fpscr was changed, possibly updated xlat_sh4_mode, so reload it */ nkeynes@1112: MOVL_rbpdisp_r32( REG_OFFSET(xlat_sh4_mode), REG_ECX ); nkeynes@1112: CMPL_r32_r32disp( REG_ECX, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET ); nkeynes@1112: } else { nkeynes@1112: CMPL_imms_r32disp( sh4_x86.sh4_mode, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET ); nkeynes@1112: } nkeynes@1112: JNE_label(wrongmode); nkeynes@1112: LEAP_rptrdisp_rptr(REG_EAX, PROLOGUE_SIZE,REG_EAX); nkeynes@1125: if( sh4_x86.end_callback ) { nkeynes@1125: /* Note this does leave the stack out of alignment, but doesn't matter nkeynes@1125: * for what we're currently using it for. nkeynes@1125: */ nkeynes@1125: PUSH_r32(REG_EAX); nkeynes@1125: MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX); nkeynes@1125: JMP_rptr(REG_ECX); nkeynes@1125: } else { nkeynes@1125: JMP_rptr(REG_EAX); nkeynes@1125: } nkeynes@1149: JMP_TARGET(wrongmode); nkeynes@1176: MOVP_rptrdisp_rptr( REG_EAX, XLAT_CHAIN_CODE_OFFSET, REG_EAX ); nkeynes@1149: int rel = ptr - xlat_output; nkeynes@1149: JMP_prerel(rel); nkeynes@1149: JMP_TARGET(nocode); nkeynes@1112: } nkeynes@1112: nkeynes@1186: /** nkeynes@1186: * nkeynes@1186: */ nkeynes@1186: static void FASTCALL sh4_translate_get_code_and_backpatch( uint32_t pc ) nkeynes@1186: { nkeynes@1186: uint8_t *target = (uint8_t *)xlat_get_code_by_vma(pc); nkeynes@1186: while( target != NULL && sh4r.xlat_sh4_mode != XLAT_BLOCK_MODE(target) ) { nkeynes@1186: target = XLAT_BLOCK_CHAIN(target); nkeynes@1186: } nkeynes@1186: if( target == NULL ) { nkeynes@1186: target = sh4_translate_basic_block( pc ); nkeynes@1186: } nkeynes@1186: uint8_t *backpatch = ((uint8_t *)__builtin_return_address(0)) - (CALL1_PTR_MIN_SIZE); nkeynes@1186: *backpatch = 0xE9; nkeynes@1186: *(uint32_t *)(backpatch+1) = (uint32_t)(target-backpatch)+PROLOGUE_SIZE-5; nkeynes@1186: *(void **)(backpatch+5) = XLAT_BLOCK_FOR_CODE(target)->use_list; nkeynes@1186: XLAT_BLOCK_FOR_CODE(target)->use_list = backpatch; nkeynes@1186: nkeynes@1198: uint8_t * volatile *retptr = ((uint8_t * volatile *)__builtin_frame_address(0))+1; nkeynes@1186: assert( *retptr == ((uint8_t *)__builtin_return_address(0)) ); nkeynes@1186: *retptr = backpatch; nkeynes@1186: } nkeynes@1186: nkeynes@1186: static void emit_translate_and_backpatch() nkeynes@1186: { nkeynes@1186: /* NB: this is either 7 bytes (i386) or 12 bytes (x86-64) */ nkeynes@1186: CALL1_ptr_r32(sh4_translate_get_code_and_backpatch, REG_ARG1); nkeynes@1186: nkeynes@1186: /* When patched, the jmp instruction will be 5 bytes (either platform) - nkeynes@1186: * we need to reserve sizeof(void*) bytes for the use-list nkeynes@1186: * pointer nkeynes@1186: */ nkeynes@1186: if( sizeof(void*) == 8 ) { nkeynes@1186: NOP(); nkeynes@1186: } else { nkeynes@1186: NOP2(); nkeynes@1186: } nkeynes@1186: } nkeynes@1186: nkeynes@1186: /** nkeynes@1186: * If we're jumping to a fixed address (or at least fixed relative to the nkeynes@1186: * current PC, then we can do a direct branch. REG_ARG1 should contain nkeynes@1186: * the PC at this point. nkeynes@1186: */ nkeynes@1186: static void jump_next_block_fixed_pc( sh4addr_t pc ) nkeynes@1186: { nkeynes@1186: if( IS_IN_ICACHE(pc) ) { nkeynes@1194: if( sh4_x86.sh4_mode != SH4_MODE_UNKNOWN && sh4_x86.end_callback == NULL ) { nkeynes@1186: /* Fixed address, in cache, and fixed SH4 mode - generate a call to the nkeynes@1186: * fetch-and-backpatch routine, which will replace the call with a branch */ nkeynes@1186: emit_translate_and_backpatch(); nkeynes@1186: return; nkeynes@1186: } else { nkeynes@1186: MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) ); nkeynes@1186: ANDP_imms_rptr( -4, REG_EAX ); nkeynes@1186: } nkeynes@1186: } else if( sh4_x86.tlb_on ) { nkeynes@1186: CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1); nkeynes@1186: } else { nkeynes@1186: CALL1_ptr_r32(xlat_get_code, REG_ARG1); nkeynes@1186: } nkeynes@1186: jump_next_block(); nkeynes@1186: nkeynes@1186: nkeynes@1186: } nkeynes@1186: nkeynes@1186: void sh4_translate_unlink_block( void *use_list ) nkeynes@1186: { nkeynes@1186: uint8_t *tmp = xlat_output; /* In case something is active, which should never happen */ nkeynes@1186: void *next = use_list; nkeynes@1186: while( next != NULL ) { nkeynes@1186: xlat_output = (uint8_t *)next; nkeynes@1186: next = *(void **)(xlat_output+5); nkeynes@1186: emit_translate_and_backpatch(); nkeynes@1186: } nkeynes@1186: xlat_output = tmp; nkeynes@1186: } nkeynes@1186: nkeynes@1186: nkeynes@1186: nkeynes@1125: static void exit_block() nkeynes@1125: { nkeynes@1125: emit_epilogue(); nkeynes@1125: if( sh4_x86.end_callback ) { nkeynes@1125: MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX); nkeynes@1125: JMP_rptr(REG_ECX); nkeynes@1125: } else { nkeynes@1125: RET(); nkeynes@1125: } nkeynes@1125: } nkeynes@1125: nkeynes@590: /** nkeynes@995: * Exit the block with sh4r.pc already written nkeynes@995: */ nkeynes@995: void exit_block_pcset( sh4addr_t pc ) nkeynes@995: { nkeynes@995: MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX ); nkeynes@1112: ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX ); nkeynes@1112: MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) ); nkeynes@1112: CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) ); nkeynes@1112: JBE_label(exitloop); nkeynes@995: MOVL_rbpdisp_r32( R_PC, REG_ARG1 ); nkeynes@995: if( sh4_x86.tlb_on ) { nkeynes@995: CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1); nkeynes@995: } else { nkeynes@995: CALL1_ptr_r32(xlat_get_code,REG_ARG1); nkeynes@995: } nkeynes@1112: nkeynes@1112: jump_next_block(); nkeynes@1112: JMP_TARGET(exitloop); nkeynes@995: exit_block(); nkeynes@995: } nkeynes@995: nkeynes@995: /** nkeynes@995: * Exit the block with sh4r.new_pc written with the target pc nkeynes@995: */ nkeynes@995: void exit_block_newpcset( sh4addr_t pc ) nkeynes@995: { nkeynes@995: MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX ); nkeynes@1112: ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX ); nkeynes@1112: MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) ); nkeynes@995: MOVL_rbpdisp_r32( R_NEW_PC, REG_ARG1 ); nkeynes@995: MOVL_r32_rbpdisp( REG_ARG1, R_PC ); nkeynes@1112: CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) ); nkeynes@1112: JBE_label(exitloop); nkeynes@995: if( sh4_x86.tlb_on ) { nkeynes@995: CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1); nkeynes@995: } else { nkeynes@995: CALL1_ptr_r32(xlat_get_code,REG_ARG1); nkeynes@995: } nkeynes@1112: nkeynes@1112: jump_next_block(); nkeynes@1112: JMP_TARGET(exitloop); nkeynes@995: exit_block(); nkeynes@995: } nkeynes@995: nkeynes@995: nkeynes@995: /** nkeynes@995: * Exit the block to an absolute PC nkeynes@995: */ nkeynes@995: void exit_block_abs( sh4addr_t pc, sh4addr_t endpc ) nkeynes@995: { nkeynes@1112: MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX ); nkeynes@1112: ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX ); nkeynes@1112: MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) ); nkeynes@1112: nkeynes@1112: MOVL_imm32_r32( pc, REG_ARG1 ); nkeynes@1112: MOVL_r32_rbpdisp( REG_ARG1, R_PC ); nkeynes@1112: CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) ); nkeynes@1112: JBE_label(exitloop); nkeynes@1186: jump_next_block_fixed_pc(pc); nkeynes@1112: JMP_TARGET(exitloop); nkeynes@995: exit_block(); nkeynes@995: } nkeynes@995: nkeynes@995: /** nkeynes@995: * Exit the block to a relative PC nkeynes@995: */ nkeynes@995: void exit_block_rel( sh4addr_t pc, sh4addr_t endpc ) nkeynes@995: { nkeynes@1112: MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX ); nkeynes@1112: ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX ); nkeynes@1112: MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) ); nkeynes@1112: nkeynes@1112: if( pc == sh4_x86.block_start_pc && sh4_x86.sh4_mode == sh4r.xlat_sh4_mode ) { nkeynes@1112: /* Special case for tight loops - the PC doesn't change, and nkeynes@1112: * we already know the target address. Just check events pending before nkeynes@1112: * looping. nkeynes@1112: */ nkeynes@1112: CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) ); nkeynes@1112: uint32_t backdisp = ((uintptr_t)(sh4_x86.code - xlat_output)) + PROLOGUE_SIZE; nkeynes@1112: JCC_cc_prerel(X86_COND_A, backdisp); nkeynes@1112: } else { nkeynes@1112: MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ARG1 ); nkeynes@1112: ADDL_rbpdisp_r32( R_PC, REG_ARG1 ); nkeynes@1112: MOVL_r32_rbpdisp( REG_ARG1, R_PC ); nkeynes@1112: CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) ); nkeynes@1112: JBE_label(exitloop2); nkeynes@1186: nkeynes@1186: jump_next_block_fixed_pc(pc); nkeynes@1112: JMP_TARGET(exitloop2); nkeynes@995: } nkeynes@995: exit_block(); nkeynes@995: } nkeynes@995: nkeynes@995: /** nkeynes@995: * Exit unconditionally with a general exception nkeynes@995: */ nkeynes@1191: void exit_block_exc( int code, sh4addr_t pc, int inst_adjust ) nkeynes@995: { nkeynes@995: MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ECX ); nkeynes@995: ADDL_r32_rbpdisp( REG_ECX, R_PC ); nkeynes@1191: MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc + inst_adjust)>>1)*sh4_cpu_period, REG_ECX ); nkeynes@995: ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) ); nkeynes@995: MOVL_imm32_r32( code, REG_ARG1 ); nkeynes@995: CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 ); nkeynes@995: exit_block(); nkeynes@995: } nkeynes@995: nkeynes@995: /** nkeynes@590: * Embed a call to sh4_execute_instruction for situations that we nkeynes@601: * can't translate (just page-crossing delay slots at the moment). nkeynes@601: * Caller is responsible for setting new_pc before calling this function. nkeynes@601: * nkeynes@601: * Performs: nkeynes@601: * Set PC = endpc nkeynes@601: * Set sh4r.in_delay_slot = sh4_x86.in_delay_slot nkeynes@601: * Update slice_cycle for endpc+2 (single step doesn't update slice_cycle) nkeynes@601: * Call sh4_execute_instruction nkeynes@601: * Call xlat_get_code_by_vma / xlat_get_code as for normal exit nkeynes@590: */ nkeynes@601: void exit_block_emu( sh4vma_t endpc ) nkeynes@590: { nkeynes@995: MOVL_imm32_r32( endpc - sh4_x86.block_start_pc, REG_ECX ); // 5 nkeynes@991: ADDL_r32_rbpdisp( REG_ECX, R_PC ); nkeynes@586: nkeynes@995: MOVL_imm32_r32( (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period, REG_ECX ); // 5 nkeynes@991: ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) ); // 6 nkeynes@995: MOVL_imm32_r32( sh4_x86.in_delay_slot ? 1 : 0, REG_ECX ); nkeynes@995: MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(in_delay_slot) ); nkeynes@590: nkeynes@1112: CALL_ptr( sh4_execute_instruction ); nkeynes@926: exit_block(); nkeynes@590: } nkeynes@539: nkeynes@359: /** nkeynes@995: * Write the block trailer (exception handling block) nkeynes@995: */ nkeynes@995: void sh4_translate_end_block( sh4addr_t pc ) { nkeynes@995: if( sh4_x86.branch_taken == FALSE ) { nkeynes@995: // Didn't exit unconditionally already, so write the termination here nkeynes@995: exit_block_rel( pc, pc ); nkeynes@995: } nkeynes@995: if( sh4_x86.backpatch_posn != 0 ) { nkeynes@995: unsigned int i; nkeynes@995: // Exception raised - cleanup and exit nkeynes@995: uint8_t *end_ptr = xlat_output; nkeynes@995: MOVL_r32_r32( REG_EDX, REG_ECX ); nkeynes@995: ADDL_r32_r32( REG_EDX, REG_ECX ); nkeynes@995: ADDL_r32_rbpdisp( REG_ECX, R_SPC ); nkeynes@995: MOVL_moffptr_eax( &sh4_cpu_period ); nkeynes@1191: INC_r32( REG_EDX ); /* Add 1 for the aborting instruction itself */ nkeynes@995: MULL_r32( REG_EDX ); nkeynes@995: ADDL_r32_rbpdisp( REG_EAX, REG_OFFSET(slice_cycle) ); nkeynes@995: exit_block(); nkeynes@995: nkeynes@995: for( i=0; i< sh4_x86.backpatch_posn; i++ ) { nkeynes@995: uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset]; nkeynes@995: if( sh4_x86.backpatch_list[i].exc_code < 0 ) { nkeynes@995: if( sh4_x86.backpatch_list[i].exc_code == -2 ) { nkeynes@995: *((uintptr_t *)fixup_addr) = (uintptr_t)xlat_output; nkeynes@995: } else { nkeynes@995: *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4; nkeynes@995: } nkeynes@995: MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX ); nkeynes@995: int rel = end_ptr - xlat_output; nkeynes@995: JMP_prerel(rel); nkeynes@995: } else { nkeynes@995: *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4; nkeynes@995: MOVL_imm32_r32( sh4_x86.backpatch_list[i].exc_code, REG_ARG1 ); nkeynes@995: CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 ); nkeynes@995: MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX ); nkeynes@995: int rel = end_ptr - xlat_output; nkeynes@995: JMP_prerel(rel); nkeynes@995: } nkeynes@995: } nkeynes@995: } nkeynes@995: } nkeynes@539: nkeynes@359: /** nkeynes@359: * Translate a single instruction. Delayed branches are handled specially nkeynes@359: * by translating both branch and delayed instruction as a single unit (as nkeynes@359: * nkeynes@586: * The instruction MUST be in the icache (assert check) nkeynes@359: * nkeynes@359: * @return true if the instruction marks the end of a basic block nkeynes@359: * (eg a branch or nkeynes@359: */ nkeynes@590: uint32_t sh4_translate_instruction( sh4vma_t pc ) nkeynes@359: { nkeynes@388: uint32_t ir; nkeynes@586: /* Read instruction from icache */ nkeynes@586: assert( IS_IN_ICACHE(pc) ); nkeynes@586: ir = *(uint16_t *)GET_ICACHE_PTR(pc); nkeynes@586: nkeynes@586: if( !sh4_x86.in_delay_slot ) { nkeynes@596: sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 ); nkeynes@388: } nkeynes@1003: nkeynes@1003: /* check for breakpoints at this pc */ nkeynes@1003: for( int i=0; i nkeynes@995: nkeynes@995: struct UnwindInfo { nkeynes@995: uintptr_t block_start; nkeynes@995: uintptr_t block_end; nkeynes@995: void *pc; nkeynes@995: }; nkeynes@995: nkeynes@995: static _Unwind_Reason_Code xlat_check_frame( struct _Unwind_Context *context, void *arg ) nkeynes@995: { nkeynes@995: struct UnwindInfo *info = arg; nkeynes@995: void *pc = (void *)_Unwind_GetIP(context); nkeynes@995: if( ((uintptr_t)pc) >= info->block_start && ((uintptr_t)pc) < info->block_end ) { nkeynes@995: info->pc = pc; nkeynes@995: return _URC_NORMAL_STOP; nkeynes@995: } nkeynes@995: return _URC_NO_REASON; nkeynes@995: } nkeynes@995: nkeynes@995: void *xlat_get_native_pc( void *code, uint32_t code_size ) nkeynes@995: { nkeynes@995: struct _Unwind_Exception exc; nkeynes@995: struct UnwindInfo info; nkeynes@995: nkeynes@995: info.pc = NULL; nkeynes@995: info.block_start = (uintptr_t)code; nkeynes@995: info.block_end = info.block_start + code_size; nkeynes@995: void *result = NULL; nkeynes@995: _Unwind_Backtrace( xlat_check_frame, &info ); nkeynes@995: return info.pc; nkeynes@995: } nkeynes@995: #else nkeynes@995: /* Assume this is an ia32 build - amd64 should always have dwarf information */ nkeynes@995: void *xlat_get_native_pc( void *code, uint32_t code_size ) nkeynes@995: { nkeynes@995: void *result = NULL; nkeynes@1120: __asm__( nkeynes@995: "mov %%ebp, %%eax\n\t" nkeynes@995: "mov $0x8, %%ecx\n\t" nkeynes@995: "mov %1, %%edx\n" nkeynes@995: "frame_loop: test %%eax, %%eax\n\t" nkeynes@995: "je frame_not_found\n\t" nkeynes@995: "cmp (%%eax), %%edx\n\t" nkeynes@995: "je frame_found\n\t" nkeynes@995: "sub $0x1, %%ecx\n\t" nkeynes@995: "je frame_not_found\n\t" nkeynes@995: "movl (%%eax), %%eax\n\t" nkeynes@995: "jmp frame_loop\n" nkeynes@995: "frame_found: movl 0x4(%%eax), %0\n" nkeynes@995: "frame_not_found:" nkeynes@995: : "=r" (result) nkeynes@995: : "r" (((uint8_t *)&sh4r) + 128 ) nkeynes@995: : "eax", "ecx", "edx" ); nkeynes@995: return result; nkeynes@995: } nkeynes@995: #endif nkeynes@995: