nkeynes@359: /** nkeynes@561: * $Id$ nkeynes@359: * nkeynes@359: * SH4 => x86 translation. This version does no real optimization, it just nkeynes@359: * outputs straight-line x86 code - it mainly exists to provide a baseline nkeynes@359: * to test the optimizing versions against. nkeynes@359: * nkeynes@359: * Copyright (c) 2007 Nathan Keynes. nkeynes@359: * nkeynes@359: * This program is free software; you can redistribute it and/or modify nkeynes@359: * it under the terms of the GNU General Public License as published by nkeynes@359: * the Free Software Foundation; either version 2 of the License, or nkeynes@359: * (at your option) any later version. nkeynes@359: * nkeynes@359: * This program is distributed in the hope that it will be useful, nkeynes@359: * but WITHOUT ANY WARRANTY; without even the implied warranty of nkeynes@359: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the nkeynes@359: * GNU General Public License for more details. nkeynes@359: */ nkeynes@359: nkeynes@368: #include nkeynes@388: #include nkeynes@368: nkeynes@380: #ifndef NDEBUG nkeynes@380: #define DEBUG_JUMPS 1 nkeynes@380: #endif nkeynes@380: nkeynes@905: #include "lxdream.h" nkeynes@368: #include "sh4/sh4core.h" nkeynes@368: #include "sh4/sh4trans.h" nkeynes@671: #include "sh4/sh4stat.h" nkeynes@388: #include "sh4/sh4mmio.h" nkeynes@953: #include "sh4/mmu.h" nkeynes@991: #include "xlat/xltcache.h" nkeynes@991: #include "xlat/x86/x86op.h" nkeynes@368: #include "clock.h" nkeynes@368: nkeynes@368: #define DEFAULT_BACKPATCH_SIZE 4096 nkeynes@368: nkeynes@991: /* Offset of a reg relative to the sh4r structure */ nkeynes@991: #define REG_OFFSET(reg) (((char *)&sh4r.reg) - ((char *)&sh4r) - 128) nkeynes@991: nkeynes@995: #define R_T REG_OFFSET(t) nkeynes@995: #define R_Q REG_OFFSET(q) nkeynes@995: #define R_S REG_OFFSET(s) nkeynes@995: #define R_M REG_OFFSET(m) nkeynes@995: #define R_SR REG_OFFSET(sr) nkeynes@995: #define R_GBR REG_OFFSET(gbr) nkeynes@995: #define R_SSR REG_OFFSET(ssr) nkeynes@995: #define R_SPC REG_OFFSET(spc) nkeynes@995: #define R_VBR REG_OFFSET(vbr) nkeynes@995: #define R_MACH REG_OFFSET(mac)+4 nkeynes@995: #define R_MACL REG_OFFSET(mac) nkeynes@995: #define R_PC REG_OFFSET(pc) nkeynes@991: #define R_NEW_PC REG_OFFSET(new_pc) nkeynes@995: #define R_PR REG_OFFSET(pr) nkeynes@995: #define R_SGR REG_OFFSET(sgr) nkeynes@995: #define R_FPUL REG_OFFSET(fpul) nkeynes@995: #define R_FPSCR REG_OFFSET(fpscr) nkeynes@995: #define R_DBR REG_OFFSET(dbr) nkeynes@995: #define R_R(rn) REG_OFFSET(r[rn]) nkeynes@995: #define R_FR(f) REG_OFFSET(fr[0][(f)^1]) nkeynes@995: #define R_XF(f) REG_OFFSET(fr[1][(f)^1]) nkeynes@995: #define R_DR(f) REG_OFFSET(fr[(f)&1][(f)&0x0E]) nkeynes@995: #define R_DRL(f) REG_OFFSET(fr[(f)&1][(f)|0x01]) nkeynes@995: #define R_DRH(f) REG_OFFSET(fr[(f)&1][(f)&0x0E]) nkeynes@995: nkeynes@995: #define DELAY_NONE 0 nkeynes@995: #define DELAY_PC 1 nkeynes@995: #define DELAY_PC_PR 2 nkeynes@991: nkeynes@559: struct backpatch_record { nkeynes@604: uint32_t fixup_offset; nkeynes@559: uint32_t fixup_icount; nkeynes@596: int32_t exc_code; nkeynes@559: }; nkeynes@559: nkeynes@368: /** nkeynes@368: * Struct to manage internal translation state. This state is not saved - nkeynes@368: * it is only valid between calls to sh4_translate_begin_block() and nkeynes@368: * sh4_translate_end_block() nkeynes@368: */ nkeynes@368: struct sh4_x86_state { nkeynes@590: int in_delay_slot; nkeynes@368: gboolean fpuen_checked; /* true if we've already checked fpu enabled. */ nkeynes@409: gboolean branch_taken; /* true if we branched unconditionally */ nkeynes@901: gboolean double_prec; /* true if FPU is in double-precision mode */ nkeynes@903: gboolean double_size; /* true if FPU is in double-size mode */ nkeynes@903: gboolean sse3_enabled; /* true if host supports SSE3 instructions */ nkeynes@408: uint32_t block_start_pc; nkeynes@547: uint32_t stack_posn; /* Trace stack height for alignment purposes */ nkeynes@417: int tstate; nkeynes@368: nkeynes@570: /* mode flags */ nkeynes@570: gboolean tlb_on; /* True if tlb translation is active */ nkeynes@570: nkeynes@368: /* Allocated memory for the (block-wide) back-patch list */ nkeynes@559: struct backpatch_record *backpatch_list; nkeynes@368: uint32_t backpatch_posn; nkeynes@368: uint32_t backpatch_size; nkeynes@368: }; nkeynes@368: nkeynes@368: static struct sh4_x86_state sh4_x86; nkeynes@368: nkeynes@388: static uint32_t max_int = 0x7FFFFFFF; nkeynes@388: static uint32_t min_int = 0x80000000; nkeynes@394: static uint32_t save_fcw; /* save value for fpu control word */ nkeynes@394: static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */ nkeynes@386: nkeynes@903: gboolean is_sse3_supported() nkeynes@903: { nkeynes@903: uint32_t features; nkeynes@903: nkeynes@903: __asm__ __volatile__( nkeynes@903: "mov $0x01, %%eax\n\t" nkeynes@908: "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx"); nkeynes@903: return (features & 1) ? TRUE : FALSE; nkeynes@903: } nkeynes@903: nkeynes@669: void sh4_translate_init(void) nkeynes@368: { nkeynes@368: sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE); nkeynes@559: sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record); nkeynes@903: sh4_x86.sse3_enabled = is_sse3_supported(); nkeynes@368: } nkeynes@368: nkeynes@368: nkeynes@559: static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code ) nkeynes@368: { nkeynes@991: int reloc_size = 4; nkeynes@991: nkeynes@991: if( exc_code == -2 ) { nkeynes@991: reloc_size = sizeof(void *); nkeynes@991: } nkeynes@991: nkeynes@368: if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) { nkeynes@368: sh4_x86.backpatch_size <<= 1; nkeynes@559: sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, nkeynes@559: sh4_x86.backpatch_size * sizeof(struct backpatch_record)); nkeynes@368: assert( sh4_x86.backpatch_list != NULL ); nkeynes@368: } nkeynes@559: if( sh4_x86.in_delay_slot ) { nkeynes@559: fixup_pc -= 2; nkeynes@368: } nkeynes@991: nkeynes@604: sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = nkeynes@991: (((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code)) - reloc_size; nkeynes@559: sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1; nkeynes@559: sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code; nkeynes@559: sh4_x86.backpatch_posn++; nkeynes@368: } nkeynes@368: nkeynes@991: #define TSTATE_NONE -1 nkeynes@995: #define TSTATE_O X86_COND_O nkeynes@995: #define TSTATE_C X86_COND_C nkeynes@995: #define TSTATE_E X86_COND_E nkeynes@995: #define TSTATE_NE X86_COND_NE nkeynes@995: #define TSTATE_G X86_COND_G nkeynes@995: #define TSTATE_GE X86_COND_GE nkeynes@995: #define TSTATE_A X86_COND_A nkeynes@995: #define TSTATE_AE X86_COND_AE nkeynes@577: nkeynes@991: #define MARK_JMP8(x) uint8_t *_mark_jmp_##x = (xlat_output-1) nkeynes@991: #define JMP_TARGET(x) *_mark_jmp_##x += (xlat_output - _mark_jmp_##x) nkeynes@359: nkeynes@991: /* Convenience instructions */ nkeynes@991: #define LDC_t() CMPB_imms_rbpdisp(1,R_T); CMC() nkeynes@991: #define SETE_t() SETCCB_cc_rbpdisp(X86_COND_E,R_T) nkeynes@991: #define SETA_t() SETCCB_cc_rbpdisp(X86_COND_A,R_T) nkeynes@991: #define SETAE_t() SETCCB_cc_rbpdisp(X86_COND_AE,R_T) nkeynes@991: #define SETG_t() SETCCB_cc_rbpdisp(X86_COND_G,R_T) nkeynes@991: #define SETGE_t() SETCCB_cc_rbpdisp(X86_COND_GE,R_T) nkeynes@991: #define SETC_t() SETCCB_cc_rbpdisp(X86_COND_C,R_T) nkeynes@991: #define SETO_t() SETCCB_cc_rbpdisp(X86_COND_O,R_T) nkeynes@991: #define SETNE_t() SETCCB_cc_rbpdisp(X86_COND_NE,R_T) nkeynes@991: #define SETC_r8(r1) SETCCB_cc_r8(X86_COND_C, r1) nkeynes@991: #define JAE_label(label) JCC_cc_rel8(X86_COND_AE,-1); MARK_JMP8(label) nkeynes@991: #define JE_label(label) JCC_cc_rel8(X86_COND_E,-1); MARK_JMP8(label) nkeynes@991: #define JGE_label(label) JCC_cc_rel8(X86_COND_GE,-1); MARK_JMP8(label) nkeynes@991: #define JNA_label(label) JCC_cc_rel8(X86_COND_NA,-1); MARK_JMP8(label) nkeynes@991: #define JNE_label(label) JCC_cc_rel8(X86_COND_NE,-1); MARK_JMP8(label) nkeynes@991: #define JNO_label(label) JCC_cc_rel8(X86_COND_NO,-1); MARK_JMP8(label) nkeynes@991: #define JS_label(label) JCC_cc_rel8(X86_COND_S,-1); MARK_JMP8(label) nkeynes@991: #define JMP_label(label) JMP_rel8(-1); MARK_JMP8(label) nkeynes@991: #define JNE_exc(exc) JCC_cc_rel32(X86_COND_NE,0); sh4_x86_add_backpatch(xlat_output, pc, exc) nkeynes@368: nkeynes@991: /** Branch if T is set (either in the current cflags, or in sh4r.t) */ nkeynes@991: #define JT_label(label) if( sh4_x86.tstate == TSTATE_NONE ) { \ nkeynes@991: CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \ nkeynes@991: JCC_cc_rel8(sh4_x86.tstate,-1); MARK_JMP8(label) nkeynes@374: nkeynes@991: /** Branch if T is clear (either in the current cflags or in sh4r.t) */ nkeynes@991: #define JF_label(label) if( sh4_x86.tstate == TSTATE_NONE ) { \ nkeynes@991: CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \ nkeynes@991: JCC_cc_rel8(sh4_x86.tstate^1, -1); MARK_JMP8(label) nkeynes@368: nkeynes@359: nkeynes@991: #define load_reg(x86reg,sh4reg) MOVL_rbpdisp_r32( REG_OFFSET(r[sh4reg]), x86reg ) nkeynes@991: #define store_reg(x86reg,sh4reg) MOVL_r32_rbpdisp( x86reg, REG_OFFSET(r[sh4reg]) ) nkeynes@374: nkeynes@375: /** nkeynes@375: * Load an FR register (single-precision floating point) into an integer x86 nkeynes@375: * register (eg for register-to-register moves) nkeynes@375: */ nkeynes@991: #define load_fr(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[0][(frm)^1]), reg ) nkeynes@991: #define load_xf(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[1][(frm)^1]), reg ) nkeynes@375: nkeynes@375: /** nkeynes@669: * Load the low half of a DR register (DR or XD) into an integer x86 register nkeynes@669: */ nkeynes@991: #define load_dr0(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm|0x01]), reg ) nkeynes@991: #define load_dr1(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm&0x0E]), reg ) nkeynes@669: nkeynes@669: /** nkeynes@669: * Store an FR register (single-precision floating point) from an integer x86+ nkeynes@375: * register (eg for register-to-register moves) nkeynes@375: */ nkeynes@991: #define store_fr(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[0][(frm)^1]) ) nkeynes@991: #define store_xf(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[1][(frm)^1]) ) nkeynes@375: nkeynes@991: #define store_dr0(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm|0x01]) ) nkeynes@991: #define store_dr1(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) ) nkeynes@375: nkeynes@374: nkeynes@991: #define push_fpul() FLDF_rbpdisp(R_FPUL) nkeynes@991: #define pop_fpul() FSTPF_rbpdisp(R_FPUL) nkeynes@991: #define push_fr(frm) FLDF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) ) nkeynes@991: #define pop_fr(frm) FSTPF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) ) nkeynes@991: #define push_xf(frm) FLDF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) ) nkeynes@991: #define pop_xf(frm) FSTPF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) ) nkeynes@991: #define push_dr(frm) FLDD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) ) nkeynes@991: #define pop_dr(frm) FSTPD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) ) nkeynes@991: #define push_xdr(frm) FLDD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) ) nkeynes@991: #define pop_xdr(frm) FSTPD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) ) nkeynes@377: nkeynes@991: #ifdef ENABLE_SH4STATS nkeynes@995: #define COUNT_INST(id) MOVL_imm32_r32( id, REG_EAX ); CALL1_ptr_r32(sh4_stats_add, REG_EAX); sh4_x86.tstate = TSTATE_NONE nkeynes@991: #else nkeynes@991: #define COUNT_INST(id) nkeynes@991: #endif nkeynes@377: nkeynes@374: nkeynes@368: /* Exception checks - Note that all exception checks will clobber EAX */ nkeynes@416: nkeynes@416: #define check_priv( ) \ nkeynes@953: if( (sh4r.xlat_sh4_mode & SR_MD) == 0 ) { \ nkeynes@953: if( sh4_x86.in_delay_slot ) { \ nkeynes@956: exit_block_exc(EXC_SLOT_ILLEGAL, (pc-2) ); \ nkeynes@953: } else { \ nkeynes@956: exit_block_exc(EXC_ILLEGAL, pc); \ nkeynes@953: } \ nkeynes@956: sh4_x86.branch_taken = TRUE; \ nkeynes@953: sh4_x86.in_delay_slot = DELAY_NONE; \ nkeynes@953: return 2; \ nkeynes@953: } nkeynes@416: nkeynes@416: #define check_fpuen( ) \ nkeynes@416: if( !sh4_x86.fpuen_checked ) {\ nkeynes@416: sh4_x86.fpuen_checked = TRUE;\ nkeynes@995: MOVL_rbpdisp_r32( R_SR, REG_EAX );\ nkeynes@991: ANDL_imms_r32( SR_FD, REG_EAX );\ nkeynes@416: if( sh4_x86.in_delay_slot ) {\ nkeynes@559: JNE_exc(EXC_SLOT_FPU_DISABLED);\ nkeynes@416: } else {\ nkeynes@559: JNE_exc(EXC_FPU_DISABLED);\ nkeynes@416: }\ nkeynes@875: sh4_x86.tstate = TSTATE_NONE; \ nkeynes@416: } nkeynes@416: nkeynes@559: #define check_ralign16( x86reg ) \ nkeynes@991: TESTL_imms_r32( 0x00000001, x86reg ); \ nkeynes@559: JNE_exc(EXC_DATA_ADDR_READ) nkeynes@416: nkeynes@559: #define check_walign16( x86reg ) \ nkeynes@991: TESTL_imms_r32( 0x00000001, x86reg ); \ nkeynes@559: JNE_exc(EXC_DATA_ADDR_WRITE); nkeynes@368: nkeynes@559: #define check_ralign32( x86reg ) \ nkeynes@991: TESTL_imms_r32( 0x00000003, x86reg ); \ nkeynes@559: JNE_exc(EXC_DATA_ADDR_READ) nkeynes@368: nkeynes@559: #define check_walign32( x86reg ) \ nkeynes@991: TESTL_imms_r32( 0x00000003, x86reg ); \ nkeynes@559: JNE_exc(EXC_DATA_ADDR_WRITE); nkeynes@368: nkeynes@732: #define check_ralign64( x86reg ) \ nkeynes@991: TESTL_imms_r32( 0x00000007, x86reg ); \ nkeynes@732: JNE_exc(EXC_DATA_ADDR_READ) nkeynes@732: nkeynes@732: #define check_walign64( x86reg ) \ nkeynes@991: TESTL_imms_r32( 0x00000007, x86reg ); \ nkeynes@732: JNE_exc(EXC_DATA_ADDR_WRITE); nkeynes@732: nkeynes@1004: #define address_space() ((sh4r.xlat_sh4_mode&SR_MD) ? (uintptr_t)sh4_address_space : (uintptr_t)sh4_user_address_space) nkeynes@1004: nkeynes@824: #define UNDEF(ir) nkeynes@953: /* Note: For SR.MD == 1 && MMUCR.AT == 0, there are no memory exceptions, so nkeynes@953: * don't waste the cycles expecting them. Otherwise we need to save the exception pointer. nkeynes@953: */ nkeynes@953: #ifdef HAVE_FRAME_ADDRESS nkeynes@995: static void call_read_func(int addr_reg, int value_reg, int offset, int pc) nkeynes@995: { nkeynes@1004: decode_address(address_space(), addr_reg); nkeynes@995: if( !sh4_x86.tlb_on && (sh4r.xlat_sh4_mode & SR_MD) ) { nkeynes@995: CALL1_r32disp_r32(REG_ECX, offset, addr_reg); nkeynes@995: } else { nkeynes@995: if( addr_reg != REG_ARG1 ) { nkeynes@995: MOVL_r32_r32( addr_reg, REG_ARG1 ); nkeynes@995: } nkeynes@995: MOVP_immptr_rptr( 0, REG_ARG2 ); nkeynes@995: sh4_x86_add_backpatch( xlat_output, pc, -2 ); nkeynes@995: CALL2_r32disp_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2); nkeynes@995: } nkeynes@995: if( value_reg != REG_RESULT1 ) { nkeynes@995: MOVL_r32_r32( REG_RESULT1, value_reg ); nkeynes@995: } nkeynes@995: } nkeynes@995: nkeynes@995: static void call_write_func(int addr_reg, int value_reg, int offset, int pc) nkeynes@995: { nkeynes@1004: decode_address(address_space(), addr_reg); nkeynes@995: if( !sh4_x86.tlb_on && (sh4r.xlat_sh4_mode & SR_MD) ) { nkeynes@995: CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg); nkeynes@995: } else { nkeynes@995: if( value_reg != REG_ARG2 ) { nkeynes@995: MOVL_r32_r32( value_reg, REG_ARG2 ); nkeynes@995: } nkeynes@995: if( addr_reg != REG_ARG1 ) { nkeynes@995: MOVL_r32_r32( addr_reg, REG_ARG1 ); nkeynes@995: } nkeynes@995: #if MAX_REG_ARG > 2 nkeynes@995: MOVP_immptr_rptr( 0, REG_ARG3 ); nkeynes@995: sh4_x86_add_backpatch( xlat_output, pc, -2 ); nkeynes@995: CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, REG_ARG3); nkeynes@995: #else nkeynes@995: MOVL_imm32_rspdisp( 0, 0 ); nkeynes@995: sh4_x86_add_backpatch( xlat_output, pc, -2 ); nkeynes@995: CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, 0); nkeynes@995: #endif nkeynes@995: } nkeynes@995: } nkeynes@995: #else nkeynes@995: static void call_read_func(int addr_reg, int value_reg, int offset, int pc) nkeynes@995: { nkeynes@1004: decode_address(address_space(), addr_reg); nkeynes@995: CALL1_r32disp_r32(REG_ECX, offset, addr_reg); nkeynes@995: if( value_reg != REG_RESULT1 ) { nkeynes@995: MOVL_r32_r32( REG_RESULT1, value_reg ); nkeynes@995: } nkeynes@995: } nkeynes@995: nkeynes@996: static void call_write_func(int addr_reg, int value_reg, int offset, int pc) nkeynes@995: { nkeynes@1004: decode_address(address_space(), addr_reg); nkeynes@995: CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg); nkeynes@995: } nkeynes@953: #endif nkeynes@953: nkeynes@995: #define MEM_REGION_PTR(name) offsetof( struct mem_region_fn, name ) nkeynes@995: #define MEM_READ_BYTE( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_byte), pc) nkeynes@995: #define MEM_READ_BYTE_FOR_WRITE( addr_reg, value_reg ) call_read_func( addr_reg, value_reg, MEM_REGION_PTR(read_byte_for_write), pc) nkeynes@995: #define MEM_READ_WORD( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_word), pc) nkeynes@995: #define MEM_READ_LONG( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_long), pc) nkeynes@995: #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_byte), pc) nkeynes@995: #define MEM_WRITE_WORD( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_word), pc) nkeynes@995: #define MEM_WRITE_LONG( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_long), pc) nkeynes@995: #define MEM_PREFETCH( addr_reg ) call_read_func(addr_reg, REG_RESULT1, MEM_REGION_PTR(prefetch), pc) nkeynes@361: nkeynes@956: #define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2); sh4_x86.in_delay_slot = DELAY_NONE; return 2; nkeynes@388: nkeynes@901: void sh4_translate_begin_block( sh4addr_t pc ) nkeynes@901: { nkeynes@927: enter_block(); nkeynes@1004: MOVP_immptr_rptr( ((uint8_t *)&sh4r) + 128, REG_EBP ); nkeynes@901: sh4_x86.in_delay_slot = FALSE; nkeynes@901: sh4_x86.fpuen_checked = FALSE; nkeynes@901: sh4_x86.branch_taken = FALSE; nkeynes@901: sh4_x86.backpatch_posn = 0; nkeynes@901: sh4_x86.block_start_pc = pc; nkeynes@953: sh4_x86.tlb_on = IS_TLB_ENABLED(); nkeynes@901: sh4_x86.tstate = TSTATE_NONE; nkeynes@901: sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR; nkeynes@903: sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ; nkeynes@901: } nkeynes@901: nkeynes@901: nkeynes@593: uint32_t sh4_translate_end_block_size() nkeynes@593: { nkeynes@596: if( sh4_x86.backpatch_posn <= 3 ) { nkeynes@1008: return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*24); nkeynes@596: } else { nkeynes@1008: return EPILOGUE_SIZE + 72 + (sh4_x86.backpatch_posn-3)*27; nkeynes@596: } nkeynes@593: } nkeynes@593: nkeynes@361: nkeynes@571: /** nkeynes@590: * Embed a breakpoint into the generated code nkeynes@571: */ nkeynes@577: void sh4_translate_emit_breakpoint( sh4vma_t pc ) nkeynes@577: { nkeynes@995: MOVL_imm32_r32( pc, REG_EAX ); nkeynes@995: CALL1_ptr_r32( sh4_translate_breakpoint_hit, REG_EAX ); nkeynes@875: sh4_x86.tstate = TSTATE_NONE; nkeynes@577: } nkeynes@590: nkeynes@601: nkeynes@601: #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc) nkeynes@601: nkeynes@590: /** nkeynes@995: * Exit the block with sh4r.pc already written nkeynes@995: */ nkeynes@995: void exit_block_pcset( sh4addr_t pc ) nkeynes@995: { nkeynes@995: MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX ); nkeynes@995: ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) ); nkeynes@995: MOVL_rbpdisp_r32( R_PC, REG_ARG1 ); nkeynes@995: if( sh4_x86.tlb_on ) { nkeynes@995: CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1); nkeynes@995: } else { nkeynes@995: CALL1_ptr_r32(xlat_get_code,REG_ARG1); nkeynes@995: } nkeynes@995: exit_block(); nkeynes@995: } nkeynes@995: nkeynes@995: /** nkeynes@995: * Exit the block with sh4r.new_pc written with the target pc nkeynes@995: */ nkeynes@995: void exit_block_newpcset( sh4addr_t pc ) nkeynes@995: { nkeynes@995: MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX ); nkeynes@995: ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) ); nkeynes@995: MOVL_rbpdisp_r32( R_NEW_PC, REG_ARG1 ); nkeynes@995: MOVL_r32_rbpdisp( REG_ARG1, R_PC ); nkeynes@995: if( sh4_x86.tlb_on ) { nkeynes@995: CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1); nkeynes@995: } else { nkeynes@995: CALL1_ptr_r32(xlat_get_code,REG_ARG1); nkeynes@995: } nkeynes@995: exit_block(); nkeynes@995: } nkeynes@995: nkeynes@995: nkeynes@995: /** nkeynes@995: * Exit the block to an absolute PC nkeynes@995: */ nkeynes@995: void exit_block_abs( sh4addr_t pc, sh4addr_t endpc ) nkeynes@995: { nkeynes@995: MOVL_imm32_r32( pc, REG_ECX ); nkeynes@995: MOVL_r32_rbpdisp( REG_ECX, R_PC ); nkeynes@995: if( IS_IN_ICACHE(pc) ) { nkeynes@995: MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) ); nkeynes@995: ANDP_imms_rptr( -4, REG_EAX ); nkeynes@995: } else if( sh4_x86.tlb_on ) { nkeynes@995: CALL1_ptr_r32(xlat_get_code_by_vma, REG_ECX); nkeynes@995: } else { nkeynes@995: CALL1_ptr_r32(xlat_get_code, REG_ECX); nkeynes@995: } nkeynes@995: MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX ); nkeynes@995: ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) ); nkeynes@995: exit_block(); nkeynes@995: } nkeynes@995: nkeynes@995: /** nkeynes@995: * Exit the block to a relative PC nkeynes@995: */ nkeynes@995: void exit_block_rel( sh4addr_t pc, sh4addr_t endpc ) nkeynes@995: { nkeynes@995: MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ECX ); nkeynes@995: ADDL_rbpdisp_r32( R_PC, REG_ECX ); nkeynes@995: MOVL_r32_rbpdisp( REG_ECX, R_PC ); nkeynes@995: if( IS_IN_ICACHE(pc) ) { nkeynes@995: MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) ); nkeynes@995: ANDP_imms_rptr( -4, REG_EAX ); nkeynes@995: } else if( sh4_x86.tlb_on ) { nkeynes@995: CALL1_ptr_r32(xlat_get_code_by_vma, REG_ECX); nkeynes@995: } else { nkeynes@995: CALL1_ptr_r32(xlat_get_code, REG_ECX); nkeynes@995: } nkeynes@995: MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX ); nkeynes@995: ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) ); nkeynes@995: exit_block(); nkeynes@995: } nkeynes@995: nkeynes@995: /** nkeynes@995: * Exit unconditionally with a general exception nkeynes@995: */ nkeynes@995: void exit_block_exc( int code, sh4addr_t pc ) nkeynes@995: { nkeynes@995: MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ECX ); nkeynes@995: ADDL_r32_rbpdisp( REG_ECX, R_PC ); nkeynes@995: MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX ); nkeynes@995: ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) ); nkeynes@995: MOVL_imm32_r32( code, REG_ARG1 ); nkeynes@995: CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 ); nkeynes@995: MOVL_rbpdisp_r32( R_PC, REG_ARG1 ); nkeynes@995: if( sh4_x86.tlb_on ) { nkeynes@995: CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1); nkeynes@995: } else { nkeynes@995: CALL1_ptr_r32(xlat_get_code,REG_ARG1); nkeynes@995: } nkeynes@995: nkeynes@995: exit_block(); nkeynes@995: } nkeynes@995: nkeynes@995: /** nkeynes@590: * Embed a call to sh4_execute_instruction for situations that we nkeynes@601: * can't translate (just page-crossing delay slots at the moment). nkeynes@601: * Caller is responsible for setting new_pc before calling this function. nkeynes@601: * nkeynes@601: * Performs: nkeynes@601: * Set PC = endpc nkeynes@601: * Set sh4r.in_delay_slot = sh4_x86.in_delay_slot nkeynes@601: * Update slice_cycle for endpc+2 (single step doesn't update slice_cycle) nkeynes@601: * Call sh4_execute_instruction nkeynes@601: * Call xlat_get_code_by_vma / xlat_get_code as for normal exit nkeynes@590: */ nkeynes@601: void exit_block_emu( sh4vma_t endpc ) nkeynes@590: { nkeynes@995: MOVL_imm32_r32( endpc - sh4_x86.block_start_pc, REG_ECX ); // 5 nkeynes@991: ADDL_r32_rbpdisp( REG_ECX, R_PC ); nkeynes@577: nkeynes@995: MOVL_imm32_r32( (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period, REG_ECX ); // 5 nkeynes@991: ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) ); // 6 nkeynes@995: MOVL_imm32_r32( sh4_x86.in_delay_slot ? 1 : 0, REG_ECX ); nkeynes@995: MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(in_delay_slot) ); nkeynes@590: nkeynes@995: CALL_ptr( sh4_execute_instruction ); nkeynes@995: MOVL_rbpdisp_r32( R_PC, REG_EAX ); nkeynes@590: if( sh4_x86.tlb_on ) { nkeynes@995: CALL1_ptr_r32(xlat_get_code_by_vma,REG_EAX); nkeynes@590: } else { nkeynes@995: CALL1_ptr_r32(xlat_get_code,REG_EAX); nkeynes@590: } nkeynes@926: exit_block(); nkeynes@590: } nkeynes@539: nkeynes@359: /** nkeynes@995: * Write the block trailer (exception handling block) nkeynes@995: */ nkeynes@995: void sh4_translate_end_block( sh4addr_t pc ) { nkeynes@995: if( sh4_x86.branch_taken == FALSE ) { nkeynes@995: // Didn't exit unconditionally already, so write the termination here nkeynes@995: exit_block_rel( pc, pc ); nkeynes@995: } nkeynes@995: if( sh4_x86.backpatch_posn != 0 ) { nkeynes@995: unsigned int i; nkeynes@995: // Exception raised - cleanup and exit nkeynes@995: uint8_t *end_ptr = xlat_output; nkeynes@995: MOVL_r32_r32( REG_EDX, REG_ECX ); nkeynes@995: ADDL_r32_r32( REG_EDX, REG_ECX ); nkeynes@995: ADDL_r32_rbpdisp( REG_ECX, R_SPC ); nkeynes@995: MOVL_moffptr_eax( &sh4_cpu_period ); nkeynes@995: MULL_r32( REG_EDX ); nkeynes@995: ADDL_r32_rbpdisp( REG_EAX, REG_OFFSET(slice_cycle) ); nkeynes@995: MOVL_rbpdisp_r32( R_PC, REG_ARG1 ); nkeynes@995: if( sh4_x86.tlb_on ) { nkeynes@995: CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1); nkeynes@995: } else { nkeynes@995: CALL1_ptr_r32(xlat_get_code, REG_ARG1); nkeynes@995: } nkeynes@995: exit_block(); nkeynes@995: nkeynes@995: for( i=0; i< sh4_x86.backpatch_posn; i++ ) { nkeynes@995: uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset]; nkeynes@995: if( sh4_x86.backpatch_list[i].exc_code < 0 ) { nkeynes@995: if( sh4_x86.backpatch_list[i].exc_code == -2 ) { nkeynes@995: *((uintptr_t *)fixup_addr) = (uintptr_t)xlat_output; nkeynes@995: } else { nkeynes@995: *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4; nkeynes@995: } nkeynes@995: MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX ); nkeynes@995: int rel = end_ptr - xlat_output; nkeynes@995: JMP_prerel(rel); nkeynes@995: } else { nkeynes@995: *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4; nkeynes@995: MOVL_imm32_r32( sh4_x86.backpatch_list[i].exc_code, REG_ARG1 ); nkeynes@995: CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 ); nkeynes@995: MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX ); nkeynes@995: int rel = end_ptr - xlat_output; nkeynes@995: JMP_prerel(rel); nkeynes@995: } nkeynes@995: } nkeynes@995: } nkeynes@995: } nkeynes@539: nkeynes@359: /** nkeynes@359: * Translate a single instruction. Delayed branches are handled specially nkeynes@359: * by translating both branch and delayed instruction as a single unit (as nkeynes@359: * nkeynes@577: * The instruction MUST be in the icache (assert check) nkeynes@359: * nkeynes@359: * @return true if the instruction marks the end of a basic block nkeynes@359: * (eg a branch or nkeynes@359: */ nkeynes@590: uint32_t sh4_translate_instruction( sh4vma_t pc ) nkeynes@359: { nkeynes@388: uint32_t ir; nkeynes@577: /* Read instruction from icache */ nkeynes@577: assert( IS_IN_ICACHE(pc) ); nkeynes@577: ir = *(uint16_t *)GET_ICACHE_PTR(pc); nkeynes@577: nkeynes@571: if( !sh4_x86.in_delay_slot ) { nkeynes@596: sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 ); nkeynes@388: } nkeynes@1003: nkeynes@1003: /* check for breakpoints at this pc */ nkeynes@1003: for( int i=0; i nkeynes@995: nkeynes@995: struct UnwindInfo { nkeynes@995: uintptr_t block_start; nkeynes@995: uintptr_t block_end; nkeynes@995: void *pc; nkeynes@995: }; nkeynes@995: nkeynes@995: static _Unwind_Reason_Code xlat_check_frame( struct _Unwind_Context *context, void *arg ) nkeynes@995: { nkeynes@995: struct UnwindInfo *info = arg; nkeynes@995: void *pc = (void *)_Unwind_GetIP(context); nkeynes@995: if( ((uintptr_t)pc) >= info->block_start && ((uintptr_t)pc) < info->block_end ) { nkeynes@995: info->pc = pc; nkeynes@995: return _URC_NORMAL_STOP; nkeynes@995: } nkeynes@995: return _URC_NO_REASON; nkeynes@995: } nkeynes@995: nkeynes@995: void *xlat_get_native_pc( void *code, uint32_t code_size ) nkeynes@995: { nkeynes@995: struct _Unwind_Exception exc; nkeynes@995: struct UnwindInfo info; nkeynes@995: nkeynes@995: info.pc = NULL; nkeynes@995: info.block_start = (uintptr_t)code; nkeynes@995: info.block_end = info.block_start + code_size; nkeynes@995: void *result = NULL; nkeynes@995: _Unwind_Backtrace( xlat_check_frame, &info ); nkeynes@995: return info.pc; nkeynes@995: } nkeynes@995: #else nkeynes@995: /* Assume this is an ia32 build - amd64 should always have dwarf information */ nkeynes@995: void *xlat_get_native_pc( void *code, uint32_t code_size ) nkeynes@995: { nkeynes@995: void *result = NULL; nkeynes@995: asm( nkeynes@995: "mov %%ebp, %%eax\n\t" nkeynes@995: "mov $0x8, %%ecx\n\t" nkeynes@995: "mov %1, %%edx\n" nkeynes@995: "frame_loop: test %%eax, %%eax\n\t" nkeynes@995: "je frame_not_found\n\t" nkeynes@995: "cmp (%%eax), %%edx\n\t" nkeynes@995: "je frame_found\n\t" nkeynes@995: "sub $0x1, %%ecx\n\t" nkeynes@995: "je frame_not_found\n\t" nkeynes@995: "movl (%%eax), %%eax\n\t" nkeynes@995: "jmp frame_loop\n" nkeynes@995: "frame_found: movl 0x4(%%eax), %0\n" nkeynes@995: "frame_not_found:" nkeynes@995: : "=r" (result) nkeynes@995: : "r" (((uint8_t *)&sh4r) + 128 ) nkeynes@995: : "eax", "ecx", "edx" ); nkeynes@995: return result; nkeynes@995: } nkeynes@995: #endif nkeynes@995: