nkeynes@1006: /** nkeynes@1006: * $Id: xir.h 931 2008-10-31 02:57:59Z nkeynes $ nkeynes@1006: * nkeynes@1006: * x86/x86-64 final code generation nkeynes@1006: * nkeynes@1006: * Copyright (c) 2009 Nathan Keynes. nkeynes@1006: * nkeynes@1006: * This program is free software; you can redistribute it and/or modify nkeynes@1006: * it under the terms of the GNU General Public License as published by nkeynes@1006: * the Free Software Foundation; either version 2 of the License, or nkeynes@1006: * (at your option) any later version. nkeynes@1006: * nkeynes@1006: * This program is distributed in the hope that it will be useful, nkeynes@1006: * but WITHOUT ANY WARRANTY; without even the implied warranty of nkeynes@1006: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the nkeynes@1006: * GNU General Public License for more details. nkeynes@1006: */ nkeynes@1006: #include nkeynes@1006: nkeynes@1006: #include "xlat/xir.h" nkeynes@1006: #include "xlat/xlat.h" nkeynes@1006: #include "xlat/x86/x86op.h" nkeynes@1006: nkeynes@1006: typedef enum { nkeynes@1006: SSE_NONE = 0, nkeynes@1006: SSE_1, nkeynes@1006: SSE_2, nkeynes@1006: SSE_3, nkeynes@1006: SSE_3_1, /* AKA SSSE3 */ nkeynes@1006: SSE_4_1, nkeynes@1006: SSE_4_2 nkeynes@1006: } sse_version_t; nkeynes@1006: nkeynes@1006: /* 32-bit register groups: nkeynes@1006: * General regs 0..7 nkeynes@1006: * - EAX, EDX - arguments, volatile nkeynes@1006: * - ECX - volatile nkeynes@1006: * - EBX, ESI, EDI - non-volatile nkeynes@1006: * - ESP, EBP - blocked out for system use. nkeynes@1006: * XMM regs 16..23 nkeynes@1006: * - Floating or integer, all volatile nkeynes@1006: * MMX regs 32..39 nkeynes@1006: * - integer, all volatile nkeynes@1006: * OR (if SSE is unsupported) nkeynes@1006: * x87 regs 32..39 nkeynes@1006: * - floating point, all volatile, stack allocator nkeynes@1006: */ nkeynes@1006: nkeynes@1006: /* nkeynes@1006: * 64-bit register groups: nkeynes@1006: * General regs 0..15 nkeynes@1006: * - EDI, ESI - arguments, volatile nkeynes@1006: * - EAX, ECX, EDX, ... - volatile nkeynes@1006: * - EBX, ... non-volatile nkeynes@1006: * - ESP, EBP - blocked for system use (r13?) nkeynes@1006: * XMM regs 16..31 nkeynes@1006: * - Floating or integer, all volatile nkeynes@1006: * MMX regs 32..39 nkeynes@1006: * - integer, all volatile nkeynes@1006: * OR nkeynes@1006: * x87 regs 32..39 nkeynes@1006: * - floating point, all volatile, stack allocator nkeynes@1006: */ nkeynes@1006: nkeynes@1006: nkeynes@1006: nkeynes@1006: nkeynes@1006: struct x86_target_info_struct { nkeynes@1006: sse_version_t sse_version; nkeynes@1006: } x86_target_info; nkeynes@1006: nkeynes@1006: nkeynes@1006: /** nkeynes@1006: * Initialize x86_target_info - detect supported features from cpuid nkeynes@1006: */ nkeynes@1006: void x86_target_init() nkeynes@1006: { nkeynes@1006: uint32_t feature1, feature2; nkeynes@1006: nkeynes@1006: __asm__ __volatile__( nkeynes@1006: "mov $0x01, %%eax\n\t" nkeynes@1006: "cpuid\n\t" : "=c" (feature1), "=d" (feature2) : : "eax", "ebx"); nkeynes@1006: nkeynes@1006: /* Walk through from oldest to newest - while it's normally the case nkeynes@1006: * that all older extensions are supported, you're not supposed to nkeynes@1006: * depend on that assumption. So instead we stop as soon as we find nkeynes@1006: * a missing feature bit. */ nkeynes@1006: if( (feature2 & 0x02000000) == 0 ) { nkeynes@1006: x86_target_info.sse_version = SSE_NONE; nkeynes@1006: } else if( (feature2 & 0x04000000) == 0 ) { nkeynes@1006: x86_target_info.sse_version = SSE_1; nkeynes@1006: } else if( (feature1 & 0x00000001) == 0 ) { /* SSE3 bit */ nkeynes@1006: x86_target_info.sse_version = SSE_2; nkeynes@1006: } else if( (feature1 & 0x00000100) == 0 ) { /* SSSE3 bit */ nkeynes@1006: x86_target_info.sse_version = SSE_3; nkeynes@1006: } else if( (feature1 & 0x00080000) == 0 ) { /* SSE4.1 bit */ nkeynes@1006: x86_target_info.sse_version = SSE_3_1; nkeynes@1006: } else if( (feature1 & 0x00100000) == 0 ) { /* SSE4.2 bit */ nkeynes@1006: x86_target_info.sse_version = SSE_4_1; nkeynes@1006: } else { nkeynes@1006: x86_target_info.sse_version = SSE_4_2; nkeynes@1006: } nkeynes@1006: } nkeynes@1006: nkeynes@1006: #define IS_X86_64() (sizeof(void *)==8) nkeynes@1006: #define IS_XMM_REG(op,n) (XOP_REG(op,n) >= MIN_XMM_REGISTER && XOP_REG(op,n) <= MAX_AMD64_XMM_REGISTER) nkeynes@1006: nkeynes@1011: #define NONE NO_OPERAND nkeynes@1011: #define SRC SOURCE_OPERAND nkeynes@1011: #define DST DEST_OPERAND nkeynes@1011: #define TMP TEMP_OPERAND nkeynes@1011: #define IMM IMMEDIATE_OPERAND nkeynes@1006: nkeynes@1011: #define MAX_X86_GENERAL_REGISTER 7 nkeynes@1011: #define MAX_AMD64_GENERAL_REGISTER 15 nkeynes@1011: #define MIN_XMM_REGISTER 16 nkeynes@1011: #define MAX_X86_XMM_REGISTER 23 nkeynes@1011: #define MAX_AMD64_XMM_REGISTER 31 nkeynes@1006: nkeynes@1011: #define SRCADDR(op,n) (XOP_REG(op,n) - 128) nkeynes@1011: #define TMPADDR(op,n) (XOP_REG(op,n)) /* FIXME */ nkeynes@1006: nkeynes@1011: #define ILLOP(op) FATAL("Illegal x86 opcode %s %d %d\n", XIR_OPCODE_TABLE[op->opcode], op->operand[0].form, op->operand[1].form) nkeynes@1006: nkeynes@1006: // Convenience macros nkeynes@1006: #define X86L_IMMS_REG(opname, op) \ nkeynes@1011: if( XOP_IS_FORM(op,IMM,DST) ) { opname##_imms_r32(XOP_INT(op,0),XOP_REG(op,1)); } \ nkeynes@1011: else if( XOP_IS_FORM(op,IMM,SRC) ) { opname##_imms_r32disp(XOP_INT(op,0),REG_RBP,SRCADDR(op,1)); } \ nkeynes@1011: else if( XOP_IS_FORM(op,IMM,TMP) ) { opname##_imms_r32disp(XOP_INT(op,0),REG_RSP,TMPADDR(op,1)); } \ nkeynes@1006: else { ILLOP(op); } nkeynes@1006: nkeynes@1011: #define X86L_REG_DST(opname,op) \ nkeynes@1011: if( XOP_IS_FORM(op,DST,DST) ) { opname##_r32_r32(XOP_REG(op,0),XOP_REG(op,1)); } \ nkeynes@1011: else if( XOP_IS_FORM(op,SRC,DST) ) { opname##_r32disp_r32(REG_RBP, SRCADDR(op,0),XOP_REG(op,1)); } \ nkeynes@1011: else if( XOP_IS_FORM(op,TMP,DST) ) { opname##_r32disp_r32(REG_RSP, TMPADDR(op,0),XOP_REG(op,1)); } \ nkeynes@1006: else { ILLOP(op); } nkeynes@1006: nkeynes@1011: #define X86F_REG_DST(opname,op ) \ nkeynes@1011: if( XOP_IS_FORM(op,DST,DST) ) { opname##_xmm_xmm(XOP_REG(op,0),XOP_REG(op,1)); } \ nkeynes@1011: else if( XOP_IS_FORM(op,SRC,DST) ) { opname##_r32disp_xmm(REG_RBP, SRCADDR(op,0),XOP_REG(op,1)); } \ nkeynes@1011: else if( XOP_IS_FORM(op,TMP,DST) ) { opname##_r32disp_xmm(REG_RSP, TMPADDR(op,0),XOP_REG(op,1)); } \ nkeynes@1006: else { ILLOP(op); } nkeynes@1006: nkeynes@1006: #define X86L_REG_REG(opname,op) \ nkeynes@1011: if( XOP_IS_FORM(op,DST,DST) ) { opname##_r32_r32(XOP_REG(op,0),XOP_REG(op,1)); } \ nkeynes@1011: else if( XOP_IS_FORM(op,SRC,DST) ) { opname##_r32disp_r32(REG_RBP, SRCADDR(op,0),XOP_REG(op,1)); } \ nkeynes@1011: else if( XOP_IS_FORM(op,DST,SRC) ) { opname##_r32_r32disp(XOP_REG(op,0),REG_RBP, SRCADDR(op,1)); } \ nkeynes@1011: else if( XOP_IS_FORM(op,TMP,DST) ) { opname##_r32disp_r32(REG_RSP, TMPADDR(op,0),XOP_REG(op,1)); } \ nkeynes@1011: else if( XOP_IS_FORM(op,DST,TMP) ) { opname##_r32_r32disp(XOP_REG(op,0),REG_RSP, TMPADDR(op,1)); } \ nkeynes@1006: else { ILLOP(op); } nkeynes@1006: nkeynes@1006: #define X86L_REG(opname,op) \ nkeynes@1011: if( XOP_IS_DST(op,0) ) { opname##_r32(XOP_REG(op,0)); } \ nkeynes@1011: else if( XOP_IS_SRC(op,0) ) { opname##_r32disp(REG_RBP,SRCADDR(op,0)); } \ nkeynes@1011: else if( XOP_IS_TMP(op,0) ) { opname##_r32disp(REG_RSP,TMPADDR(op,0)); } \ nkeynes@1006: else { ILLOP(op); } nkeynes@1006: nkeynes@1006: #define X86L_CL_REG(opname,op) \ nkeynes@1011: if( XOP_IS_FORM(op,DST,DST) && XOP_REG(op,0) == REG_CL ) { opname##_cl_r32(XOP_REG(op,1)); } \ nkeynes@1011: else if( XOP_IS_FORM(op,DST,SRC) && XOP_REG(op,0) == REG_CL ) { opname##_cl_r32disp(REG_RBP, SRCADDR(op,1)); } \ nkeynes@1011: else if( XOP_IS_FORM(op,DST,TMP) && XOP_REG(op,0) == REG_CL ) { opname##_cl_r32disp(REG_RSP, TMPADDR(op,1)); } \ nkeynes@1006: else { ILLOP(op); } nkeynes@1006: nkeynes@1006: #define X86L_IMMCL_REG(opname,op) \ nkeynes@1011: if( XOP_IS_FORM(op,IMM,DST) ) { opname##_imm_r32(XOP_INT(op,0),XOP_REG(op,1)); } \ nkeynes@1011: else if( XOP_IS_FORM(op,IMM,SRC) ) { opname##_imm_r32disp(XOP_INT(op,0),REG_RBP, SRCADDR(op,1)); } \ nkeynes@1011: else if( XOP_IS_FORM(op,IMM,TMP) ) { opname##_imm_r32disp(XOP_INT(op,0),REG_RSP, TMPADDR(op,1)); } \ nkeynes@1011: else if( XOP_IS_FORM(op,DST,DST) && XOP_REG(op,0) == REG_CL ) { opname##_cl_r32(XOP_REG(op,1)); } \ nkeynes@1011: else if( XOP_IS_FORM(op,DST,SRC) && XOP_REG(op,0) == REG_CL ) { opname##_cl_r32disp(REG_RBP, SRCADDR(op,1)); } \ nkeynes@1011: else if( XOP_IS_FORM(op,DST,TMP) && XOP_REG(op,0) == REG_CL ) { opname##_cl_r32disp(REG_RSP, TMPADDR(op,1)); } \ nkeynes@1006: else { ILLOP(op); } nkeynes@1006: nkeynes@1006: // Standard ALU forms - imms,reg or reg,reg nkeynes@1006: #define X86L_ALU_REG(opname,op) \ nkeynes@1011: if( XOP_IS_FORM(op,IMM,DST) ) { opname##_imms_r32(XOP_INT(op,0),XOP_REG(op,1)); } \ nkeynes@1011: else if( XOP_IS_FORM(op,IMM,SRC) ) { opname##_imms_r32disp(XOP_INT(op,0),REG_RBP, SRCADDR(op,1)); } \ nkeynes@1011: else if( XOP_IS_FORM(op,IMM,TMP) ) { opname##_imms_r32disp(XOP_INT(op,0),REG_RSP, TMPADDR(op,1)); } \ nkeynes@1011: else if( XOP_IS_FORM(op,DST,DST) ) { opname##_r32_r32(XOP_REG(op,0),XOP_REG(op,1)); } \ nkeynes@1011: else if( XOP_IS_FORM(op,SRC,DST) ) { opname##_r32disp_r32(REG_RBP, SRCADDR(op,0),XOP_REG(op,1)); } \ nkeynes@1011: else if( XOP_IS_FORM(op,DST,SRC) ) { opname##_r32_r32disp(XOP_REG(op,0),REG_RBP, SRCADDR(op,1)); } \ nkeynes@1011: else if( XOP_IS_FORM(op,TMP,DST) ) { opname##_r32disp_r32(REG_RSP, TMPADDR(op,0),XOP_REG(op,1)); } \ nkeynes@1011: else if( XOP_IS_FORM(op,DST,TMP) ) { opname##_r32_r32disp(XOP_REG(op,0),REG_RSP, TMPADDR(op,1)); } \ nkeynes@1006: else { ILLOP(op); } nkeynes@1006: nkeynes@1006: uint32_t x86_target_get_code_size( xir_op_t begin, xir_op_t end ) nkeynes@1006: { nkeynes@1006: return -1; nkeynes@1006: } nkeynes@1006: nkeynes@1006: nkeynes@1006: /** nkeynes@1006: * Note: Assumes that the IR is x86-legal (ie doesn't contain any unencodeable instructions). nkeynes@1006: */ nkeynes@1006: uint32_t x86_target_codegen( target_data_t td, xir_op_t begin, xir_op_t end ) nkeynes@1006: { nkeynes@1006: int ss; nkeynes@1006: xir_op_t it; nkeynes@1006: nkeynes@1006: /* Prologue */ nkeynes@1006: nkeynes@1006: for( it=begin; it != NULL; it = it->next ) { nkeynes@1006: switch( it->opcode ) { nkeynes@1006: case OP_ENTER: nkeynes@1006: case OP_BARRIER: nkeynes@1006: case OP_NOP: nkeynes@1006: /* No code to generate */ nkeynes@1006: break; nkeynes@1006: case OP_MOV: nkeynes@1011: if( XOP_IS_FORM(it, IMM, DST) ) { nkeynes@1006: MOVL_imm32_r32( XOP_INT(it,0), XOP_REG2(it) ); nkeynes@1011: } else if( XOP_IS_FORM(it, IMM, SRC) ) { nkeynes@1011: MOVL_imm32_r32disp( XOP_INT(it,0), REG_RBP, SRCADDR(it,1) ); nkeynes@1011: } else if( XOP_IS_FORM(it, IMM, TMP) ) { nkeynes@1011: MOVL_imm32_r32disp( XOP_INT(it,0), REG_RSP, TMPADDR(it,1) ); nkeynes@1011: } else if( XOP_IS_FORM(it, DST, SRC) ) { nkeynes@1006: if( IS_XMM_REG(it,0) ) { nkeynes@1011: MOVSS_xmm_r32disp( XOP_REG1(it), REG_RBP, SRCADDR(it,1) ); nkeynes@1006: } else { nkeynes@1011: MOVL_r32_r32disp( XOP_REG1(it), REG_RBP, SRCADDR(it,1) ); nkeynes@1006: } nkeynes@1011: } else if( XOP_IS_FORM(it, DST, DST) ) { nkeynes@1006: if( IS_XMM_REG(it,0) ) { nkeynes@1006: if( IS_XMM_REG(it,1) ) { nkeynes@1006: MOVSS_xmm_xmm( XOP_REG1(it), XOP_REG2(it) ); nkeynes@1006: } else { nkeynes@1006: MOVL_xmm_r32( XOP_REG1(it), XOP_REG2(it) ); nkeynes@1006: } nkeynes@1006: } else if( IS_XMM_REG(it,1) ) { nkeynes@1006: MOVL_r32_xmm( XOP_REG1(it), XOP_REG2(it) ); nkeynes@1006: } else { nkeynes@1006: MOVL_r32_r32( XOP_REG1(it), XOP_REG2(it) ); nkeynes@1006: } nkeynes@1011: } else if( XOP_IS_FORM(it, SRC, DST) ) { nkeynes@1006: if( IS_XMM_REG(it,1) ) { nkeynes@1011: MOVSS_r32disp_xmm( REG_RBP, SRCADDR(it,0), XOP_REG2(it) ); nkeynes@1006: } else { nkeynes@1011: MOVL_r32disp_r32( REG_RBP, SRCADDR(it,0), XOP_REG2(it) ); nkeynes@1006: } nkeynes@1006: } else { nkeynes@1006: ILLOP(it); nkeynes@1006: } nkeynes@1006: break; nkeynes@1006: case OP_MOVQ: nkeynes@1006: if( XOP_IS_FORM(it, IMM, SRC) ) { nkeynes@1006: ILLOP(it); nkeynes@1011: } else if( XOP_IS_FORM(it, IMM, DST) ) { nkeynes@1006: if( IS_XMM_REG(it,0) ) { nkeynes@1006: if( XOP_INT(it,0) == 0 ) { nkeynes@1006: XORPD_xmm_xmm( XOP_REG2(it), XOP_REG2(it) ); nkeynes@1006: } nkeynes@1006: } else { nkeynes@1006: MOVQ_imm64_r64( XOP_INT(it,0), XOP_REG2(it) ); nkeynes@1006: } nkeynes@1011: } else if( XOP_IS_FORM(it, DST, SRC) ) { nkeynes@1006: if( IS_XMM_REG(it,0) ) { nkeynes@1011: MOVSD_xmm_r32disp( XOP_REG1(it), REG_RBP, SRCADDR(it,1) ); nkeynes@1006: } else { nkeynes@1011: MOVQ_r64_r64disp( XOP_REG1(it), REG_RBP, SRCADDR(it,1) ); nkeynes@1006: } nkeynes@1011: } else if( XOP_IS_FORM(it, DST, DST) ) { nkeynes@1006: if( IS_XMM_REG(it,0) ) { nkeynes@1006: if( IS_XMM_REG(it,1) ) { nkeynes@1006: MOVSD_xmm_xmm( XOP_REG1(it), XOP_REG2(it) ); nkeynes@1006: } else { nkeynes@1006: MOVQ_xmm_r64( XOP_REG1(it), XOP_REG2(it) ); nkeynes@1006: } nkeynes@1006: } else if( IS_XMM_REG(it,1) ) { nkeynes@1006: MOVQ_r64_xmm( XOP_REG1(it), XOP_REG2(it) ); nkeynes@1006: } else { nkeynes@1006: MOVQ_r64_r64( XOP_REG1(it), XOP_REG2(it) ); nkeynes@1006: } nkeynes@1011: } else if( XOP_IS_FORM(it, SRC, DST) ) { nkeynes@1006: if( IS_XMM_REG(it,1) ) { nkeynes@1011: MOVSD_r32disp_xmm( REG_RBP, SRCADDR(it,0), XOP_REG2(it) ); nkeynes@1006: } else { nkeynes@1011: MOVQ_r64disp_r64( REG_RBP, SRCADDR(it,0), XOP_REG2(it) ); nkeynes@1006: } nkeynes@1006: } else { nkeynes@1006: ILLOP(it); nkeynes@1006: } nkeynes@1006: break; nkeynes@1006: case OP_MOVSX8: nkeynes@1011: if( XOP_IS_FORM(it, DST, DST) ) { nkeynes@1006: MOVSXL_r8_r32( XOP_REG1(it), XOP_REG2(it) ); nkeynes@1011: } else if( XOP_IS_FORM(it, SRC, DST) ) { nkeynes@1011: MOVSXL_r32disp8_r32( REG_RBP, SRCADDR(it,0), XOP_REG2(it) ); nkeynes@1006: } else { nkeynes@1006: ILLOP(it); nkeynes@1006: } nkeynes@1006: break; nkeynes@1006: case OP_MOVSX16: nkeynes@1011: if( XOP_IS_FORM(it, DST, DST) ) { nkeynes@1006: MOVSXL_r16_r32( XOP_REG1(it), XOP_REG2(it) ); nkeynes@1011: } else if( XOP_IS_FORM(it, SRC, DST) ) { nkeynes@1011: MOVSXL_r32disp16_r32( REG_RBP, SRCADDR(it,0), XOP_REG2(it) ); nkeynes@1006: } else { nkeynes@1006: ILLOP(it); nkeynes@1006: } nkeynes@1006: break; nkeynes@1006: case OP_MOVZX8: nkeynes@1011: if( XOP_IS_FORM(it, DST, DST) ) { nkeynes@1006: MOVZXL_r8_r32( XOP_REG1(it), XOP_REG2(it) ); nkeynes@1011: } else if( XOP_IS_FORM(it, SRC, DST) ) { nkeynes@1011: MOVZXL_r32disp8_r32( REG_RBP, SRCADDR(it,0), XOP_REG2(it) ); nkeynes@1006: } else { nkeynes@1006: ILLOP(it); nkeynes@1006: } nkeynes@1006: break; nkeynes@1006: case OP_MOVZX16: nkeynes@1011: if( XOP_IS_FORM(it, DST, DST) ) { nkeynes@1006: MOVZXL_r16_r32( XOP_REG1(it), XOP_REG2(it) ); nkeynes@1011: } else if( XOP_IS_FORM(it, SRC, DST) ) { nkeynes@1011: MOVZXL_r32disp16_r32( REG_RBP, SRCADDR(it,0), XOP_REG2(it) ); nkeynes@1006: } else { nkeynes@1006: ILLOP(it); nkeynes@1006: } nkeynes@1006: break; nkeynes@1006: case OP_ADD: nkeynes@1006: case OP_ADDS: X86L_ALU_REG(ADDL,it); break; nkeynes@1006: case OP_ADDCS: X86L_ALU_REG(ADCL,it); break; nkeynes@1006: case OP_AND: X86L_ALU_REG(ANDL,it); break; nkeynes@1006: case OP_CMP: nkeynes@1006: X86L_ALU_REG(CMPL,it); break; nkeynes@1006: case OP_DEC: nkeynes@1011: if( XOP_IS_FORM(it,DST,NONE) ) { nkeynes@1006: DECL_r32(XOP_REG(it,0)); nkeynes@1006: } else if( XOP_IS_FORM(it,SRC,NONE) ) { nkeynes@1011: DECL_r32disp( REG_RBP, SRCADDR(it,0) ); nkeynes@1006: } else { nkeynes@1006: ILLOP(it); nkeynes@1006: } nkeynes@1006: break; nkeynes@1006: case OP_MUL: nkeynes@1011: X86L_REG_DST(IMULL,it); nkeynes@1006: break; nkeynes@1006: case OP_NEG: X86L_REG(NEGL,it); break; nkeynes@1006: case OP_NOT: X86L_REG(NOTL,it); break; nkeynes@1006: case OP_OR: X86L_ALU_REG(ORL,it); break; nkeynes@1006: case OP_RCL: X86L_IMMCL_REG(RCLL,it); break; nkeynes@1006: case OP_RCR: X86L_IMMCL_REG(RCRL,it); break; nkeynes@1006: case OP_ROL: X86L_IMMCL_REG(ROLL,it); break; nkeynes@1006: case OP_ROR: X86L_IMMCL_REG(RORL,it); break; nkeynes@1006: case OP_SAR: nkeynes@1006: case OP_SARS: X86L_IMMCL_REG(SARL,it); break; nkeynes@1006: case OP_SUBBS: X86L_ALU_REG(SBBL,it); break; nkeynes@1006: case OP_SLL: nkeynes@1006: case OP_SLLS: X86L_IMMCL_REG(SHLL,it); break; nkeynes@1006: case OP_SLR: nkeynes@1006: case OP_SLRS: X86L_IMMCL_REG(SHRL,it); break; nkeynes@1006: case OP_SUB: nkeynes@1006: case OP_SUBS: X86L_ALU_REG(SUBL,it); break; nkeynes@1006: case OP_SHUFFLE: nkeynes@1011: if( XOP_IS_FORM(it,IMM,DST) ) { nkeynes@1006: if( XOP_INT(it,0) == 0x4321 ) { nkeynes@1006: BSWAPL_r32( XOP_REG(it,1) ); nkeynes@1006: } else if( it->operand[1].value.i == 0x1243 ) { nkeynes@1006: XCHGB_r8_r8( REG_AL, REG_AH ); nkeynes@1006: /* XCHG al, ah */ nkeynes@1006: } nkeynes@1006: } nkeynes@1006: break; nkeynes@1006: case OP_TST: X86L_ALU_REG(TESTL,it); break; nkeynes@1006: case OP_XOR: X86L_ALU_REG(XORL,it); break; nkeynes@1006: nkeynes@1006: // Float nkeynes@1006: case OP_ABSF: nkeynes@1006: case OP_ABSD: nkeynes@1006: // Why is there no SSE FP ABS instruction? nkeynes@1006: break; nkeynes@1011: case OP_ADDF: X86F_REG_DST(ADDSS,it); break; nkeynes@1011: case OP_ADDD: X86F_REG_DST(ADDSD,it); break; nkeynes@1006: case OP_CMPF: nkeynes@1006: break; nkeynes@1006: case OP_CMPD: // UCOMISD nkeynes@1006: break; nkeynes@1011: case OP_DIVF: X86F_REG_DST(DIVSS,it); break; nkeynes@1011: case OP_DIVD: X86F_REG_DST(DIVSD,it); break; nkeynes@1011: case OP_MULF: X86F_REG_DST(MULSS,it); break; nkeynes@1011: case OP_MULD: X86F_REG_DST(MULSD,it); break; nkeynes@1011: case OP_RSQRTF:X86F_REG_DST(RSQRTSS,it); break; nkeynes@1011: case OP_SQRTF: X86F_REG_DST(SQRTSS,it); break; nkeynes@1011: case OP_SQRTD: X86F_REG_DST(SQRTSD,it); break; nkeynes@1011: case OP_SUBF: X86F_REG_DST(SUBSS,it); break; nkeynes@1011: case OP_SUBD: X86F_REG_DST(SUBSD,it); break; nkeynes@1006: nkeynes@1006: case OP_DOTPRODV: nkeynes@1011: MULPS_r32disp_xmm( REG_RBP, SRCADDR(it,0), 4 ); nkeynes@1006: HADDPS_xmm_xmm( 4, 4 ); nkeynes@1006: HADDPS_xmm_xmm( 4, 4 ); nkeynes@1011: MOVSS_xmm_r32disp( 4, REG_RBP, SRCADDR(it,0) ); nkeynes@1006: break; nkeynes@1006: case OP_SINCOSF: nkeynes@1006: case OP_MATMULV: nkeynes@1006: break; nkeynes@1006: case OP_FTOD: nkeynes@1011: if( XOP_IS_FORM(it,DST,DST) ) { nkeynes@1006: CVTSS2SD_xmm_xmm( XOP_REG(it,0), XOP_REG(it,1) ); nkeynes@1011: } else if( XOP_IS_FORM(it,SRC,DST) ) { nkeynes@1011: CVTSS2SD_r32disp_xmm( REG_RBP, SRCADDR(it,0), XOP_REG(it,1) ); nkeynes@1006: } else { nkeynes@1006: ILLOP(it); nkeynes@1006: } nkeynes@1006: break; nkeynes@1006: case OP_DTOF: nkeynes@1011: if( XOP_IS_FORM(it,DST,DST) ) { nkeynes@1006: CVTSS2SD_xmm_xmm( XOP_REG(it,0), XOP_REG(it,1) ); nkeynes@1011: } else if( XOP_IS_FORM(it, SRC,DST) ) { nkeynes@1011: CVTSS2SD_r32disp_xmm( REG_RBP, SRCADDR(it,0), XOP_REG(it,1) ); nkeynes@1006: } else { nkeynes@1006: ILLOP(it); nkeynes@1006: } nkeynes@1006: break; nkeynes@1006: case OP_ITOD: nkeynes@1011: if( XOP_IS_FORM(it,DST,DST) ) { nkeynes@1006: CVTSI2SDL_r32_xmm( XOP_REG(it,0), XOP_REG(it,1) ); nkeynes@1011: } else if( XOP_IS_FORM(it,SRC,DST) ) { nkeynes@1011: CVTSI2SDL_r32disp_xmm( REG_RBP, SRCADDR(it,0), XOP_REG(it,1) ); nkeynes@1006: } else { nkeynes@1006: ILLOP(it); nkeynes@1006: } nkeynes@1006: break; nkeynes@1006: case OP_DTOI: nkeynes@1011: if( XOP_IS_FORM(it,DST,DST) ) { nkeynes@1006: CVTSD2SIL_xmm_r32( XOP_REG(it,0), XOP_REG(it,1) ); nkeynes@1011: } else if( XOP_IS_FORM(it,SRC,DST) ) { nkeynes@1011: CVTSD2SIL_r32disp_r32( REG_RBP, SRCADDR(it,0), XOP_REG(it,1) ); nkeynes@1006: } else { nkeynes@1006: ILLOP(it); nkeynes@1006: } nkeynes@1006: break; nkeynes@1006: case OP_ITOF: nkeynes@1006: case OP_FTOI: nkeynes@1006: nkeynes@1006: case OP_BRCOND: nkeynes@1006: case OP_BRREL: nkeynes@1006: case OP_BR: nkeynes@1006: case OP_BRCONDDEL: nkeynes@1006: nkeynes@1006: case OP_CALL0: nkeynes@1011: if( XOP_IS_IMM(it,0) ) { nkeynes@1006: CALL_imm32( XOP_INT(it,0) ); nkeynes@1011: } else if( XOP_IS_SRC(it,0) ) { nkeynes@1006: CALL_r32( XOP_INT(it,0) ); nkeynes@1006: } else { nkeynes@1006: ILLOP(it); nkeynes@1006: } nkeynes@1006: break; nkeynes@1006: case OP_XLAT: nkeynes@1006: if( IS_X86_64() ) { nkeynes@1006: ss = 3; nkeynes@1006: } else { nkeynes@1006: ss = 2; nkeynes@1006: } nkeynes@1011: if( XOP_IS_FORM(it,IMM,DST) ) { nkeynes@1006: MOVP_sib_rptr(ss, XOP_REG(it,1), -1, XOP_INT(it,0), XOP_REG(it,1)); nkeynes@1011: } else if( XOP_IS_FORM(it,DST,DST) ) { nkeynes@1006: MOVP_sib_rptr(ss, XOP_REG(it,1), XOP_REG(it,0), 0, XOP_REG(it,1)); nkeynes@1006: } else { nkeynes@1006: ILLOP(it); nkeynes@1006: } nkeynes@1006: break; nkeynes@1006: case OP_CALLLUT: nkeynes@1011: if( XOP_IS_FORM(it,DST,IMM) ) { nkeynes@1006: CALL_r32disp(XOP_REG(it,0),XOP_INT(it,1)); nkeynes@1011: } else if( XOP_IS_FORM(it,DST,DST) ) { nkeynes@1006: CALL_sib(0,XOP_REG(it,0),XOP_REG(it,1),0); nkeynes@1011: } else if( XOP_IS_FORM(it,IMM,DST) ) { nkeynes@1006: CALL_r32disp(XOP_REG(it,1),XOP_INT(it,0)); nkeynes@1006: } else { nkeynes@1006: ILLOP(it); nkeynes@1006: } nkeynes@1006: break; nkeynes@1006: nkeynes@1006: // SH4-specific macro operations nkeynes@1006: case OP_RAISEME: nkeynes@1006: nkeynes@1006: case OP_RAISEMNE: nkeynes@1006: nkeynes@1006: case OP_CMPSTR: nkeynes@1006: break; nkeynes@1006: case OP_DIV1: nkeynes@1006: break; nkeynes@1006: case OP_SHAD: nkeynes@1011: assert( it->operand[0].form == DST && XOP_REG(it,0) == REG_ECX ); nkeynes@1006: CMPL_imms_r32(0,REG_ECX); nkeynes@1006: JNGE_label(shad_shr); nkeynes@1006: X86L_CL_REG(SHLL,it); nkeynes@1006: JMP_label(shad_end); nkeynes@1006: nkeynes@1006: JMP_TARGET(shad_shr); nkeynes@1011: if( IS_X86_64() && it->operand[1].form == DST ) { nkeynes@1006: /* We can do this a little more simply with a 64-bit shift */ nkeynes@1006: ORL_imms_r32(0xFFFFFFE0,REG_ECX); nkeynes@1006: NEGL_r32(REG_ECX); nkeynes@1006: MOVSXQ_r32_r64(XOP_REG(it,1), XOP_REG(it,1)); // sign-extend nkeynes@1006: SARQ_cl_r64(XOP_REG(it,1)); nkeynes@1006: } else { nkeynes@1006: NEGL_r32(REG_ECX); nkeynes@1006: ANDB_imms_r8( 0x1F, REG_ECX ); nkeynes@1006: JE_label(emptyshr ); nkeynes@1006: X86L_CL_REG(SARL,it); nkeynes@1006: JMP_label(shad_end2); nkeynes@1006: nkeynes@1006: JMP_TARGET(emptyshr); nkeynes@1011: if( it->operand[1].form == DST ) { nkeynes@1006: SARL_imm_r32( 31, XOP_REG(it,1) ); nkeynes@1011: } else if( it->operand[1].form == SRC ) { nkeynes@1011: SARL_imm_r32disp( 32, REG_RBP, SRCADDR(it,1) ); nkeynes@1006: } else { nkeynes@1011: SARL_imm_r32disp( 32, REG_RSP, TMPADDR(it,1) ); nkeynes@1006: } nkeynes@1006: JMP_TARGET(shad_end2); nkeynes@1006: } nkeynes@1006: JMP_TARGET(shad_end); nkeynes@1006: break; nkeynes@1006: nkeynes@1006: case OP_SHLD: nkeynes@1011: assert( it->operand[0].form == DST && XOP_REG(it,0) == REG_ECX ); nkeynes@1006: CMPL_imms_r32(0,REG_ECX); nkeynes@1006: JNGE_label(shld_shr); nkeynes@1006: X86L_CL_REG(SHLL,it); nkeynes@1006: JMP_label(shld_end); nkeynes@1006: nkeynes@1006: JMP_TARGET(shld_shr); nkeynes@1011: if( IS_X86_64() && it->operand[1].form == DST ) { nkeynes@1006: /* We can do this a little more simply with a 64-bit shift */ nkeynes@1006: ORL_imms_r32(0xFFFFFFE0,REG_ECX); nkeynes@1006: NEGL_r32(REG_ECX); nkeynes@1006: MOVL_r32_r32(XOP_REG(it,1), XOP_REG(it,1)); // Ensure high bits are 0 nkeynes@1006: SHRQ_cl_r64(XOP_REG(it,1)); nkeynes@1006: } else { nkeynes@1006: NEGL_r32(REG_ECX); nkeynes@1006: ANDB_imms_r8( 0x1F, REG_ECX ); nkeynes@1006: JE_label(emptyshr ); nkeynes@1006: X86L_CL_REG(SHRL,it); nkeynes@1006: JMP_label(shld_end2); nkeynes@1006: nkeynes@1006: JMP_TARGET(emptyshr); nkeynes@1006: XORL_r32_r32( REG_EAX, REG_EAX ); nkeynes@1006: JMP_TARGET(shld_end2); nkeynes@1006: } nkeynes@1006: JMP_TARGET(shld_end); nkeynes@1006: break; nkeynes@1006: nkeynes@1006: case OP_MULQ: nkeynes@1006: case OP_ADDQSAT32: nkeynes@1006: case OP_ADDQSAT48: nkeynes@1006: nkeynes@1006: // Should not occur (should be have been lowered in target_lower) nkeynes@1006: case OP_NEGF: nkeynes@1006: case OP_NEGD: nkeynes@1006: case OP_LOADB: nkeynes@1006: case OP_LOADBFW: nkeynes@1006: case OP_LOADW: nkeynes@1006: case OP_LOADL: nkeynes@1006: case OP_LOADQ: nkeynes@1006: case OP_STOREB: nkeynes@1006: case OP_STOREW: nkeynes@1006: case OP_STOREL: nkeynes@1006: case OP_STOREQ: nkeynes@1006: case OP_STORELCA: nkeynes@1006: case OP_OCBI: nkeynes@1006: case OP_OCBP: nkeynes@1006: case OP_OCBWB: nkeynes@1006: case OP_PREF: nkeynes@1006: default: nkeynes@1006: ILLOP(it); nkeynes@1006: } nkeynes@1006: if( it == end ) nkeynes@1006: break; nkeynes@1006: /* Epilogue */ nkeynes@1006: } nkeynes@1006: }