Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 991:60c7fab9c880
prev975:007bf7eb944f
next992:7c15f8a71995
author nkeynes
date Wed Mar 04 23:12:21 2009 +0000 (15 years ago)
permissions -rw-r--r--
last change Move xltcache to xlat/ src directory
Commit new and improved x86 opcode file - cleaned up and added support for amd64 extended registers
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "lxdream.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4trans.h"
    31 #include "sh4/sh4stat.h"
    32 #include "sh4/sh4mmio.h"
    33 #include "sh4/mmu.h"
    34 #include "xlat/xltcache.h"
    35 #include "xlat/x86/x86op.h"
    36 #include "clock.h"
    38 #define DEFAULT_BACKPATCH_SIZE 4096
    40 /* Offset of a reg relative to the sh4r structure */
    41 #define REG_OFFSET(reg)  (((char *)&sh4r.reg) - ((char *)&sh4r) - 128)
    43 #define R_T   REG_OFFSET(t)
    44 #define R_Q   REG_OFFSET(q)
    45 #define R_S   REG_OFFSET(s)
    46 #define R_M   REG_OFFSET(m)
    47 #define R_SR  REG_OFFSET(sr)
    48 #define R_GBR REG_OFFSET(gbr)
    49 #define R_SSR REG_OFFSET(ssr)
    50 #define R_SPC REG_OFFSET(spc)
    51 #define R_VBR REG_OFFSET(vbr)
    52 #define R_MACH REG_OFFSET(mac)+4
    53 #define R_MACL REG_OFFSET(mac)
    54 #define R_PC REG_OFFSET(pc)
    55 #define R_NEW_PC REG_OFFSET(new_pc)
    56 #define R_PR REG_OFFSET(pr)
    57 #define R_SGR REG_OFFSET(sgr)
    58 #define R_FPUL REG_OFFSET(fpul)
    59 #define R_FPSCR REG_OFFSET(fpscr)
    60 #define R_DBR REG_OFFSET(dbr)
    62 struct backpatch_record {
    63     uint32_t fixup_offset;
    64     uint32_t fixup_icount;
    65     int32_t exc_code;
    66 };
    68 #define DELAY_NONE 0
    69 #define DELAY_PC 1
    70 #define DELAY_PC_PR 2
    72 /** 
    73  * Struct to manage internal translation state. This state is not saved -
    74  * it is only valid between calls to sh4_translate_begin_block() and
    75  * sh4_translate_end_block()
    76  */
    77 struct sh4_x86_state {
    78     int in_delay_slot;
    79     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    80     gboolean branch_taken; /* true if we branched unconditionally */
    81     gboolean double_prec; /* true if FPU is in double-precision mode */
    82     gboolean double_size; /* true if FPU is in double-size mode */
    83     gboolean sse3_enabled; /* true if host supports SSE3 instructions */
    84     uint32_t block_start_pc;
    85     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    86     int tstate;
    88     /* mode flags */
    89     gboolean tlb_on; /* True if tlb translation is active */
    91     /* Allocated memory for the (block-wide) back-patch list */
    92     struct backpatch_record *backpatch_list;
    93     uint32_t backpatch_posn;
    94     uint32_t backpatch_size;
    95 };
    97 static struct sh4_x86_state sh4_x86;
    99 static uint32_t max_int = 0x7FFFFFFF;
   100 static uint32_t min_int = 0x80000000;
   101 static uint32_t save_fcw; /* save value for fpu control word */
   102 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   104 gboolean is_sse3_supported()
   105 {
   106     uint32_t features;
   108     __asm__ __volatile__(
   109         "mov $0x01, %%eax\n\t"
   110         "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
   111     return (features & 1) ? TRUE : FALSE;
   112 }
   114 void sh4_translate_init(void)
   115 {
   116     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   117     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   118     sh4_x86.sse3_enabled = is_sse3_supported();
   119 }
   122 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   123 {
   124     int reloc_size = 4;
   126     if( exc_code == -2 ) {
   127         reloc_size = sizeof(void *);
   128     }
   130     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   131 	sh4_x86.backpatch_size <<= 1;
   132 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   133 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   134 	assert( sh4_x86.backpatch_list != NULL );
   135     }
   136     if( sh4_x86.in_delay_slot ) {
   137 	fixup_pc -= 2;
   138     }
   140     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
   141 	(((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code)) - reloc_size;
   142     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   143     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   144     sh4_x86.backpatch_posn++;
   145 }
   147 #define TSTATE_NONE -1
   148 #define TSTATE_O    0
   149 #define TSTATE_C    2
   150 #define TSTATE_E    4
   151 #define TSTATE_NE   5
   152 #define TSTATE_G    0xF
   153 #define TSTATE_GE   0xD
   154 #define TSTATE_A    7
   155 #define TSTATE_AE   3
   157 #define MARK_JMP8(x) uint8_t *_mark_jmp_##x = (xlat_output-1)
   158 #define JMP_TARGET(x) *_mark_jmp_##x += (xlat_output - _mark_jmp_##x)
   160 /* Convenience instructions */
   161 #define LDC_t()          CMPB_imms_rbpdisp(1,R_T); CMC()
   162 #define SETE_t()         SETCCB_cc_rbpdisp(X86_COND_E,R_T)
   163 #define SETA_t()         SETCCB_cc_rbpdisp(X86_COND_A,R_T)
   164 #define SETAE_t()        SETCCB_cc_rbpdisp(X86_COND_AE,R_T)
   165 #define SETG_t()         SETCCB_cc_rbpdisp(X86_COND_G,R_T)
   166 #define SETGE_t()        SETCCB_cc_rbpdisp(X86_COND_GE,R_T)
   167 #define SETC_t()         SETCCB_cc_rbpdisp(X86_COND_C,R_T)
   168 #define SETO_t()         SETCCB_cc_rbpdisp(X86_COND_O,R_T)
   169 #define SETNE_t()        SETCCB_cc_rbpdisp(X86_COND_NE,R_T)
   170 #define SETC_r8(r1)      SETCCB_cc_r8(X86_COND_C, r1)
   171 #define JAE_label(label) JCC_cc_rel8(X86_COND_AE,-1); MARK_JMP8(label)
   172 #define JE_label(label)  JCC_cc_rel8(X86_COND_E,-1); MARK_JMP8(label)
   173 #define JGE_label(label) JCC_cc_rel8(X86_COND_GE,-1); MARK_JMP8(label)
   174 #define JNA_label(label) JCC_cc_rel8(X86_COND_NA,-1); MARK_JMP8(label)
   175 #define JNE_label(label) JCC_cc_rel8(X86_COND_NE,-1); MARK_JMP8(label)
   176 #define JNO_label(label) JCC_cc_rel8(X86_COND_NO,-1); MARK_JMP8(label)
   177 #define JS_label(label)  JCC_cc_rel8(X86_COND_S,-1); MARK_JMP8(label)
   178 #define JMP_label(label) JMP_rel8(-1); MARK_JMP8(label)
   179 #define JNE_exc(exc)     JCC_cc_rel32(X86_COND_NE,0); sh4_x86_add_backpatch(xlat_output, pc, exc)
   181 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
   182 #define JT_label(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
   183 	CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
   184     JCC_cc_rel8(sh4_x86.tstate,-1); MARK_JMP8(label)
   186 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
   187 #define JF_label(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
   188 	CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
   189     JCC_cc_rel8(sh4_x86.tstate^1, -1); MARK_JMP8(label)
   191 #define load_reg16s(x86reg,sh4reg)  MOVSXL_rbpdisp16_r32( REG_OFFSET(r[sh4reg]), x86reg )
   192 #define load_reg16u(x86reg,sh4reg)  MOVZXL_rbpdisp16_r32( REG_OFFSET(r[sh4reg]), x86reg )
   193 #define load_imm32(x86reg,value)    MOVL_imm32_r32(value,x86reg)
   194 #define load_imm64(x86reg,value)    MOVQ_imm64_r64(value,x86reg)
   195 #define load_reg(x86reg,sh4reg)     MOVL_rbpdisp_r32( REG_OFFSET(r[sh4reg]), x86reg )
   196 #define store_reg(x86reg,sh4reg)    MOVL_r32_rbpdisp( x86reg, REG_OFFSET(r[sh4reg]) )
   197 #define load_spreg(x86reg, regoff)  MOVL_rbpdisp_r32( regoff, x86reg )
   198 #define store_spreg(x86reg, regoff) MOVL_r32_rbpdisp( x86reg, regoff )
   200 /**
   201  * Load an FR register (single-precision floating point) into an integer x86
   202  * register (eg for register-to-register moves)
   203  */
   204 #define load_fr(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[0][(frm)^1]), reg )
   205 #define load_xf(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[1][(frm)^1]), reg )
   207 /**
   208  * Load the low half of a DR register (DR or XD) into an integer x86 register 
   209  */
   210 #define load_dr0(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm|0x01]), reg )
   211 #define load_dr1(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm&0x0E]), reg )
   213 /**
   214  * Store an FR register (single-precision floating point) from an integer x86+
   215  * register (eg for register-to-register moves)
   216  */
   217 #define store_fr(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[0][(frm)^1]) )
   218 #define store_xf(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[1][(frm)^1]) )
   220 #define store_dr0(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   221 #define store_dr1(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   224 #define push_fpul()  FLDF_rbpdisp(R_FPUL)
   225 #define pop_fpul()   FSTPF_rbpdisp(R_FPUL)
   226 #define push_fr(frm) FLDF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
   227 #define pop_fr(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
   228 #define push_xf(frm) FLDF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
   229 #define pop_xf(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
   230 #define push_dr(frm) FLDD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
   231 #define pop_dr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
   232 #define push_xdr(frm) FLDD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
   233 #define pop_xdr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
   235 #ifdef ENABLE_SH4STATS
   236 #define COUNT_INST(id) load_imm32(REG_EAX,id); call_func1(sh4_stats_add, REG_EAX); sh4_x86.tstate = TSTATE_NONE
   237 #else
   238 #define COUNT_INST(id)
   239 #endif
   242 /* Exception checks - Note that all exception checks will clobber EAX */
   244 #define check_priv( ) \
   245     if( (sh4r.xlat_sh4_mode & SR_MD) == 0 ) { \
   246         if( sh4_x86.in_delay_slot ) { \
   247             exit_block_exc(EXC_SLOT_ILLEGAL, (pc-2) ); \
   248         } else { \
   249             exit_block_exc(EXC_ILLEGAL, pc); \
   250         } \
   251         sh4_x86.branch_taken = TRUE; \
   252         sh4_x86.in_delay_slot = DELAY_NONE; \
   253         return 2; \
   254     }
   256 #define check_fpuen( ) \
   257     if( !sh4_x86.fpuen_checked ) {\
   258 	sh4_x86.fpuen_checked = TRUE;\
   259 	load_spreg( REG_EAX, R_SR );\
   260 	ANDL_imms_r32( SR_FD, REG_EAX );\
   261 	if( sh4_x86.in_delay_slot ) {\
   262 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   263 	} else {\
   264 	    JNE_exc(EXC_FPU_DISABLED);\
   265 	}\
   266 	sh4_x86.tstate = TSTATE_NONE; \
   267     }
   269 #define check_ralign16( x86reg ) \
   270     TESTL_imms_r32( 0x00000001, x86reg ); \
   271     JNE_exc(EXC_DATA_ADDR_READ)
   273 #define check_walign16( x86reg ) \
   274     TESTL_imms_r32( 0x00000001, x86reg ); \
   275     JNE_exc(EXC_DATA_ADDR_WRITE);
   277 #define check_ralign32( x86reg ) \
   278     TESTL_imms_r32( 0x00000003, x86reg ); \
   279     JNE_exc(EXC_DATA_ADDR_READ)
   281 #define check_walign32( x86reg ) \
   282     TESTL_imms_r32( 0x00000003, x86reg ); \
   283     JNE_exc(EXC_DATA_ADDR_WRITE);
   285 #define check_ralign64( x86reg ) \
   286     TESTL_imms_r32( 0x00000007, x86reg ); \
   287     JNE_exc(EXC_DATA_ADDR_READ)
   289 #define check_walign64( x86reg ) \
   290     TESTL_imms_r32( 0x00000007, x86reg ); \
   291     JNE_exc(EXC_DATA_ADDR_WRITE);
   293 #define UNDEF(ir)
   294 #define MEM_REGION_PTR(name) offsetof( struct mem_region_fn, name )
   295 #define MEM_RESULT(value_reg) if(value_reg != REG_EAX) { MOVL_r32_r32(REG_EAX,value_reg); }
   296 /* Note: For SR.MD == 1 && MMUCR.AT == 0, there are no memory exceptions, so 
   297  * don't waste the cycles expecting them. Otherwise we need to save the exception pointer.
   298  */
   300 #ifdef HAVE_FRAME_ADDRESS
   301 #define _CALL_READ(addr_reg, fn) if( !sh4_x86.tlb_on && (sh4r.xlat_sh4_mode & SR_MD) ) { \
   302         call_func1_r32disp8(REG_ECX, MEM_REGION_PTR(fn), addr_reg); } else { \
   303         call_func1_r32disp8_exc(REG_ECX, MEM_REGION_PTR(fn), addr_reg, pc); } 
   304 #define _CALL_WRITE(addr_reg, val_reg, fn) if( !sh4_x86.tlb_on && (sh4r.xlat_sh4_mode & SR_MD) ) { \
   305         call_func2_r32disp8(REG_ECX, MEM_REGION_PTR(fn), addr_reg, val_reg); } else { \
   306         call_func2_r32disp8_exc(REG_ECX, MEM_REGION_PTR(fn), addr_reg, val_reg, pc); }
   307 #else 
   308 #define _CALL_READ(addr_reg, fn) call_func1_r32disp8(REG_ECX, MEM_REGION_PTR(fn), addr_reg)
   309 #define _CALL_WRITE(addr_reg, val_reg, fn) call_func2_r32disp8(REG_ECX, MEM_REGION_PTR(fn), addr_reg, val_reg)
   310 #endif
   312 #define MEM_READ_BYTE( addr_reg, value_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, read_byte); MEM_RESULT(value_reg)
   313 #define MEM_READ_BYTE_FOR_WRITE( addr_reg, value_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, read_byte_for_write); MEM_RESULT(value_reg)
   314 #define MEM_READ_WORD( addr_reg, value_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, read_word); MEM_RESULT(value_reg)
   315 #define MEM_READ_LONG( addr_reg, value_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, read_long); MEM_RESULT(value_reg)
   316 #define MEM_WRITE_BYTE( addr_reg, value_reg ) decode_address(addr_reg); _CALL_WRITE(addr_reg, value_reg, write_byte)
   317 #define MEM_WRITE_WORD( addr_reg, value_reg ) decode_address(addr_reg); _CALL_WRITE(addr_reg, value_reg, write_word)
   318 #define MEM_WRITE_LONG( addr_reg, value_reg ) decode_address(addr_reg); _CALL_WRITE(addr_reg, value_reg, write_long)
   319 #define MEM_PREFETCH( addr_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, prefetch)
   321 #define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2); sh4_x86.in_delay_slot = DELAY_NONE; return 2;
   323 /****** Import appropriate calling conventions ******/
   324 #if SIZEOF_VOID_P == 8
   325 #include "sh4/ia64abi.h"
   326 #else /* 32-bit system */
   327 #include "sh4/ia32abi.h"
   328 #endif
   330 void sh4_translate_begin_block( sh4addr_t pc ) 
   331 {
   332     enter_block();
   333     sh4_x86.in_delay_slot = FALSE;
   334     sh4_x86.fpuen_checked = FALSE;
   335     sh4_x86.branch_taken = FALSE;
   336     sh4_x86.backpatch_posn = 0;
   337     sh4_x86.block_start_pc = pc;
   338     sh4_x86.tlb_on = IS_TLB_ENABLED();
   339     sh4_x86.tstate = TSTATE_NONE;
   340     sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
   341     sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
   342 }
   345 uint32_t sh4_translate_end_block_size()
   346 {
   347     if( sh4_x86.backpatch_posn <= 3 ) {
   348         return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);
   349     } else {
   350         return EPILOGUE_SIZE + 48 + (sh4_x86.backpatch_posn-3)*15;
   351     }
   352 }
   355 /**
   356  * Embed a breakpoint into the generated code
   357  */
   358 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   359 {
   360     load_imm32( REG_EAX, pc );
   361     call_func1( sh4_translate_breakpoint_hit, REG_EAX );
   362     sh4_x86.tstate = TSTATE_NONE;
   363 }
   366 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   368 /**
   369  * Embed a call to sh4_execute_instruction for situations that we
   370  * can't translate (just page-crossing delay slots at the moment).
   371  * Caller is responsible for setting new_pc before calling this function.
   372  *
   373  * Performs:
   374  *   Set PC = endpc
   375  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   376  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   377  *   Call sh4_execute_instruction
   378  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   379  */
   380 void exit_block_emu( sh4vma_t endpc )
   381 {
   382     load_imm32( REG_ECX, endpc - sh4_x86.block_start_pc );   // 5
   383     ADDL_r32_rbpdisp( REG_ECX, R_PC );
   385     load_imm32( REG_ECX, (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period ); // 5
   386     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );     // 6
   387     load_imm32( REG_ECX, sh4_x86.in_delay_slot ? 1 : 0 );
   388     store_spreg( REG_ECX, REG_OFFSET(in_delay_slot) );
   390     call_func0( sh4_execute_instruction );    
   391     load_spreg( REG_EAX, R_PC );
   392     if( sh4_x86.tlb_on ) {
   393 	call_func1(xlat_get_code_by_vma,REG_EAX);
   394     } else {
   395 	call_func1(xlat_get_code,REG_EAX);
   396     }
   397     exit_block();
   398 } 
   400 /**
   401  * Translate a single instruction. Delayed branches are handled specially
   402  * by translating both branch and delayed instruction as a single unit (as
   403  * 
   404  * The instruction MUST be in the icache (assert check)
   405  *
   406  * @return true if the instruction marks the end of a basic block
   407  * (eg a branch or 
   408  */
   409 uint32_t sh4_translate_instruction( sh4vma_t pc )
   410 {
   411     uint32_t ir;
   412     /* Read instruction from icache */
   413     assert( IS_IN_ICACHE(pc) );
   414     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   416     if( !sh4_x86.in_delay_slot ) {
   417 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   418     }
   419 %%
   420 /* ALU operations */
   421 ADD Rm, Rn {:
   422     COUNT_INST(I_ADD);
   423     load_reg( REG_EAX, Rm );
   424     load_reg( REG_ECX, Rn );
   425     ADDL_r32_r32( REG_EAX, REG_ECX );
   426     store_reg( REG_ECX, Rn );
   427     sh4_x86.tstate = TSTATE_NONE;
   428 :}
   429 ADD #imm, Rn {:  
   430     COUNT_INST(I_ADDI);
   431     ADDL_imms_rbpdisp( imm, REG_OFFSET(r[Rn]) );
   432     sh4_x86.tstate = TSTATE_NONE;
   433 :}
   434 ADDC Rm, Rn {:
   435     COUNT_INST(I_ADDC);
   436     if( sh4_x86.tstate != TSTATE_C ) {
   437         LDC_t();
   438     }
   439     load_reg( REG_EAX, Rm );
   440     load_reg( REG_ECX, Rn );
   441     ADCL_r32_r32( REG_EAX, REG_ECX );
   442     store_reg( REG_ECX, Rn );
   443     SETC_t();
   444     sh4_x86.tstate = TSTATE_C;
   445 :}
   446 ADDV Rm, Rn {:
   447     COUNT_INST(I_ADDV);
   448     load_reg( REG_EAX, Rm );
   449     load_reg( REG_ECX, Rn );
   450     ADDL_r32_r32( REG_EAX, REG_ECX );
   451     store_reg( REG_ECX, Rn );
   452     SETO_t();
   453     sh4_x86.tstate = TSTATE_O;
   454 :}
   455 AND Rm, Rn {:
   456     COUNT_INST(I_AND);
   457     load_reg( REG_EAX, Rm );
   458     load_reg( REG_ECX, Rn );
   459     ANDL_r32_r32( REG_EAX, REG_ECX );
   460     store_reg( REG_ECX, Rn );
   461     sh4_x86.tstate = TSTATE_NONE;
   462 :}
   463 AND #imm, R0 {:  
   464     COUNT_INST(I_ANDI);
   465     load_reg( REG_EAX, 0 );
   466     ANDL_imms_r32(imm, REG_EAX); 
   467     store_reg( REG_EAX, 0 );
   468     sh4_x86.tstate = TSTATE_NONE;
   469 :}
   470 AND.B #imm, @(R0, GBR) {: 
   471     COUNT_INST(I_ANDB);
   472     load_reg( REG_EAX, 0 );
   473     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
   474     MOVL_r32_rspdisp(REG_EAX, 0);
   475     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
   476     MOVL_rspdisp_r32(0, REG_EAX);
   477     ANDL_imms_r32(imm, REG_EDX );
   478     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
   479     sh4_x86.tstate = TSTATE_NONE;
   480 :}
   481 CMP/EQ Rm, Rn {:  
   482     COUNT_INST(I_CMPEQ);
   483     load_reg( REG_EAX, Rm );
   484     load_reg( REG_ECX, Rn );
   485     CMPL_r32_r32( REG_EAX, REG_ECX );
   486     SETE_t();
   487     sh4_x86.tstate = TSTATE_E;
   488 :}
   489 CMP/EQ #imm, R0 {:  
   490     COUNT_INST(I_CMPEQI);
   491     load_reg( REG_EAX, 0 );
   492     CMPL_imms_r32(imm, REG_EAX);
   493     SETE_t();
   494     sh4_x86.tstate = TSTATE_E;
   495 :}
   496 CMP/GE Rm, Rn {:  
   497     COUNT_INST(I_CMPGE);
   498     load_reg( REG_EAX, Rm );
   499     load_reg( REG_ECX, Rn );
   500     CMPL_r32_r32( REG_EAX, REG_ECX );
   501     SETGE_t();
   502     sh4_x86.tstate = TSTATE_GE;
   503 :}
   504 CMP/GT Rm, Rn {: 
   505     COUNT_INST(I_CMPGT);
   506     load_reg( REG_EAX, Rm );
   507     load_reg( REG_ECX, Rn );
   508     CMPL_r32_r32( REG_EAX, REG_ECX );
   509     SETG_t();
   510     sh4_x86.tstate = TSTATE_G;
   511 :}
   512 CMP/HI Rm, Rn {:  
   513     COUNT_INST(I_CMPHI);
   514     load_reg( REG_EAX, Rm );
   515     load_reg( REG_ECX, Rn );
   516     CMPL_r32_r32( REG_EAX, REG_ECX );
   517     SETA_t();
   518     sh4_x86.tstate = TSTATE_A;
   519 :}
   520 CMP/HS Rm, Rn {: 
   521     COUNT_INST(I_CMPHS);
   522     load_reg( REG_EAX, Rm );
   523     load_reg( REG_ECX, Rn );
   524     CMPL_r32_r32( REG_EAX, REG_ECX );
   525     SETAE_t();
   526     sh4_x86.tstate = TSTATE_AE;
   527  :}
   528 CMP/PL Rn {: 
   529     COUNT_INST(I_CMPPL);
   530     load_reg( REG_EAX, Rn );
   531     CMPL_imms_r32( 0, REG_EAX );
   532     SETG_t();
   533     sh4_x86.tstate = TSTATE_G;
   534 :}
   535 CMP/PZ Rn {:  
   536     COUNT_INST(I_CMPPZ);
   537     load_reg( REG_EAX, Rn );
   538     CMPL_imms_r32( 0, REG_EAX );
   539     SETGE_t();
   540     sh4_x86.tstate = TSTATE_GE;
   541 :}
   542 CMP/STR Rm, Rn {:  
   543     COUNT_INST(I_CMPSTR);
   544     load_reg( REG_EAX, Rm );
   545     load_reg( REG_ECX, Rn );
   546     XORL_r32_r32( REG_ECX, REG_EAX );
   547     TESTB_r8_r8( REG_AL, REG_AL );
   548     JE_label(target1);
   549     TESTB_r8_r8( REG_AH, REG_AH );
   550     JE_label(target2);
   551     SHRL_imm_r32( 16, REG_EAX );
   552     TESTB_r8_r8( REG_AL, REG_AL );
   553     JE_label(target3);
   554     TESTB_r8_r8( REG_AH, REG_AH );
   555     JMP_TARGET(target1);
   556     JMP_TARGET(target2);
   557     JMP_TARGET(target3);
   558     SETE_t();
   559     sh4_x86.tstate = TSTATE_E;
   560 :}
   561 DIV0S Rm, Rn {:
   562     COUNT_INST(I_DIV0S);
   563     load_reg( REG_EAX, Rm );
   564     load_reg( REG_ECX, Rn );
   565     SHRL_imm_r32( 31, REG_EAX );
   566     SHRL_imm_r32( 31, REG_ECX );
   567     store_spreg( REG_EAX, R_M );
   568     store_spreg( REG_ECX, R_Q );
   569     CMPL_r32_r32( REG_EAX, REG_ECX );
   570     SETNE_t();
   571     sh4_x86.tstate = TSTATE_NE;
   572 :}
   573 DIV0U {:  
   574     COUNT_INST(I_DIV0U);
   575     XORL_r32_r32( REG_EAX, REG_EAX );
   576     store_spreg( REG_EAX, R_Q );
   577     store_spreg( REG_EAX, R_M );
   578     store_spreg( REG_EAX, R_T );
   579     sh4_x86.tstate = TSTATE_C; // works for DIV1
   580 :}
   581 DIV1 Rm, Rn {:
   582     COUNT_INST(I_DIV1);
   583     load_spreg( REG_ECX, R_M );
   584     load_reg( REG_EAX, Rn );
   585     if( sh4_x86.tstate != TSTATE_C ) {
   586 	LDC_t();
   587     }
   588     RCLL_imm_r32( 1, REG_EAX );
   589     SETC_r8( REG_DL ); // Q'
   590     CMPL_rbpdisp_r32( R_Q, REG_ECX );
   591     JE_label(mqequal);
   592     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
   593     JMP_label(end);
   594     JMP_TARGET(mqequal);
   595     SUBL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
   596     JMP_TARGET(end);
   597     store_reg( REG_EAX, Rn ); // Done with Rn now
   598     SETC_r8(REG_AL); // tmp1
   599     XORB_r8_r8( REG_DL, REG_AL ); // Q' = Q ^ tmp1
   600     XORB_r8_r8( REG_AL, REG_CL ); // Q'' = Q' ^ M
   601     store_spreg( REG_ECX, R_Q );
   602     XORL_imms_r32( 1, REG_AL );   // T = !Q'
   603     MOVZXL_r8_r32( REG_AL, REG_EAX );
   604     store_spreg( REG_EAX, R_T );
   605     sh4_x86.tstate = TSTATE_NONE;
   606 :}
   607 DMULS.L Rm, Rn {:  
   608     COUNT_INST(I_DMULS);
   609     load_reg( REG_EAX, Rm );
   610     load_reg( REG_ECX, Rn );
   611     IMULL_r32(REG_ECX);
   612     store_spreg( REG_EDX, R_MACH );
   613     store_spreg( REG_EAX, R_MACL );
   614     sh4_x86.tstate = TSTATE_NONE;
   615 :}
   616 DMULU.L Rm, Rn {:  
   617     COUNT_INST(I_DMULU);
   618     load_reg( REG_EAX, Rm );
   619     load_reg( REG_ECX, Rn );
   620     MULL_r32(REG_ECX);
   621     store_spreg( REG_EDX, R_MACH );
   622     store_spreg( REG_EAX, R_MACL );    
   623     sh4_x86.tstate = TSTATE_NONE;
   624 :}
   625 DT Rn {:  
   626     COUNT_INST(I_DT);
   627     load_reg( REG_EAX, Rn );
   628     ADDL_imms_r32( -1, REG_EAX );
   629     store_reg( REG_EAX, Rn );
   630     SETE_t();
   631     sh4_x86.tstate = TSTATE_E;
   632 :}
   633 EXTS.B Rm, Rn {:  
   634     COUNT_INST(I_EXTSB);
   635     load_reg( REG_EAX, Rm );
   636     MOVSXL_r8_r32( REG_EAX, REG_EAX );
   637     store_reg( REG_EAX, Rn );
   638 :}
   639 EXTS.W Rm, Rn {:  
   640     COUNT_INST(I_EXTSW);
   641     load_reg( REG_EAX, Rm );
   642     MOVSXL_r16_r32( REG_EAX, REG_EAX );
   643     store_reg( REG_EAX, Rn );
   644 :}
   645 EXTU.B Rm, Rn {:  
   646     COUNT_INST(I_EXTUB);
   647     load_reg( REG_EAX, Rm );
   648     MOVZXL_r8_r32( REG_EAX, REG_EAX );
   649     store_reg( REG_EAX, Rn );
   650 :}
   651 EXTU.W Rm, Rn {:  
   652     COUNT_INST(I_EXTUW);
   653     load_reg( REG_EAX, Rm );
   654     MOVZXL_r16_r32( REG_EAX, REG_EAX );
   655     store_reg( REG_EAX, Rn );
   656 :}
   657 MAC.L @Rm+, @Rn+ {:
   658     COUNT_INST(I_MACL);
   659     if( Rm == Rn ) {
   660 	load_reg( REG_EAX, Rm );
   661 	check_ralign32( REG_EAX );
   662 	MEM_READ_LONG( REG_EAX, REG_EAX );
   663 	MOVL_r32_rspdisp(REG_EAX, 0);
   664 	load_reg( REG_EAX, Rm );
   665 	LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
   666 	MEM_READ_LONG( REG_EAX, REG_EAX );
   667         ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rn]) );
   668     } else {
   669 	load_reg( REG_EAX, Rm );
   670 	check_ralign32( REG_EAX );
   671 	MEM_READ_LONG( REG_EAX, REG_EAX );
   672 	MOVL_r32_rspdisp( REG_EAX, 0 );
   673 	load_reg( REG_EAX, Rn );
   674 	check_ralign32( REG_EAX );
   675 	MEM_READ_LONG( REG_EAX, REG_EAX );
   676 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
   677 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
   678     }
   680     IMULL_rspdisp( 0 );
   681     ADDL_r32_rbpdisp( REG_EAX, R_MACL );
   682     ADCL_r32_rbpdisp( REG_EDX, R_MACH );
   684     load_spreg( REG_ECX, R_S );
   685     TESTL_r32_r32(REG_ECX, REG_ECX);
   686     JE_label( nosat );
   687     call_func0( signsat48 );
   688     JMP_TARGET( nosat );
   689     sh4_x86.tstate = TSTATE_NONE;
   690 :}
   691 MAC.W @Rm+, @Rn+ {:  
   692     COUNT_INST(I_MACW);
   693     if( Rm == Rn ) {
   694 	load_reg( REG_EAX, Rm );
   695 	check_ralign16( REG_EAX );
   696 	MEM_READ_WORD( REG_EAX, REG_EAX );
   697         MOVL_r32_rspdisp( REG_EAX, 0 );
   698 	load_reg( REG_EAX, Rm );
   699 	LEAL_r32disp_r32( REG_EAX, 2, REG_EAX );
   700 	MEM_READ_WORD( REG_EAX, REG_EAX );
   701 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
   702 	// Note translate twice in case of page boundaries. Maybe worth
   703 	// adding a page-boundary check to skip the second translation
   704     } else {
   705 	load_reg( REG_EAX, Rm );
   706 	check_ralign16( REG_EAX );
   707 	MEM_READ_WORD( REG_EAX, REG_EAX );
   708         MOVL_r32_rspdisp( REG_EAX, 0 );
   709 	load_reg( REG_EAX, Rn );
   710 	check_ralign16( REG_EAX );
   711 	MEM_READ_WORD( REG_EAX, REG_EAX );
   712 	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rn]) );
   713 	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
   714     }
   715     IMULL_rspdisp( 0 );
   716     load_spreg( REG_ECX, R_S );
   717     TESTL_r32_r32( REG_ECX, REG_ECX );
   718     JE_label( nosat );
   720     ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
   721     JNO_label( end );            // 2
   722     load_imm32( REG_EDX, 1 );         // 5
   723     store_spreg( REG_EDX, R_MACH );   // 6
   724     JS_label( positive );        // 2
   725     load_imm32( REG_EAX, 0x80000000 );// 5
   726     store_spreg( REG_EAX, R_MACL );   // 6
   727     JMP_label(end2);           // 2
   729     JMP_TARGET(positive);
   730     load_imm32( REG_EAX, 0x7FFFFFFF );// 5
   731     store_spreg( REG_EAX, R_MACL );   // 6
   732     JMP_label(end3);            // 2
   734     JMP_TARGET(nosat);
   735     ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
   736     ADCL_r32_rbpdisp( REG_EDX, R_MACH );  // 6
   737     JMP_TARGET(end);
   738     JMP_TARGET(end2);
   739     JMP_TARGET(end3);
   740     sh4_x86.tstate = TSTATE_NONE;
   741 :}
   742 MOVT Rn {:  
   743     COUNT_INST(I_MOVT);
   744     load_spreg( REG_EAX, R_T );
   745     store_reg( REG_EAX, Rn );
   746 :}
   747 MUL.L Rm, Rn {:  
   748     COUNT_INST(I_MULL);
   749     load_reg( REG_EAX, Rm );
   750     load_reg( REG_ECX, Rn );
   751     MULL_r32( REG_ECX );
   752     store_spreg( REG_EAX, R_MACL );
   753     sh4_x86.tstate = TSTATE_NONE;
   754 :}
   755 MULS.W Rm, Rn {:
   756     COUNT_INST(I_MULSW);
   757     load_reg16s( REG_EAX, Rm );
   758     load_reg16s( REG_ECX, Rn );
   759     MULL_r32( REG_ECX );
   760     store_spreg( REG_EAX, R_MACL );
   761     sh4_x86.tstate = TSTATE_NONE;
   762 :}
   763 MULU.W Rm, Rn {:  
   764     COUNT_INST(I_MULUW);
   765     load_reg16u( REG_EAX, Rm );
   766     load_reg16u( REG_ECX, Rn );
   767     MULL_r32( REG_ECX );
   768     store_spreg( REG_EAX, R_MACL );
   769     sh4_x86.tstate = TSTATE_NONE;
   770 :}
   771 NEG Rm, Rn {:
   772     COUNT_INST(I_NEG);
   773     load_reg( REG_EAX, Rm );
   774     NEGL_r32( REG_EAX );
   775     store_reg( REG_EAX, Rn );
   776     sh4_x86.tstate = TSTATE_NONE;
   777 :}
   778 NEGC Rm, Rn {:  
   779     COUNT_INST(I_NEGC);
   780     load_reg( REG_EAX, Rm );
   781     XORL_r32_r32( REG_ECX, REG_ECX );
   782     LDC_t();
   783     SBBL_r32_r32( REG_EAX, REG_ECX );
   784     store_reg( REG_ECX, Rn );
   785     SETC_t();
   786     sh4_x86.tstate = TSTATE_C;
   787 :}
   788 NOT Rm, Rn {:  
   789     COUNT_INST(I_NOT);
   790     load_reg( REG_EAX, Rm );
   791     NOTL_r32( REG_EAX );
   792     store_reg( REG_EAX, Rn );
   793     sh4_x86.tstate = TSTATE_NONE;
   794 :}
   795 OR Rm, Rn {:  
   796     COUNT_INST(I_OR);
   797     load_reg( REG_EAX, Rm );
   798     load_reg( REG_ECX, Rn );
   799     ORL_r32_r32( REG_EAX, REG_ECX );
   800     store_reg( REG_ECX, Rn );
   801     sh4_x86.tstate = TSTATE_NONE;
   802 :}
   803 OR #imm, R0 {:
   804     COUNT_INST(I_ORI);
   805     load_reg( REG_EAX, 0 );
   806     ORL_imms_r32(imm, REG_EAX);
   807     store_reg( REG_EAX, 0 );
   808     sh4_x86.tstate = TSTATE_NONE;
   809 :}
   810 OR.B #imm, @(R0, GBR) {:  
   811     COUNT_INST(I_ORB);
   812     load_reg( REG_EAX, 0 );
   813     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
   814     MOVL_r32_rspdisp( REG_EAX, 0 );
   815     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
   816     MOVL_rspdisp_r32( 0, REG_EAX );
   817     ORL_imms_r32(imm, REG_EDX );
   818     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
   819     sh4_x86.tstate = TSTATE_NONE;
   820 :}
   821 ROTCL Rn {:
   822     COUNT_INST(I_ROTCL);
   823     load_reg( REG_EAX, Rn );
   824     if( sh4_x86.tstate != TSTATE_C ) {
   825 	LDC_t();
   826     }
   827     RCLL_imm_r32( 1, REG_EAX );
   828     store_reg( REG_EAX, Rn );
   829     SETC_t();
   830     sh4_x86.tstate = TSTATE_C;
   831 :}
   832 ROTCR Rn {:  
   833     COUNT_INST(I_ROTCR);
   834     load_reg( REG_EAX, Rn );
   835     if( sh4_x86.tstate != TSTATE_C ) {
   836 	LDC_t();
   837     }
   838     RCRL_imm_r32( 1, REG_EAX );
   839     store_reg( REG_EAX, Rn );
   840     SETC_t();
   841     sh4_x86.tstate = TSTATE_C;
   842 :}
   843 ROTL Rn {:  
   844     COUNT_INST(I_ROTL);
   845     load_reg( REG_EAX, Rn );
   846     ROLL_imm_r32( 1, REG_EAX );
   847     store_reg( REG_EAX, Rn );
   848     SETC_t();
   849     sh4_x86.tstate = TSTATE_C;
   850 :}
   851 ROTR Rn {:  
   852     COUNT_INST(I_ROTR);
   853     load_reg( REG_EAX, Rn );
   854     RORL_imm_r32( 1, REG_EAX );
   855     store_reg( REG_EAX, Rn );
   856     SETC_t();
   857     sh4_x86.tstate = TSTATE_C;
   858 :}
   859 SHAD Rm, Rn {:
   860     COUNT_INST(I_SHAD);
   861     /* Annoyingly enough, not directly convertible */
   862     load_reg( REG_EAX, Rn );
   863     load_reg( REG_ECX, Rm );
   864     CMPL_imms_r32( 0, REG_ECX );
   865     JGE_label(doshl);
   867     NEGL_r32( REG_ECX );      // 2
   868     ANDB_imms_r8( 0x1F, REG_CL ); // 3
   869     JE_label(emptysar);     // 2
   870     SARL_cl_r32( REG_EAX );       // 2
   871     JMP_label(end);          // 2
   873     JMP_TARGET(emptysar);
   874     SARL_imm_r32(31, REG_EAX );  // 3
   875     JMP_label(end2);
   877     JMP_TARGET(doshl);
   878     ANDB_imms_r8( 0x1F, REG_CL ); // 3
   879     SHLL_cl_r32( REG_EAX );       // 2
   880     JMP_TARGET(end);
   881     JMP_TARGET(end2);
   882     store_reg( REG_EAX, Rn );
   883     sh4_x86.tstate = TSTATE_NONE;
   884 :}
   885 SHLD Rm, Rn {:  
   886     COUNT_INST(I_SHLD);
   887     load_reg( REG_EAX, Rn );
   888     load_reg( REG_ECX, Rm );
   889     CMPL_imms_r32( 0, REG_ECX );
   890     JGE_label(doshl);
   892     NEGL_r32( REG_ECX );      // 2
   893     ANDB_imms_r8( 0x1F, REG_CL ); // 3
   894     JE_label(emptyshr );
   895     SHRL_cl_r32( REG_EAX );       // 2
   896     JMP_label(end);          // 2
   898     JMP_TARGET(emptyshr);
   899     XORL_r32_r32( REG_EAX, REG_EAX );
   900     JMP_label(end2);
   902     JMP_TARGET(doshl);
   903     ANDB_imms_r8( 0x1F, REG_CL ); // 3
   904     SHLL_cl_r32( REG_EAX );       // 2
   905     JMP_TARGET(end);
   906     JMP_TARGET(end2);
   907     store_reg( REG_EAX, Rn );
   908     sh4_x86.tstate = TSTATE_NONE;
   909 :}
   910 SHAL Rn {: 
   911     COUNT_INST(I_SHAL);
   912     load_reg( REG_EAX, Rn );
   913     SHLL_imm_r32( 1, REG_EAX );
   914     SETC_t();
   915     store_reg( REG_EAX, Rn );
   916     sh4_x86.tstate = TSTATE_C;
   917 :}
   918 SHAR Rn {:  
   919     COUNT_INST(I_SHAR);
   920     load_reg( REG_EAX, Rn );
   921     SARL_imm_r32( 1, REG_EAX );
   922     SETC_t();
   923     store_reg( REG_EAX, Rn );
   924     sh4_x86.tstate = TSTATE_C;
   925 :}
   926 SHLL Rn {:  
   927     COUNT_INST(I_SHLL);
   928     load_reg( REG_EAX, Rn );
   929     SHLL_imm_r32( 1, REG_EAX );
   930     SETC_t();
   931     store_reg( REG_EAX, Rn );
   932     sh4_x86.tstate = TSTATE_C;
   933 :}
   934 SHLL2 Rn {:
   935     COUNT_INST(I_SHLL);
   936     load_reg( REG_EAX, Rn );
   937     SHLL_imm_r32( 2, REG_EAX );
   938     store_reg( REG_EAX, Rn );
   939     sh4_x86.tstate = TSTATE_NONE;
   940 :}
   941 SHLL8 Rn {:  
   942     COUNT_INST(I_SHLL);
   943     load_reg( REG_EAX, Rn );
   944     SHLL_imm_r32( 8, REG_EAX );
   945     store_reg( REG_EAX, Rn );
   946     sh4_x86.tstate = TSTATE_NONE;
   947 :}
   948 SHLL16 Rn {:  
   949     COUNT_INST(I_SHLL);
   950     load_reg( REG_EAX, Rn );
   951     SHLL_imm_r32( 16, REG_EAX );
   952     store_reg( REG_EAX, Rn );
   953     sh4_x86.tstate = TSTATE_NONE;
   954 :}
   955 SHLR Rn {:  
   956     COUNT_INST(I_SHLR);
   957     load_reg( REG_EAX, Rn );
   958     SHRL_imm_r32( 1, REG_EAX );
   959     SETC_t();
   960     store_reg( REG_EAX, Rn );
   961     sh4_x86.tstate = TSTATE_C;
   962 :}
   963 SHLR2 Rn {:  
   964     COUNT_INST(I_SHLR);
   965     load_reg( REG_EAX, Rn );
   966     SHRL_imm_r32( 2, REG_EAX );
   967     store_reg( REG_EAX, Rn );
   968     sh4_x86.tstate = TSTATE_NONE;
   969 :}
   970 SHLR8 Rn {:  
   971     COUNT_INST(I_SHLR);
   972     load_reg( REG_EAX, Rn );
   973     SHRL_imm_r32( 8, REG_EAX );
   974     store_reg( REG_EAX, Rn );
   975     sh4_x86.tstate = TSTATE_NONE;
   976 :}
   977 SHLR16 Rn {:  
   978     COUNT_INST(I_SHLR);
   979     load_reg( REG_EAX, Rn );
   980     SHRL_imm_r32( 16, REG_EAX );
   981     store_reg( REG_EAX, Rn );
   982     sh4_x86.tstate = TSTATE_NONE;
   983 :}
   984 SUB Rm, Rn {:  
   985     COUNT_INST(I_SUB);
   986     load_reg( REG_EAX, Rm );
   987     load_reg( REG_ECX, Rn );
   988     SUBL_r32_r32( REG_EAX, REG_ECX );
   989     store_reg( REG_ECX, Rn );
   990     sh4_x86.tstate = TSTATE_NONE;
   991 :}
   992 SUBC Rm, Rn {:  
   993     COUNT_INST(I_SUBC);
   994     load_reg( REG_EAX, Rm );
   995     load_reg( REG_ECX, Rn );
   996     if( sh4_x86.tstate != TSTATE_C ) {
   997 	LDC_t();
   998     }
   999     SBBL_r32_r32( REG_EAX, REG_ECX );
  1000     store_reg( REG_ECX, Rn );
  1001     SETC_t();
  1002     sh4_x86.tstate = TSTATE_C;
  1003 :}
  1004 SUBV Rm, Rn {:  
  1005     COUNT_INST(I_SUBV);
  1006     load_reg( REG_EAX, Rm );
  1007     load_reg( REG_ECX, Rn );
  1008     SUBL_r32_r32( REG_EAX, REG_ECX );
  1009     store_reg( REG_ECX, Rn );
  1010     SETO_t();
  1011     sh4_x86.tstate = TSTATE_O;
  1012 :}
  1013 SWAP.B Rm, Rn {:  
  1014     COUNT_INST(I_SWAPB);
  1015     load_reg( REG_EAX, Rm );
  1016     XCHGB_r8_r8( REG_AL, REG_AH ); // NB: does not touch EFLAGS
  1017     store_reg( REG_EAX, Rn );
  1018 :}
  1019 SWAP.W Rm, Rn {:  
  1020     COUNT_INST(I_SWAPB);
  1021     load_reg( REG_EAX, Rm );
  1022     MOVL_r32_r32( REG_EAX, REG_ECX );
  1023     SHLL_imm_r32( 16, REG_ECX );
  1024     SHRL_imm_r32( 16, REG_EAX );
  1025     ORL_r32_r32( REG_EAX, REG_ECX );
  1026     store_reg( REG_ECX, Rn );
  1027     sh4_x86.tstate = TSTATE_NONE;
  1028 :}
  1029 TAS.B @Rn {:  
  1030     COUNT_INST(I_TASB);
  1031     load_reg( REG_EAX, Rn );
  1032     MOVL_r32_rspdisp( REG_EAX, 0 );
  1033     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
  1034     TESTB_r8_r8( REG_DL, REG_DL );
  1035     SETE_t();
  1036     ORB_imms_r8( 0x80, REG_DL );
  1037     MOVL_rspdisp_r32( 0, REG_EAX );
  1038     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1039     sh4_x86.tstate = TSTATE_NONE;
  1040 :}
  1041 TST Rm, Rn {:  
  1042     COUNT_INST(I_TST);
  1043     load_reg( REG_EAX, Rm );
  1044     load_reg( REG_ECX, Rn );
  1045     TESTL_r32_r32( REG_EAX, REG_ECX );
  1046     SETE_t();
  1047     sh4_x86.tstate = TSTATE_E;
  1048 :}
  1049 TST #imm, R0 {:  
  1050     COUNT_INST(I_TSTI);
  1051     load_reg( REG_EAX, 0 );
  1052     TESTL_imms_r32( imm, REG_EAX );
  1053     SETE_t();
  1054     sh4_x86.tstate = TSTATE_E;
  1055 :}
  1056 TST.B #imm, @(R0, GBR) {:  
  1057     COUNT_INST(I_TSTB);
  1058     load_reg( REG_EAX, 0);
  1059     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
  1060     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1061     TESTB_imms_r8( imm, REG_AL );
  1062     SETE_t();
  1063     sh4_x86.tstate = TSTATE_E;
  1064 :}
  1065 XOR Rm, Rn {:  
  1066     COUNT_INST(I_XOR);
  1067     load_reg( REG_EAX, Rm );
  1068     load_reg( REG_ECX, Rn );
  1069     XORL_r32_r32( REG_EAX, REG_ECX );
  1070     store_reg( REG_ECX, Rn );
  1071     sh4_x86.tstate = TSTATE_NONE;
  1072 :}
  1073 XOR #imm, R0 {:  
  1074     COUNT_INST(I_XORI);
  1075     load_reg( REG_EAX, 0 );
  1076     XORL_imms_r32( imm, REG_EAX );
  1077     store_reg( REG_EAX, 0 );
  1078     sh4_x86.tstate = TSTATE_NONE;
  1079 :}
  1080 XOR.B #imm, @(R0, GBR) {:  
  1081     COUNT_INST(I_XORB);
  1082     load_reg( REG_EAX, 0 );
  1083     ADDL_rbpdisp_r32( R_GBR, REG_EAX ); 
  1084     MOVL_r32_rspdisp( REG_EAX, 0 );
  1085     MEM_READ_BYTE_FOR_WRITE(REG_EAX, REG_EDX);
  1086     MOVL_rspdisp_r32( 0, REG_EAX );
  1087     XORL_imms_r32( imm, REG_EDX );
  1088     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1089     sh4_x86.tstate = TSTATE_NONE;
  1090 :}
  1091 XTRCT Rm, Rn {:
  1092     COUNT_INST(I_XTRCT);
  1093     load_reg( REG_EAX, Rm );
  1094     load_reg( REG_ECX, Rn );
  1095     SHLL_imm_r32( 16, REG_EAX );
  1096     SHRL_imm_r32( 16, REG_ECX );
  1097     ORL_r32_r32( REG_EAX, REG_ECX );
  1098     store_reg( REG_ECX, Rn );
  1099     sh4_x86.tstate = TSTATE_NONE;
  1100 :}
  1102 /* Data move instructions */
  1103 MOV Rm, Rn {:  
  1104     COUNT_INST(I_MOV);
  1105     load_reg( REG_EAX, Rm );
  1106     store_reg( REG_EAX, Rn );
  1107 :}
  1108 MOV #imm, Rn {:  
  1109     COUNT_INST(I_MOVI);
  1110     load_imm32( REG_EAX, imm );
  1111     store_reg( REG_EAX, Rn );
  1112 :}
  1113 MOV.B Rm, @Rn {:  
  1114     COUNT_INST(I_MOVB);
  1115     load_reg( REG_EAX, Rn );
  1116     load_reg( REG_EDX, Rm );
  1117     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1118     sh4_x86.tstate = TSTATE_NONE;
  1119 :}
  1120 MOV.B Rm, @-Rn {:  
  1121     COUNT_INST(I_MOVB);
  1122     load_reg( REG_EAX, Rn );
  1123     LEAL_r32disp_r32( REG_EAX, -1, REG_EAX );
  1124     load_reg( REG_EDX, Rm );
  1125     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1126     ADDL_imms_rbpdisp( -1, REG_OFFSET(r[Rn]) );
  1127     sh4_x86.tstate = TSTATE_NONE;
  1128 :}
  1129 MOV.B Rm, @(R0, Rn) {:  
  1130     COUNT_INST(I_MOVB);
  1131     load_reg( REG_EAX, 0 );
  1132     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1133     load_reg( REG_EDX, Rm );
  1134     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1135     sh4_x86.tstate = TSTATE_NONE;
  1136 :}
  1137 MOV.B R0, @(disp, GBR) {:  
  1138     COUNT_INST(I_MOVB);
  1139     load_spreg( REG_EAX, R_GBR );
  1140     ADDL_imms_r32( disp, REG_EAX );
  1141     load_reg( REG_EDX, 0 );
  1142     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1143     sh4_x86.tstate = TSTATE_NONE;
  1144 :}
  1145 MOV.B R0, @(disp, Rn) {:  
  1146     COUNT_INST(I_MOVB);
  1147     load_reg( REG_EAX, Rn );
  1148     ADDL_imms_r32( disp, REG_EAX );
  1149     load_reg( REG_EDX, 0 );
  1150     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1151     sh4_x86.tstate = TSTATE_NONE;
  1152 :}
  1153 MOV.B @Rm, Rn {:  
  1154     COUNT_INST(I_MOVB);
  1155     load_reg( REG_EAX, Rm );
  1156     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1157     store_reg( REG_EAX, Rn );
  1158     sh4_x86.tstate = TSTATE_NONE;
  1159 :}
  1160 MOV.B @Rm+, Rn {:  
  1161     COUNT_INST(I_MOVB);
  1162     load_reg( REG_EAX, Rm );
  1163     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1164     if( Rm != Rn ) {
  1165     	ADDL_imms_rbpdisp( 1, REG_OFFSET(r[Rm]) );
  1167     store_reg( REG_EAX, Rn );
  1168     sh4_x86.tstate = TSTATE_NONE;
  1169 :}
  1170 MOV.B @(R0, Rm), Rn {:  
  1171     COUNT_INST(I_MOVB);
  1172     load_reg( REG_EAX, 0 );
  1173     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1174     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1175     store_reg( REG_EAX, Rn );
  1176     sh4_x86.tstate = TSTATE_NONE;
  1177 :}
  1178 MOV.B @(disp, GBR), R0 {:  
  1179     COUNT_INST(I_MOVB);
  1180     load_spreg( REG_EAX, R_GBR );
  1181     ADDL_imms_r32( disp, REG_EAX );
  1182     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1183     store_reg( REG_EAX, 0 );
  1184     sh4_x86.tstate = TSTATE_NONE;
  1185 :}
  1186 MOV.B @(disp, Rm), R0 {:  
  1187     COUNT_INST(I_MOVB);
  1188     load_reg( REG_EAX, Rm );
  1189     ADDL_imms_r32( disp, REG_EAX );
  1190     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1191     store_reg( REG_EAX, 0 );
  1192     sh4_x86.tstate = TSTATE_NONE;
  1193 :}
  1194 MOV.L Rm, @Rn {:
  1195     COUNT_INST(I_MOVL);
  1196     load_reg( REG_EAX, Rn );
  1197     check_walign32(REG_EAX);
  1198     MOVL_r32_r32( REG_EAX, REG_ECX );
  1199     ANDL_imms_r32( 0xFC000000, REG_ECX );
  1200     CMPL_imms_r32( 0xE0000000, REG_ECX );
  1201     JNE_label( notsq );
  1202     ANDL_imms_r32( 0x3C, REG_EAX );
  1203     load_reg( REG_EDX, Rm );
  1204     MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
  1205     JMP_label(end);
  1206     JMP_TARGET(notsq);
  1207     load_reg( REG_EDX, Rm );
  1208     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1209     JMP_TARGET(end);
  1210     sh4_x86.tstate = TSTATE_NONE;
  1211 :}
  1212 MOV.L Rm, @-Rn {:  
  1213     COUNT_INST(I_MOVL);
  1214     load_reg( REG_EAX, Rn );
  1215     ADDL_imms_r32( -4, REG_EAX );
  1216     check_walign32( REG_EAX );
  1217     load_reg( REG_EDX, Rm );
  1218     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1219     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  1220     sh4_x86.tstate = TSTATE_NONE;
  1221 :}
  1222 MOV.L Rm, @(R0, Rn) {:  
  1223     COUNT_INST(I_MOVL);
  1224     load_reg( REG_EAX, 0 );
  1225     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1226     check_walign32( REG_EAX );
  1227     load_reg( REG_EDX, Rm );
  1228     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1229     sh4_x86.tstate = TSTATE_NONE;
  1230 :}
  1231 MOV.L R0, @(disp, GBR) {:  
  1232     COUNT_INST(I_MOVL);
  1233     load_spreg( REG_EAX, R_GBR );
  1234     ADDL_imms_r32( disp, REG_EAX );
  1235     check_walign32( REG_EAX );
  1236     load_reg( REG_EDX, 0 );
  1237     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1238     sh4_x86.tstate = TSTATE_NONE;
  1239 :}
  1240 MOV.L Rm, @(disp, Rn) {:  
  1241     COUNT_INST(I_MOVL);
  1242     load_reg( REG_EAX, Rn );
  1243     ADDL_imms_r32( disp, REG_EAX );
  1244     check_walign32( REG_EAX );
  1245     MOVL_r32_r32( REG_EAX, REG_ECX );
  1246     ANDL_imms_r32( 0xFC000000, REG_ECX );
  1247     CMPL_imms_r32( 0xE0000000, REG_ECX );
  1248     JNE_label( notsq );
  1249     ANDL_imms_r32( 0x3C, REG_EAX );
  1250     load_reg( REG_EDX, Rm );
  1251     MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
  1252     JMP_label(end);
  1253     JMP_TARGET(notsq);
  1254     load_reg( REG_EDX, Rm );
  1255     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1256     JMP_TARGET(end);
  1257     sh4_x86.tstate = TSTATE_NONE;
  1258 :}
  1259 MOV.L @Rm, Rn {:  
  1260     COUNT_INST(I_MOVL);
  1261     load_reg( REG_EAX, Rm );
  1262     check_ralign32( REG_EAX );
  1263     MEM_READ_LONG( REG_EAX, REG_EAX );
  1264     store_reg( REG_EAX, Rn );
  1265     sh4_x86.tstate = TSTATE_NONE;
  1266 :}
  1267 MOV.L @Rm+, Rn {:  
  1268     COUNT_INST(I_MOVL);
  1269     load_reg( REG_EAX, Rm );
  1270     check_ralign32( REG_EAX );
  1271     MEM_READ_LONG( REG_EAX, REG_EAX );
  1272     if( Rm != Rn ) {
  1273     	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  1275     store_reg( REG_EAX, Rn );
  1276     sh4_x86.tstate = TSTATE_NONE;
  1277 :}
  1278 MOV.L @(R0, Rm), Rn {:  
  1279     COUNT_INST(I_MOVL);
  1280     load_reg( REG_EAX, 0 );
  1281     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1282     check_ralign32( REG_EAX );
  1283     MEM_READ_LONG( REG_EAX, REG_EAX );
  1284     store_reg( REG_EAX, Rn );
  1285     sh4_x86.tstate = TSTATE_NONE;
  1286 :}
  1287 MOV.L @(disp, GBR), R0 {:
  1288     COUNT_INST(I_MOVL);
  1289     load_spreg( REG_EAX, R_GBR );
  1290     ADDL_imms_r32( disp, REG_EAX );
  1291     check_ralign32( REG_EAX );
  1292     MEM_READ_LONG( REG_EAX, REG_EAX );
  1293     store_reg( REG_EAX, 0 );
  1294     sh4_x86.tstate = TSTATE_NONE;
  1295 :}
  1296 MOV.L @(disp, PC), Rn {:  
  1297     COUNT_INST(I_MOVLPC);
  1298     if( sh4_x86.in_delay_slot ) {
  1299 	SLOTILLEGAL();
  1300     } else {
  1301 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1302 	if( IS_IN_ICACHE(target) ) {
  1303 	    // If the target address is in the same page as the code, it's
  1304 	    // pretty safe to just ref it directly and circumvent the whole
  1305 	    // memory subsystem. (this is a big performance win)
  1307 	    // FIXME: There's a corner-case that's not handled here when
  1308 	    // the current code-page is in the ITLB but not in the UTLB.
  1309 	    // (should generate a TLB miss although need to test SH4 
  1310 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1311 	    // behaviour though.
  1312 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1313 	    MOVL_moffptr_eax( ptr );
  1314 	} else {
  1315 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1316 	    // different virtual address than the translation was done with,
  1317 	    // but we can safely assume that the low bits are the same.
  1318 	    load_imm32( REG_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1319 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1320 	    MEM_READ_LONG( REG_EAX, REG_EAX );
  1321 	    sh4_x86.tstate = TSTATE_NONE;
  1323 	store_reg( REG_EAX, Rn );
  1325 :}
  1326 MOV.L @(disp, Rm), Rn {:  
  1327     COUNT_INST(I_MOVL);
  1328     load_reg( REG_EAX, Rm );
  1329     ADDL_imms_r32( disp, REG_EAX );
  1330     check_ralign32( REG_EAX );
  1331     MEM_READ_LONG( REG_EAX, REG_EAX );
  1332     store_reg( REG_EAX, Rn );
  1333     sh4_x86.tstate = TSTATE_NONE;
  1334 :}
  1335 MOV.W Rm, @Rn {:  
  1336     COUNT_INST(I_MOVW);
  1337     load_reg( REG_EAX, Rn );
  1338     check_walign16( REG_EAX );
  1339     load_reg( REG_EDX, Rm );
  1340     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1341     sh4_x86.tstate = TSTATE_NONE;
  1342 :}
  1343 MOV.W Rm, @-Rn {:  
  1344     COUNT_INST(I_MOVW);
  1345     load_reg( REG_EAX, Rn );
  1346     check_walign16( REG_EAX );
  1347     LEAL_r32disp_r32( REG_EAX, -2, REG_EAX );
  1348     load_reg( REG_EDX, Rm );
  1349     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1350     ADDL_imms_rbpdisp( -2, REG_OFFSET(r[Rn]) );
  1351     sh4_x86.tstate = TSTATE_NONE;
  1352 :}
  1353 MOV.W Rm, @(R0, Rn) {:  
  1354     COUNT_INST(I_MOVW);
  1355     load_reg( REG_EAX, 0 );
  1356     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1357     check_walign16( REG_EAX );
  1358     load_reg( REG_EDX, Rm );
  1359     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1360     sh4_x86.tstate = TSTATE_NONE;
  1361 :}
  1362 MOV.W R0, @(disp, GBR) {:  
  1363     COUNT_INST(I_MOVW);
  1364     load_spreg( REG_EAX, R_GBR );
  1365     ADDL_imms_r32( disp, REG_EAX );
  1366     check_walign16( REG_EAX );
  1367     load_reg( REG_EDX, 0 );
  1368     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1369     sh4_x86.tstate = TSTATE_NONE;
  1370 :}
  1371 MOV.W R0, @(disp, Rn) {:  
  1372     COUNT_INST(I_MOVW);
  1373     load_reg( REG_EAX, Rn );
  1374     ADDL_imms_r32( disp, REG_EAX );
  1375     check_walign16( REG_EAX );
  1376     load_reg( REG_EDX, 0 );
  1377     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1378     sh4_x86.tstate = TSTATE_NONE;
  1379 :}
  1380 MOV.W @Rm, Rn {:  
  1381     COUNT_INST(I_MOVW);
  1382     load_reg( REG_EAX, Rm );
  1383     check_ralign16( REG_EAX );
  1384     MEM_READ_WORD( REG_EAX, REG_EAX );
  1385     store_reg( REG_EAX, Rn );
  1386     sh4_x86.tstate = TSTATE_NONE;
  1387 :}
  1388 MOV.W @Rm+, Rn {:  
  1389     COUNT_INST(I_MOVW);
  1390     load_reg( REG_EAX, Rm );
  1391     check_ralign16( REG_EAX );
  1392     MEM_READ_WORD( REG_EAX, REG_EAX );
  1393     if( Rm != Rn ) {
  1394         ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
  1396     store_reg( REG_EAX, Rn );
  1397     sh4_x86.tstate = TSTATE_NONE;
  1398 :}
  1399 MOV.W @(R0, Rm), Rn {:  
  1400     COUNT_INST(I_MOVW);
  1401     load_reg( REG_EAX, 0 );
  1402     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1403     check_ralign16( REG_EAX );
  1404     MEM_READ_WORD( REG_EAX, REG_EAX );
  1405     store_reg( REG_EAX, Rn );
  1406     sh4_x86.tstate = TSTATE_NONE;
  1407 :}
  1408 MOV.W @(disp, GBR), R0 {:  
  1409     COUNT_INST(I_MOVW);
  1410     load_spreg( REG_EAX, R_GBR );
  1411     ADDL_imms_r32( disp, REG_EAX );
  1412     check_ralign16( REG_EAX );
  1413     MEM_READ_WORD( REG_EAX, REG_EAX );
  1414     store_reg( REG_EAX, 0 );
  1415     sh4_x86.tstate = TSTATE_NONE;
  1416 :}
  1417 MOV.W @(disp, PC), Rn {:  
  1418     COUNT_INST(I_MOVW);
  1419     if( sh4_x86.in_delay_slot ) {
  1420 	SLOTILLEGAL();
  1421     } else {
  1422 	// See comments for MOV.L @(disp, PC), Rn
  1423 	uint32_t target = pc + disp + 4;
  1424 	if( IS_IN_ICACHE(target) ) {
  1425 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1426 	    MOVL_moffptr_eax( ptr );
  1427 	    MOVSXL_r16_r32( REG_EAX, REG_EAX );
  1428 	} else {
  1429 	    load_imm32( REG_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
  1430 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1431 	    MEM_READ_WORD( REG_EAX, REG_EAX );
  1432 	    sh4_x86.tstate = TSTATE_NONE;
  1434 	store_reg( REG_EAX, Rn );
  1436 :}
  1437 MOV.W @(disp, Rm), R0 {:  
  1438     COUNT_INST(I_MOVW);
  1439     load_reg( REG_EAX, Rm );
  1440     ADDL_imms_r32( disp, REG_EAX );
  1441     check_ralign16( REG_EAX );
  1442     MEM_READ_WORD( REG_EAX, REG_EAX );
  1443     store_reg( REG_EAX, 0 );
  1444     sh4_x86.tstate = TSTATE_NONE;
  1445 :}
  1446 MOVA @(disp, PC), R0 {:  
  1447     COUNT_INST(I_MOVA);
  1448     if( sh4_x86.in_delay_slot ) {
  1449 	SLOTILLEGAL();
  1450     } else {
  1451 	load_imm32( REG_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1452 	ADDL_rbpdisp_r32( R_PC, REG_ECX );
  1453 	store_reg( REG_ECX, 0 );
  1454 	sh4_x86.tstate = TSTATE_NONE;
  1456 :}
  1457 MOVCA.L R0, @Rn {:  
  1458     COUNT_INST(I_MOVCA);
  1459     load_reg( REG_EAX, Rn );
  1460     check_walign32( REG_EAX );
  1461     load_reg( REG_EDX, 0 );
  1462     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1463     sh4_x86.tstate = TSTATE_NONE;
  1464 :}
  1466 /* Control transfer instructions */
  1467 BF disp {:
  1468     COUNT_INST(I_BF);
  1469     if( sh4_x86.in_delay_slot ) {
  1470 	SLOTILLEGAL();
  1471     } else {
  1472 	sh4vma_t target = disp + pc + 4;
  1473 	JT_label( nottaken );
  1474 	exit_block_rel(target, pc+2 );
  1475 	JMP_TARGET(nottaken);
  1476 	return 2;
  1478 :}
  1479 BF/S disp {:
  1480     COUNT_INST(I_BFS);
  1481     if( sh4_x86.in_delay_slot ) {
  1482 	SLOTILLEGAL();
  1483     } else {
  1484 	sh4_x86.in_delay_slot = DELAY_PC;
  1485 	if( UNTRANSLATABLE(pc+2) ) {
  1486 	    load_imm32( REG_EAX, pc + 4 - sh4_x86.block_start_pc );
  1487 	    JT_label(nottaken);
  1488 	    ADDL_imms_r32( disp, REG_EAX );
  1489 	    JMP_TARGET(nottaken);
  1490 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1491 	    store_spreg( REG_EAX, R_NEW_PC );
  1492 	    exit_block_emu(pc+2);
  1493 	    sh4_x86.branch_taken = TRUE;
  1494 	    return 2;
  1495 	} else {
  1496 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1497 		CMPL_imms_rbpdisp( 1, R_T );
  1498 		sh4_x86.tstate = TSTATE_E;
  1500 	    sh4vma_t target = disp + pc + 4;
  1501 	    JCC_cc_rel32(sh4_x86.tstate,0);
  1502 	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
  1503 	    int save_tstate = sh4_x86.tstate;
  1504 	    sh4_translate_instruction(pc+2);
  1505 	    exit_block_rel( target, pc+4 );
  1507 	    // not taken
  1508 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1509 	    sh4_x86.tstate = save_tstate;
  1510 	    sh4_translate_instruction(pc+2);
  1511 	    return 4;
  1514 :}
  1515 BRA disp {:  
  1516     COUNT_INST(I_BRA);
  1517     if( sh4_x86.in_delay_slot ) {
  1518 	SLOTILLEGAL();
  1519     } else {
  1520 	sh4_x86.in_delay_slot = DELAY_PC;
  1521 	sh4_x86.branch_taken = TRUE;
  1522 	if( UNTRANSLATABLE(pc+2) ) {
  1523 	    load_spreg( REG_EAX, R_PC );
  1524 	    ADDL_imms_r32( pc + disp + 4 - sh4_x86.block_start_pc, REG_EAX );
  1525 	    store_spreg( REG_EAX, R_NEW_PC );
  1526 	    exit_block_emu(pc+2);
  1527 	    return 2;
  1528 	} else {
  1529 	    sh4_translate_instruction( pc + 2 );
  1530 	    exit_block_rel( disp + pc + 4, pc+4 );
  1531 	    return 4;
  1534 :}
  1535 BRAF Rn {:  
  1536     COUNT_INST(I_BRAF);
  1537     if( sh4_x86.in_delay_slot ) {
  1538 	SLOTILLEGAL();
  1539     } else {
  1540 	load_spreg( REG_EAX, R_PC );
  1541 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1542 	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1543 	store_spreg( REG_EAX, R_NEW_PC );
  1544 	sh4_x86.in_delay_slot = DELAY_PC;
  1545 	sh4_x86.tstate = TSTATE_NONE;
  1546 	sh4_x86.branch_taken = TRUE;
  1547 	if( UNTRANSLATABLE(pc+2) ) {
  1548 	    exit_block_emu(pc+2);
  1549 	    return 2;
  1550 	} else {
  1551 	    sh4_translate_instruction( pc + 2 );
  1552 	    exit_block_newpcset(pc+4);
  1553 	    return 4;
  1556 :}
  1557 BSR disp {:  
  1558     COUNT_INST(I_BSR);
  1559     if( sh4_x86.in_delay_slot ) {
  1560 	SLOTILLEGAL();
  1561     } else {
  1562 	load_spreg( REG_EAX, R_PC );
  1563 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1564 	store_spreg( REG_EAX, R_PR );
  1565 	sh4_x86.in_delay_slot = DELAY_PC;
  1566 	sh4_x86.branch_taken = TRUE;
  1567 	sh4_x86.tstate = TSTATE_NONE;
  1568 	if( UNTRANSLATABLE(pc+2) ) {
  1569 	    ADDL_imms_r32( disp, REG_EAX );
  1570 	    store_spreg( REG_EAX, R_NEW_PC );
  1571 	    exit_block_emu(pc+2);
  1572 	    return 2;
  1573 	} else {
  1574 	    sh4_translate_instruction( pc + 2 );
  1575 	    exit_block_rel( disp + pc + 4, pc+4 );
  1576 	    return 4;
  1579 :}
  1580 BSRF Rn {:  
  1581     COUNT_INST(I_BSRF);
  1582     if( sh4_x86.in_delay_slot ) {
  1583 	SLOTILLEGAL();
  1584     } else {
  1585 	load_spreg( REG_EAX, R_PC );
  1586 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1587 	store_spreg( REG_EAX, R_PR );
  1588 	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1589 	store_spreg( REG_EAX, R_NEW_PC );
  1591 	sh4_x86.in_delay_slot = DELAY_PC;
  1592 	sh4_x86.tstate = TSTATE_NONE;
  1593 	sh4_x86.branch_taken = TRUE;
  1594 	if( UNTRANSLATABLE(pc+2) ) {
  1595 	    exit_block_emu(pc+2);
  1596 	    return 2;
  1597 	} else {
  1598 	    sh4_translate_instruction( pc + 2 );
  1599 	    exit_block_newpcset(pc+4);
  1600 	    return 4;
  1603 :}
  1604 BT disp {:
  1605     COUNT_INST(I_BT);
  1606     if( sh4_x86.in_delay_slot ) {
  1607 	SLOTILLEGAL();
  1608     } else {
  1609 	sh4vma_t target = disp + pc + 4;
  1610 	JF_label( nottaken );
  1611 	exit_block_rel(target, pc+2 );
  1612 	JMP_TARGET(nottaken);
  1613 	return 2;
  1615 :}
  1616 BT/S disp {:
  1617     COUNT_INST(I_BTS);
  1618     if( sh4_x86.in_delay_slot ) {
  1619 	SLOTILLEGAL();
  1620     } else {
  1621 	sh4_x86.in_delay_slot = DELAY_PC;
  1622 	if( UNTRANSLATABLE(pc+2) ) {
  1623 	    load_imm32( REG_EAX, pc + 4 - sh4_x86.block_start_pc );
  1624 	    JF_label(nottaken);
  1625 	    ADDL_imms_r32( disp, REG_EAX );
  1626 	    JMP_TARGET(nottaken);
  1627 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1628 	    store_spreg( REG_EAX, R_NEW_PC );
  1629 	    exit_block_emu(pc+2);
  1630 	    sh4_x86.branch_taken = TRUE;
  1631 	    return 2;
  1632 	} else {
  1633 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1634 		CMPL_imms_rbpdisp( 1, R_T );
  1635 		sh4_x86.tstate = TSTATE_E;
  1637 	    JCC_cc_rel32(sh4_x86.tstate^1,0);
  1638 	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
  1640 	    int save_tstate = sh4_x86.tstate;
  1641 	    sh4_translate_instruction(pc+2);
  1642 	    exit_block_rel( disp + pc + 4, pc+4 );
  1643 	    // not taken
  1644 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1645 	    sh4_x86.tstate = save_tstate;
  1646 	    sh4_translate_instruction(pc+2);
  1647 	    return 4;
  1650 :}
  1651 JMP @Rn {:  
  1652     COUNT_INST(I_JMP);
  1653     if( sh4_x86.in_delay_slot ) {
  1654 	SLOTILLEGAL();
  1655     } else {
  1656 	load_reg( REG_ECX, Rn );
  1657 	store_spreg( REG_ECX, R_NEW_PC );
  1658 	sh4_x86.in_delay_slot = DELAY_PC;
  1659 	sh4_x86.branch_taken = TRUE;
  1660 	if( UNTRANSLATABLE(pc+2) ) {
  1661 	    exit_block_emu(pc+2);
  1662 	    return 2;
  1663 	} else {
  1664 	    sh4_translate_instruction(pc+2);
  1665 	    exit_block_newpcset(pc+4);
  1666 	    return 4;
  1669 :}
  1670 JSR @Rn {:  
  1671     COUNT_INST(I_JSR);
  1672     if( sh4_x86.in_delay_slot ) {
  1673 	SLOTILLEGAL();
  1674     } else {
  1675 	load_spreg( REG_EAX, R_PC );
  1676 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1677 	store_spreg( REG_EAX, R_PR );
  1678 	load_reg( REG_ECX, Rn );
  1679 	store_spreg( REG_ECX, R_NEW_PC );
  1680 	sh4_x86.in_delay_slot = DELAY_PC;
  1681 	sh4_x86.branch_taken = TRUE;
  1682 	sh4_x86.tstate = TSTATE_NONE;
  1683 	if( UNTRANSLATABLE(pc+2) ) {
  1684 	    exit_block_emu(pc+2);
  1685 	    return 2;
  1686 	} else {
  1687 	    sh4_translate_instruction(pc+2);
  1688 	    exit_block_newpcset(pc+4);
  1689 	    return 4;
  1692 :}
  1693 RTE {:  
  1694     COUNT_INST(I_RTE);
  1695     if( sh4_x86.in_delay_slot ) {
  1696 	SLOTILLEGAL();
  1697     } else {
  1698 	check_priv();
  1699 	load_spreg( REG_ECX, R_SPC );
  1700 	store_spreg( REG_ECX, R_NEW_PC );
  1701 	load_spreg( REG_EAX, R_SSR );
  1702 	call_func1( sh4_write_sr, REG_EAX );
  1703 	sh4_x86.in_delay_slot = DELAY_PC;
  1704 	sh4_x86.fpuen_checked = FALSE;
  1705 	sh4_x86.tstate = TSTATE_NONE;
  1706 	sh4_x86.branch_taken = TRUE;
  1707 	if( UNTRANSLATABLE(pc+2) ) {
  1708 	    exit_block_emu(pc+2);
  1709 	    return 2;
  1710 	} else {
  1711 	    sh4_translate_instruction(pc+2);
  1712 	    exit_block_newpcset(pc+4);
  1713 	    return 4;
  1716 :}
  1717 RTS {:  
  1718     COUNT_INST(I_RTS);
  1719     if( sh4_x86.in_delay_slot ) {
  1720 	SLOTILLEGAL();
  1721     } else {
  1722 	load_spreg( REG_ECX, R_PR );
  1723 	store_spreg( REG_ECX, R_NEW_PC );
  1724 	sh4_x86.in_delay_slot = DELAY_PC;
  1725 	sh4_x86.branch_taken = TRUE;
  1726 	if( UNTRANSLATABLE(pc+2) ) {
  1727 	    exit_block_emu(pc+2);
  1728 	    return 2;
  1729 	} else {
  1730 	    sh4_translate_instruction(pc+2);
  1731 	    exit_block_newpcset(pc+4);
  1732 	    return 4;
  1735 :}
  1736 TRAPA #imm {:  
  1737     COUNT_INST(I_TRAPA);
  1738     if( sh4_x86.in_delay_slot ) {
  1739 	SLOTILLEGAL();
  1740     } else {
  1741 	load_imm32( REG_ECX, pc+2 - sh4_x86.block_start_pc );   // 5
  1742 	ADDL_r32_rbpdisp( REG_ECX, R_PC );
  1743 	load_imm32( REG_EAX, imm );
  1744 	call_func1( sh4_raise_trap, REG_EAX );
  1745 	sh4_x86.tstate = TSTATE_NONE;
  1746 	exit_block_pcset(pc+2);
  1747 	sh4_x86.branch_taken = TRUE;
  1748 	return 2;
  1750 :}
  1751 UNDEF {:  
  1752     COUNT_INST(I_UNDEF);
  1753     if( sh4_x86.in_delay_slot ) {
  1754 	exit_block_exc(EXC_SLOT_ILLEGAL, pc-2);    
  1755     } else {
  1756 	exit_block_exc(EXC_ILLEGAL, pc);    
  1757 	return 2;
  1759 :}
  1761 CLRMAC {:  
  1762     COUNT_INST(I_CLRMAC);
  1763     XORL_r32_r32(REG_EAX, REG_EAX);
  1764     store_spreg( REG_EAX, R_MACL );
  1765     store_spreg( REG_EAX, R_MACH );
  1766     sh4_x86.tstate = TSTATE_NONE;
  1767 :}
  1768 CLRS {:
  1769     COUNT_INST(I_CLRS);
  1770     CLC();
  1771     SETCCB_cc_rbpdisp(X86_COND_C, R_S);
  1772     sh4_x86.tstate = TSTATE_NONE;
  1773 :}
  1774 CLRT {:  
  1775     COUNT_INST(I_CLRT);
  1776     CLC();
  1777     SETC_t();
  1778     sh4_x86.tstate = TSTATE_C;
  1779 :}
  1780 SETS {:  
  1781     COUNT_INST(I_SETS);
  1782     STC();
  1783     SETCCB_cc_rbpdisp(X86_COND_C, R_S);
  1784     sh4_x86.tstate = TSTATE_NONE;
  1785 :}
  1786 SETT {:  
  1787     COUNT_INST(I_SETT);
  1788     STC();
  1789     SETC_t();
  1790     sh4_x86.tstate = TSTATE_C;
  1791 :}
  1793 /* Floating point moves */
  1794 FMOV FRm, FRn {:  
  1795     COUNT_INST(I_FMOV1);
  1796     check_fpuen();
  1797     if( sh4_x86.double_size ) {
  1798         load_dr0( REG_EAX, FRm );
  1799         load_dr1( REG_ECX, FRm );
  1800         store_dr0( REG_EAX, FRn );
  1801         store_dr1( REG_ECX, FRn );
  1802     } else {
  1803         load_fr( REG_EAX, FRm ); // SZ=0 branch
  1804         store_fr( REG_EAX, FRn );
  1806 :}
  1807 FMOV FRm, @Rn {: 
  1808     COUNT_INST(I_FMOV2);
  1809     check_fpuen();
  1810     load_reg( REG_EAX, Rn );
  1811     if( sh4_x86.double_size ) {
  1812         check_walign64( REG_EAX );
  1813         load_dr0( REG_EDX, FRm );
  1814         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1815         load_reg( REG_EAX, Rn );
  1816         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  1817         load_dr1( REG_EDX, FRm );
  1818         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1819     } else {
  1820         check_walign32( REG_EAX );
  1821         load_fr( REG_EDX, FRm );
  1822         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1824     sh4_x86.tstate = TSTATE_NONE;
  1825 :}
  1826 FMOV @Rm, FRn {:  
  1827     COUNT_INST(I_FMOV5);
  1828     check_fpuen();
  1829     load_reg( REG_EAX, Rm );
  1830     if( sh4_x86.double_size ) {
  1831         check_ralign64( REG_EAX );
  1832         MEM_READ_LONG( REG_EAX, REG_EAX );
  1833         store_dr0( REG_EAX, FRn );
  1834         load_reg( REG_EAX, Rm );
  1835         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  1836         MEM_READ_LONG( REG_EAX, REG_EAX );
  1837         store_dr1( REG_EAX, FRn );
  1838     } else {
  1839         check_ralign32( REG_EAX );
  1840         MEM_READ_LONG( REG_EAX, REG_EAX );
  1841         store_fr( REG_EAX, FRn );
  1843     sh4_x86.tstate = TSTATE_NONE;
  1844 :}
  1845 FMOV FRm, @-Rn {:  
  1846     COUNT_INST(I_FMOV3);
  1847     check_fpuen();
  1848     load_reg( REG_EAX, Rn );
  1849     if( sh4_x86.double_size ) {
  1850         check_walign64( REG_EAX );
  1851         LEAL_r32disp_r32( REG_EAX, -8, REG_EAX );
  1852         load_dr0( REG_EDX, FRm );
  1853         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1854         load_reg( REG_EAX, Rn );
  1855         LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  1856         load_dr1( REG_EDX, FRm );
  1857         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1858         ADDL_imms_rbpdisp(-8,REG_OFFSET(r[Rn]));
  1859     } else {
  1860         check_walign32( REG_EAX );
  1861         LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  1862         load_fr( REG_EDX, FRm );
  1863         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1864         ADDL_imms_rbpdisp(-4,REG_OFFSET(r[Rn]));
  1866     sh4_x86.tstate = TSTATE_NONE;
  1867 :}
  1868 FMOV @Rm+, FRn {:
  1869     COUNT_INST(I_FMOV6);
  1870     check_fpuen();
  1871     load_reg( REG_EAX, Rm );
  1872     if( sh4_x86.double_size ) {
  1873         check_ralign64( REG_EAX );
  1874         MEM_READ_LONG( REG_EAX, REG_EAX );
  1875         store_dr0( REG_EAX, FRn );
  1876         load_reg( REG_EAX, Rm );
  1877         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  1878         MEM_READ_LONG( REG_EAX, REG_EAX );
  1879         store_dr1( REG_EAX, FRn );
  1880         ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rm]) );
  1881     } else {
  1882         check_ralign32( REG_EAX );
  1883         MEM_READ_LONG( REG_EAX, REG_EAX );
  1884         store_fr( REG_EAX, FRn );
  1885         ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  1887     sh4_x86.tstate = TSTATE_NONE;
  1888 :}
  1889 FMOV FRm, @(R0, Rn) {:  
  1890     COUNT_INST(I_FMOV4);
  1891     check_fpuen();
  1892     load_reg( REG_EAX, Rn );
  1893     ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  1894     if( sh4_x86.double_size ) {
  1895         check_walign64( REG_EAX );
  1896         load_dr0( REG_EDX, FRm );
  1897         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1898         load_reg( REG_EAX, Rn );
  1899         ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  1900         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  1901         load_dr1( REG_EDX, FRm );
  1902         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1903     } else {
  1904         check_walign32( REG_EAX );
  1905         load_fr( REG_EDX, FRm );
  1906         MEM_WRITE_LONG( REG_EAX, REG_EDX ); // 12
  1908     sh4_x86.tstate = TSTATE_NONE;
  1909 :}
  1910 FMOV @(R0, Rm), FRn {:  
  1911     COUNT_INST(I_FMOV7);
  1912     check_fpuen();
  1913     load_reg( REG_EAX, Rm );
  1914     ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  1915     if( sh4_x86.double_size ) {
  1916         check_ralign64( REG_EAX );
  1917         MEM_READ_LONG( REG_EAX, REG_EAX );
  1918         store_dr0( REG_EAX, FRn );
  1919         load_reg( REG_EAX, Rm );
  1920         ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  1921         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  1922         MEM_READ_LONG( REG_EAX, REG_EAX );
  1923         store_dr1( REG_EAX, FRn );
  1924     } else {
  1925         check_ralign32( REG_EAX );
  1926         MEM_READ_LONG( REG_EAX, REG_EAX );
  1927         store_fr( REG_EAX, FRn );
  1929     sh4_x86.tstate = TSTATE_NONE;
  1930 :}
  1931 FLDI0 FRn {:  /* IFF PR=0 */
  1932     COUNT_INST(I_FLDI0);
  1933     check_fpuen();
  1934     if( sh4_x86.double_prec == 0 ) {
  1935         XORL_r32_r32( REG_EAX, REG_EAX );
  1936         store_fr( REG_EAX, FRn );
  1938     sh4_x86.tstate = TSTATE_NONE;
  1939 :}
  1940 FLDI1 FRn {:  /* IFF PR=0 */
  1941     COUNT_INST(I_FLDI1);
  1942     check_fpuen();
  1943     if( sh4_x86.double_prec == 0 ) {
  1944         load_imm32(REG_EAX, 0x3F800000);
  1945         store_fr( REG_EAX, FRn );
  1947 :}
  1949 FLOAT FPUL, FRn {:  
  1950     COUNT_INST(I_FLOAT);
  1951     check_fpuen();
  1952     FILD_rbpdisp(R_FPUL);
  1953     if( sh4_x86.double_prec ) {
  1954         pop_dr( FRn );
  1955     } else {
  1956         pop_fr( FRn );
  1958 :}
  1959 FTRC FRm, FPUL {:  
  1960     COUNT_INST(I_FTRC);
  1961     check_fpuen();
  1962     if( sh4_x86.double_prec ) {
  1963         push_dr( FRm );
  1964     } else {
  1965         push_fr( FRm );
  1967     load_ptr( REG_ECX, &max_int );
  1968     FILD_r32disp( REG_ECX, 0 );
  1969     FCOMIP_st(1);
  1970     JNA_label( sat );
  1971     load_ptr( REG_ECX, &min_int );  // 5
  1972     FILD_r32disp( REG_ECX, 0 );           // 2
  1973     FCOMIP_st(1);                   // 2
  1974     JAE_label( sat2 );            // 2
  1975     load_ptr( REG_EAX, &save_fcw );
  1976     FNSTCW_r32disp( REG_EAX, 0 );
  1977     load_ptr( REG_EDX, &trunc_fcw );
  1978     FLDCW_r32disp( REG_EDX, 0 );
  1979     FISTP_rbpdisp(R_FPUL);             // 3
  1980     FLDCW_r32disp( REG_EAX, 0 );
  1981     JMP_label(end);             // 2
  1983     JMP_TARGET(sat);
  1984     JMP_TARGET(sat2);
  1985     MOVL_r32disp_r32( REG_ECX, 0, REG_ECX ); // 2
  1986     store_spreg( REG_ECX, R_FPUL );
  1987     FPOP_st();
  1988     JMP_TARGET(end);
  1989     sh4_x86.tstate = TSTATE_NONE;
  1990 :}
  1991 FLDS FRm, FPUL {:  
  1992     COUNT_INST(I_FLDS);
  1993     check_fpuen();
  1994     load_fr( REG_EAX, FRm );
  1995     store_spreg( REG_EAX, R_FPUL );
  1996 :}
  1997 FSTS FPUL, FRn {:  
  1998     COUNT_INST(I_FSTS);
  1999     check_fpuen();
  2000     load_spreg( REG_EAX, R_FPUL );
  2001     store_fr( REG_EAX, FRn );
  2002 :}
  2003 FCNVDS FRm, FPUL {:  
  2004     COUNT_INST(I_FCNVDS);
  2005     check_fpuen();
  2006     if( sh4_x86.double_prec ) {
  2007         push_dr( FRm );
  2008         pop_fpul();
  2010 :}
  2011 FCNVSD FPUL, FRn {:  
  2012     COUNT_INST(I_FCNVSD);
  2013     check_fpuen();
  2014     if( sh4_x86.double_prec ) {
  2015         push_fpul();
  2016         pop_dr( FRn );
  2018 :}
  2020 /* Floating point instructions */
  2021 FABS FRn {:  
  2022     COUNT_INST(I_FABS);
  2023     check_fpuen();
  2024     if( sh4_x86.double_prec ) {
  2025         push_dr(FRn);
  2026         FABS_st0();
  2027         pop_dr(FRn);
  2028     } else {
  2029         push_fr(FRn);
  2030         FABS_st0();
  2031         pop_fr(FRn);
  2033 :}
  2034 FADD FRm, FRn {:  
  2035     COUNT_INST(I_FADD);
  2036     check_fpuen();
  2037     if( sh4_x86.double_prec ) {
  2038         push_dr(FRm);
  2039         push_dr(FRn);
  2040         FADDP_st(1);
  2041         pop_dr(FRn);
  2042     } else {
  2043         push_fr(FRm);
  2044         push_fr(FRn);
  2045         FADDP_st(1);
  2046         pop_fr(FRn);
  2048 :}
  2049 FDIV FRm, FRn {:  
  2050     COUNT_INST(I_FDIV);
  2051     check_fpuen();
  2052     if( sh4_x86.double_prec ) {
  2053         push_dr(FRn);
  2054         push_dr(FRm);
  2055         FDIVP_st(1);
  2056         pop_dr(FRn);
  2057     } else {
  2058         push_fr(FRn);
  2059         push_fr(FRm);
  2060         FDIVP_st(1);
  2061         pop_fr(FRn);
  2063 :}
  2064 FMAC FR0, FRm, FRn {:  
  2065     COUNT_INST(I_FMAC);
  2066     check_fpuen();
  2067     if( sh4_x86.double_prec ) {
  2068         push_dr( 0 );
  2069         push_dr( FRm );
  2070         FMULP_st(1);
  2071         push_dr( FRn );
  2072         FADDP_st(1);
  2073         pop_dr( FRn );
  2074     } else {
  2075         push_fr( 0 );
  2076         push_fr( FRm );
  2077         FMULP_st(1);
  2078         push_fr( FRn );
  2079         FADDP_st(1);
  2080         pop_fr( FRn );
  2082 :}
  2084 FMUL FRm, FRn {:  
  2085     COUNT_INST(I_FMUL);
  2086     check_fpuen();
  2087     if( sh4_x86.double_prec ) {
  2088         push_dr(FRm);
  2089         push_dr(FRn);
  2090         FMULP_st(1);
  2091         pop_dr(FRn);
  2092     } else {
  2093         push_fr(FRm);
  2094         push_fr(FRn);
  2095         FMULP_st(1);
  2096         pop_fr(FRn);
  2098 :}
  2099 FNEG FRn {:  
  2100     COUNT_INST(I_FNEG);
  2101     check_fpuen();
  2102     if( sh4_x86.double_prec ) {
  2103         push_dr(FRn);
  2104         FCHS_st0();
  2105         pop_dr(FRn);
  2106     } else {
  2107         push_fr(FRn);
  2108         FCHS_st0();
  2109         pop_fr(FRn);
  2111 :}
  2112 FSRRA FRn {:  
  2113     COUNT_INST(I_FSRRA);
  2114     check_fpuen();
  2115     if( sh4_x86.double_prec == 0 ) {
  2116         FLD1_st0();
  2117         push_fr(FRn);
  2118         FSQRT_st0();
  2119         FDIVP_st(1);
  2120         pop_fr(FRn);
  2122 :}
  2123 FSQRT FRn {:  
  2124     COUNT_INST(I_FSQRT);
  2125     check_fpuen();
  2126     if( sh4_x86.double_prec ) {
  2127         push_dr(FRn);
  2128         FSQRT_st0();
  2129         pop_dr(FRn);
  2130     } else {
  2131         push_fr(FRn);
  2132         FSQRT_st0();
  2133         pop_fr(FRn);
  2135 :}
  2136 FSUB FRm, FRn {:  
  2137     COUNT_INST(I_FSUB);
  2138     check_fpuen();
  2139     if( sh4_x86.double_prec ) {
  2140         push_dr(FRn);
  2141         push_dr(FRm);
  2142         FSUBP_st(1);
  2143         pop_dr(FRn);
  2144     } else {
  2145         push_fr(FRn);
  2146         push_fr(FRm);
  2147         FSUBP_st(1);
  2148         pop_fr(FRn);
  2150 :}
  2152 FCMP/EQ FRm, FRn {:  
  2153     COUNT_INST(I_FCMPEQ);
  2154     check_fpuen();
  2155     if( sh4_x86.double_prec ) {
  2156         push_dr(FRm);
  2157         push_dr(FRn);
  2158     } else {
  2159         push_fr(FRm);
  2160         push_fr(FRn);
  2162     FCOMIP_st(1);
  2163     SETE_t();
  2164     FPOP_st();
  2165     sh4_x86.tstate = TSTATE_E;
  2166 :}
  2167 FCMP/GT FRm, FRn {:  
  2168     COUNT_INST(I_FCMPGT);
  2169     check_fpuen();
  2170     if( sh4_x86.double_prec ) {
  2171         push_dr(FRm);
  2172         push_dr(FRn);
  2173     } else {
  2174         push_fr(FRm);
  2175         push_fr(FRn);
  2177     FCOMIP_st(1);
  2178     SETA_t();
  2179     FPOP_st();
  2180     sh4_x86.tstate = TSTATE_A;
  2181 :}
  2183 FSCA FPUL, FRn {:  
  2184     COUNT_INST(I_FSCA);
  2185     check_fpuen();
  2186     if( sh4_x86.double_prec == 0 ) {
  2187         LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FRn&0x0E]), REG_EDX );
  2188         load_spreg( REG_EAX, R_FPUL );
  2189         call_func2( sh4_fsca, REG_EAX, REG_EDX );
  2191     sh4_x86.tstate = TSTATE_NONE;
  2192 :}
  2193 FIPR FVm, FVn {:  
  2194     COUNT_INST(I_FIPR);
  2195     check_fpuen();
  2196     if( sh4_x86.double_prec == 0 ) {
  2197         if( sh4_x86.sse3_enabled ) {
  2198             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
  2199             MULPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
  2200             HADDPS_xmm_xmm( 4, 4 ); 
  2201             HADDPS_xmm_xmm( 4, 4 );
  2202             MOVSS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
  2203         } else {
  2204             push_fr( FVm<<2 );
  2205             push_fr( FVn<<2 );
  2206             FMULP_st(1);
  2207             push_fr( (FVm<<2)+1);
  2208             push_fr( (FVn<<2)+1);
  2209             FMULP_st(1);
  2210             FADDP_st(1);
  2211             push_fr( (FVm<<2)+2);
  2212             push_fr( (FVn<<2)+2);
  2213             FMULP_st(1);
  2214             FADDP_st(1);
  2215             push_fr( (FVm<<2)+3);
  2216             push_fr( (FVn<<2)+3);
  2217             FMULP_st(1);
  2218             FADDP_st(1);
  2219             pop_fr( (FVn<<2)+3);
  2222 :}
  2223 FTRV XMTRX, FVn {:  
  2224     COUNT_INST(I_FTRV);
  2225     check_fpuen();
  2226     if( sh4_x86.double_prec == 0 ) {
  2227         if( sh4_x86.sse3_enabled ) {
  2228             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1  M0  M3  M2
  2229             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5  M4  M7  M6
  2230             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9  M8  M11 M10
  2231             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
  2233             MOVSLDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
  2234             MOVSHDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
  2235             MOV_xmm_xmm( 4, 6 );
  2236             MOV_xmm_xmm( 5, 7 );
  2237             MOVLHPS_xmm_xmm( 4, 4 );  // V1 V1 V1 V1
  2238             MOVHLPS_xmm_xmm( 6, 6 );  // V3 V3 V3 V3
  2239             MOVLHPS_xmm_xmm( 5, 5 );  // V0 V0 V0 V0
  2240             MOVHLPS_xmm_xmm( 7, 7 );  // V2 V2 V2 V2
  2241             MULPS_xmm_xmm( 0, 4 );
  2242             MULPS_xmm_xmm( 1, 5 );
  2243             MULPS_xmm_xmm( 2, 6 );
  2244             MULPS_xmm_xmm( 3, 7 );
  2245             ADDPS_xmm_xmm( 5, 4 );
  2246             ADDPS_xmm_xmm( 7, 6 );
  2247             ADDPS_xmm_xmm( 6, 4 );
  2248             MOVAPS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][FVn<<2]) );
  2249         } else {
  2250             LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FVn<<2]), REG_EAX );
  2251             call_func1( sh4_ftrv, REG_EAX );
  2254     sh4_x86.tstate = TSTATE_NONE;
  2255 :}
  2257 FRCHG {:  
  2258     COUNT_INST(I_FRCHG);
  2259     check_fpuen();
  2260     XORL_imms_rbpdisp( FPSCR_FR, R_FPSCR );
  2261     call_func0( sh4_switch_fr_banks );
  2262     sh4_x86.tstate = TSTATE_NONE;
  2263 :}
  2264 FSCHG {:  
  2265     COUNT_INST(I_FSCHG);
  2266     check_fpuen();
  2267     XORL_imms_rbpdisp( FPSCR_SZ, R_FPSCR);
  2268     XORL_imms_rbpdisp( FPSCR_SZ, REG_OFFSET(xlat_sh4_mode) );
  2269     sh4_x86.tstate = TSTATE_NONE;
  2270     sh4_x86.double_size = !sh4_x86.double_size;
  2271 :}
  2273 /* Processor control instructions */
  2274 LDC Rm, SR {:
  2275     COUNT_INST(I_LDCSR);
  2276     if( sh4_x86.in_delay_slot ) {
  2277 	SLOTILLEGAL();
  2278     } else {
  2279 	check_priv();
  2280 	load_reg( REG_EAX, Rm );
  2281 	call_func1( sh4_write_sr, REG_EAX );
  2282 	sh4_x86.fpuen_checked = FALSE;
  2283 	sh4_x86.tstate = TSTATE_NONE;
  2284 	return 2;
  2286 :}
  2287 LDC Rm, GBR {: 
  2288     COUNT_INST(I_LDC);
  2289     load_reg( REG_EAX, Rm );
  2290     store_spreg( REG_EAX, R_GBR );
  2291 :}
  2292 LDC Rm, VBR {:  
  2293     COUNT_INST(I_LDC);
  2294     check_priv();
  2295     load_reg( REG_EAX, Rm );
  2296     store_spreg( REG_EAX, R_VBR );
  2297     sh4_x86.tstate = TSTATE_NONE;
  2298 :}
  2299 LDC Rm, SSR {:  
  2300     COUNT_INST(I_LDC);
  2301     check_priv();
  2302     load_reg( REG_EAX, Rm );
  2303     store_spreg( REG_EAX, R_SSR );
  2304     sh4_x86.tstate = TSTATE_NONE;
  2305 :}
  2306 LDC Rm, SGR {:  
  2307     COUNT_INST(I_LDC);
  2308     check_priv();
  2309     load_reg( REG_EAX, Rm );
  2310     store_spreg( REG_EAX, R_SGR );
  2311     sh4_x86.tstate = TSTATE_NONE;
  2312 :}
  2313 LDC Rm, SPC {:  
  2314     COUNT_INST(I_LDC);
  2315     check_priv();
  2316     load_reg( REG_EAX, Rm );
  2317     store_spreg( REG_EAX, R_SPC );
  2318     sh4_x86.tstate = TSTATE_NONE;
  2319 :}
  2320 LDC Rm, DBR {:  
  2321     COUNT_INST(I_LDC);
  2322     check_priv();
  2323     load_reg( REG_EAX, Rm );
  2324     store_spreg( REG_EAX, R_DBR );
  2325     sh4_x86.tstate = TSTATE_NONE;
  2326 :}
  2327 LDC Rm, Rn_BANK {:  
  2328     COUNT_INST(I_LDC);
  2329     check_priv();
  2330     load_reg( REG_EAX, Rm );
  2331     store_spreg( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2332     sh4_x86.tstate = TSTATE_NONE;
  2333 :}
  2334 LDC.L @Rm+, GBR {:  
  2335     COUNT_INST(I_LDCM);
  2336     load_reg( REG_EAX, Rm );
  2337     check_ralign32( REG_EAX );
  2338     MEM_READ_LONG( REG_EAX, REG_EAX );
  2339     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2340     store_spreg( REG_EAX, R_GBR );
  2341     sh4_x86.tstate = TSTATE_NONE;
  2342 :}
  2343 LDC.L @Rm+, SR {:
  2344     COUNT_INST(I_LDCSRM);
  2345     if( sh4_x86.in_delay_slot ) {
  2346 	SLOTILLEGAL();
  2347     } else {
  2348 	check_priv();
  2349 	load_reg( REG_EAX, Rm );
  2350 	check_ralign32( REG_EAX );
  2351 	MEM_READ_LONG( REG_EAX, REG_EAX );
  2352 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2353 	call_func1( sh4_write_sr, REG_EAX );
  2354 	sh4_x86.fpuen_checked = FALSE;
  2355 	sh4_x86.tstate = TSTATE_NONE;
  2356 	return 2;
  2358 :}
  2359 LDC.L @Rm+, VBR {:  
  2360     COUNT_INST(I_LDCM);
  2361     check_priv();
  2362     load_reg( REG_EAX, Rm );
  2363     check_ralign32( REG_EAX );
  2364     MEM_READ_LONG( REG_EAX, REG_EAX );
  2365     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2366     store_spreg( REG_EAX, R_VBR );
  2367     sh4_x86.tstate = TSTATE_NONE;
  2368 :}
  2369 LDC.L @Rm+, SSR {:
  2370     COUNT_INST(I_LDCM);
  2371     check_priv();
  2372     load_reg( REG_EAX, Rm );
  2373     check_ralign32( REG_EAX );
  2374     MEM_READ_LONG( REG_EAX, REG_EAX );
  2375     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2376     store_spreg( REG_EAX, R_SSR );
  2377     sh4_x86.tstate = TSTATE_NONE;
  2378 :}
  2379 LDC.L @Rm+, SGR {:  
  2380     COUNT_INST(I_LDCM);
  2381     check_priv();
  2382     load_reg( REG_EAX, Rm );
  2383     check_ralign32( REG_EAX );
  2384     MEM_READ_LONG( REG_EAX, REG_EAX );
  2385     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2386     store_spreg( REG_EAX, R_SGR );
  2387     sh4_x86.tstate = TSTATE_NONE;
  2388 :}
  2389 LDC.L @Rm+, SPC {:  
  2390     COUNT_INST(I_LDCM);
  2391     check_priv();
  2392     load_reg( REG_EAX, Rm );
  2393     check_ralign32( REG_EAX );
  2394     MEM_READ_LONG( REG_EAX, REG_EAX );
  2395     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2396     store_spreg( REG_EAX, R_SPC );
  2397     sh4_x86.tstate = TSTATE_NONE;
  2398 :}
  2399 LDC.L @Rm+, DBR {:  
  2400     COUNT_INST(I_LDCM);
  2401     check_priv();
  2402     load_reg( REG_EAX, Rm );
  2403     check_ralign32( REG_EAX );
  2404     MEM_READ_LONG( REG_EAX, REG_EAX );
  2405     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2406     store_spreg( REG_EAX, R_DBR );
  2407     sh4_x86.tstate = TSTATE_NONE;
  2408 :}
  2409 LDC.L @Rm+, Rn_BANK {:  
  2410     COUNT_INST(I_LDCM);
  2411     check_priv();
  2412     load_reg( REG_EAX, Rm );
  2413     check_ralign32( REG_EAX );
  2414     MEM_READ_LONG( REG_EAX, REG_EAX );
  2415     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2416     store_spreg( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2417     sh4_x86.tstate = TSTATE_NONE;
  2418 :}
  2419 LDS Rm, FPSCR {:
  2420     COUNT_INST(I_LDSFPSCR);
  2421     check_fpuen();
  2422     load_reg( REG_EAX, Rm );
  2423     call_func1( sh4_write_fpscr, REG_EAX );
  2424     sh4_x86.tstate = TSTATE_NONE;
  2425     return 2;
  2426 :}
  2427 LDS.L @Rm+, FPSCR {:  
  2428     COUNT_INST(I_LDSFPSCRM);
  2429     check_fpuen();
  2430     load_reg( REG_EAX, Rm );
  2431     check_ralign32( REG_EAX );
  2432     MEM_READ_LONG( REG_EAX, REG_EAX );
  2433     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2434     call_func1( sh4_write_fpscr, REG_EAX );
  2435     sh4_x86.tstate = TSTATE_NONE;
  2436     return 2;
  2437 :}
  2438 LDS Rm, FPUL {:  
  2439     COUNT_INST(I_LDS);
  2440     check_fpuen();
  2441     load_reg( REG_EAX, Rm );
  2442     store_spreg( REG_EAX, R_FPUL );
  2443 :}
  2444 LDS.L @Rm+, FPUL {:  
  2445     COUNT_INST(I_LDSM);
  2446     check_fpuen();
  2447     load_reg( REG_EAX, Rm );
  2448     check_ralign32( REG_EAX );
  2449     MEM_READ_LONG( REG_EAX, REG_EAX );
  2450     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2451     store_spreg( REG_EAX, R_FPUL );
  2452     sh4_x86.tstate = TSTATE_NONE;
  2453 :}
  2454 LDS Rm, MACH {: 
  2455     COUNT_INST(I_LDS);
  2456     load_reg( REG_EAX, Rm );
  2457     store_spreg( REG_EAX, R_MACH );
  2458 :}
  2459 LDS.L @Rm+, MACH {:  
  2460     COUNT_INST(I_LDSM);
  2461     load_reg( REG_EAX, Rm );
  2462     check_ralign32( REG_EAX );
  2463     MEM_READ_LONG( REG_EAX, REG_EAX );
  2464     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2465     store_spreg( REG_EAX, R_MACH );
  2466     sh4_x86.tstate = TSTATE_NONE;
  2467 :}
  2468 LDS Rm, MACL {:  
  2469     COUNT_INST(I_LDS);
  2470     load_reg( REG_EAX, Rm );
  2471     store_spreg( REG_EAX, R_MACL );
  2472 :}
  2473 LDS.L @Rm+, MACL {:  
  2474     COUNT_INST(I_LDSM);
  2475     load_reg( REG_EAX, Rm );
  2476     check_ralign32( REG_EAX );
  2477     MEM_READ_LONG( REG_EAX, REG_EAX );
  2478     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2479     store_spreg( REG_EAX, R_MACL );
  2480     sh4_x86.tstate = TSTATE_NONE;
  2481 :}
  2482 LDS Rm, PR {:  
  2483     COUNT_INST(I_LDS);
  2484     load_reg( REG_EAX, Rm );
  2485     store_spreg( REG_EAX, R_PR );
  2486 :}
  2487 LDS.L @Rm+, PR {:  
  2488     COUNT_INST(I_LDSM);
  2489     load_reg( REG_EAX, Rm );
  2490     check_ralign32( REG_EAX );
  2491     MEM_READ_LONG( REG_EAX, REG_EAX );
  2492     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2493     store_spreg( REG_EAX, R_PR );
  2494     sh4_x86.tstate = TSTATE_NONE;
  2495 :}
  2496 LDTLB {:  
  2497     COUNT_INST(I_LDTLB);
  2498     call_func0( MMU_ldtlb );
  2499     sh4_x86.tstate = TSTATE_NONE;
  2500 :}
  2501 OCBI @Rn {:
  2502     COUNT_INST(I_OCBI);
  2503 :}
  2504 OCBP @Rn {:
  2505     COUNT_INST(I_OCBP);
  2506 :}
  2507 OCBWB @Rn {:
  2508     COUNT_INST(I_OCBWB);
  2509 :}
  2510 PREF @Rn {:
  2511     COUNT_INST(I_PREF);
  2512     load_reg( REG_EAX, Rn );
  2513     MEM_PREFETCH( REG_EAX );
  2514     sh4_x86.tstate = TSTATE_NONE;
  2515 :}
  2516 SLEEP {: 
  2517     COUNT_INST(I_SLEEP);
  2518     check_priv();
  2519     call_func0( sh4_sleep );
  2520     sh4_x86.tstate = TSTATE_NONE;
  2521     sh4_x86.in_delay_slot = DELAY_NONE;
  2522     return 2;
  2523 :}
  2524 STC SR, Rn {:
  2525     COUNT_INST(I_STCSR);
  2526     check_priv();
  2527     call_func0(sh4_read_sr);
  2528     store_reg( REG_EAX, Rn );
  2529     sh4_x86.tstate = TSTATE_NONE;
  2530 :}
  2531 STC GBR, Rn {:  
  2532     COUNT_INST(I_STC);
  2533     load_spreg( REG_EAX, R_GBR );
  2534     store_reg( REG_EAX, Rn );
  2535 :}
  2536 STC VBR, Rn {:  
  2537     COUNT_INST(I_STC);
  2538     check_priv();
  2539     load_spreg( REG_EAX, R_VBR );
  2540     store_reg( REG_EAX, Rn );
  2541     sh4_x86.tstate = TSTATE_NONE;
  2542 :}
  2543 STC SSR, Rn {:  
  2544     COUNT_INST(I_STC);
  2545     check_priv();
  2546     load_spreg( REG_EAX, R_SSR );
  2547     store_reg( REG_EAX, Rn );
  2548     sh4_x86.tstate = TSTATE_NONE;
  2549 :}
  2550 STC SPC, Rn {:  
  2551     COUNT_INST(I_STC);
  2552     check_priv();
  2553     load_spreg( REG_EAX, R_SPC );
  2554     store_reg( REG_EAX, Rn );
  2555     sh4_x86.tstate = TSTATE_NONE;
  2556 :}
  2557 STC SGR, Rn {:  
  2558     COUNT_INST(I_STC);
  2559     check_priv();
  2560     load_spreg( REG_EAX, R_SGR );
  2561     store_reg( REG_EAX, Rn );
  2562     sh4_x86.tstate = TSTATE_NONE;
  2563 :}
  2564 STC DBR, Rn {:  
  2565     COUNT_INST(I_STC);
  2566     check_priv();
  2567     load_spreg( REG_EAX, R_DBR );
  2568     store_reg( REG_EAX, Rn );
  2569     sh4_x86.tstate = TSTATE_NONE;
  2570 :}
  2571 STC Rm_BANK, Rn {:
  2572     COUNT_INST(I_STC);
  2573     check_priv();
  2574     load_spreg( REG_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2575     store_reg( REG_EAX, Rn );
  2576     sh4_x86.tstate = TSTATE_NONE;
  2577 :}
  2578 STC.L SR, @-Rn {:
  2579     COUNT_INST(I_STCSRM);
  2580     check_priv();
  2581     call_func0( sh4_read_sr );
  2582     MOVL_r32_r32( REG_EAX, REG_EDX );
  2583     load_reg( REG_EAX, Rn );
  2584     check_walign32( REG_EAX );
  2585     LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2586     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2587     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2588     sh4_x86.tstate = TSTATE_NONE;
  2589 :}
  2590 STC.L VBR, @-Rn {:  
  2591     COUNT_INST(I_STCM);
  2592     check_priv();
  2593     load_reg( REG_EAX, Rn );
  2594     check_walign32( REG_EAX );
  2595     ADDL_imms_r32( -4, REG_EAX );
  2596     load_spreg( REG_EDX, R_VBR );
  2597     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2598     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2599     sh4_x86.tstate = TSTATE_NONE;
  2600 :}
  2601 STC.L SSR, @-Rn {:  
  2602     COUNT_INST(I_STCM);
  2603     check_priv();
  2604     load_reg( REG_EAX, Rn );
  2605     check_walign32( REG_EAX );
  2606     ADDL_imms_r32( -4, REG_EAX );
  2607     load_spreg( REG_EDX, R_SSR );
  2608     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2609     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2610     sh4_x86.tstate = TSTATE_NONE;
  2611 :}
  2612 STC.L SPC, @-Rn {:
  2613     COUNT_INST(I_STCM);
  2614     check_priv();
  2615     load_reg( REG_EAX, Rn );
  2616     check_walign32( REG_EAX );
  2617     ADDL_imms_r32( -4, REG_EAX );
  2618     load_spreg( REG_EDX, R_SPC );
  2619     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2620     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2621     sh4_x86.tstate = TSTATE_NONE;
  2622 :}
  2623 STC.L SGR, @-Rn {:  
  2624     COUNT_INST(I_STCM);
  2625     check_priv();
  2626     load_reg( REG_EAX, Rn );
  2627     check_walign32( REG_EAX );
  2628     ADDL_imms_r32( -4, REG_EAX );
  2629     load_spreg( REG_EDX, R_SGR );
  2630     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2631     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2632     sh4_x86.tstate = TSTATE_NONE;
  2633 :}
  2634 STC.L DBR, @-Rn {:  
  2635     COUNT_INST(I_STCM);
  2636     check_priv();
  2637     load_reg( REG_EAX, Rn );
  2638     check_walign32( REG_EAX );
  2639     ADDL_imms_r32( -4, REG_EAX );
  2640     load_spreg( REG_EDX, R_DBR );
  2641     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2642     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2643     sh4_x86.tstate = TSTATE_NONE;
  2644 :}
  2645 STC.L Rm_BANK, @-Rn {:  
  2646     COUNT_INST(I_STCM);
  2647     check_priv();
  2648     load_reg( REG_EAX, Rn );
  2649     check_walign32( REG_EAX );
  2650     ADDL_imms_r32( -4, REG_EAX );
  2651     load_spreg( REG_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
  2652     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2653     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2654     sh4_x86.tstate = TSTATE_NONE;
  2655 :}
  2656 STC.L GBR, @-Rn {:  
  2657     COUNT_INST(I_STCM);
  2658     load_reg( REG_EAX, Rn );
  2659     check_walign32( REG_EAX );
  2660     ADDL_imms_r32( -4, REG_EAX );
  2661     load_spreg( REG_EDX, R_GBR );
  2662     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2663     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2664     sh4_x86.tstate = TSTATE_NONE;
  2665 :}
  2666 STS FPSCR, Rn {:  
  2667     COUNT_INST(I_STSFPSCR);
  2668     check_fpuen();
  2669     load_spreg( REG_EAX, R_FPSCR );
  2670     store_reg( REG_EAX, Rn );
  2671 :}
  2672 STS.L FPSCR, @-Rn {:  
  2673     COUNT_INST(I_STSFPSCRM);
  2674     check_fpuen();
  2675     load_reg( REG_EAX, Rn );
  2676     check_walign32( REG_EAX );
  2677     ADDL_imms_r32( -4, REG_EAX );
  2678     load_spreg( REG_EDX, R_FPSCR );
  2679     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2680     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2681     sh4_x86.tstate = TSTATE_NONE;
  2682 :}
  2683 STS FPUL, Rn {:  
  2684     COUNT_INST(I_STS);
  2685     check_fpuen();
  2686     load_spreg( REG_EAX, R_FPUL );
  2687     store_reg( REG_EAX, Rn );
  2688 :}
  2689 STS.L FPUL, @-Rn {:  
  2690     COUNT_INST(I_STSM);
  2691     check_fpuen();
  2692     load_reg( REG_EAX, Rn );
  2693     check_walign32( REG_EAX );
  2694     ADDL_imms_r32( -4, REG_EAX );
  2695     load_spreg( REG_EDX, R_FPUL );
  2696     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2697     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2698     sh4_x86.tstate = TSTATE_NONE;
  2699 :}
  2700 STS MACH, Rn {:  
  2701     COUNT_INST(I_STS);
  2702     load_spreg( REG_EAX, R_MACH );
  2703     store_reg( REG_EAX, Rn );
  2704 :}
  2705 STS.L MACH, @-Rn {:  
  2706     COUNT_INST(I_STSM);
  2707     load_reg( REG_EAX, Rn );
  2708     check_walign32( REG_EAX );
  2709     ADDL_imms_r32( -4, REG_EAX );
  2710     load_spreg( REG_EDX, R_MACH );
  2711     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2712     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2713     sh4_x86.tstate = TSTATE_NONE;
  2714 :}
  2715 STS MACL, Rn {:  
  2716     COUNT_INST(I_STS);
  2717     load_spreg( REG_EAX, R_MACL );
  2718     store_reg( REG_EAX, Rn );
  2719 :}
  2720 STS.L MACL, @-Rn {:  
  2721     COUNT_INST(I_STSM);
  2722     load_reg( REG_EAX, Rn );
  2723     check_walign32( REG_EAX );
  2724     ADDL_imms_r32( -4, REG_EAX );
  2725     load_spreg( REG_EDX, R_MACL );
  2726     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2727     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2728     sh4_x86.tstate = TSTATE_NONE;
  2729 :}
  2730 STS PR, Rn {:  
  2731     COUNT_INST(I_STS);
  2732     load_spreg( REG_EAX, R_PR );
  2733     store_reg( REG_EAX, Rn );
  2734 :}
  2735 STS.L PR, @-Rn {:  
  2736     COUNT_INST(I_STSM);
  2737     load_reg( REG_EAX, Rn );
  2738     check_walign32( REG_EAX );
  2739     ADDL_imms_r32( -4, REG_EAX );
  2740     load_spreg( REG_EDX, R_PR );
  2741     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2742     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2743     sh4_x86.tstate = TSTATE_NONE;
  2744 :}
  2746 NOP {: 
  2747     COUNT_INST(I_NOP);
  2748     /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ 
  2749 :}
  2750 %%
  2751     sh4_x86.in_delay_slot = DELAY_NONE;
  2752     return 0;
.