Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 953:f4a156508ad1
prev927:17b6b9e245d8
next956:4c1ed9e03985
author nkeynes
date Tue Jan 13 11:56:28 2009 +0000 (11 years ago)
permissions -rw-r--r--
last change Merge lxdream-mem branch back to trunk
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "lxdream.h"
    29 #include "sh4/xltcache.h"
    30 #include "sh4/sh4core.h"
    31 #include "sh4/sh4trans.h"
    32 #include "sh4/sh4stat.h"
    33 #include "sh4/sh4mmio.h"
    34 #include "sh4/x86op.h"
    35 #include "sh4/mmu.h"
    36 #include "clock.h"
    38 #define DEFAULT_BACKPATCH_SIZE 4096
    40 struct backpatch_record {
    41     uint32_t fixup_offset;
    42     uint32_t fixup_icount;
    43     int32_t exc_code;
    44 };
    46 #define DELAY_NONE 0
    47 #define DELAY_PC 1
    48 #define DELAY_PC_PR 2
    50 /** 
    51  * Struct to manage internal translation state. This state is not saved -
    52  * it is only valid between calls to sh4_translate_begin_block() and
    53  * sh4_translate_end_block()
    54  */
    55 struct sh4_x86_state {
    56     int in_delay_slot;
    57     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    58     gboolean branch_taken; /* true if we branched unconditionally */
    59     gboolean double_prec; /* true if FPU is in double-precision mode */
    60     gboolean double_size; /* true if FPU is in double-size mode */
    61     gboolean sse3_enabled; /* true if host supports SSE3 instructions */
    62     uint32_t block_start_pc;
    63     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    64     int tstate;
    66     /* mode flags */
    67     gboolean tlb_on; /* True if tlb translation is active */
    69     /* Allocated memory for the (block-wide) back-patch list */
    70     struct backpatch_record *backpatch_list;
    71     uint32_t backpatch_posn;
    72     uint32_t backpatch_size;
    73 };
    75 #define TSTATE_NONE -1
    76 #define TSTATE_O    0
    77 #define TSTATE_C    2
    78 #define TSTATE_E    4
    79 #define TSTATE_NE   5
    80 #define TSTATE_G    0xF
    81 #define TSTATE_GE   0xD
    82 #define TSTATE_A    7
    83 #define TSTATE_AE   3
    85 #ifdef ENABLE_SH4STATS
    86 #define COUNT_INST(id) load_imm32(R_EAX,id); call_func1(sh4_stats_add, R_EAX); sh4_x86.tstate = TSTATE_NONE
    87 #else
    88 #define COUNT_INST(id)
    89 #endif
    91 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
    92 #define JT_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    93 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    94     OP(0x70+sh4_x86.tstate); MARK_JMP8(label); OP(-1)
    96 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
    97 #define JF_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    98 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    99     OP(0x70+ (sh4_x86.tstate^1)); MARK_JMP8(label); OP(-1)
   101 static struct sh4_x86_state sh4_x86;
   103 static uint32_t max_int = 0x7FFFFFFF;
   104 static uint32_t min_int = 0x80000000;
   105 static uint32_t save_fcw; /* save value for fpu control word */
   106 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   108 gboolean is_sse3_supported()
   109 {
   110     uint32_t features;
   112     __asm__ __volatile__(
   113         "mov $0x01, %%eax\n\t"
   114         "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
   115     return (features & 1) ? TRUE : FALSE;
   116 }
   118 void sh4_translate_init(void)
   119 {
   120     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   121     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   122     sh4_x86.sse3_enabled = is_sse3_supported();
   123 }
   126 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   127 {
   128     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   129 	sh4_x86.backpatch_size <<= 1;
   130 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   131 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   132 	assert( sh4_x86.backpatch_list != NULL );
   133     }
   134     if( sh4_x86.in_delay_slot ) {
   135 	fixup_pc -= 2;
   136     }
   137     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
   138 	((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code);
   139     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   140     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   141     sh4_x86.backpatch_posn++;
   142 }
   144 /**
   145  * Emit an instruction to load an SH4 reg into a real register
   146  */
   147 static inline void load_reg( int x86reg, int sh4reg ) 
   148 {
   149     /* mov [bp+n], reg */
   150     OP(0x8B);
   151     OP(0x45 + (x86reg<<3));
   152     OP(REG_OFFSET(r[sh4reg]));
   153 }
   155 static inline void load_reg16s( int x86reg, int sh4reg )
   156 {
   157     OP(0x0F);
   158     OP(0xBF);
   159     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   160 }
   162 static inline void load_reg16u( int x86reg, int sh4reg )
   163 {
   164     OP(0x0F);
   165     OP(0xB7);
   166     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   168 }
   170 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   171 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   172 /**
   173  * Emit an instruction to load an immediate value into a register
   174  */
   175 static inline void load_imm32( int x86reg, uint32_t value ) {
   176     /* mov #value, reg */
   177     OP(0xB8 + x86reg);
   178     OP32(value);
   179 }
   182 /**
   183  * Load an immediate 64-bit quantity (note: x86-64 only)
   184  */
   185 static inline void load_imm64( int x86reg, uint64_t value ) {
   186     /* mov #value, reg */
   187     REXW();
   188     OP(0xB8 + x86reg);
   189     OP64(value);
   190 }
   192 /**
   193  * Emit an instruction to store an SH4 reg (RN)
   194  */
   195 void static inline store_reg( int x86reg, int sh4reg ) {
   196     /* mov reg, [bp+n] */
   197     OP(0x89);
   198     OP(0x45 + (x86reg<<3));
   199     OP(REG_OFFSET(r[sh4reg]));
   200 }
   202 /**
   203  * Load an FR register (single-precision floating point) into an integer x86
   204  * register (eg for register-to-register moves)
   205  */
   206 #define load_fr(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[0][(frm)^1]) )
   207 #define load_xf(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[1][(frm)^1]) )
   209 /**
   210  * Load the low half of a DR register (DR or XD) into an integer x86 register 
   211  */
   212 #define load_dr0(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   213 #define load_dr1(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   215 /**
   216  * Store an FR register (single-precision floating point) from an integer x86+
   217  * register (eg for register-to-register moves)
   218  */
   219 #define store_fr(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[0][(frm)^1]) )
   220 #define store_xf(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[1][(frm)^1]) )
   222 #define store_dr0(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   223 #define store_dr1(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   226 #define push_fpul()  FLDF_sh4r(R_FPUL)
   227 #define pop_fpul()   FSTPF_sh4r(R_FPUL)
   228 #define push_fr(frm) FLDF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   229 #define pop_fr(frm)  FSTPF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   230 #define push_xf(frm) FLDF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   231 #define pop_xf(frm)  FSTPF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   232 #define push_dr(frm) FLDD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   233 #define pop_dr(frm)  FSTPD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   234 #define push_xdr(frm) FLDD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   235 #define pop_xdr(frm)  FSTPD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   239 /* Exception checks - Note that all exception checks will clobber EAX */
   241 #define check_priv( ) \
   242     if( (sh4r.xlat_sh4_mode & SR_MD) == 0 ) { \
   243         if( sh4_x86.in_delay_slot ) { \
   244             JMP_exc(EXC_SLOT_ILLEGAL); \
   245         } else { \
   246             JMP_exc(EXC_ILLEGAL ); \
   247         } \
   248         sh4_x86.in_delay_slot = DELAY_NONE; \
   249         return 2; \
   250     }
   252 #define check_fpuen( ) \
   253     if( !sh4_x86.fpuen_checked ) {\
   254 	sh4_x86.fpuen_checked = TRUE;\
   255 	load_spreg( R_EAX, R_SR );\
   256 	AND_imm32_r32( SR_FD, R_EAX );\
   257 	if( sh4_x86.in_delay_slot ) {\
   258 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   259 	} else {\
   260 	    JNE_exc(EXC_FPU_DISABLED);\
   261 	}\
   262 	sh4_x86.tstate = TSTATE_NONE; \
   263     }
   265 #define check_ralign16( x86reg ) \
   266     TEST_imm32_r32( 0x00000001, x86reg ); \
   267     JNE_exc(EXC_DATA_ADDR_READ)
   269 #define check_walign16( x86reg ) \
   270     TEST_imm32_r32( 0x00000001, x86reg ); \
   271     JNE_exc(EXC_DATA_ADDR_WRITE);
   273 #define check_ralign32( x86reg ) \
   274     TEST_imm32_r32( 0x00000003, x86reg ); \
   275     JNE_exc(EXC_DATA_ADDR_READ)
   277 #define check_walign32( x86reg ) \
   278     TEST_imm32_r32( 0x00000003, x86reg ); \
   279     JNE_exc(EXC_DATA_ADDR_WRITE);
   281 #define check_ralign64( x86reg ) \
   282     TEST_imm32_r32( 0x00000007, x86reg ); \
   283     JNE_exc(EXC_DATA_ADDR_READ)
   285 #define check_walign64( x86reg ) \
   286     TEST_imm32_r32( 0x00000007, x86reg ); \
   287     JNE_exc(EXC_DATA_ADDR_WRITE);
   289 #define UNDEF(ir)
   290 #define MEM_REGION_PTR(name) offsetof( struct mem_region_fn, name )
   291 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   292 /* Note: For SR.MD == 1 && MMUCR.AT == 0, there are no memory exceptions, so 
   293  * don't waste the cycles expecting them. Otherwise we need to save the exception pointer.
   294  */
   296 #ifdef HAVE_FRAME_ADDRESS
   297 #define _CALL_READ(addr_reg, fn) if( !sh4_x86.tlb_on && (sh4r.xlat_sh4_mode & SR_MD) ) { \
   298         call_func1_r32disp8(R_ECX, MEM_REGION_PTR(fn), addr_reg); } else { \
   299         call_func1_r32disp8_exc(R_ECX, MEM_REGION_PTR(fn), addr_reg, pc); } 
   300 #define _CALL_WRITE(addr_reg, val_reg, fn) if( !sh4_x86.tlb_on && (sh4r.xlat_sh4_mode & SR_MD) ) { \
   301         call_func2_r32disp8(R_ECX, MEM_REGION_PTR(fn), addr_reg, val_reg); } else { \
   302         call_func2_r32disp8_exc(R_ECX, MEM_REGION_PTR(fn), addr_reg, val_reg, pc); }
   303 #else 
   304 #define _CALL_READ(addr_reg, fn) call_func1_r32disp8(R_ECX, MEM_REGION_PTR(fn), addr_reg)
   305 #define _CALL_WRITE(addr_reg, val_reg, fn) call_func2_r32disp8(R_ECX, MEM_REGION_PTR(fn), addr_reg, val_reg)
   306 #endif
   308 #define MEM_READ_BYTE( addr_reg, value_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, read_byte); MEM_RESULT(value_reg)
   309 #define MEM_READ_WORD( addr_reg, value_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, read_word); MEM_RESULT(value_reg)
   310 #define MEM_READ_LONG( addr_reg, value_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, read_long); MEM_RESULT(value_reg)
   311 #define MEM_WRITE_BYTE( addr_reg, value_reg ) decode_address(addr_reg); _CALL_WRITE(addr_reg, value_reg, write_byte)
   312 #define MEM_WRITE_WORD( addr_reg, value_reg ) decode_address(addr_reg); _CALL_WRITE(addr_reg, value_reg, write_word)
   313 #define MEM_WRITE_LONG( addr_reg, value_reg ) decode_address(addr_reg); _CALL_WRITE(addr_reg, value_reg, write_long)
   314 #define MEM_PREFETCH( addr_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, prefetch)
   316 #define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = DELAY_NONE; return 2;
   318 /****** Import appropriate calling conventions ******/
   319 #if SIZEOF_VOID_P == 8
   320 #include "sh4/ia64abi.h"
   321 #else /* 32-bit system */
   322 #include "sh4/ia32abi.h"
   323 #endif
   325 void sh4_translate_begin_block( sh4addr_t pc ) 
   326 {
   327     enter_block();
   328     sh4_x86.in_delay_slot = FALSE;
   329     sh4_x86.fpuen_checked = FALSE;
   330     sh4_x86.branch_taken = FALSE;
   331     sh4_x86.backpatch_posn = 0;
   332     sh4_x86.block_start_pc = pc;
   333     sh4_x86.tlb_on = IS_TLB_ENABLED();
   334     sh4_x86.tstate = TSTATE_NONE;
   335     sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
   336     sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
   337 }
   340 uint32_t sh4_translate_end_block_size()
   341 {
   342     if( sh4_x86.backpatch_posn <= 3 ) {
   343         return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);
   344     } else {
   345         return EPILOGUE_SIZE + 48 + (sh4_x86.backpatch_posn-3)*15;
   346     }
   347 }
   350 /**
   351  * Embed a breakpoint into the generated code
   352  */
   353 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   354 {
   355     load_imm32( R_EAX, pc );
   356     call_func1( sh4_translate_breakpoint_hit, R_EAX );
   357     sh4_x86.tstate = TSTATE_NONE;
   358 }
   361 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   363 /**
   364  * Embed a call to sh4_execute_instruction for situations that we
   365  * can't translate (just page-crossing delay slots at the moment).
   366  * Caller is responsible for setting new_pc before calling this function.
   367  *
   368  * Performs:
   369  *   Set PC = endpc
   370  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   371  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   372  *   Call sh4_execute_instruction
   373  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   374  */
   375 void exit_block_emu( sh4vma_t endpc )
   376 {
   377     load_imm32( R_ECX, endpc - sh4_x86.block_start_pc );   // 5
   378     ADD_r32_sh4r( R_ECX, R_PC );
   380     load_imm32( R_ECX, (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period ); // 5
   381     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
   382     load_imm32( R_ECX, sh4_x86.in_delay_slot ? 1 : 0 );
   383     store_spreg( R_ECX, REG_OFFSET(in_delay_slot) );
   385     call_func0( sh4_execute_instruction );    
   386     load_spreg( R_EAX, R_PC );
   387     if( sh4_x86.tlb_on ) {
   388 	call_func1(xlat_get_code_by_vma,R_EAX);
   389     } else {
   390 	call_func1(xlat_get_code,R_EAX);
   391     }
   392     exit_block();
   393 } 
   395 /**
   396  * Translate a single instruction. Delayed branches are handled specially
   397  * by translating both branch and delayed instruction as a single unit (as
   398  * 
   399  * The instruction MUST be in the icache (assert check)
   400  *
   401  * @return true if the instruction marks the end of a basic block
   402  * (eg a branch or 
   403  */
   404 uint32_t sh4_translate_instruction( sh4vma_t pc )
   405 {
   406     uint32_t ir;
   407     /* Read instruction from icache */
   408     assert( IS_IN_ICACHE(pc) );
   409     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   411     if( !sh4_x86.in_delay_slot ) {
   412 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   413     }
   414 %%
   415 /* ALU operations */
   416 ADD Rm, Rn {:
   417     COUNT_INST(I_ADD);
   418     load_reg( R_EAX, Rm );
   419     load_reg( R_ECX, Rn );
   420     ADD_r32_r32( R_EAX, R_ECX );
   421     store_reg( R_ECX, Rn );
   422     sh4_x86.tstate = TSTATE_NONE;
   423 :}
   424 ADD #imm, Rn {:  
   425     COUNT_INST(I_ADDI);
   426     ADD_imm8s_sh4r( imm, REG_OFFSET(r[Rn]) );
   427     sh4_x86.tstate = TSTATE_NONE;
   428 :}
   429 ADDC Rm, Rn {:
   430     COUNT_INST(I_ADDC);
   431     if( sh4_x86.tstate != TSTATE_C ) {
   432         LDC_t();
   433     }
   434     load_reg( R_EAX, Rm );
   435     load_reg( R_ECX, Rn );
   436     ADC_r32_r32( R_EAX, R_ECX );
   437     store_reg( R_ECX, Rn );
   438     SETC_t();
   439     sh4_x86.tstate = TSTATE_C;
   440 :}
   441 ADDV Rm, Rn {:
   442     COUNT_INST(I_ADDV);
   443     load_reg( R_EAX, Rm );
   444     load_reg( R_ECX, Rn );
   445     ADD_r32_r32( R_EAX, R_ECX );
   446     store_reg( R_ECX, Rn );
   447     SETO_t();
   448     sh4_x86.tstate = TSTATE_O;
   449 :}
   450 AND Rm, Rn {:
   451     COUNT_INST(I_AND);
   452     load_reg( R_EAX, Rm );
   453     load_reg( R_ECX, Rn );
   454     AND_r32_r32( R_EAX, R_ECX );
   455     store_reg( R_ECX, Rn );
   456     sh4_x86.tstate = TSTATE_NONE;
   457 :}
   458 AND #imm, R0 {:  
   459     COUNT_INST(I_ANDI);
   460     load_reg( R_EAX, 0 );
   461     AND_imm32_r32(imm, R_EAX); 
   462     store_reg( R_EAX, 0 );
   463     sh4_x86.tstate = TSTATE_NONE;
   464 :}
   465 AND.B #imm, @(R0, GBR) {: 
   466     COUNT_INST(I_ANDB);
   467     load_reg( R_EAX, 0 );
   468     ADD_sh4r_r32( R_GBR, R_EAX );
   469     MOV_r32_esp8(R_EAX, 0);
   470     MEM_READ_BYTE( R_EAX, R_EDX );
   471     MOV_esp8_r32(0, R_EAX);
   472     AND_imm32_r32(imm, R_EDX );
   473     MEM_WRITE_BYTE( R_EAX, R_EDX );
   474     sh4_x86.tstate = TSTATE_NONE;
   475 :}
   476 CMP/EQ Rm, Rn {:  
   477     COUNT_INST(I_CMPEQ);
   478     load_reg( R_EAX, Rm );
   479     load_reg( R_ECX, Rn );
   480     CMP_r32_r32( R_EAX, R_ECX );
   481     SETE_t();
   482     sh4_x86.tstate = TSTATE_E;
   483 :}
   484 CMP/EQ #imm, R0 {:  
   485     COUNT_INST(I_CMPEQI);
   486     load_reg( R_EAX, 0 );
   487     CMP_imm8s_r32(imm, R_EAX);
   488     SETE_t();
   489     sh4_x86.tstate = TSTATE_E;
   490 :}
   491 CMP/GE Rm, Rn {:  
   492     COUNT_INST(I_CMPGE);
   493     load_reg( R_EAX, Rm );
   494     load_reg( R_ECX, Rn );
   495     CMP_r32_r32( R_EAX, R_ECX );
   496     SETGE_t();
   497     sh4_x86.tstate = TSTATE_GE;
   498 :}
   499 CMP/GT Rm, Rn {: 
   500     COUNT_INST(I_CMPGT);
   501     load_reg( R_EAX, Rm );
   502     load_reg( R_ECX, Rn );
   503     CMP_r32_r32( R_EAX, R_ECX );
   504     SETG_t();
   505     sh4_x86.tstate = TSTATE_G;
   506 :}
   507 CMP/HI Rm, Rn {:  
   508     COUNT_INST(I_CMPHI);
   509     load_reg( R_EAX, Rm );
   510     load_reg( R_ECX, Rn );
   511     CMP_r32_r32( R_EAX, R_ECX );
   512     SETA_t();
   513     sh4_x86.tstate = TSTATE_A;
   514 :}
   515 CMP/HS Rm, Rn {: 
   516     COUNT_INST(I_CMPHS);
   517     load_reg( R_EAX, Rm );
   518     load_reg( R_ECX, Rn );
   519     CMP_r32_r32( R_EAX, R_ECX );
   520     SETAE_t();
   521     sh4_x86.tstate = TSTATE_AE;
   522  :}
   523 CMP/PL Rn {: 
   524     COUNT_INST(I_CMPPL);
   525     load_reg( R_EAX, Rn );
   526     CMP_imm8s_r32( 0, R_EAX );
   527     SETG_t();
   528     sh4_x86.tstate = TSTATE_G;
   529 :}
   530 CMP/PZ Rn {:  
   531     COUNT_INST(I_CMPPZ);
   532     load_reg( R_EAX, Rn );
   533     CMP_imm8s_r32( 0, R_EAX );
   534     SETGE_t();
   535     sh4_x86.tstate = TSTATE_GE;
   536 :}
   537 CMP/STR Rm, Rn {:  
   538     COUNT_INST(I_CMPSTR);
   539     load_reg( R_EAX, Rm );
   540     load_reg( R_ECX, Rn );
   541     XOR_r32_r32( R_ECX, R_EAX );
   542     TEST_r8_r8( R_AL, R_AL );
   543     JE_rel8(target1);
   544     TEST_r8_r8( R_AH, R_AH );
   545     JE_rel8(target2);
   546     SHR_imm8_r32( 16, R_EAX );
   547     TEST_r8_r8( R_AL, R_AL );
   548     JE_rel8(target3);
   549     TEST_r8_r8( R_AH, R_AH );
   550     JMP_TARGET(target1);
   551     JMP_TARGET(target2);
   552     JMP_TARGET(target3);
   553     SETE_t();
   554     sh4_x86.tstate = TSTATE_E;
   555 :}
   556 DIV0S Rm, Rn {:
   557     COUNT_INST(I_DIV0S);
   558     load_reg( R_EAX, Rm );
   559     load_reg( R_ECX, Rn );
   560     SHR_imm8_r32( 31, R_EAX );
   561     SHR_imm8_r32( 31, R_ECX );
   562     store_spreg( R_EAX, R_M );
   563     store_spreg( R_ECX, R_Q );
   564     CMP_r32_r32( R_EAX, R_ECX );
   565     SETNE_t();
   566     sh4_x86.tstate = TSTATE_NE;
   567 :}
   568 DIV0U {:  
   569     COUNT_INST(I_DIV0U);
   570     XOR_r32_r32( R_EAX, R_EAX );
   571     store_spreg( R_EAX, R_Q );
   572     store_spreg( R_EAX, R_M );
   573     store_spreg( R_EAX, R_T );
   574     sh4_x86.tstate = TSTATE_C; // works for DIV1
   575 :}
   576 DIV1 Rm, Rn {:
   577     COUNT_INST(I_DIV1);
   578     load_spreg( R_ECX, R_M );
   579     load_reg( R_EAX, Rn );
   580     if( sh4_x86.tstate != TSTATE_C ) {
   581 	LDC_t();
   582     }
   583     RCL1_r32( R_EAX );
   584     SETC_r8( R_DL ); // Q'
   585     CMP_sh4r_r32( R_Q, R_ECX );
   586     JE_rel8(mqequal);
   587     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   588     JMP_rel8(end);
   589     JMP_TARGET(mqequal);
   590     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   591     JMP_TARGET(end);
   592     store_reg( R_EAX, Rn ); // Done with Rn now
   593     SETC_r8(R_AL); // tmp1
   594     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   595     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   596     store_spreg( R_ECX, R_Q );
   597     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   598     MOVZX_r8_r32( R_AL, R_EAX );
   599     store_spreg( R_EAX, R_T );
   600     sh4_x86.tstate = TSTATE_NONE;
   601 :}
   602 DMULS.L Rm, Rn {:  
   603     COUNT_INST(I_DMULS);
   604     load_reg( R_EAX, Rm );
   605     load_reg( R_ECX, Rn );
   606     IMUL_r32(R_ECX);
   607     store_spreg( R_EDX, R_MACH );
   608     store_spreg( R_EAX, R_MACL );
   609     sh4_x86.tstate = TSTATE_NONE;
   610 :}
   611 DMULU.L Rm, Rn {:  
   612     COUNT_INST(I_DMULU);
   613     load_reg( R_EAX, Rm );
   614     load_reg( R_ECX, Rn );
   615     MUL_r32(R_ECX);
   616     store_spreg( R_EDX, R_MACH );
   617     store_spreg( R_EAX, R_MACL );    
   618     sh4_x86.tstate = TSTATE_NONE;
   619 :}
   620 DT Rn {:  
   621     COUNT_INST(I_DT);
   622     load_reg( R_EAX, Rn );
   623     ADD_imm8s_r32( -1, R_EAX );
   624     store_reg( R_EAX, Rn );
   625     SETE_t();
   626     sh4_x86.tstate = TSTATE_E;
   627 :}
   628 EXTS.B Rm, Rn {:  
   629     COUNT_INST(I_EXTSB);
   630     load_reg( R_EAX, Rm );
   631     MOVSX_r8_r32( R_EAX, R_EAX );
   632     store_reg( R_EAX, Rn );
   633 :}
   634 EXTS.W Rm, Rn {:  
   635     COUNT_INST(I_EXTSW);
   636     load_reg( R_EAX, Rm );
   637     MOVSX_r16_r32( R_EAX, R_EAX );
   638     store_reg( R_EAX, Rn );
   639 :}
   640 EXTU.B Rm, Rn {:  
   641     COUNT_INST(I_EXTUB);
   642     load_reg( R_EAX, Rm );
   643     MOVZX_r8_r32( R_EAX, R_EAX );
   644     store_reg( R_EAX, Rn );
   645 :}
   646 EXTU.W Rm, Rn {:  
   647     COUNT_INST(I_EXTUW);
   648     load_reg( R_EAX, Rm );
   649     MOVZX_r16_r32( R_EAX, R_EAX );
   650     store_reg( R_EAX, Rn );
   651 :}
   652 MAC.L @Rm+, @Rn+ {:
   653     COUNT_INST(I_MACL);
   654     if( Rm == Rn ) {
   655 	load_reg( R_EAX, Rm );
   656 	check_ralign32( R_EAX );
   657 	MEM_READ_LONG( R_EAX, R_EAX );
   658 	MOV_r32_esp8(R_EAX, 0);
   659 	load_reg( R_EAX, Rm );
   660 	LEA_r32disp8_r32( R_EAX, 4, R_EAX );
   661 	MEM_READ_LONG( R_EAX, R_EAX );
   662         ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
   663     } else {
   664 	load_reg( R_EAX, Rm );
   665 	check_ralign32( R_EAX );
   666 	MEM_READ_LONG( R_EAX, R_EAX );
   667 	MOV_r32_esp8( R_EAX, 0 );
   668 	load_reg( R_EAX, Rn );
   669 	check_ralign32( R_EAX );
   670 	MEM_READ_LONG( R_EAX, R_EAX );
   671 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   672 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   673     }
   675     IMUL_esp8( 0 );
   676     ADD_r32_sh4r( R_EAX, R_MACL );
   677     ADC_r32_sh4r( R_EDX, R_MACH );
   679     load_spreg( R_ECX, R_S );
   680     TEST_r32_r32(R_ECX, R_ECX);
   681     JE_rel8( nosat );
   682     call_func0( signsat48 );
   683     JMP_TARGET( nosat );
   684     sh4_x86.tstate = TSTATE_NONE;
   685 :}
   686 MAC.W @Rm+, @Rn+ {:  
   687     COUNT_INST(I_MACW);
   688     if( Rm == Rn ) {
   689 	load_reg( R_EAX, Rm );
   690 	check_ralign16( R_EAX );
   691 	MEM_READ_WORD( R_EAX, R_EAX );
   692         MOV_r32_esp8( R_EAX, 0 );
   693 	load_reg( R_EAX, Rm );
   694 	LEA_r32disp8_r32( R_EAX, 2, R_EAX );
   695 	MEM_READ_WORD( R_EAX, R_EAX );
   696 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   697 	// Note translate twice in case of page boundaries. Maybe worth
   698 	// adding a page-boundary check to skip the second translation
   699     } else {
   700 	load_reg( R_EAX, Rm );
   701 	check_ralign16( R_EAX );
   702 	MEM_READ_WORD( R_EAX, R_EAX );
   703         MOV_r32_esp8( R_EAX, 0 );
   704 	load_reg( R_EAX, Rn );
   705 	check_ralign16( R_EAX );
   706 	MEM_READ_WORD( R_EAX, R_EAX );
   707 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   708 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   709     }
   710     IMUL_esp8( 0 );
   711     load_spreg( R_ECX, R_S );
   712     TEST_r32_r32( R_ECX, R_ECX );
   713     JE_rel8( nosat );
   715     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   716     JNO_rel8( end );            // 2
   717     load_imm32( R_EDX, 1 );         // 5
   718     store_spreg( R_EDX, R_MACH );   // 6
   719     JS_rel8( positive );        // 2
   720     load_imm32( R_EAX, 0x80000000 );// 5
   721     store_spreg( R_EAX, R_MACL );   // 6
   722     JMP_rel8(end2);           // 2
   724     JMP_TARGET(positive);
   725     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   726     store_spreg( R_EAX, R_MACL );   // 6
   727     JMP_rel8(end3);            // 2
   729     JMP_TARGET(nosat);
   730     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   731     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   732     JMP_TARGET(end);
   733     JMP_TARGET(end2);
   734     JMP_TARGET(end3);
   735     sh4_x86.tstate = TSTATE_NONE;
   736 :}
   737 MOVT Rn {:  
   738     COUNT_INST(I_MOVT);
   739     load_spreg( R_EAX, R_T );
   740     store_reg( R_EAX, Rn );
   741 :}
   742 MUL.L Rm, Rn {:  
   743     COUNT_INST(I_MULL);
   744     load_reg( R_EAX, Rm );
   745     load_reg( R_ECX, Rn );
   746     MUL_r32( R_ECX );
   747     store_spreg( R_EAX, R_MACL );
   748     sh4_x86.tstate = TSTATE_NONE;
   749 :}
   750 MULS.W Rm, Rn {:
   751     COUNT_INST(I_MULSW);
   752     load_reg16s( R_EAX, Rm );
   753     load_reg16s( R_ECX, Rn );
   754     MUL_r32( R_ECX );
   755     store_spreg( R_EAX, R_MACL );
   756     sh4_x86.tstate = TSTATE_NONE;
   757 :}
   758 MULU.W Rm, Rn {:  
   759     COUNT_INST(I_MULUW);
   760     load_reg16u( R_EAX, Rm );
   761     load_reg16u( R_ECX, Rn );
   762     MUL_r32( R_ECX );
   763     store_spreg( R_EAX, R_MACL );
   764     sh4_x86.tstate = TSTATE_NONE;
   765 :}
   766 NEG Rm, Rn {:
   767     COUNT_INST(I_NEG);
   768     load_reg( R_EAX, Rm );
   769     NEG_r32( R_EAX );
   770     store_reg( R_EAX, Rn );
   771     sh4_x86.tstate = TSTATE_NONE;
   772 :}
   773 NEGC Rm, Rn {:  
   774     COUNT_INST(I_NEGC);
   775     load_reg( R_EAX, Rm );
   776     XOR_r32_r32( R_ECX, R_ECX );
   777     LDC_t();
   778     SBB_r32_r32( R_EAX, R_ECX );
   779     store_reg( R_ECX, Rn );
   780     SETC_t();
   781     sh4_x86.tstate = TSTATE_C;
   782 :}
   783 NOT Rm, Rn {:  
   784     COUNT_INST(I_NOT);
   785     load_reg( R_EAX, Rm );
   786     NOT_r32( R_EAX );
   787     store_reg( R_EAX, Rn );
   788     sh4_x86.tstate = TSTATE_NONE;
   789 :}
   790 OR Rm, Rn {:  
   791     COUNT_INST(I_OR);
   792     load_reg( R_EAX, Rm );
   793     load_reg( R_ECX, Rn );
   794     OR_r32_r32( R_EAX, R_ECX );
   795     store_reg( R_ECX, Rn );
   796     sh4_x86.tstate = TSTATE_NONE;
   797 :}
   798 OR #imm, R0 {:
   799     COUNT_INST(I_ORI);
   800     load_reg( R_EAX, 0 );
   801     OR_imm32_r32(imm, R_EAX);
   802     store_reg( R_EAX, 0 );
   803     sh4_x86.tstate = TSTATE_NONE;
   804 :}
   805 OR.B #imm, @(R0, GBR) {:  
   806     COUNT_INST(I_ORB);
   807     load_reg( R_EAX, 0 );
   808     ADD_sh4r_r32( R_GBR, R_EAX );
   809     MOV_r32_esp8( R_EAX, 0 );
   810     MEM_READ_BYTE( R_EAX, R_EDX );
   811     MOV_esp8_r32( 0, R_EAX );
   812     OR_imm32_r32(imm, R_EDX );
   813     MEM_WRITE_BYTE( R_EAX, R_EDX );
   814     sh4_x86.tstate = TSTATE_NONE;
   815 :}
   816 ROTCL Rn {:
   817     COUNT_INST(I_ROTCL);
   818     load_reg( R_EAX, Rn );
   819     if( sh4_x86.tstate != TSTATE_C ) {
   820 	LDC_t();
   821     }
   822     RCL1_r32( R_EAX );
   823     store_reg( R_EAX, Rn );
   824     SETC_t();
   825     sh4_x86.tstate = TSTATE_C;
   826 :}
   827 ROTCR Rn {:  
   828     COUNT_INST(I_ROTCR);
   829     load_reg( R_EAX, Rn );
   830     if( sh4_x86.tstate != TSTATE_C ) {
   831 	LDC_t();
   832     }
   833     RCR1_r32( R_EAX );
   834     store_reg( R_EAX, Rn );
   835     SETC_t();
   836     sh4_x86.tstate = TSTATE_C;
   837 :}
   838 ROTL Rn {:  
   839     COUNT_INST(I_ROTL);
   840     load_reg( R_EAX, Rn );
   841     ROL1_r32( R_EAX );
   842     store_reg( R_EAX, Rn );
   843     SETC_t();
   844     sh4_x86.tstate = TSTATE_C;
   845 :}
   846 ROTR Rn {:  
   847     COUNT_INST(I_ROTR);
   848     load_reg( R_EAX, Rn );
   849     ROR1_r32( R_EAX );
   850     store_reg( R_EAX, Rn );
   851     SETC_t();
   852     sh4_x86.tstate = TSTATE_C;
   853 :}
   854 SHAD Rm, Rn {:
   855     COUNT_INST(I_SHAD);
   856     /* Annoyingly enough, not directly convertible */
   857     load_reg( R_EAX, Rn );
   858     load_reg( R_ECX, Rm );
   859     CMP_imm32_r32( 0, R_ECX );
   860     JGE_rel8(doshl);
   862     NEG_r32( R_ECX );      // 2
   863     AND_imm8_r8( 0x1F, R_CL ); // 3
   864     JE_rel8(emptysar);     // 2
   865     SAR_r32_CL( R_EAX );       // 2
   866     JMP_rel8(end);          // 2
   868     JMP_TARGET(emptysar);
   869     SAR_imm8_r32(31, R_EAX );  // 3
   870     JMP_rel8(end2);
   872     JMP_TARGET(doshl);
   873     AND_imm8_r8( 0x1F, R_CL ); // 3
   874     SHL_r32_CL( R_EAX );       // 2
   875     JMP_TARGET(end);
   876     JMP_TARGET(end2);
   877     store_reg( R_EAX, Rn );
   878     sh4_x86.tstate = TSTATE_NONE;
   879 :}
   880 SHLD Rm, Rn {:  
   881     COUNT_INST(I_SHLD);
   882     load_reg( R_EAX, Rn );
   883     load_reg( R_ECX, Rm );
   884     CMP_imm32_r32( 0, R_ECX );
   885     JGE_rel8(doshl);
   887     NEG_r32( R_ECX );      // 2
   888     AND_imm8_r8( 0x1F, R_CL ); // 3
   889     JE_rel8(emptyshr );
   890     SHR_r32_CL( R_EAX );       // 2
   891     JMP_rel8(end);          // 2
   893     JMP_TARGET(emptyshr);
   894     XOR_r32_r32( R_EAX, R_EAX );
   895     JMP_rel8(end2);
   897     JMP_TARGET(doshl);
   898     AND_imm8_r8( 0x1F, R_CL ); // 3
   899     SHL_r32_CL( R_EAX );       // 2
   900     JMP_TARGET(end);
   901     JMP_TARGET(end2);
   902     store_reg( R_EAX, Rn );
   903     sh4_x86.tstate = TSTATE_NONE;
   904 :}
   905 SHAL Rn {: 
   906     COUNT_INST(I_SHAL);
   907     load_reg( R_EAX, Rn );
   908     SHL1_r32( R_EAX );
   909     SETC_t();
   910     store_reg( R_EAX, Rn );
   911     sh4_x86.tstate = TSTATE_C;
   912 :}
   913 SHAR Rn {:  
   914     COUNT_INST(I_SHAR);
   915     load_reg( R_EAX, Rn );
   916     SAR1_r32( R_EAX );
   917     SETC_t();
   918     store_reg( R_EAX, Rn );
   919     sh4_x86.tstate = TSTATE_C;
   920 :}
   921 SHLL Rn {:  
   922     COUNT_INST(I_SHLL);
   923     load_reg( R_EAX, Rn );
   924     SHL1_r32( R_EAX );
   925     SETC_t();
   926     store_reg( R_EAX, Rn );
   927     sh4_x86.tstate = TSTATE_C;
   928 :}
   929 SHLL2 Rn {:
   930     COUNT_INST(I_SHLL);
   931     load_reg( R_EAX, Rn );
   932     SHL_imm8_r32( 2, R_EAX );
   933     store_reg( R_EAX, Rn );
   934     sh4_x86.tstate = TSTATE_NONE;
   935 :}
   936 SHLL8 Rn {:  
   937     COUNT_INST(I_SHLL);
   938     load_reg( R_EAX, Rn );
   939     SHL_imm8_r32( 8, R_EAX );
   940     store_reg( R_EAX, Rn );
   941     sh4_x86.tstate = TSTATE_NONE;
   942 :}
   943 SHLL16 Rn {:  
   944     COUNT_INST(I_SHLL);
   945     load_reg( R_EAX, Rn );
   946     SHL_imm8_r32( 16, R_EAX );
   947     store_reg( R_EAX, Rn );
   948     sh4_x86.tstate = TSTATE_NONE;
   949 :}
   950 SHLR Rn {:  
   951     COUNT_INST(I_SHLR);
   952     load_reg( R_EAX, Rn );
   953     SHR1_r32( R_EAX );
   954     SETC_t();
   955     store_reg( R_EAX, Rn );
   956     sh4_x86.tstate = TSTATE_C;
   957 :}
   958 SHLR2 Rn {:  
   959     COUNT_INST(I_SHLR);
   960     load_reg( R_EAX, Rn );
   961     SHR_imm8_r32( 2, R_EAX );
   962     store_reg( R_EAX, Rn );
   963     sh4_x86.tstate = TSTATE_NONE;
   964 :}
   965 SHLR8 Rn {:  
   966     COUNT_INST(I_SHLR);
   967     load_reg( R_EAX, Rn );
   968     SHR_imm8_r32( 8, R_EAX );
   969     store_reg( R_EAX, Rn );
   970     sh4_x86.tstate = TSTATE_NONE;
   971 :}
   972 SHLR16 Rn {:  
   973     COUNT_INST(I_SHLR);
   974     load_reg( R_EAX, Rn );
   975     SHR_imm8_r32( 16, R_EAX );
   976     store_reg( R_EAX, Rn );
   977     sh4_x86.tstate = TSTATE_NONE;
   978 :}
   979 SUB Rm, Rn {:  
   980     COUNT_INST(I_SUB);
   981     load_reg( R_EAX, Rm );
   982     load_reg( R_ECX, Rn );
   983     SUB_r32_r32( R_EAX, R_ECX );
   984     store_reg( R_ECX, Rn );
   985     sh4_x86.tstate = TSTATE_NONE;
   986 :}
   987 SUBC Rm, Rn {:  
   988     COUNT_INST(I_SUBC);
   989     load_reg( R_EAX, Rm );
   990     load_reg( R_ECX, Rn );
   991     if( sh4_x86.tstate != TSTATE_C ) {
   992 	LDC_t();
   993     }
   994     SBB_r32_r32( R_EAX, R_ECX );
   995     store_reg( R_ECX, Rn );
   996     SETC_t();
   997     sh4_x86.tstate = TSTATE_C;
   998 :}
   999 SUBV Rm, Rn {:  
  1000     COUNT_INST(I_SUBV);
  1001     load_reg( R_EAX, Rm );
  1002     load_reg( R_ECX, Rn );
  1003     SUB_r32_r32( R_EAX, R_ECX );
  1004     store_reg( R_ECX, Rn );
  1005     SETO_t();
  1006     sh4_x86.tstate = TSTATE_O;
  1007 :}
  1008 SWAP.B Rm, Rn {:  
  1009     COUNT_INST(I_SWAPB);
  1010     load_reg( R_EAX, Rm );
  1011     XCHG_r8_r8( R_AL, R_AH ); // NB: does not touch EFLAGS
  1012     store_reg( R_EAX, Rn );
  1013 :}
  1014 SWAP.W Rm, Rn {:  
  1015     COUNT_INST(I_SWAPB);
  1016     load_reg( R_EAX, Rm );
  1017     MOV_r32_r32( R_EAX, R_ECX );
  1018     SHL_imm8_r32( 16, R_ECX );
  1019     SHR_imm8_r32( 16, R_EAX );
  1020     OR_r32_r32( R_EAX, R_ECX );
  1021     store_reg( R_ECX, Rn );
  1022     sh4_x86.tstate = TSTATE_NONE;
  1023 :}
  1024 TAS.B @Rn {:  
  1025     COUNT_INST(I_TASB);
  1026     load_reg( R_EAX, Rn );
  1027     MOV_r32_esp8( R_EAX, 0 );
  1028     MEM_READ_BYTE( R_EAX, R_EDX );
  1029     TEST_r8_r8( R_DL, R_DL );
  1030     SETE_t();
  1031     OR_imm8_r8( 0x80, R_DL );
  1032     MOV_esp8_r32( 0, R_EAX );
  1033     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1034     sh4_x86.tstate = TSTATE_NONE;
  1035 :}
  1036 TST Rm, Rn {:  
  1037     COUNT_INST(I_TST);
  1038     load_reg( R_EAX, Rm );
  1039     load_reg( R_ECX, Rn );
  1040     TEST_r32_r32( R_EAX, R_ECX );
  1041     SETE_t();
  1042     sh4_x86.tstate = TSTATE_E;
  1043 :}
  1044 TST #imm, R0 {:  
  1045     COUNT_INST(I_TSTI);
  1046     load_reg( R_EAX, 0 );
  1047     TEST_imm32_r32( imm, R_EAX );
  1048     SETE_t();
  1049     sh4_x86.tstate = TSTATE_E;
  1050 :}
  1051 TST.B #imm, @(R0, GBR) {:  
  1052     COUNT_INST(I_TSTB);
  1053     load_reg( R_EAX, 0);
  1054     ADD_sh4r_r32( R_GBR, R_EAX );
  1055     MEM_READ_BYTE( R_EAX, R_EAX );
  1056     TEST_imm8_r8( imm, R_AL );
  1057     SETE_t();
  1058     sh4_x86.tstate = TSTATE_E;
  1059 :}
  1060 XOR Rm, Rn {:  
  1061     COUNT_INST(I_XOR);
  1062     load_reg( R_EAX, Rm );
  1063     load_reg( R_ECX, Rn );
  1064     XOR_r32_r32( R_EAX, R_ECX );
  1065     store_reg( R_ECX, Rn );
  1066     sh4_x86.tstate = TSTATE_NONE;
  1067 :}
  1068 XOR #imm, R0 {:  
  1069     COUNT_INST(I_XORI);
  1070     load_reg( R_EAX, 0 );
  1071     XOR_imm32_r32( imm, R_EAX );
  1072     store_reg( R_EAX, 0 );
  1073     sh4_x86.tstate = TSTATE_NONE;
  1074 :}
  1075 XOR.B #imm, @(R0, GBR) {:  
  1076     COUNT_INST(I_XORB);
  1077     load_reg( R_EAX, 0 );
  1078     ADD_sh4r_r32( R_GBR, R_EAX ); 
  1079     MOV_r32_esp8( R_EAX, 0 );
  1080     MEM_READ_BYTE(R_EAX, R_EDX);
  1081     MOV_esp8_r32( 0, R_EAX );
  1082     XOR_imm32_r32( imm, R_EDX );
  1083     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1084     sh4_x86.tstate = TSTATE_NONE;
  1085 :}
  1086 XTRCT Rm, Rn {:
  1087     COUNT_INST(I_XTRCT);
  1088     load_reg( R_EAX, Rm );
  1089     load_reg( R_ECX, Rn );
  1090     SHL_imm8_r32( 16, R_EAX );
  1091     SHR_imm8_r32( 16, R_ECX );
  1092     OR_r32_r32( R_EAX, R_ECX );
  1093     store_reg( R_ECX, Rn );
  1094     sh4_x86.tstate = TSTATE_NONE;
  1095 :}
  1097 /* Data move instructions */
  1098 MOV Rm, Rn {:  
  1099     COUNT_INST(I_MOV);
  1100     load_reg( R_EAX, Rm );
  1101     store_reg( R_EAX, Rn );
  1102 :}
  1103 MOV #imm, Rn {:  
  1104     COUNT_INST(I_MOVI);
  1105     load_imm32( R_EAX, imm );
  1106     store_reg( R_EAX, Rn );
  1107 :}
  1108 MOV.B Rm, @Rn {:  
  1109     COUNT_INST(I_MOVB);
  1110     load_reg( R_EAX, Rn );
  1111     load_reg( R_EDX, Rm );
  1112     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1113     sh4_x86.tstate = TSTATE_NONE;
  1114 :}
  1115 MOV.B Rm, @-Rn {:  
  1116     COUNT_INST(I_MOVB);
  1117     load_reg( R_EAX, Rn );
  1118     LEA_r32disp8_r32( R_EAX, -1, R_EAX );
  1119     load_reg( R_EDX, Rm );
  1120     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1121     ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
  1122     sh4_x86.tstate = TSTATE_NONE;
  1123 :}
  1124 MOV.B Rm, @(R0, Rn) {:  
  1125     COUNT_INST(I_MOVB);
  1126     load_reg( R_EAX, 0 );
  1127     ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1128     load_reg( R_EDX, Rm );
  1129     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1130     sh4_x86.tstate = TSTATE_NONE;
  1131 :}
  1132 MOV.B R0, @(disp, GBR) {:  
  1133     COUNT_INST(I_MOVB);
  1134     load_spreg( R_EAX, R_GBR );
  1135     ADD_imm32_r32( disp, R_EAX );
  1136     load_reg( R_EDX, 0 );
  1137     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1138     sh4_x86.tstate = TSTATE_NONE;
  1139 :}
  1140 MOV.B R0, @(disp, Rn) {:  
  1141     COUNT_INST(I_MOVB);
  1142     load_reg( R_EAX, Rn );
  1143     ADD_imm32_r32( disp, R_EAX );
  1144     load_reg( R_EDX, 0 );
  1145     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1146     sh4_x86.tstate = TSTATE_NONE;
  1147 :}
  1148 MOV.B @Rm, Rn {:  
  1149     COUNT_INST(I_MOVB);
  1150     load_reg( R_EAX, Rm );
  1151     MEM_READ_BYTE( R_EAX, R_EAX );
  1152     store_reg( R_EAX, Rn );
  1153     sh4_x86.tstate = TSTATE_NONE;
  1154 :}
  1155 MOV.B @Rm+, Rn {:  
  1156     COUNT_INST(I_MOVB);
  1157     load_reg( R_EAX, Rm );
  1158     MEM_READ_BYTE( R_EAX, R_EAX );
  1159     if( Rm != Rn ) {
  1160     	ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
  1162     store_reg( R_EAX, Rn );
  1163     sh4_x86.tstate = TSTATE_NONE;
  1164 :}
  1165 MOV.B @(R0, Rm), Rn {:  
  1166     COUNT_INST(I_MOVB);
  1167     load_reg( R_EAX, 0 );
  1168     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
  1169     MEM_READ_BYTE( R_EAX, R_EAX );
  1170     store_reg( R_EAX, Rn );
  1171     sh4_x86.tstate = TSTATE_NONE;
  1172 :}
  1173 MOV.B @(disp, GBR), R0 {:  
  1174     COUNT_INST(I_MOVB);
  1175     load_spreg( R_EAX, R_GBR );
  1176     ADD_imm32_r32( disp, R_EAX );
  1177     MEM_READ_BYTE( R_EAX, R_EAX );
  1178     store_reg( R_EAX, 0 );
  1179     sh4_x86.tstate = TSTATE_NONE;
  1180 :}
  1181 MOV.B @(disp, Rm), R0 {:  
  1182     COUNT_INST(I_MOVB);
  1183     load_reg( R_EAX, Rm );
  1184     ADD_imm32_r32( disp, R_EAX );
  1185     MEM_READ_BYTE( R_EAX, R_EAX );
  1186     store_reg( R_EAX, 0 );
  1187     sh4_x86.tstate = TSTATE_NONE;
  1188 :}
  1189 MOV.L Rm, @Rn {:
  1190     COUNT_INST(I_MOVL);
  1191     load_reg( R_EAX, Rn );
  1192     check_walign32(R_EAX);
  1193     MOV_r32_r32( R_EAX, R_ECX );
  1194     AND_imm32_r32( 0xFC000000, R_ECX );
  1195     CMP_imm32_r32( 0xE0000000, R_ECX );
  1196     JNE_rel8( notsq );
  1197     AND_imm8s_r32( 0x3C, R_EAX );
  1198     load_reg( R_EDX, Rm );
  1199     MOV_r32_ebpr32disp32( R_EDX, R_EAX, REG_OFFSET(store_queue) );
  1200     JMP_rel8(end);
  1201     JMP_TARGET(notsq);
  1202     load_reg( R_EDX, Rm );
  1203     MEM_WRITE_LONG( R_EAX, R_EDX );
  1204     JMP_TARGET(end);
  1205     sh4_x86.tstate = TSTATE_NONE;
  1206 :}
  1207 MOV.L Rm, @-Rn {:  
  1208     COUNT_INST(I_MOVL);
  1209     load_reg( R_EAX, Rn );
  1210     ADD_imm8s_r32( -4, R_EAX );
  1211     check_walign32( R_EAX );
  1212     load_reg( R_EDX, Rm );
  1213     MEM_WRITE_LONG( R_EAX, R_EDX );
  1214     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  1215     sh4_x86.tstate = TSTATE_NONE;
  1216 :}
  1217 MOV.L Rm, @(R0, Rn) {:  
  1218     COUNT_INST(I_MOVL);
  1219     load_reg( R_EAX, 0 );
  1220     ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1221     check_walign32( R_EAX );
  1222     load_reg( R_EDX, Rm );
  1223     MEM_WRITE_LONG( R_EAX, R_EDX );
  1224     sh4_x86.tstate = TSTATE_NONE;
  1225 :}
  1226 MOV.L R0, @(disp, GBR) {:  
  1227     COUNT_INST(I_MOVL);
  1228     load_spreg( R_EAX, R_GBR );
  1229     ADD_imm32_r32( disp, R_EAX );
  1230     check_walign32( R_EAX );
  1231     load_reg( R_EDX, 0 );
  1232     MEM_WRITE_LONG( R_EAX, R_EDX );
  1233     sh4_x86.tstate = TSTATE_NONE;
  1234 :}
  1235 MOV.L Rm, @(disp, Rn) {:  
  1236     COUNT_INST(I_MOVL);
  1237     load_reg( R_EAX, Rn );
  1238     ADD_imm32_r32( disp, R_EAX );
  1239     check_walign32( R_EAX );
  1240     MOV_r32_r32( R_EAX, R_ECX );
  1241     AND_imm32_r32( 0xFC000000, R_ECX );
  1242     CMP_imm32_r32( 0xE0000000, R_ECX );
  1243     JNE_rel8( notsq );
  1244     AND_imm8s_r32( 0x3C, R_EAX );
  1245     load_reg( R_EDX, Rm );
  1246     MOV_r32_ebpr32disp32( R_EDX, R_EAX, REG_OFFSET(store_queue) );
  1247     JMP_rel8(end);
  1248     JMP_TARGET(notsq);
  1249     load_reg( R_EDX, Rm );
  1250     MEM_WRITE_LONG( R_EAX, R_EDX );
  1251     JMP_TARGET(end);
  1252     sh4_x86.tstate = TSTATE_NONE;
  1253 :}
  1254 MOV.L @Rm, Rn {:  
  1255     COUNT_INST(I_MOVL);
  1256     load_reg( R_EAX, Rm );
  1257     check_ralign32( R_EAX );
  1258     MEM_READ_LONG( R_EAX, R_EAX );
  1259     store_reg( R_EAX, Rn );
  1260     sh4_x86.tstate = TSTATE_NONE;
  1261 :}
  1262 MOV.L @Rm+, Rn {:  
  1263     COUNT_INST(I_MOVL);
  1264     load_reg( R_EAX, Rm );
  1265     check_ralign32( R_EAX );
  1266     MEM_READ_LONG( R_EAX, R_EAX );
  1267     if( Rm != Rn ) {
  1268     	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1270     store_reg( R_EAX, Rn );
  1271     sh4_x86.tstate = TSTATE_NONE;
  1272 :}
  1273 MOV.L @(R0, Rm), Rn {:  
  1274     COUNT_INST(I_MOVL);
  1275     load_reg( R_EAX, 0 );
  1276     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
  1277     check_ralign32( R_EAX );
  1278     MEM_READ_LONG( R_EAX, R_EAX );
  1279     store_reg( R_EAX, Rn );
  1280     sh4_x86.tstate = TSTATE_NONE;
  1281 :}
  1282 MOV.L @(disp, GBR), R0 {:
  1283     COUNT_INST(I_MOVL);
  1284     load_spreg( R_EAX, R_GBR );
  1285     ADD_imm32_r32( disp, R_EAX );
  1286     check_ralign32( R_EAX );
  1287     MEM_READ_LONG( R_EAX, R_EAX );
  1288     store_reg( R_EAX, 0 );
  1289     sh4_x86.tstate = TSTATE_NONE;
  1290 :}
  1291 MOV.L @(disp, PC), Rn {:  
  1292     COUNT_INST(I_MOVLPC);
  1293     if( sh4_x86.in_delay_slot ) {
  1294 	SLOTILLEGAL();
  1295     } else {
  1296 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1297 	if( IS_IN_ICACHE(target) ) {
  1298 	    // If the target address is in the same page as the code, it's
  1299 	    // pretty safe to just ref it directly and circumvent the whole
  1300 	    // memory subsystem. (this is a big performance win)
  1302 	    // FIXME: There's a corner-case that's not handled here when
  1303 	    // the current code-page is in the ITLB but not in the UTLB.
  1304 	    // (should generate a TLB miss although need to test SH4 
  1305 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1306 	    // behaviour though.
  1307 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1308 	    MOV_moff32_EAX( ptr );
  1309 	} else {
  1310 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1311 	    // different virtual address than the translation was done with,
  1312 	    // but we can safely assume that the low bits are the same.
  1313 	    load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1314 	    ADD_sh4r_r32( R_PC, R_EAX );
  1315 	    MEM_READ_LONG( R_EAX, R_EAX );
  1316 	    sh4_x86.tstate = TSTATE_NONE;
  1318 	store_reg( R_EAX, Rn );
  1320 :}
  1321 MOV.L @(disp, Rm), Rn {:  
  1322     COUNT_INST(I_MOVL);
  1323     load_reg( R_EAX, Rm );
  1324     ADD_imm8s_r32( disp, R_EAX );
  1325     check_ralign32( R_EAX );
  1326     MEM_READ_LONG( R_EAX, R_EAX );
  1327     store_reg( R_EAX, Rn );
  1328     sh4_x86.tstate = TSTATE_NONE;
  1329 :}
  1330 MOV.W Rm, @Rn {:  
  1331     COUNT_INST(I_MOVW);
  1332     load_reg( R_EAX, Rn );
  1333     check_walign16( R_EAX );
  1334     load_reg( R_EDX, Rm );
  1335     MEM_WRITE_WORD( R_EAX, R_EDX );
  1336     sh4_x86.tstate = TSTATE_NONE;
  1337 :}
  1338 MOV.W Rm, @-Rn {:  
  1339     COUNT_INST(I_MOVW);
  1340     load_reg( R_EAX, Rn );
  1341     check_walign16( R_EAX );
  1342     LEA_r32disp8_r32( R_EAX, -2, R_EAX );
  1343     load_reg( R_EDX, Rm );
  1344     MEM_WRITE_WORD( R_EAX, R_EDX );
  1345     ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
  1346     sh4_x86.tstate = TSTATE_NONE;
  1347 :}
  1348 MOV.W Rm, @(R0, Rn) {:  
  1349     COUNT_INST(I_MOVW);
  1350     load_reg( R_EAX, 0 );
  1351     ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1352     check_walign16( R_EAX );
  1353     load_reg( R_EDX, Rm );
  1354     MEM_WRITE_WORD( R_EAX, R_EDX );
  1355     sh4_x86.tstate = TSTATE_NONE;
  1356 :}
  1357 MOV.W R0, @(disp, GBR) {:  
  1358     COUNT_INST(I_MOVW);
  1359     load_spreg( R_EAX, R_GBR );
  1360     ADD_imm32_r32( disp, R_EAX );
  1361     check_walign16( R_EAX );
  1362     load_reg( R_EDX, 0 );
  1363     MEM_WRITE_WORD( R_EAX, R_EDX );
  1364     sh4_x86.tstate = TSTATE_NONE;
  1365 :}
  1366 MOV.W R0, @(disp, Rn) {:  
  1367     COUNT_INST(I_MOVW);
  1368     load_reg( R_EAX, Rn );
  1369     ADD_imm32_r32( disp, R_EAX );
  1370     check_walign16( R_EAX );
  1371     load_reg( R_EDX, 0 );
  1372     MEM_WRITE_WORD( R_EAX, R_EDX );
  1373     sh4_x86.tstate = TSTATE_NONE;
  1374 :}
  1375 MOV.W @Rm, Rn {:  
  1376     COUNT_INST(I_MOVW);
  1377     load_reg( R_EAX, Rm );
  1378     check_ralign16( R_EAX );
  1379     MEM_READ_WORD( R_EAX, R_EAX );
  1380     store_reg( R_EAX, Rn );
  1381     sh4_x86.tstate = TSTATE_NONE;
  1382 :}
  1383 MOV.W @Rm+, Rn {:  
  1384     COUNT_INST(I_MOVW);
  1385     load_reg( R_EAX, Rm );
  1386     check_ralign16( R_EAX );
  1387     MEM_READ_WORD( R_EAX, R_EAX );
  1388     if( Rm != Rn ) {
  1389         ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
  1391     store_reg( R_EAX, Rn );
  1392     sh4_x86.tstate = TSTATE_NONE;
  1393 :}
  1394 MOV.W @(R0, Rm), Rn {:  
  1395     COUNT_INST(I_MOVW);
  1396     load_reg( R_EAX, 0 );
  1397     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
  1398     check_ralign16( R_EAX );
  1399     MEM_READ_WORD( R_EAX, R_EAX );
  1400     store_reg( R_EAX, Rn );
  1401     sh4_x86.tstate = TSTATE_NONE;
  1402 :}
  1403 MOV.W @(disp, GBR), R0 {:  
  1404     COUNT_INST(I_MOVW);
  1405     load_spreg( R_EAX, R_GBR );
  1406     ADD_imm32_r32( disp, R_EAX );
  1407     check_ralign16( R_EAX );
  1408     MEM_READ_WORD( R_EAX, R_EAX );
  1409     store_reg( R_EAX, 0 );
  1410     sh4_x86.tstate = TSTATE_NONE;
  1411 :}
  1412 MOV.W @(disp, PC), Rn {:  
  1413     COUNT_INST(I_MOVW);
  1414     if( sh4_x86.in_delay_slot ) {
  1415 	SLOTILLEGAL();
  1416     } else {
  1417 	// See comments for MOV.L @(disp, PC), Rn
  1418 	uint32_t target = pc + disp + 4;
  1419 	if( IS_IN_ICACHE(target) ) {
  1420 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1421 	    MOV_moff32_EAX( ptr );
  1422 	    MOVSX_r16_r32( R_EAX, R_EAX );
  1423 	} else {
  1424 	    load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
  1425 	    ADD_sh4r_r32( R_PC, R_EAX );
  1426 	    MEM_READ_WORD( R_EAX, R_EAX );
  1427 	    sh4_x86.tstate = TSTATE_NONE;
  1429 	store_reg( R_EAX, Rn );
  1431 :}
  1432 MOV.W @(disp, Rm), R0 {:  
  1433     COUNT_INST(I_MOVW);
  1434     load_reg( R_EAX, Rm );
  1435     ADD_imm32_r32( disp, R_EAX );
  1436     check_ralign16( R_EAX );
  1437     MEM_READ_WORD( R_EAX, R_EAX );
  1438     store_reg( R_EAX, 0 );
  1439     sh4_x86.tstate = TSTATE_NONE;
  1440 :}
  1441 MOVA @(disp, PC), R0 {:  
  1442     COUNT_INST(I_MOVA);
  1443     if( sh4_x86.in_delay_slot ) {
  1444 	SLOTILLEGAL();
  1445     } else {
  1446 	load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1447 	ADD_sh4r_r32( R_PC, R_ECX );
  1448 	store_reg( R_ECX, 0 );
  1449 	sh4_x86.tstate = TSTATE_NONE;
  1451 :}
  1452 MOVCA.L R0, @Rn {:  
  1453     COUNT_INST(I_MOVCA);
  1454     load_reg( R_EAX, Rn );
  1455     check_walign32( R_EAX );
  1456     load_reg( R_EDX, 0 );
  1457     MEM_WRITE_LONG( R_EAX, R_EDX );
  1458     sh4_x86.tstate = TSTATE_NONE;
  1459 :}
  1461 /* Control transfer instructions */
  1462 BF disp {:
  1463     COUNT_INST(I_BF);
  1464     if( sh4_x86.in_delay_slot ) {
  1465 	SLOTILLEGAL();
  1466     } else {
  1467 	sh4vma_t target = disp + pc + 4;
  1468 	JT_rel8( nottaken );
  1469 	exit_block_rel(target, pc+2 );
  1470 	JMP_TARGET(nottaken);
  1471 	return 2;
  1473 :}
  1474 BF/S disp {:
  1475     COUNT_INST(I_BFS);
  1476     if( sh4_x86.in_delay_slot ) {
  1477 	SLOTILLEGAL();
  1478     } else {
  1479 	sh4_x86.in_delay_slot = DELAY_PC;
  1480 	if( UNTRANSLATABLE(pc+2) ) {
  1481 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1482 	    JT_rel8(nottaken);
  1483 	    ADD_imm32_r32( disp, R_EAX );
  1484 	    JMP_TARGET(nottaken);
  1485 	    ADD_sh4r_r32( R_PC, R_EAX );
  1486 	    store_spreg( R_EAX, R_NEW_PC );
  1487 	    exit_block_emu(pc+2);
  1488 	    sh4_x86.branch_taken = TRUE;
  1489 	    return 2;
  1490 	} else {
  1491 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1492 		CMP_imm8s_sh4r( 1, R_T );
  1493 		sh4_x86.tstate = TSTATE_E;
  1495 	    sh4vma_t target = disp + pc + 4;
  1496 	    OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JT rel32
  1497 	    int save_tstate = sh4_x86.tstate;
  1498 	    sh4_translate_instruction(pc+2);
  1499 	    exit_block_rel( target, pc+4 );
  1501 	    // not taken
  1502 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1503 	    sh4_x86.tstate = save_tstate;
  1504 	    sh4_translate_instruction(pc+2);
  1505 	    return 4;
  1508 :}
  1509 BRA disp {:  
  1510     COUNT_INST(I_BRA);
  1511     if( sh4_x86.in_delay_slot ) {
  1512 	SLOTILLEGAL();
  1513     } else {
  1514 	sh4_x86.in_delay_slot = DELAY_PC;
  1515 	sh4_x86.branch_taken = TRUE;
  1516 	if( UNTRANSLATABLE(pc+2) ) {
  1517 	    load_spreg( R_EAX, R_PC );
  1518 	    ADD_imm32_r32( pc + disp + 4 - sh4_x86.block_start_pc, R_EAX );
  1519 	    store_spreg( R_EAX, R_NEW_PC );
  1520 	    exit_block_emu(pc+2);
  1521 	    return 2;
  1522 	} else {
  1523 	    sh4_translate_instruction( pc + 2 );
  1524 	    exit_block_rel( disp + pc + 4, pc+4 );
  1525 	    return 4;
  1528 :}
  1529 BRAF Rn {:  
  1530     COUNT_INST(I_BRAF);
  1531     if( sh4_x86.in_delay_slot ) {
  1532 	SLOTILLEGAL();
  1533     } else {
  1534 	load_spreg( R_EAX, R_PC );
  1535 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1536 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1537 	store_spreg( R_EAX, R_NEW_PC );
  1538 	sh4_x86.in_delay_slot = DELAY_PC;
  1539 	sh4_x86.tstate = TSTATE_NONE;
  1540 	sh4_x86.branch_taken = TRUE;
  1541 	if( UNTRANSLATABLE(pc+2) ) {
  1542 	    exit_block_emu(pc+2);
  1543 	    return 2;
  1544 	} else {
  1545 	    sh4_translate_instruction( pc + 2 );
  1546 	    exit_block_newpcset(pc+2);
  1547 	    return 4;
  1550 :}
  1551 BSR disp {:  
  1552     COUNT_INST(I_BSR);
  1553     if( sh4_x86.in_delay_slot ) {
  1554 	SLOTILLEGAL();
  1555     } else {
  1556 	load_spreg( R_EAX, R_PC );
  1557 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1558 	store_spreg( R_EAX, R_PR );
  1559 	sh4_x86.in_delay_slot = DELAY_PC;
  1560 	sh4_x86.branch_taken = TRUE;
  1561 	sh4_x86.tstate = TSTATE_NONE;
  1562 	if( UNTRANSLATABLE(pc+2) ) {
  1563 	    ADD_imm32_r32( disp, R_EAX );
  1564 	    store_spreg( R_EAX, R_NEW_PC );
  1565 	    exit_block_emu(pc+2);
  1566 	    return 2;
  1567 	} else {
  1568 	    sh4_translate_instruction( pc + 2 );
  1569 	    exit_block_rel( disp + pc + 4, pc+4 );
  1570 	    return 4;
  1573 :}
  1574 BSRF Rn {:  
  1575     COUNT_INST(I_BSRF);
  1576     if( sh4_x86.in_delay_slot ) {
  1577 	SLOTILLEGAL();
  1578     } else {
  1579 	load_spreg( R_EAX, R_PC );
  1580 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1581 	store_spreg( R_EAX, R_PR );
  1582 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1583 	store_spreg( R_EAX, R_NEW_PC );
  1585 	sh4_x86.in_delay_slot = DELAY_PC;
  1586 	sh4_x86.tstate = TSTATE_NONE;
  1587 	sh4_x86.branch_taken = TRUE;
  1588 	if( UNTRANSLATABLE(pc+2) ) {
  1589 	    exit_block_emu(pc+2);
  1590 	    return 2;
  1591 	} else {
  1592 	    sh4_translate_instruction( pc + 2 );
  1593 	    exit_block_newpcset(pc+2);
  1594 	    return 4;
  1597 :}
  1598 BT disp {:
  1599     COUNT_INST(I_BT);
  1600     if( sh4_x86.in_delay_slot ) {
  1601 	SLOTILLEGAL();
  1602     } else {
  1603 	sh4vma_t target = disp + pc + 4;
  1604 	JF_rel8( nottaken );
  1605 	exit_block_rel(target, pc+2 );
  1606 	JMP_TARGET(nottaken);
  1607 	return 2;
  1609 :}
  1610 BT/S disp {:
  1611     COUNT_INST(I_BTS);
  1612     if( sh4_x86.in_delay_slot ) {
  1613 	SLOTILLEGAL();
  1614     } else {
  1615 	sh4_x86.in_delay_slot = DELAY_PC;
  1616 	if( UNTRANSLATABLE(pc+2) ) {
  1617 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1618 	    JF_rel8(nottaken);
  1619 	    ADD_imm32_r32( disp, R_EAX );
  1620 	    JMP_TARGET(nottaken);
  1621 	    ADD_sh4r_r32( R_PC, R_EAX );
  1622 	    store_spreg( R_EAX, R_NEW_PC );
  1623 	    exit_block_emu(pc+2);
  1624 	    sh4_x86.branch_taken = TRUE;
  1625 	    return 2;
  1626 	} else {
  1627 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1628 		CMP_imm8s_sh4r( 1, R_T );
  1629 		sh4_x86.tstate = TSTATE_E;
  1631 	    OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JF rel32
  1632 	    int save_tstate = sh4_x86.tstate;
  1633 	    sh4_translate_instruction(pc+2);
  1634 	    exit_block_rel( disp + pc + 4, pc+4 );
  1635 	    // not taken
  1636 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1637 	    sh4_x86.tstate = save_tstate;
  1638 	    sh4_translate_instruction(pc+2);
  1639 	    return 4;
  1642 :}
  1643 JMP @Rn {:  
  1644     COUNT_INST(I_JMP);
  1645     if( sh4_x86.in_delay_slot ) {
  1646 	SLOTILLEGAL();
  1647     } else {
  1648 	load_reg( R_ECX, Rn );
  1649 	store_spreg( R_ECX, R_NEW_PC );
  1650 	sh4_x86.in_delay_slot = DELAY_PC;
  1651 	sh4_x86.branch_taken = TRUE;
  1652 	if( UNTRANSLATABLE(pc+2) ) {
  1653 	    exit_block_emu(pc+2);
  1654 	    return 2;
  1655 	} else {
  1656 	    sh4_translate_instruction(pc+2);
  1657 	    exit_block_newpcset(pc+2);
  1658 	    return 4;
  1661 :}
  1662 JSR @Rn {:  
  1663     COUNT_INST(I_JSR);
  1664     if( sh4_x86.in_delay_slot ) {
  1665 	SLOTILLEGAL();
  1666     } else {
  1667 	load_spreg( R_EAX, R_PC );
  1668 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1669 	store_spreg( R_EAX, R_PR );
  1670 	load_reg( R_ECX, Rn );
  1671 	store_spreg( R_ECX, R_NEW_PC );
  1672 	sh4_x86.in_delay_slot = DELAY_PC;
  1673 	sh4_x86.branch_taken = TRUE;
  1674 	sh4_x86.tstate = TSTATE_NONE;
  1675 	if( UNTRANSLATABLE(pc+2) ) {
  1676 	    exit_block_emu(pc+2);
  1677 	    return 2;
  1678 	} else {
  1679 	    sh4_translate_instruction(pc+2);
  1680 	    exit_block_newpcset(pc+2);
  1681 	    return 4;
  1684 :}
  1685 RTE {:  
  1686     COUNT_INST(I_RTE);
  1687     if( sh4_x86.in_delay_slot ) {
  1688 	SLOTILLEGAL();
  1689     } else {
  1690 	check_priv();
  1691 	load_spreg( R_ECX, R_SPC );
  1692 	store_spreg( R_ECX, R_NEW_PC );
  1693 	load_spreg( R_EAX, R_SSR );
  1694 	call_func1( sh4_write_sr, R_EAX );
  1695 	sh4_x86.in_delay_slot = DELAY_PC;
  1696 	sh4_x86.fpuen_checked = FALSE;
  1697 	sh4_x86.tstate = TSTATE_NONE;
  1698 	sh4_x86.branch_taken = TRUE;
  1699 	if( UNTRANSLATABLE(pc+2) ) {
  1700 	    exit_block_emu(pc+2);
  1701 	    return 2;
  1702 	} else {
  1703 	    sh4_translate_instruction(pc+2);
  1704 	    exit_block_newpcset(pc+2);
  1705 	    return 4;
  1708 :}
  1709 RTS {:  
  1710     COUNT_INST(I_RTS);
  1711     if( sh4_x86.in_delay_slot ) {
  1712 	SLOTILLEGAL();
  1713     } else {
  1714 	load_spreg( R_ECX, R_PR );
  1715 	store_spreg( R_ECX, R_NEW_PC );
  1716 	sh4_x86.in_delay_slot = DELAY_PC;
  1717 	sh4_x86.branch_taken = TRUE;
  1718 	if( UNTRANSLATABLE(pc+2) ) {
  1719 	    exit_block_emu(pc+2);
  1720 	    return 2;
  1721 	} else {
  1722 	    sh4_translate_instruction(pc+2);
  1723 	    exit_block_newpcset(pc+2);
  1724 	    return 4;
  1727 :}
  1728 TRAPA #imm {:  
  1729     COUNT_INST(I_TRAPA);
  1730     if( sh4_x86.in_delay_slot ) {
  1731 	SLOTILLEGAL();
  1732     } else {
  1733 	load_imm32( R_ECX, pc+2 - sh4_x86.block_start_pc );   // 5
  1734 	ADD_r32_sh4r( R_ECX, R_PC );
  1735 	load_imm32( R_EAX, imm );
  1736 	call_func1( sh4_raise_trap, R_EAX );
  1737 	sh4_x86.tstate = TSTATE_NONE;
  1738 	exit_block_pcset(pc);
  1739 	sh4_x86.branch_taken = TRUE;
  1740 	return 2;
  1742 :}
  1743 UNDEF {:  
  1744     COUNT_INST(I_UNDEF);
  1745     if( sh4_x86.in_delay_slot ) {
  1746 	SLOTILLEGAL();
  1747     } else {
  1748 	JMP_exc(EXC_ILLEGAL);
  1749 	return 2;
  1751 :}
  1753 CLRMAC {:  
  1754     COUNT_INST(I_CLRMAC);
  1755     XOR_r32_r32(R_EAX, R_EAX);
  1756     store_spreg( R_EAX, R_MACL );
  1757     store_spreg( R_EAX, R_MACH );
  1758     sh4_x86.tstate = TSTATE_NONE;
  1759 :}
  1760 CLRS {:
  1761     COUNT_INST(I_CLRS);
  1762     CLC();
  1763     SETC_sh4r(R_S);
  1764     sh4_x86.tstate = TSTATE_NONE;
  1765 :}
  1766 CLRT {:  
  1767     COUNT_INST(I_CLRT);
  1768     CLC();
  1769     SETC_t();
  1770     sh4_x86.tstate = TSTATE_C;
  1771 :}
  1772 SETS {:  
  1773     COUNT_INST(I_SETS);
  1774     STC();
  1775     SETC_sh4r(R_S);
  1776     sh4_x86.tstate = TSTATE_NONE;
  1777 :}
  1778 SETT {:  
  1779     COUNT_INST(I_SETT);
  1780     STC();
  1781     SETC_t();
  1782     sh4_x86.tstate = TSTATE_C;
  1783 :}
  1785 /* Floating point moves */
  1786 FMOV FRm, FRn {:  
  1787     COUNT_INST(I_FMOV1);
  1788     check_fpuen();
  1789     if( sh4_x86.double_size ) {
  1790         load_dr0( R_EAX, FRm );
  1791         load_dr1( R_ECX, FRm );
  1792         store_dr0( R_EAX, FRn );
  1793         store_dr1( R_ECX, FRn );
  1794     } else {
  1795         load_fr( R_EAX, FRm ); // SZ=0 branch
  1796         store_fr( R_EAX, FRn );
  1798 :}
  1799 FMOV FRm, @Rn {: 
  1800     COUNT_INST(I_FMOV2);
  1801     check_fpuen();
  1802     load_reg( R_EAX, Rn );
  1803     if( sh4_x86.double_size ) {
  1804         check_walign64( R_EAX );
  1805         load_dr0( R_EDX, FRm );
  1806         MEM_WRITE_LONG( R_EAX, R_EDX );
  1807         load_reg( R_EAX, Rn );
  1808         LEA_r32disp8_r32( R_EAX, 4, R_EAX );
  1809         load_dr1( R_EDX, FRm );
  1810         MEM_WRITE_LONG( R_EAX, R_EDX );
  1811     } else {
  1812         check_walign32( R_EAX );
  1813         load_fr( R_EDX, FRm );
  1814         MEM_WRITE_LONG( R_EAX, R_EDX );
  1816     sh4_x86.tstate = TSTATE_NONE;
  1817 :}
  1818 FMOV @Rm, FRn {:  
  1819     COUNT_INST(I_FMOV5);
  1820     check_fpuen();
  1821     load_reg( R_EAX, Rm );
  1822     if( sh4_x86.double_size ) {
  1823         check_ralign64( R_EAX );
  1824         MEM_READ_LONG( R_EAX, R_EAX );
  1825         store_dr0( R_EAX, FRn );
  1826         load_reg( R_EAX, Rm );
  1827         LEA_r32disp8_r32( R_EAX, 4, R_EAX );
  1828         MEM_READ_LONG( R_EAX, R_EAX );
  1829         store_dr1( R_EAX, FRn );
  1830     } else {
  1831         check_ralign32( R_EAX );
  1832         MEM_READ_LONG( R_EAX, R_EAX );
  1833         store_fr( R_EAX, FRn );
  1835     sh4_x86.tstate = TSTATE_NONE;
  1836 :}
  1837 FMOV FRm, @-Rn {:  
  1838     COUNT_INST(I_FMOV3);
  1839     check_fpuen();
  1840     load_reg( R_EAX, Rn );
  1841     if( sh4_x86.double_size ) {
  1842         check_walign64( R_EAX );
  1843         LEA_r32disp8_r32( R_EAX, -8, R_EAX );
  1844         load_dr0( R_EDX, FRm );
  1845         MEM_WRITE_LONG( R_EAX, R_EDX );
  1846         load_reg( R_EAX, Rn );
  1847         LEA_r32disp8_r32( R_EAX, -4, R_EAX );
  1848         load_dr1( R_EDX, FRm );
  1849         MEM_WRITE_LONG( R_EAX, R_EDX );
  1850         ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
  1851     } else {
  1852         check_walign32( R_EAX );
  1853         LEA_r32disp8_r32( R_EAX, -4, R_EAX );
  1854         load_fr( R_EDX, FRm );
  1855         MEM_WRITE_LONG( R_EAX, R_EDX );
  1856         ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
  1858     sh4_x86.tstate = TSTATE_NONE;
  1859 :}
  1860 FMOV @Rm+, FRn {:
  1861     COUNT_INST(I_FMOV6);
  1862     check_fpuen();
  1863     load_reg( R_EAX, Rm );
  1864     if( sh4_x86.double_size ) {
  1865         check_ralign64( R_EAX );
  1866         MEM_READ_LONG( R_EAX, R_EAX );
  1867         store_dr0( R_EAX, FRn );
  1868         load_reg( R_EAX, Rm );
  1869         LEA_r32disp8_r32( R_EAX, 4, R_EAX );
  1870         MEM_READ_LONG( R_EAX, R_EAX );
  1871         store_dr1( R_EAX, FRn );
  1872         ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
  1873     } else {
  1874         check_ralign32( R_EAX );
  1875         MEM_READ_LONG( R_EAX, R_EAX );
  1876         store_fr( R_EAX, FRn );
  1877         ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1879     sh4_x86.tstate = TSTATE_NONE;
  1880 :}
  1881 FMOV FRm, @(R0, Rn) {:  
  1882     COUNT_INST(I_FMOV4);
  1883     check_fpuen();
  1884     load_reg( R_EAX, Rn );
  1885     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1886     if( sh4_x86.double_size ) {
  1887         check_walign64( R_EAX );
  1888         load_dr0( R_EDX, FRm );
  1889         MEM_WRITE_LONG( R_EAX, R_EDX );
  1890         load_reg( R_EAX, Rn );
  1891         ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1892         LEA_r32disp8_r32( R_EAX, 4, R_EAX );
  1893         load_dr1( R_EDX, FRm );
  1894         MEM_WRITE_LONG( R_EAX, R_EDX );
  1895     } else {
  1896         check_walign32( R_EAX );
  1897         load_fr( R_EDX, FRm );
  1898         MEM_WRITE_LONG( R_EAX, R_EDX ); // 12
  1900     sh4_x86.tstate = TSTATE_NONE;
  1901 :}
  1902 FMOV @(R0, Rm), FRn {:  
  1903     COUNT_INST(I_FMOV7);
  1904     check_fpuen();
  1905     load_reg( R_EAX, Rm );
  1906     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1907     if( sh4_x86.double_size ) {
  1908         check_ralign64( R_EAX );
  1909         MEM_READ_LONG( R_EAX, R_EAX );
  1910         store_dr0( R_EAX, FRn );
  1911         load_reg( R_EAX, Rm );
  1912         ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1913         LEA_r32disp8_r32( R_EAX, 4, R_EAX );
  1914         MEM_READ_LONG( R_EAX, R_EAX );
  1915         store_dr1( R_EAX, FRn );
  1916     } else {
  1917         check_ralign32( R_EAX );
  1918         MEM_READ_LONG( R_EAX, R_EAX );
  1919         store_fr( R_EAX, FRn );
  1921     sh4_x86.tstate = TSTATE_NONE;
  1922 :}
  1923 FLDI0 FRn {:  /* IFF PR=0 */
  1924     COUNT_INST(I_FLDI0);
  1925     check_fpuen();
  1926     if( sh4_x86.double_prec == 0 ) {
  1927         XOR_r32_r32( R_EAX, R_EAX );
  1928         store_fr( R_EAX, FRn );
  1930     sh4_x86.tstate = TSTATE_NONE;
  1931 :}
  1932 FLDI1 FRn {:  /* IFF PR=0 */
  1933     COUNT_INST(I_FLDI1);
  1934     check_fpuen();
  1935     if( sh4_x86.double_prec == 0 ) {
  1936         load_imm32(R_EAX, 0x3F800000);
  1937         store_fr( R_EAX, FRn );
  1939 :}
  1941 FLOAT FPUL, FRn {:  
  1942     COUNT_INST(I_FLOAT);
  1943     check_fpuen();
  1944     FILD_sh4r(R_FPUL);
  1945     if( sh4_x86.double_prec ) {
  1946         pop_dr( FRn );
  1947     } else {
  1948         pop_fr( FRn );
  1950 :}
  1951 FTRC FRm, FPUL {:  
  1952     COUNT_INST(I_FTRC);
  1953     check_fpuen();
  1954     if( sh4_x86.double_prec ) {
  1955         push_dr( FRm );
  1956     } else {
  1957         push_fr( FRm );
  1959     load_ptr( R_ECX, &max_int );
  1960     FILD_r32ind( R_ECX );
  1961     FCOMIP_st(1);
  1962     JNA_rel8( sat );
  1963     load_ptr( R_ECX, &min_int );  // 5
  1964     FILD_r32ind( R_ECX );           // 2
  1965     FCOMIP_st(1);                   // 2
  1966     JAE_rel8( sat2 );            // 2
  1967     load_ptr( R_EAX, &save_fcw );
  1968     FNSTCW_r32ind( R_EAX );
  1969     load_ptr( R_EDX, &trunc_fcw );
  1970     FLDCW_r32ind( R_EDX );
  1971     FISTP_sh4r(R_FPUL);             // 3
  1972     FLDCW_r32ind( R_EAX );
  1973     JMP_rel8(end);             // 2
  1975     JMP_TARGET(sat);
  1976     JMP_TARGET(sat2);
  1977     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  1978     store_spreg( R_ECX, R_FPUL );
  1979     FPOP_st();
  1980     JMP_TARGET(end);
  1981     sh4_x86.tstate = TSTATE_NONE;
  1982 :}
  1983 FLDS FRm, FPUL {:  
  1984     COUNT_INST(I_FLDS);
  1985     check_fpuen();
  1986     load_fr( R_EAX, FRm );
  1987     store_spreg( R_EAX, R_FPUL );
  1988 :}
  1989 FSTS FPUL, FRn {:  
  1990     COUNT_INST(I_FSTS);
  1991     check_fpuen();
  1992     load_spreg( R_EAX, R_FPUL );
  1993     store_fr( R_EAX, FRn );
  1994 :}
  1995 FCNVDS FRm, FPUL {:  
  1996     COUNT_INST(I_FCNVDS);
  1997     check_fpuen();
  1998     if( sh4_x86.double_prec ) {
  1999         push_dr( FRm );
  2000         pop_fpul();
  2002 :}
  2003 FCNVSD FPUL, FRn {:  
  2004     COUNT_INST(I_FCNVSD);
  2005     check_fpuen();
  2006     if( sh4_x86.double_prec ) {
  2007         push_fpul();
  2008         pop_dr( FRn );
  2010 :}
  2012 /* Floating point instructions */
  2013 FABS FRn {:  
  2014     COUNT_INST(I_FABS);
  2015     check_fpuen();
  2016     if( sh4_x86.double_prec ) {
  2017         push_dr(FRn);
  2018         FABS_st0();
  2019         pop_dr(FRn);
  2020     } else {
  2021         push_fr(FRn);
  2022         FABS_st0();
  2023         pop_fr(FRn);
  2025 :}
  2026 FADD FRm, FRn {:  
  2027     COUNT_INST(I_FADD);
  2028     check_fpuen();
  2029     if( sh4_x86.double_prec ) {
  2030         push_dr(FRm);
  2031         push_dr(FRn);
  2032         FADDP_st(1);
  2033         pop_dr(FRn);
  2034     } else {
  2035         push_fr(FRm);
  2036         push_fr(FRn);
  2037         FADDP_st(1);
  2038         pop_fr(FRn);
  2040 :}
  2041 FDIV FRm, FRn {:  
  2042     COUNT_INST(I_FDIV);
  2043     check_fpuen();
  2044     if( sh4_x86.double_prec ) {
  2045         push_dr(FRn);
  2046         push_dr(FRm);
  2047         FDIVP_st(1);
  2048         pop_dr(FRn);
  2049     } else {
  2050         push_fr(FRn);
  2051         push_fr(FRm);
  2052         FDIVP_st(1);
  2053         pop_fr(FRn);
  2055 :}
  2056 FMAC FR0, FRm, FRn {:  
  2057     COUNT_INST(I_FMAC);
  2058     check_fpuen();
  2059     if( sh4_x86.double_prec ) {
  2060         push_dr( 0 );
  2061         push_dr( FRm );
  2062         FMULP_st(1);
  2063         push_dr( FRn );
  2064         FADDP_st(1);
  2065         pop_dr( FRn );
  2066     } else {
  2067         push_fr( 0 );
  2068         push_fr( FRm );
  2069         FMULP_st(1);
  2070         push_fr( FRn );
  2071         FADDP_st(1);
  2072         pop_fr( FRn );
  2074 :}
  2076 FMUL FRm, FRn {:  
  2077     COUNT_INST(I_FMUL);
  2078     check_fpuen();
  2079     if( sh4_x86.double_prec ) {
  2080         push_dr(FRm);
  2081         push_dr(FRn);
  2082         FMULP_st(1);
  2083         pop_dr(FRn);
  2084     } else {
  2085         push_fr(FRm);
  2086         push_fr(FRn);
  2087         FMULP_st(1);
  2088         pop_fr(FRn);
  2090 :}
  2091 FNEG FRn {:  
  2092     COUNT_INST(I_FNEG);
  2093     check_fpuen();
  2094     if( sh4_x86.double_prec ) {
  2095         push_dr(FRn);
  2096         FCHS_st0();
  2097         pop_dr(FRn);
  2098     } else {
  2099         push_fr(FRn);
  2100         FCHS_st0();
  2101         pop_fr(FRn);
  2103 :}
  2104 FSRRA FRn {:  
  2105     COUNT_INST(I_FSRRA);
  2106     check_fpuen();
  2107     if( sh4_x86.double_prec == 0 ) {
  2108         FLD1_st0();
  2109         push_fr(FRn);
  2110         FSQRT_st0();
  2111         FDIVP_st(1);
  2112         pop_fr(FRn);
  2114 :}
  2115 FSQRT FRn {:  
  2116     COUNT_INST(I_FSQRT);
  2117     check_fpuen();
  2118     if( sh4_x86.double_prec ) {
  2119         push_dr(FRn);
  2120         FSQRT_st0();
  2121         pop_dr(FRn);
  2122     } else {
  2123         push_fr(FRn);
  2124         FSQRT_st0();
  2125         pop_fr(FRn);
  2127 :}
  2128 FSUB FRm, FRn {:  
  2129     COUNT_INST(I_FSUB);
  2130     check_fpuen();
  2131     if( sh4_x86.double_prec ) {
  2132         push_dr(FRn);
  2133         push_dr(FRm);
  2134         FSUBP_st(1);
  2135         pop_dr(FRn);
  2136     } else {
  2137         push_fr(FRn);
  2138         push_fr(FRm);
  2139         FSUBP_st(1);
  2140         pop_fr(FRn);
  2142 :}
  2144 FCMP/EQ FRm, FRn {:  
  2145     COUNT_INST(I_FCMPEQ);
  2146     check_fpuen();
  2147     if( sh4_x86.double_prec ) {
  2148         push_dr(FRm);
  2149         push_dr(FRn);
  2150     } else {
  2151         push_fr(FRm);
  2152         push_fr(FRn);
  2154     FCOMIP_st(1);
  2155     SETE_t();
  2156     FPOP_st();
  2157     sh4_x86.tstate = TSTATE_E;
  2158 :}
  2159 FCMP/GT FRm, FRn {:  
  2160     COUNT_INST(I_FCMPGT);
  2161     check_fpuen();
  2162     if( sh4_x86.double_prec ) {
  2163         push_dr(FRm);
  2164         push_dr(FRn);
  2165     } else {
  2166         push_fr(FRm);
  2167         push_fr(FRn);
  2169     FCOMIP_st(1);
  2170     SETA_t();
  2171     FPOP_st();
  2172     sh4_x86.tstate = TSTATE_A;
  2173 :}
  2175 FSCA FPUL, FRn {:  
  2176     COUNT_INST(I_FSCA);
  2177     check_fpuen();
  2178     if( sh4_x86.double_prec == 0 ) {
  2179         LEA_sh4r_rptr( REG_OFFSET(fr[0][FRn&0x0E]), R_EDX );
  2180         load_spreg( R_EAX, R_FPUL );
  2181         call_func2( sh4_fsca, R_EAX, R_EDX );
  2183     sh4_x86.tstate = TSTATE_NONE;
  2184 :}
  2185 FIPR FVm, FVn {:  
  2186     COUNT_INST(I_FIPR);
  2187     check_fpuen();
  2188     if( sh4_x86.double_prec == 0 ) {
  2189         if( sh4_x86.sse3_enabled ) {
  2190             MOVAPS_sh4r_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
  2191             MULPS_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
  2192             HADDPS_xmm_xmm( 4, 4 ); 
  2193             HADDPS_xmm_xmm( 4, 4 );
  2194             MOVSS_xmm_sh4r( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
  2195         } else {
  2196             push_fr( FVm<<2 );
  2197             push_fr( FVn<<2 );
  2198             FMULP_st(1);
  2199             push_fr( (FVm<<2)+1);
  2200             push_fr( (FVn<<2)+1);
  2201             FMULP_st(1);
  2202             FADDP_st(1);
  2203             push_fr( (FVm<<2)+2);
  2204             push_fr( (FVn<<2)+2);
  2205             FMULP_st(1);
  2206             FADDP_st(1);
  2207             push_fr( (FVm<<2)+3);
  2208             push_fr( (FVn<<2)+3);
  2209             FMULP_st(1);
  2210             FADDP_st(1);
  2211             pop_fr( (FVn<<2)+3);
  2214 :}
  2215 FTRV XMTRX, FVn {:  
  2216     COUNT_INST(I_FTRV);
  2217     check_fpuen();
  2218     if( sh4_x86.double_prec == 0 ) {
  2219         if( sh4_x86.sse3_enabled ) {
  2220             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1  M0  M3  M2
  2221             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5  M4  M7  M6
  2222             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9  M8  M11 M10
  2223             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
  2225             MOVSLDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
  2226             MOVSHDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
  2227             MOVAPS_xmm_xmm( 4, 6 );
  2228             MOVAPS_xmm_xmm( 5, 7 );
  2229             MOVLHPS_xmm_xmm( 4, 4 );  // V1 V1 V1 V1
  2230             MOVHLPS_xmm_xmm( 6, 6 );  // V3 V3 V3 V3
  2231             MOVLHPS_xmm_xmm( 5, 5 );  // V0 V0 V0 V0
  2232             MOVHLPS_xmm_xmm( 7, 7 );  // V2 V2 V2 V2
  2233             MULPS_xmm_xmm( 0, 4 );
  2234             MULPS_xmm_xmm( 1, 5 );
  2235             MULPS_xmm_xmm( 2, 6 );
  2236             MULPS_xmm_xmm( 3, 7 );
  2237             ADDPS_xmm_xmm( 5, 4 );
  2238             ADDPS_xmm_xmm( 7, 6 );
  2239             ADDPS_xmm_xmm( 6, 4 );
  2240             MOVAPS_xmm_sh4r( 4, REG_OFFSET(fr[0][FVn<<2]) );
  2241         } else {
  2242             LEA_sh4r_rptr( REG_OFFSET(fr[0][FVn<<2]), R_EAX );
  2243             call_func1( sh4_ftrv, R_EAX );
  2246     sh4_x86.tstate = TSTATE_NONE;
  2247 :}
  2249 FRCHG {:  
  2250     COUNT_INST(I_FRCHG);
  2251     check_fpuen();
  2252     XOR_imm32_sh4r( FPSCR_FR, R_FPSCR );
  2253     call_func0( sh4_switch_fr_banks );
  2254     sh4_x86.tstate = TSTATE_NONE;
  2255 :}
  2256 FSCHG {:  
  2257     COUNT_INST(I_FSCHG);
  2258     check_fpuen();
  2259     XOR_imm32_sh4r( FPSCR_SZ, R_FPSCR);
  2260     XOR_imm32_sh4r( FPSCR_SZ, REG_OFFSET(xlat_sh4_mode) );
  2261     sh4_x86.tstate = TSTATE_NONE;
  2262     sh4_x86.double_size = !sh4_x86.double_size;
  2263 :}
  2265 /* Processor control instructions */
  2266 LDC Rm, SR {:
  2267     COUNT_INST(I_LDCSR);
  2268     if( sh4_x86.in_delay_slot ) {
  2269 	SLOTILLEGAL();
  2270     } else {
  2271 	check_priv();
  2272 	load_reg( R_EAX, Rm );
  2273 	call_func1( sh4_write_sr, R_EAX );
  2274 	sh4_x86.fpuen_checked = FALSE;
  2275 	sh4_x86.tstate = TSTATE_NONE;
  2276 	return 2;
  2278 :}
  2279 LDC Rm, GBR {: 
  2280     COUNT_INST(I_LDC);
  2281     load_reg( R_EAX, Rm );
  2282     store_spreg( R_EAX, R_GBR );
  2283 :}
  2284 LDC Rm, VBR {:  
  2285     COUNT_INST(I_LDC);
  2286     check_priv();
  2287     load_reg( R_EAX, Rm );
  2288     store_spreg( R_EAX, R_VBR );
  2289     sh4_x86.tstate = TSTATE_NONE;
  2290 :}
  2291 LDC Rm, SSR {:  
  2292     COUNT_INST(I_LDC);
  2293     check_priv();
  2294     load_reg( R_EAX, Rm );
  2295     store_spreg( R_EAX, R_SSR );
  2296     sh4_x86.tstate = TSTATE_NONE;
  2297 :}
  2298 LDC Rm, SGR {:  
  2299     COUNT_INST(I_LDC);
  2300     check_priv();
  2301     load_reg( R_EAX, Rm );
  2302     store_spreg( R_EAX, R_SGR );
  2303     sh4_x86.tstate = TSTATE_NONE;
  2304 :}
  2305 LDC Rm, SPC {:  
  2306     COUNT_INST(I_LDC);
  2307     check_priv();
  2308     load_reg( R_EAX, Rm );
  2309     store_spreg( R_EAX, R_SPC );
  2310     sh4_x86.tstate = TSTATE_NONE;
  2311 :}
  2312 LDC Rm, DBR {:  
  2313     COUNT_INST(I_LDC);
  2314     check_priv();
  2315     load_reg( R_EAX, Rm );
  2316     store_spreg( R_EAX, R_DBR );
  2317     sh4_x86.tstate = TSTATE_NONE;
  2318 :}
  2319 LDC Rm, Rn_BANK {:  
  2320     COUNT_INST(I_LDC);
  2321     check_priv();
  2322     load_reg( R_EAX, Rm );
  2323     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2324     sh4_x86.tstate = TSTATE_NONE;
  2325 :}
  2326 LDC.L @Rm+, GBR {:  
  2327     COUNT_INST(I_LDCM);
  2328     load_reg( R_EAX, Rm );
  2329     check_ralign32( R_EAX );
  2330     MEM_READ_LONG( R_EAX, R_EAX );
  2331     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2332     store_spreg( R_EAX, R_GBR );
  2333     sh4_x86.tstate = TSTATE_NONE;
  2334 :}
  2335 LDC.L @Rm+, SR {:
  2336     COUNT_INST(I_LDCSRM);
  2337     if( sh4_x86.in_delay_slot ) {
  2338 	SLOTILLEGAL();
  2339     } else {
  2340 	check_priv();
  2341 	load_reg( R_EAX, Rm );
  2342 	check_ralign32( R_EAX );
  2343 	MEM_READ_LONG( R_EAX, R_EAX );
  2344 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2345 	call_func1( sh4_write_sr, R_EAX );
  2346 	sh4_x86.fpuen_checked = FALSE;
  2347 	sh4_x86.tstate = TSTATE_NONE;
  2348 	return 2;
  2350 :}
  2351 LDC.L @Rm+, VBR {:  
  2352     COUNT_INST(I_LDCM);
  2353     check_priv();
  2354     load_reg( R_EAX, Rm );
  2355     check_ralign32( R_EAX );
  2356     MEM_READ_LONG( R_EAX, R_EAX );
  2357     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2358     store_spreg( R_EAX, R_VBR );
  2359     sh4_x86.tstate = TSTATE_NONE;
  2360 :}
  2361 LDC.L @Rm+, SSR {:
  2362     COUNT_INST(I_LDCM);
  2363     check_priv();
  2364     load_reg( R_EAX, Rm );
  2365     check_ralign32( R_EAX );
  2366     MEM_READ_LONG( R_EAX, R_EAX );
  2367     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2368     store_spreg( R_EAX, R_SSR );
  2369     sh4_x86.tstate = TSTATE_NONE;
  2370 :}
  2371 LDC.L @Rm+, SGR {:  
  2372     COUNT_INST(I_LDCM);
  2373     check_priv();
  2374     load_reg( R_EAX, Rm );
  2375     check_ralign32( R_EAX );
  2376     MEM_READ_LONG( R_EAX, R_EAX );
  2377     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2378     store_spreg( R_EAX, R_SGR );
  2379     sh4_x86.tstate = TSTATE_NONE;
  2380 :}
  2381 LDC.L @Rm+, SPC {:  
  2382     COUNT_INST(I_LDCM);
  2383     check_priv();
  2384     load_reg( R_EAX, Rm );
  2385     check_ralign32( R_EAX );
  2386     MEM_READ_LONG( R_EAX, R_EAX );
  2387     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2388     store_spreg( R_EAX, R_SPC );
  2389     sh4_x86.tstate = TSTATE_NONE;
  2390 :}
  2391 LDC.L @Rm+, DBR {:  
  2392     COUNT_INST(I_LDCM);
  2393     check_priv();
  2394     load_reg( R_EAX, Rm );
  2395     check_ralign32( R_EAX );
  2396     MEM_READ_LONG( R_EAX, R_EAX );
  2397     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2398     store_spreg( R_EAX, R_DBR );
  2399     sh4_x86.tstate = TSTATE_NONE;
  2400 :}
  2401 LDC.L @Rm+, Rn_BANK {:  
  2402     COUNT_INST(I_LDCM);
  2403     check_priv();
  2404     load_reg( R_EAX, Rm );
  2405     check_ralign32( R_EAX );
  2406     MEM_READ_LONG( R_EAX, R_EAX );
  2407     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2408     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2409     sh4_x86.tstate = TSTATE_NONE;
  2410 :}
  2411 LDS Rm, FPSCR {:
  2412     COUNT_INST(I_LDSFPSCR);
  2413     check_fpuen();
  2414     load_reg( R_EAX, Rm );
  2415     call_func1( sh4_write_fpscr, R_EAX );
  2416     sh4_x86.tstate = TSTATE_NONE;
  2417     return 2;
  2418 :}
  2419 LDS.L @Rm+, FPSCR {:  
  2420     COUNT_INST(I_LDSFPSCRM);
  2421     check_fpuen();
  2422     load_reg( R_EAX, Rm );
  2423     check_ralign32( R_EAX );
  2424     MEM_READ_LONG( R_EAX, R_EAX );
  2425     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2426     call_func1( sh4_write_fpscr, R_EAX );
  2427     sh4_x86.tstate = TSTATE_NONE;
  2428     return 2;
  2429 :}
  2430 LDS Rm, FPUL {:  
  2431     COUNT_INST(I_LDS);
  2432     check_fpuen();
  2433     load_reg( R_EAX, Rm );
  2434     store_spreg( R_EAX, R_FPUL );
  2435 :}
  2436 LDS.L @Rm+, FPUL {:  
  2437     COUNT_INST(I_LDSM);
  2438     check_fpuen();
  2439     load_reg( R_EAX, Rm );
  2440     check_ralign32( R_EAX );
  2441     MEM_READ_LONG( R_EAX, R_EAX );
  2442     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2443     store_spreg( R_EAX, R_FPUL );
  2444     sh4_x86.tstate = TSTATE_NONE;
  2445 :}
  2446 LDS Rm, MACH {: 
  2447     COUNT_INST(I_LDS);
  2448     load_reg( R_EAX, Rm );
  2449     store_spreg( R_EAX, R_MACH );
  2450 :}
  2451 LDS.L @Rm+, MACH {:  
  2452     COUNT_INST(I_LDSM);
  2453     load_reg( R_EAX, Rm );
  2454     check_ralign32( R_EAX );
  2455     MEM_READ_LONG( R_EAX, R_EAX );
  2456     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2457     store_spreg( R_EAX, R_MACH );
  2458     sh4_x86.tstate = TSTATE_NONE;
  2459 :}
  2460 LDS Rm, MACL {:  
  2461     COUNT_INST(I_LDS);
  2462     load_reg( R_EAX, Rm );
  2463     store_spreg( R_EAX, R_MACL );
  2464 :}
  2465 LDS.L @Rm+, MACL {:  
  2466     COUNT_INST(I_LDSM);
  2467     load_reg( R_EAX, Rm );
  2468     check_ralign32( R_EAX );
  2469     MEM_READ_LONG( R_EAX, R_EAX );
  2470     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2471     store_spreg( R_EAX, R_MACL );
  2472     sh4_x86.tstate = TSTATE_NONE;
  2473 :}
  2474 LDS Rm, PR {:  
  2475     COUNT_INST(I_LDS);
  2476     load_reg( R_EAX, Rm );
  2477     store_spreg( R_EAX, R_PR );
  2478 :}
  2479 LDS.L @Rm+, PR {:  
  2480     COUNT_INST(I_LDSM);
  2481     load_reg( R_EAX, Rm );
  2482     check_ralign32( R_EAX );
  2483     MEM_READ_LONG( R_EAX, R_EAX );
  2484     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2485     store_spreg( R_EAX, R_PR );
  2486     sh4_x86.tstate = TSTATE_NONE;
  2487 :}
  2488 LDTLB {:  
  2489     COUNT_INST(I_LDTLB);
  2490     call_func0( MMU_ldtlb );
  2491     sh4_x86.tstate = TSTATE_NONE;
  2492 :}
  2493 OCBI @Rn {:
  2494     COUNT_INST(I_OCBI);
  2495 :}
  2496 OCBP @Rn {:
  2497     COUNT_INST(I_OCBP);
  2498 :}
  2499 OCBWB @Rn {:
  2500     COUNT_INST(I_OCBWB);
  2501 :}
  2502 PREF @Rn {:
  2503     COUNT_INST(I_PREF);
  2504     load_reg( R_EAX, Rn );
  2505     MEM_PREFETCH( R_EAX );
  2506     sh4_x86.tstate = TSTATE_NONE;
  2507 :}
  2508 SLEEP {: 
  2509     COUNT_INST(I_SLEEP);
  2510     check_priv();
  2511     call_func0( sh4_sleep );
  2512     sh4_x86.tstate = TSTATE_NONE;
  2513     sh4_x86.in_delay_slot = DELAY_NONE;
  2514     return 2;
  2515 :}
  2516 STC SR, Rn {:
  2517     COUNT_INST(I_STCSR);
  2518     check_priv();
  2519     call_func0(sh4_read_sr);
  2520     store_reg( R_EAX, Rn );
  2521     sh4_x86.tstate = TSTATE_NONE;
  2522 :}
  2523 STC GBR, Rn {:  
  2524     COUNT_INST(I_STC);
  2525     load_spreg( R_EAX, R_GBR );
  2526     store_reg( R_EAX, Rn );
  2527 :}
  2528 STC VBR, Rn {:  
  2529     COUNT_INST(I_STC);
  2530     check_priv();
  2531     load_spreg( R_EAX, R_VBR );
  2532     store_reg( R_EAX, Rn );
  2533     sh4_x86.tstate = TSTATE_NONE;
  2534 :}
  2535 STC SSR, Rn {:  
  2536     COUNT_INST(I_STC);
  2537     check_priv();
  2538     load_spreg( R_EAX, R_SSR );
  2539     store_reg( R_EAX, Rn );
  2540     sh4_x86.tstate = TSTATE_NONE;
  2541 :}
  2542 STC SPC, Rn {:  
  2543     COUNT_INST(I_STC);
  2544     check_priv();
  2545     load_spreg( R_EAX, R_SPC );
  2546     store_reg( R_EAX, Rn );
  2547     sh4_x86.tstate = TSTATE_NONE;
  2548 :}
  2549 STC SGR, Rn {:  
  2550     COUNT_INST(I_STC);
  2551     check_priv();
  2552     load_spreg( R_EAX, R_SGR );
  2553     store_reg( R_EAX, Rn );
  2554     sh4_x86.tstate = TSTATE_NONE;
  2555 :}
  2556 STC DBR, Rn {:  
  2557     COUNT_INST(I_STC);
  2558     check_priv();
  2559     load_spreg( R_EAX, R_DBR );
  2560     store_reg( R_EAX, Rn );
  2561     sh4_x86.tstate = TSTATE_NONE;
  2562 :}
  2563 STC Rm_BANK, Rn {:
  2564     COUNT_INST(I_STC);
  2565     check_priv();
  2566     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2567     store_reg( R_EAX, Rn );
  2568     sh4_x86.tstate = TSTATE_NONE;
  2569 :}
  2570 STC.L SR, @-Rn {:
  2571     COUNT_INST(I_STCSRM);
  2572     check_priv();
  2573     call_func0( sh4_read_sr );
  2574     MOV_r32_r32( R_EAX, R_EDX );
  2575     load_reg( R_EAX, Rn );
  2576     check_walign32( R_EAX );
  2577     LEA_r32disp8_r32( R_EAX, -4, R_EAX );
  2578     MEM_WRITE_LONG( R_EAX, R_EDX );
  2579     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2580     sh4_x86.tstate = TSTATE_NONE;
  2581 :}
  2582 STC.L VBR, @-Rn {:  
  2583     COUNT_INST(I_STCM);
  2584     check_priv();
  2585     load_reg( R_EAX, Rn );
  2586     check_walign32( R_EAX );
  2587     ADD_imm8s_r32( -4, R_EAX );
  2588     load_spreg( R_EDX, R_VBR );
  2589     MEM_WRITE_LONG( R_EAX, R_EDX );
  2590     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2591     sh4_x86.tstate = TSTATE_NONE;
  2592 :}
  2593 STC.L SSR, @-Rn {:  
  2594     COUNT_INST(I_STCM);
  2595     check_priv();
  2596     load_reg( R_EAX, Rn );
  2597     check_walign32( R_EAX );
  2598     ADD_imm8s_r32( -4, R_EAX );
  2599     load_spreg( R_EDX, R_SSR );
  2600     MEM_WRITE_LONG( R_EAX, R_EDX );
  2601     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2602     sh4_x86.tstate = TSTATE_NONE;
  2603 :}
  2604 STC.L SPC, @-Rn {:
  2605     COUNT_INST(I_STCM);
  2606     check_priv();
  2607     load_reg( R_EAX, Rn );
  2608     check_walign32( R_EAX );
  2609     ADD_imm8s_r32( -4, R_EAX );
  2610     load_spreg( R_EDX, R_SPC );
  2611     MEM_WRITE_LONG( R_EAX, R_EDX );
  2612     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2613     sh4_x86.tstate = TSTATE_NONE;
  2614 :}
  2615 STC.L SGR, @-Rn {:  
  2616     COUNT_INST(I_STCM);
  2617     check_priv();
  2618     load_reg( R_EAX, Rn );
  2619     check_walign32( R_EAX );
  2620     ADD_imm8s_r32( -4, R_EAX );
  2621     load_spreg( R_EDX, R_SGR );
  2622     MEM_WRITE_LONG( R_EAX, R_EDX );
  2623     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2624     sh4_x86.tstate = TSTATE_NONE;
  2625 :}
  2626 STC.L DBR, @-Rn {:  
  2627     COUNT_INST(I_STCM);
  2628     check_priv();
  2629     load_reg( R_EAX, Rn );
  2630     check_walign32( R_EAX );
  2631     ADD_imm8s_r32( -4, R_EAX );
  2632     load_spreg( R_EDX, R_DBR );
  2633     MEM_WRITE_LONG( R_EAX, R_EDX );
  2634     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2635     sh4_x86.tstate = TSTATE_NONE;
  2636 :}
  2637 STC.L Rm_BANK, @-Rn {:  
  2638     COUNT_INST(I_STCM);
  2639     check_priv();
  2640     load_reg( R_EAX, Rn );
  2641     check_walign32( R_EAX );
  2642     ADD_imm8s_r32( -4, R_EAX );
  2643     load_spreg( R_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
  2644     MEM_WRITE_LONG( R_EAX, R_EDX );
  2645     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2646     sh4_x86.tstate = TSTATE_NONE;
  2647 :}
  2648 STC.L GBR, @-Rn {:  
  2649     COUNT_INST(I_STCM);
  2650     load_reg( R_EAX, Rn );
  2651     check_walign32( R_EAX );
  2652     ADD_imm8s_r32( -4, R_EAX );
  2653     load_spreg( R_EDX, R_GBR );
  2654     MEM_WRITE_LONG( R_EAX, R_EDX );
  2655     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2656     sh4_x86.tstate = TSTATE_NONE;
  2657 :}
  2658 STS FPSCR, Rn {:  
  2659     COUNT_INST(I_STSFPSCR);
  2660     check_fpuen();
  2661     load_spreg( R_EAX, R_FPSCR );
  2662     store_reg( R_EAX, Rn );
  2663 :}
  2664 STS.L FPSCR, @-Rn {:  
  2665     COUNT_INST(I_STSFPSCRM);
  2666     check_fpuen();
  2667     load_reg( R_EAX, Rn );
  2668     check_walign32( R_EAX );
  2669     ADD_imm8s_r32( -4, R_EAX );
  2670     load_spreg( R_EDX, R_FPSCR );
  2671     MEM_WRITE_LONG( R_EAX, R_EDX );
  2672     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2673     sh4_x86.tstate = TSTATE_NONE;
  2674 :}
  2675 STS FPUL, Rn {:  
  2676     COUNT_INST(I_STS);
  2677     check_fpuen();
  2678     load_spreg( R_EAX, R_FPUL );
  2679     store_reg( R_EAX, Rn );
  2680 :}
  2681 STS.L FPUL, @-Rn {:  
  2682     COUNT_INST(I_STSM);
  2683     check_fpuen();
  2684     load_reg( R_EAX, Rn );
  2685     check_walign32( R_EAX );
  2686     ADD_imm8s_r32( -4, R_EAX );
  2687     load_spreg( R_EDX, R_FPUL );
  2688     MEM_WRITE_LONG( R_EAX, R_EDX );
  2689     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2690     sh4_x86.tstate = TSTATE_NONE;
  2691 :}
  2692 STS MACH, Rn {:  
  2693     COUNT_INST(I_STS);
  2694     load_spreg( R_EAX, R_MACH );
  2695     store_reg( R_EAX, Rn );
  2696 :}
  2697 STS.L MACH, @-Rn {:  
  2698     COUNT_INST(I_STSM);
  2699     load_reg( R_EAX, Rn );
  2700     check_walign32( R_EAX );
  2701     ADD_imm8s_r32( -4, R_EAX );
  2702     load_spreg( R_EDX, R_MACH );
  2703     MEM_WRITE_LONG( R_EAX, R_EDX );
  2704     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2705     sh4_x86.tstate = TSTATE_NONE;
  2706 :}
  2707 STS MACL, Rn {:  
  2708     COUNT_INST(I_STS);
  2709     load_spreg( R_EAX, R_MACL );
  2710     store_reg( R_EAX, Rn );
  2711 :}
  2712 STS.L MACL, @-Rn {:  
  2713     COUNT_INST(I_STSM);
  2714     load_reg( R_EAX, Rn );
  2715     check_walign32( R_EAX );
  2716     ADD_imm8s_r32( -4, R_EAX );
  2717     load_spreg( R_EDX, R_MACL );
  2718     MEM_WRITE_LONG( R_EAX, R_EDX );
  2719     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2720     sh4_x86.tstate = TSTATE_NONE;
  2721 :}
  2722 STS PR, Rn {:  
  2723     COUNT_INST(I_STS);
  2724     load_spreg( R_EAX, R_PR );
  2725     store_reg( R_EAX, Rn );
  2726 :}
  2727 STS.L PR, @-Rn {:  
  2728     COUNT_INST(I_STSM);
  2729     load_reg( R_EAX, Rn );
  2730     check_walign32( R_EAX );
  2731     ADD_imm8s_r32( -4, R_EAX );
  2732     load_spreg( R_EDX, R_PR );
  2733     MEM_WRITE_LONG( R_EAX, R_EDX );
  2734     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2735     sh4_x86.tstate = TSTATE_NONE;
  2736 :}
  2738 NOP {: 
  2739     COUNT_INST(I_NOP);
  2740     /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ 
  2741 :}
  2742 %%
  2743     sh4_x86.in_delay_slot = DELAY_NONE;
  2744     return 0;
.