Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 905:4c17ebd9ef5e
prev904:5b92e51ac06b
next908:a00debcf2600
author nkeynes
date Wed Oct 29 23:51:58 2008 +0000 (11 years ago)
permissions -rw-r--r--
last change Use regparam calling conventions for all functions called from translated code,
along with a few other high-use functions. Can probably extend this to all functions,
but as it is this is a nice performance boost
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "lxdream.h"
    29 #include "sh4/xltcache.h"
    30 #include "sh4/sh4core.h"
    31 #include "sh4/sh4trans.h"
    32 #include "sh4/sh4stat.h"
    33 #include "sh4/sh4mmio.h"
    34 #include "sh4/x86op.h"
    35 #include "clock.h"
    37 #define DEFAULT_BACKPATCH_SIZE 4096
    39 struct backpatch_record {
    40     uint32_t fixup_offset;
    41     uint32_t fixup_icount;
    42     int32_t exc_code;
    43 };
    45 #define DELAY_NONE 0
    46 #define DELAY_PC 1
    47 #define DELAY_PC_PR 2
    49 /** 
    50  * Struct to manage internal translation state. This state is not saved -
    51  * it is only valid between calls to sh4_translate_begin_block() and
    52  * sh4_translate_end_block()
    53  */
    54 struct sh4_x86_state {
    55     int in_delay_slot;
    56     gboolean priv_checked; /* true if we've already checked the cpu mode. */
    57     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    58     gboolean branch_taken; /* true if we branched unconditionally */
    59     gboolean double_prec; /* true if FPU is in double-precision mode */
    60     gboolean double_size; /* true if FPU is in double-size mode */
    61     gboolean sse3_enabled; /* true if host supports SSE3 instructions */
    62     uint32_t block_start_pc;
    63     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    64     int tstate;
    66     /* mode flags */
    67     gboolean tlb_on; /* True if tlb translation is active */
    69     /* Allocated memory for the (block-wide) back-patch list */
    70     struct backpatch_record *backpatch_list;
    71     uint32_t backpatch_posn;
    72     uint32_t backpatch_size;
    73 };
    75 #define TSTATE_NONE -1
    76 #define TSTATE_O    0
    77 #define TSTATE_C    2
    78 #define TSTATE_E    4
    79 #define TSTATE_NE   5
    80 #define TSTATE_G    0xF
    81 #define TSTATE_GE   0xD
    82 #define TSTATE_A    7
    83 #define TSTATE_AE   3
    85 #ifdef ENABLE_SH4STATS
    86 #define COUNT_INST(id) load_imm32(R_EAX,id); call_func1(sh4_stats_add, R_EAX); sh4_x86.tstate = TSTATE_NONE
    87 #else
    88 #define COUNT_INST(id)
    89 #endif
    91 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
    92 #define JT_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    93 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    94     OP(0x70+sh4_x86.tstate); MARK_JMP8(label); OP(-1)
    96 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
    97 #define JF_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    98 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    99     OP(0x70+ (sh4_x86.tstate^1)); MARK_JMP8(label); OP(-1)
   101 static struct sh4_x86_state sh4_x86;
   103 static uint32_t max_int = 0x7FFFFFFF;
   104 static uint32_t min_int = 0x80000000;
   105 static uint32_t save_fcw; /* save value for fpu control word */
   106 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   108 gboolean is_sse3_supported()
   109 {
   110     uint32_t features;
   112     // Note: Include the push/pop ebx sequence in case of PIC builds. This 
   113     // isn't exactly on a critical path anyway
   114     __asm__ __volatile__(
   115         "pushl %%ebx\n\t"
   116         "mov $0x01, %%eax\n\t"
   117         "cpuid\n\t"
   118         "popl %%ebx" : "=c" (features) : : "eax", "edx");
   119     return (features & 1) ? TRUE : FALSE;
   120 }
   122 void sh4_translate_init(void)
   123 {
   124     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   125     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   126     sh4_x86.sse3_enabled = is_sse3_supported();
   127 }
   130 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   131 {
   132     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   133 	sh4_x86.backpatch_size <<= 1;
   134 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   135 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   136 	assert( sh4_x86.backpatch_list != NULL );
   137     }
   138     if( sh4_x86.in_delay_slot ) {
   139 	fixup_pc -= 2;
   140     }
   141     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
   142 	((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code);
   143     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   144     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   145     sh4_x86.backpatch_posn++;
   146 }
   148 /**
   149  * Emit an instruction to load an SH4 reg into a real register
   150  */
   151 static inline void load_reg( int x86reg, int sh4reg ) 
   152 {
   153     /* mov [bp+n], reg */
   154     OP(0x8B);
   155     OP(0x45 + (x86reg<<3));
   156     OP(REG_OFFSET(r[sh4reg]));
   157 }
   159 static inline void load_reg16s( int x86reg, int sh4reg )
   160 {
   161     OP(0x0F);
   162     OP(0xBF);
   163     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   164 }
   166 static inline void load_reg16u( int x86reg, int sh4reg )
   167 {
   168     OP(0x0F);
   169     OP(0xB7);
   170     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   172 }
   174 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   175 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   176 /**
   177  * Emit an instruction to load an immediate value into a register
   178  */
   179 static inline void load_imm32( int x86reg, uint32_t value ) {
   180     /* mov #value, reg */
   181     OP(0xB8 + x86reg);
   182     OP32(value);
   183 }
   185 /**
   186  * Load an immediate 64-bit quantity (note: x86-64 only)
   187  */
   188 static inline void load_imm64( int x86reg, uint64_t value ) {
   189     /* mov #value, reg */
   190     REXW();
   191     OP(0xB8 + x86reg);
   192     OP64(value);
   193 }
   195 /**
   196  * Emit an instruction to store an SH4 reg (RN)
   197  */
   198 void static inline store_reg( int x86reg, int sh4reg ) {
   199     /* mov reg, [bp+n] */
   200     OP(0x89);
   201     OP(0x45 + (x86reg<<3));
   202     OP(REG_OFFSET(r[sh4reg]));
   203 }
   205 /**
   206  * Load an FR register (single-precision floating point) into an integer x86
   207  * register (eg for register-to-register moves)
   208  */
   209 #define load_fr(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[0][(frm)^1]) )
   210 #define load_xf(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[1][(frm)^1]) )
   212 /**
   213  * Load the low half of a DR register (DR or XD) into an integer x86 register 
   214  */
   215 #define load_dr0(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   216 #define load_dr1(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   218 /**
   219  * Store an FR register (single-precision floating point) from an integer x86+
   220  * register (eg for register-to-register moves)
   221  */
   222 #define store_fr(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[0][(frm)^1]) )
   223 #define store_xf(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[1][(frm)^1]) )
   225 #define store_dr0(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   226 #define store_dr1(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   229 #define push_fpul()  FLDF_sh4r(R_FPUL)
   230 #define pop_fpul()   FSTPF_sh4r(R_FPUL)
   231 #define push_fr(frm) FLDF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   232 #define pop_fr(frm)  FSTPF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   233 #define push_xf(frm) FLDF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   234 #define pop_xf(frm)  FSTPF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   235 #define push_dr(frm) FLDD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   236 #define pop_dr(frm)  FSTPD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   237 #define push_xdr(frm) FLDD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   238 #define pop_xdr(frm)  FSTPD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   242 /* Exception checks - Note that all exception checks will clobber EAX */
   244 #define check_priv( ) \
   245     if( !sh4_x86.priv_checked ) { \
   246 	sh4_x86.priv_checked = TRUE;\
   247 	load_spreg( R_EAX, R_SR );\
   248 	AND_imm32_r32( SR_MD, R_EAX );\
   249 	if( sh4_x86.in_delay_slot ) {\
   250 	    JE_exc( EXC_SLOT_ILLEGAL );\
   251 	} else {\
   252 	    JE_exc( EXC_ILLEGAL );\
   253 	}\
   254 	sh4_x86.tstate = TSTATE_NONE; \
   255     }\
   257 #define check_fpuen( ) \
   258     if( !sh4_x86.fpuen_checked ) {\
   259 	sh4_x86.fpuen_checked = TRUE;\
   260 	load_spreg( R_EAX, R_SR );\
   261 	AND_imm32_r32( SR_FD, R_EAX );\
   262 	if( sh4_x86.in_delay_slot ) {\
   263 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   264 	} else {\
   265 	    JNE_exc(EXC_FPU_DISABLED);\
   266 	}\
   267 	sh4_x86.tstate = TSTATE_NONE; \
   268     }
   270 #define check_ralign16( x86reg ) \
   271     TEST_imm32_r32( 0x00000001, x86reg ); \
   272     JNE_exc(EXC_DATA_ADDR_READ)
   274 #define check_walign16( x86reg ) \
   275     TEST_imm32_r32( 0x00000001, x86reg ); \
   276     JNE_exc(EXC_DATA_ADDR_WRITE);
   278 #define check_ralign32( x86reg ) \
   279     TEST_imm32_r32( 0x00000003, x86reg ); \
   280     JNE_exc(EXC_DATA_ADDR_READ)
   282 #define check_walign32( x86reg ) \
   283     TEST_imm32_r32( 0x00000003, x86reg ); \
   284     JNE_exc(EXC_DATA_ADDR_WRITE);
   286 #define check_ralign64( x86reg ) \
   287     TEST_imm32_r32( 0x00000007, x86reg ); \
   288     JNE_exc(EXC_DATA_ADDR_READ)
   290 #define check_walign64( x86reg ) \
   291     TEST_imm32_r32( 0x00000007, x86reg ); \
   292     JNE_exc(EXC_DATA_ADDR_WRITE);
   294 #define UNDEF(ir)
   295 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   296 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
   297 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
   298 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
   299 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
   300 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
   301 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
   303 /**
   304  * Perform MMU translation on the address in addr_reg for a read operation, iff the TLB is turned 
   305  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   306  */
   307 #define MMU_TRANSLATE_READ( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   309 #define MMU_TRANSLATE_READ_EXC( addr_reg, exc_code ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(exc_code); MEM_RESULT(addr_reg) }
   310 /**
   311  * Perform MMU translation on the address in addr_reg for a write operation, iff the TLB is turned 
   312  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   313  */
   314 #define MMU_TRANSLATE_WRITE( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_write, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   316 #define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = DELAY_NONE; return 1;
   318 /****** Import appropriate calling conventions ******/
   319 #if SIZEOF_VOID_P == 8
   320 #include "sh4/ia64abi.h"
   321 #else /* 32-bit system */
   322 #ifdef APPLE_BUILD
   323 #include "sh4/ia32mac.h"
   324 #else
   325 #include "sh4/ia32abi.h"
   326 #endif
   327 #endif
   329 void sh4_translate_begin_block( sh4addr_t pc ) 
   330 {
   331 	enter_block();
   332     sh4_x86.in_delay_slot = FALSE;
   333     sh4_x86.priv_checked = FALSE;
   334     sh4_x86.fpuen_checked = FALSE;
   335     sh4_x86.branch_taken = FALSE;
   336     sh4_x86.backpatch_posn = 0;
   337     sh4_x86.block_start_pc = pc;
   338     sh4_x86.tlb_on = IS_MMU_ENABLED();
   339     sh4_x86.tstate = TSTATE_NONE;
   340     sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
   341     sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
   342 }
   345 uint32_t sh4_translate_end_block_size()
   346 {
   347     if( sh4_x86.backpatch_posn <= 3 ) {
   348         return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);
   349     } else {
   350         return EPILOGUE_SIZE + 48 + (sh4_x86.backpatch_posn-3)*15;
   351     }
   352 }
   355 /**
   356  * Embed a breakpoint into the generated code
   357  */
   358 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   359 {
   360     load_imm32( R_EAX, pc );
   361     call_func1( sh4_translate_breakpoint_hit, R_EAX );
   362     sh4_x86.tstate = TSTATE_NONE;
   363 }
   366 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   368 /**
   369  * Embed a call to sh4_execute_instruction for situations that we
   370  * can't translate (just page-crossing delay slots at the moment).
   371  * Caller is responsible for setting new_pc before calling this function.
   372  *
   373  * Performs:
   374  *   Set PC = endpc
   375  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   376  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   377  *   Call sh4_execute_instruction
   378  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   379  */
   380 void exit_block_emu( sh4vma_t endpc )
   381 {
   382     load_imm32( R_ECX, endpc - sh4_x86.block_start_pc );   // 5
   383     ADD_r32_sh4r( R_ECX, R_PC );
   385     load_imm32( R_ECX, (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period ); // 5
   386     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
   387     load_imm32( R_ECX, sh4_x86.in_delay_slot ? 1 : 0 );
   388     store_spreg( R_ECX, REG_OFFSET(in_delay_slot) );
   390     call_func0( sh4_execute_instruction );    
   391     load_spreg( R_EAX, R_PC );
   392     if( sh4_x86.tlb_on ) {
   393 	call_func1(xlat_get_code_by_vma,R_EAX);
   394     } else {
   395 	call_func1(xlat_get_code,R_EAX);
   396     }
   397     AND_imm8s_rptr( 0xFC, R_EAX );
   398     POP_r32(R_EBP);
   399     RET();
   400 } 
   402 /**
   403  * Translate a single instruction. Delayed branches are handled specially
   404  * by translating both branch and delayed instruction as a single unit (as
   405  * 
   406  * The instruction MUST be in the icache (assert check)
   407  *
   408  * @return true if the instruction marks the end of a basic block
   409  * (eg a branch or 
   410  */
   411 uint32_t sh4_translate_instruction( sh4vma_t pc )
   412 {
   413     uint32_t ir;
   414     /* Read instruction from icache */
   415     assert( IS_IN_ICACHE(pc) );
   416     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   418 	/* PC is not in the current icache - this usually means we're running
   419 	 * with MMU on, and we've gone past the end of the page. And since 
   420 	 * sh4_translate_block is pretty careful about this, it means we're
   421 	 * almost certainly in a delay slot.
   422 	 *
   423 	 * Since we can't assume the page is present (and we can't fault it in
   424 	 * at this point, inline a call to sh4_execute_instruction (with a few
   425 	 * small repairs to cope with the different environment).
   426 	 */
   428     if( !sh4_x86.in_delay_slot ) {
   429 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   430     }
   431 %%
   432 /* ALU operations */
   433 ADD Rm, Rn {:
   434     COUNT_INST(I_ADD);
   435     load_reg( R_EAX, Rm );
   436     load_reg( R_ECX, Rn );
   437     ADD_r32_r32( R_EAX, R_ECX );
   438     store_reg( R_ECX, Rn );
   439     sh4_x86.tstate = TSTATE_NONE;
   440 :}
   441 ADD #imm, Rn {:  
   442     COUNT_INST(I_ADDI);
   443     load_reg( R_EAX, Rn );
   444     ADD_imm8s_r32( imm, R_EAX );
   445     store_reg( R_EAX, Rn );
   446     sh4_x86.tstate = TSTATE_NONE;
   447 :}
   448 ADDC Rm, Rn {:
   449     COUNT_INST(I_ADDC);
   450     if( sh4_x86.tstate != TSTATE_C ) {
   451 	LDC_t();
   452     }
   453     load_reg( R_EAX, Rm );
   454     load_reg( R_ECX, Rn );
   455     ADC_r32_r32( R_EAX, R_ECX );
   456     store_reg( R_ECX, Rn );
   457     SETC_t();
   458     sh4_x86.tstate = TSTATE_C;
   459 :}
   460 ADDV Rm, Rn {:
   461     COUNT_INST(I_ADDV);
   462     load_reg( R_EAX, Rm );
   463     load_reg( R_ECX, Rn );
   464     ADD_r32_r32( R_EAX, R_ECX );
   465     store_reg( R_ECX, Rn );
   466     SETO_t();
   467     sh4_x86.tstate = TSTATE_O;
   468 :}
   469 AND Rm, Rn {:
   470     COUNT_INST(I_AND);
   471     load_reg( R_EAX, Rm );
   472     load_reg( R_ECX, Rn );
   473     AND_r32_r32( R_EAX, R_ECX );
   474     store_reg( R_ECX, Rn );
   475     sh4_x86.tstate = TSTATE_NONE;
   476 :}
   477 AND #imm, R0 {:  
   478     COUNT_INST(I_ANDI);
   479     load_reg( R_EAX, 0 );
   480     AND_imm32_r32(imm, R_EAX); 
   481     store_reg( R_EAX, 0 );
   482     sh4_x86.tstate = TSTATE_NONE;
   483 :}
   484 AND.B #imm, @(R0, GBR) {: 
   485     COUNT_INST(I_ANDB);
   486     load_reg( R_EAX, 0 );
   487     load_spreg( R_ECX, R_GBR );
   488     ADD_r32_r32( R_ECX, R_EAX );
   489     MMU_TRANSLATE_WRITE( R_EAX );
   490     PUSH_realigned_r32(R_EAX);
   491     MEM_READ_BYTE( R_EAX, R_EDX );
   492     POP_realigned_r32(R_EAX);
   493     AND_imm32_r32(imm, R_EDX );
   494     MEM_WRITE_BYTE( R_EAX, R_EDX );
   495     sh4_x86.tstate = TSTATE_NONE;
   496 :}
   497 CMP/EQ Rm, Rn {:  
   498     COUNT_INST(I_CMPEQ);
   499     load_reg( R_EAX, Rm );
   500     load_reg( R_ECX, Rn );
   501     CMP_r32_r32( R_EAX, R_ECX );
   502     SETE_t();
   503     sh4_x86.tstate = TSTATE_E;
   504 :}
   505 CMP/EQ #imm, R0 {:  
   506     COUNT_INST(I_CMPEQI);
   507     load_reg( R_EAX, 0 );
   508     CMP_imm8s_r32(imm, R_EAX);
   509     SETE_t();
   510     sh4_x86.tstate = TSTATE_E;
   511 :}
   512 CMP/GE Rm, Rn {:  
   513     COUNT_INST(I_CMPGE);
   514     load_reg( R_EAX, Rm );
   515     load_reg( R_ECX, Rn );
   516     CMP_r32_r32( R_EAX, R_ECX );
   517     SETGE_t();
   518     sh4_x86.tstate = TSTATE_GE;
   519 :}
   520 CMP/GT Rm, Rn {: 
   521     COUNT_INST(I_CMPGT);
   522     load_reg( R_EAX, Rm );
   523     load_reg( R_ECX, Rn );
   524     CMP_r32_r32( R_EAX, R_ECX );
   525     SETG_t();
   526     sh4_x86.tstate = TSTATE_G;
   527 :}
   528 CMP/HI Rm, Rn {:  
   529     COUNT_INST(I_CMPHI);
   530     load_reg( R_EAX, Rm );
   531     load_reg( R_ECX, Rn );
   532     CMP_r32_r32( R_EAX, R_ECX );
   533     SETA_t();
   534     sh4_x86.tstate = TSTATE_A;
   535 :}
   536 CMP/HS Rm, Rn {: 
   537     COUNT_INST(I_CMPHS);
   538     load_reg( R_EAX, Rm );
   539     load_reg( R_ECX, Rn );
   540     CMP_r32_r32( R_EAX, R_ECX );
   541     SETAE_t();
   542     sh4_x86.tstate = TSTATE_AE;
   543  :}
   544 CMP/PL Rn {: 
   545     COUNT_INST(I_CMPPL);
   546     load_reg( R_EAX, Rn );
   547     CMP_imm8s_r32( 0, R_EAX );
   548     SETG_t();
   549     sh4_x86.tstate = TSTATE_G;
   550 :}
   551 CMP/PZ Rn {:  
   552     COUNT_INST(I_CMPPZ);
   553     load_reg( R_EAX, Rn );
   554     CMP_imm8s_r32( 0, R_EAX );
   555     SETGE_t();
   556     sh4_x86.tstate = TSTATE_GE;
   557 :}
   558 CMP/STR Rm, Rn {:  
   559     COUNT_INST(I_CMPSTR);
   560     load_reg( R_EAX, Rm );
   561     load_reg( R_ECX, Rn );
   562     XOR_r32_r32( R_ECX, R_EAX );
   563     TEST_r8_r8( R_AL, R_AL );
   564     JE_rel8(target1);
   565     TEST_r8_r8( R_AH, R_AH );
   566     JE_rel8(target2);
   567     SHR_imm8_r32( 16, R_EAX );
   568     TEST_r8_r8( R_AL, R_AL );
   569     JE_rel8(target3);
   570     TEST_r8_r8( R_AH, R_AH );
   571     JMP_TARGET(target1);
   572     JMP_TARGET(target2);
   573     JMP_TARGET(target3);
   574     SETE_t();
   575     sh4_x86.tstate = TSTATE_E;
   576 :}
   577 DIV0S Rm, Rn {:
   578     COUNT_INST(I_DIV0S);
   579     load_reg( R_EAX, Rm );
   580     load_reg( R_ECX, Rn );
   581     SHR_imm8_r32( 31, R_EAX );
   582     SHR_imm8_r32( 31, R_ECX );
   583     store_spreg( R_EAX, R_M );
   584     store_spreg( R_ECX, R_Q );
   585     CMP_r32_r32( R_EAX, R_ECX );
   586     SETNE_t();
   587     sh4_x86.tstate = TSTATE_NE;
   588 :}
   589 DIV0U {:  
   590     COUNT_INST(I_DIV0U);
   591     XOR_r32_r32( R_EAX, R_EAX );
   592     store_spreg( R_EAX, R_Q );
   593     store_spreg( R_EAX, R_M );
   594     store_spreg( R_EAX, R_T );
   595     sh4_x86.tstate = TSTATE_C; // works for DIV1
   596 :}
   597 DIV1 Rm, Rn {:
   598     COUNT_INST(I_DIV1);
   599     load_spreg( R_ECX, R_M );
   600     load_reg( R_EAX, Rn );
   601     if( sh4_x86.tstate != TSTATE_C ) {
   602 	LDC_t();
   603     }
   604     RCL1_r32( R_EAX );
   605     SETC_r8( R_DL ); // Q'
   606     CMP_sh4r_r32( R_Q, R_ECX );
   607     JE_rel8(mqequal);
   608     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   609     JMP_rel8(end);
   610     JMP_TARGET(mqequal);
   611     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   612     JMP_TARGET(end);
   613     store_reg( R_EAX, Rn ); // Done with Rn now
   614     SETC_r8(R_AL); // tmp1
   615     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   616     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   617     store_spreg( R_ECX, R_Q );
   618     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   619     MOVZX_r8_r32( R_AL, R_EAX );
   620     store_spreg( R_EAX, R_T );
   621     sh4_x86.tstate = TSTATE_NONE;
   622 :}
   623 DMULS.L Rm, Rn {:  
   624     COUNT_INST(I_DMULS);
   625     load_reg( R_EAX, Rm );
   626     load_reg( R_ECX, Rn );
   627     IMUL_r32(R_ECX);
   628     store_spreg( R_EDX, R_MACH );
   629     store_spreg( R_EAX, R_MACL );
   630     sh4_x86.tstate = TSTATE_NONE;
   631 :}
   632 DMULU.L Rm, Rn {:  
   633     COUNT_INST(I_DMULU);
   634     load_reg( R_EAX, Rm );
   635     load_reg( R_ECX, Rn );
   636     MUL_r32(R_ECX);
   637     store_spreg( R_EDX, R_MACH );
   638     store_spreg( R_EAX, R_MACL );    
   639     sh4_x86.tstate = TSTATE_NONE;
   640 :}
   641 DT Rn {:  
   642     COUNT_INST(I_DT);
   643     load_reg( R_EAX, Rn );
   644     ADD_imm8s_r32( -1, R_EAX );
   645     store_reg( R_EAX, Rn );
   646     SETE_t();
   647     sh4_x86.tstate = TSTATE_E;
   648 :}
   649 EXTS.B Rm, Rn {:  
   650     COUNT_INST(I_EXTSB);
   651     load_reg( R_EAX, Rm );
   652     MOVSX_r8_r32( R_EAX, R_EAX );
   653     store_reg( R_EAX, Rn );
   654 :}
   655 EXTS.W Rm, Rn {:  
   656     COUNT_INST(I_EXTSW);
   657     load_reg( R_EAX, Rm );
   658     MOVSX_r16_r32( R_EAX, R_EAX );
   659     store_reg( R_EAX, Rn );
   660 :}
   661 EXTU.B Rm, Rn {:  
   662     COUNT_INST(I_EXTUB);
   663     load_reg( R_EAX, Rm );
   664     MOVZX_r8_r32( R_EAX, R_EAX );
   665     store_reg( R_EAX, Rn );
   666 :}
   667 EXTU.W Rm, Rn {:  
   668     COUNT_INST(I_EXTUW);
   669     load_reg( R_EAX, Rm );
   670     MOVZX_r16_r32( R_EAX, R_EAX );
   671     store_reg( R_EAX, Rn );
   672 :}
   673 MAC.L @Rm+, @Rn+ {:
   674     COUNT_INST(I_MACL);
   675     if( Rm == Rn ) {
   676 	load_reg( R_EAX, Rm );
   677 	check_ralign32( R_EAX );
   678 	MMU_TRANSLATE_READ( R_EAX );
   679 	PUSH_realigned_r32( R_EAX );
   680 	load_reg( R_EAX, Rn );
   681 	ADD_imm8s_r32( 4, R_EAX );
   682 	MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
   683 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
   684 	// Note translate twice in case of page boundaries. Maybe worth
   685 	// adding a page-boundary check to skip the second translation
   686     } else {
   687 	load_reg( R_EAX, Rm );
   688 	check_ralign32( R_EAX );
   689 	MMU_TRANSLATE_READ( R_EAX );
   690 	load_reg( R_ECX, Rn );
   691 	check_ralign32( R_ECX );
   692 	PUSH_realigned_r32( R_EAX );
   693 	MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
   694 	MOV_r32_r32( R_ECX, R_EAX );
   695 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   696 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   697     }
   698     MEM_READ_LONG( R_EAX, R_EAX );
   699     POP_r32( R_ECX );
   700     PUSH_r32( R_EAX );
   701     MEM_READ_LONG( R_ECX, R_EAX );
   702     POP_realigned_r32( R_ECX );
   704     IMUL_r32( R_ECX );
   705     ADD_r32_sh4r( R_EAX, R_MACL );
   706     ADC_r32_sh4r( R_EDX, R_MACH );
   708     load_spreg( R_ECX, R_S );
   709     TEST_r32_r32(R_ECX, R_ECX);
   710     JE_rel8( nosat );
   711     call_func0( signsat48 );
   712     JMP_TARGET( nosat );
   713     sh4_x86.tstate = TSTATE_NONE;
   714 :}
   715 MAC.W @Rm+, @Rn+ {:  
   716     COUNT_INST(I_MACW);
   717     if( Rm == Rn ) {
   718 	load_reg( R_EAX, Rm );
   719 	check_ralign16( R_EAX );
   720 	MMU_TRANSLATE_READ( R_EAX );
   721 	PUSH_realigned_r32( R_EAX );
   722 	load_reg( R_EAX, Rn );
   723 	ADD_imm8s_r32( 2, R_EAX );
   724 	MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
   725 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   726 	// Note translate twice in case of page boundaries. Maybe worth
   727 	// adding a page-boundary check to skip the second translation
   728     } else {
   729 	load_reg( R_EAX, Rm );
   730 	check_ralign16( R_EAX );
   731 	MMU_TRANSLATE_READ( R_EAX );
   732 	load_reg( R_ECX, Rn );
   733 	check_ralign16( R_ECX );
   734 	PUSH_realigned_r32( R_EAX );
   735 	MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
   736 	MOV_r32_r32( R_ECX, R_EAX );
   737 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   738 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   739     }
   740     MEM_READ_WORD( R_EAX, R_EAX );
   741     POP_r32( R_ECX );
   742     PUSH_r32( R_EAX );
   743     MEM_READ_WORD( R_ECX, R_EAX );
   744     POP_realigned_r32( R_ECX );
   745     IMUL_r32( R_ECX );
   747     load_spreg( R_ECX, R_S );
   748     TEST_r32_r32( R_ECX, R_ECX );
   749     JE_rel8( nosat );
   751     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   752     JNO_rel8( end );            // 2
   753     load_imm32( R_EDX, 1 );         // 5
   754     store_spreg( R_EDX, R_MACH );   // 6
   755     JS_rel8( positive );        // 2
   756     load_imm32( R_EAX, 0x80000000 );// 5
   757     store_spreg( R_EAX, R_MACL );   // 6
   758     JMP_rel8(end2);           // 2
   760     JMP_TARGET(positive);
   761     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   762     store_spreg( R_EAX, R_MACL );   // 6
   763     JMP_rel8(end3);            // 2
   765     JMP_TARGET(nosat);
   766     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   767     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   768     JMP_TARGET(end);
   769     JMP_TARGET(end2);
   770     JMP_TARGET(end3);
   771     sh4_x86.tstate = TSTATE_NONE;
   772 :}
   773 MOVT Rn {:  
   774     COUNT_INST(I_MOVT);
   775     load_spreg( R_EAX, R_T );
   776     store_reg( R_EAX, Rn );
   777 :}
   778 MUL.L Rm, Rn {:  
   779     COUNT_INST(I_MULL);
   780     load_reg( R_EAX, Rm );
   781     load_reg( R_ECX, Rn );
   782     MUL_r32( R_ECX );
   783     store_spreg( R_EAX, R_MACL );
   784     sh4_x86.tstate = TSTATE_NONE;
   785 :}
   786 MULS.W Rm, Rn {:
   787     COUNT_INST(I_MULSW);
   788     load_reg16s( R_EAX, Rm );
   789     load_reg16s( R_ECX, Rn );
   790     MUL_r32( R_ECX );
   791     store_spreg( R_EAX, R_MACL );
   792     sh4_x86.tstate = TSTATE_NONE;
   793 :}
   794 MULU.W Rm, Rn {:  
   795     COUNT_INST(I_MULUW);
   796     load_reg16u( R_EAX, Rm );
   797     load_reg16u( R_ECX, Rn );
   798     MUL_r32( R_ECX );
   799     store_spreg( R_EAX, R_MACL );
   800     sh4_x86.tstate = TSTATE_NONE;
   801 :}
   802 NEG Rm, Rn {:
   803     COUNT_INST(I_NEG);
   804     load_reg( R_EAX, Rm );
   805     NEG_r32( R_EAX );
   806     store_reg( R_EAX, Rn );
   807     sh4_x86.tstate = TSTATE_NONE;
   808 :}
   809 NEGC Rm, Rn {:  
   810     COUNT_INST(I_NEGC);
   811     load_reg( R_EAX, Rm );
   812     XOR_r32_r32( R_ECX, R_ECX );
   813     LDC_t();
   814     SBB_r32_r32( R_EAX, R_ECX );
   815     store_reg( R_ECX, Rn );
   816     SETC_t();
   817     sh4_x86.tstate = TSTATE_C;
   818 :}
   819 NOT Rm, Rn {:  
   820     COUNT_INST(I_NOT);
   821     load_reg( R_EAX, Rm );
   822     NOT_r32( R_EAX );
   823     store_reg( R_EAX, Rn );
   824     sh4_x86.tstate = TSTATE_NONE;
   825 :}
   826 OR Rm, Rn {:  
   827     COUNT_INST(I_OR);
   828     load_reg( R_EAX, Rm );
   829     load_reg( R_ECX, Rn );
   830     OR_r32_r32( R_EAX, R_ECX );
   831     store_reg( R_ECX, Rn );
   832     sh4_x86.tstate = TSTATE_NONE;
   833 :}
   834 OR #imm, R0 {:
   835     COUNT_INST(I_ORI);
   836     load_reg( R_EAX, 0 );
   837     OR_imm32_r32(imm, R_EAX);
   838     store_reg( R_EAX, 0 );
   839     sh4_x86.tstate = TSTATE_NONE;
   840 :}
   841 OR.B #imm, @(R0, GBR) {:  
   842     COUNT_INST(I_ORB);
   843     load_reg( R_EAX, 0 );
   844     load_spreg( R_ECX, R_GBR );
   845     ADD_r32_r32( R_ECX, R_EAX );
   846     MMU_TRANSLATE_WRITE( R_EAX );
   847     PUSH_realigned_r32(R_EAX);
   848     MEM_READ_BYTE( R_EAX, R_EDX );
   849     POP_realigned_r32(R_EAX);
   850     OR_imm32_r32(imm, R_EDX );
   851     MEM_WRITE_BYTE( R_EAX, R_EDX );
   852     sh4_x86.tstate = TSTATE_NONE;
   853 :}
   854 ROTCL Rn {:
   855     COUNT_INST(I_ROTCL);
   856     load_reg( R_EAX, Rn );
   857     if( sh4_x86.tstate != TSTATE_C ) {
   858 	LDC_t();
   859     }
   860     RCL1_r32( R_EAX );
   861     store_reg( R_EAX, Rn );
   862     SETC_t();
   863     sh4_x86.tstate = TSTATE_C;
   864 :}
   865 ROTCR Rn {:  
   866     COUNT_INST(I_ROTCR);
   867     load_reg( R_EAX, Rn );
   868     if( sh4_x86.tstate != TSTATE_C ) {
   869 	LDC_t();
   870     }
   871     RCR1_r32( R_EAX );
   872     store_reg( R_EAX, Rn );
   873     SETC_t();
   874     sh4_x86.tstate = TSTATE_C;
   875 :}
   876 ROTL Rn {:  
   877     COUNT_INST(I_ROTL);
   878     load_reg( R_EAX, Rn );
   879     ROL1_r32( R_EAX );
   880     store_reg( R_EAX, Rn );
   881     SETC_t();
   882     sh4_x86.tstate = TSTATE_C;
   883 :}
   884 ROTR Rn {:  
   885     COUNT_INST(I_ROTR);
   886     load_reg( R_EAX, Rn );
   887     ROR1_r32( R_EAX );
   888     store_reg( R_EAX, Rn );
   889     SETC_t();
   890     sh4_x86.tstate = TSTATE_C;
   891 :}
   892 SHAD Rm, Rn {:
   893     COUNT_INST(I_SHAD);
   894     /* Annoyingly enough, not directly convertible */
   895     load_reg( R_EAX, Rn );
   896     load_reg( R_ECX, Rm );
   897     CMP_imm32_r32( 0, R_ECX );
   898     JGE_rel8(doshl);
   900     NEG_r32( R_ECX );      // 2
   901     AND_imm8_r8( 0x1F, R_CL ); // 3
   902     JE_rel8(emptysar);     // 2
   903     SAR_r32_CL( R_EAX );       // 2
   904     JMP_rel8(end);          // 2
   906     JMP_TARGET(emptysar);
   907     SAR_imm8_r32(31, R_EAX );  // 3
   908     JMP_rel8(end2);
   910     JMP_TARGET(doshl);
   911     AND_imm8_r8( 0x1F, R_CL ); // 3
   912     SHL_r32_CL( R_EAX );       // 2
   913     JMP_TARGET(end);
   914     JMP_TARGET(end2);
   915     store_reg( R_EAX, Rn );
   916     sh4_x86.tstate = TSTATE_NONE;
   917 :}
   918 SHLD Rm, Rn {:  
   919     COUNT_INST(I_SHLD);
   920     load_reg( R_EAX, Rn );
   921     load_reg( R_ECX, Rm );
   922     CMP_imm32_r32( 0, R_ECX );
   923     JGE_rel8(doshl);
   925     NEG_r32( R_ECX );      // 2
   926     AND_imm8_r8( 0x1F, R_CL ); // 3
   927     JE_rel8(emptyshr );
   928     SHR_r32_CL( R_EAX );       // 2
   929     JMP_rel8(end);          // 2
   931     JMP_TARGET(emptyshr);
   932     XOR_r32_r32( R_EAX, R_EAX );
   933     JMP_rel8(end2);
   935     JMP_TARGET(doshl);
   936     AND_imm8_r8( 0x1F, R_CL ); // 3
   937     SHL_r32_CL( R_EAX );       // 2
   938     JMP_TARGET(end);
   939     JMP_TARGET(end2);
   940     store_reg( R_EAX, Rn );
   941     sh4_x86.tstate = TSTATE_NONE;
   942 :}
   943 SHAL Rn {: 
   944     COUNT_INST(I_SHAL);
   945     load_reg( R_EAX, Rn );
   946     SHL1_r32( R_EAX );
   947     SETC_t();
   948     store_reg( R_EAX, Rn );
   949     sh4_x86.tstate = TSTATE_C;
   950 :}
   951 SHAR Rn {:  
   952     COUNT_INST(I_SHAR);
   953     load_reg( R_EAX, Rn );
   954     SAR1_r32( R_EAX );
   955     SETC_t();
   956     store_reg( R_EAX, Rn );
   957     sh4_x86.tstate = TSTATE_C;
   958 :}
   959 SHLL Rn {:  
   960     COUNT_INST(I_SHLL);
   961     load_reg( R_EAX, Rn );
   962     SHL1_r32( R_EAX );
   963     SETC_t();
   964     store_reg( R_EAX, Rn );
   965     sh4_x86.tstate = TSTATE_C;
   966 :}
   967 SHLL2 Rn {:
   968     COUNT_INST(I_SHLL);
   969     load_reg( R_EAX, Rn );
   970     SHL_imm8_r32( 2, R_EAX );
   971     store_reg( R_EAX, Rn );
   972     sh4_x86.tstate = TSTATE_NONE;
   973 :}
   974 SHLL8 Rn {:  
   975     COUNT_INST(I_SHLL);
   976     load_reg( R_EAX, Rn );
   977     SHL_imm8_r32( 8, R_EAX );
   978     store_reg( R_EAX, Rn );
   979     sh4_x86.tstate = TSTATE_NONE;
   980 :}
   981 SHLL16 Rn {:  
   982     COUNT_INST(I_SHLL);
   983     load_reg( R_EAX, Rn );
   984     SHL_imm8_r32( 16, R_EAX );
   985     store_reg( R_EAX, Rn );
   986     sh4_x86.tstate = TSTATE_NONE;
   987 :}
   988 SHLR Rn {:  
   989     COUNT_INST(I_SHLR);
   990     load_reg( R_EAX, Rn );
   991     SHR1_r32( R_EAX );
   992     SETC_t();
   993     store_reg( R_EAX, Rn );
   994     sh4_x86.tstate = TSTATE_C;
   995 :}
   996 SHLR2 Rn {:  
   997     COUNT_INST(I_SHLR);
   998     load_reg( R_EAX, Rn );
   999     SHR_imm8_r32( 2, R_EAX );
  1000     store_reg( R_EAX, Rn );
  1001     sh4_x86.tstate = TSTATE_NONE;
  1002 :}
  1003 SHLR8 Rn {:  
  1004     COUNT_INST(I_SHLR);
  1005     load_reg( R_EAX, Rn );
  1006     SHR_imm8_r32( 8, R_EAX );
  1007     store_reg( R_EAX, Rn );
  1008     sh4_x86.tstate = TSTATE_NONE;
  1009 :}
  1010 SHLR16 Rn {:  
  1011     COUNT_INST(I_SHLR);
  1012     load_reg( R_EAX, Rn );
  1013     SHR_imm8_r32( 16, R_EAX );
  1014     store_reg( R_EAX, Rn );
  1015     sh4_x86.tstate = TSTATE_NONE;
  1016 :}
  1017 SUB Rm, Rn {:  
  1018     COUNT_INST(I_SUB);
  1019     load_reg( R_EAX, Rm );
  1020     load_reg( R_ECX, Rn );
  1021     SUB_r32_r32( R_EAX, R_ECX );
  1022     store_reg( R_ECX, Rn );
  1023     sh4_x86.tstate = TSTATE_NONE;
  1024 :}
  1025 SUBC Rm, Rn {:  
  1026     COUNT_INST(I_SUBC);
  1027     load_reg( R_EAX, Rm );
  1028     load_reg( R_ECX, Rn );
  1029     if( sh4_x86.tstate != TSTATE_C ) {
  1030 	LDC_t();
  1032     SBB_r32_r32( R_EAX, R_ECX );
  1033     store_reg( R_ECX, Rn );
  1034     SETC_t();
  1035     sh4_x86.tstate = TSTATE_C;
  1036 :}
  1037 SUBV Rm, Rn {:  
  1038     COUNT_INST(I_SUBV);
  1039     load_reg( R_EAX, Rm );
  1040     load_reg( R_ECX, Rn );
  1041     SUB_r32_r32( R_EAX, R_ECX );
  1042     store_reg( R_ECX, Rn );
  1043     SETO_t();
  1044     sh4_x86.tstate = TSTATE_O;
  1045 :}
  1046 SWAP.B Rm, Rn {:  
  1047     COUNT_INST(I_SWAPB);
  1048     load_reg( R_EAX, Rm );
  1049     XCHG_r8_r8( R_AL, R_AH ); // NB: does not touch EFLAGS
  1050     store_reg( R_EAX, Rn );
  1051 :}
  1052 SWAP.W Rm, Rn {:  
  1053     COUNT_INST(I_SWAPB);
  1054     load_reg( R_EAX, Rm );
  1055     MOV_r32_r32( R_EAX, R_ECX );
  1056     SHL_imm8_r32( 16, R_ECX );
  1057     SHR_imm8_r32( 16, R_EAX );
  1058     OR_r32_r32( R_EAX, R_ECX );
  1059     store_reg( R_ECX, Rn );
  1060     sh4_x86.tstate = TSTATE_NONE;
  1061 :}
  1062 TAS.B @Rn {:  
  1063     COUNT_INST(I_TASB);
  1064     load_reg( R_EAX, Rn );
  1065     MMU_TRANSLATE_WRITE( R_EAX );
  1066     PUSH_realigned_r32( R_EAX );
  1067     MEM_READ_BYTE( R_EAX, R_EDX );
  1068     TEST_r8_r8( R_DL, R_DL );
  1069     SETE_t();
  1070     OR_imm8_r8( 0x80, R_DL );
  1071     POP_realigned_r32( R_EAX );
  1072     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1073     sh4_x86.tstate = TSTATE_NONE;
  1074 :}
  1075 TST Rm, Rn {:  
  1076     COUNT_INST(I_TST);
  1077     load_reg( R_EAX, Rm );
  1078     load_reg( R_ECX, Rn );
  1079     TEST_r32_r32( R_EAX, R_ECX );
  1080     SETE_t();
  1081     sh4_x86.tstate = TSTATE_E;
  1082 :}
  1083 TST #imm, R0 {:  
  1084     COUNT_INST(I_TSTI);
  1085     load_reg( R_EAX, 0 );
  1086     TEST_imm32_r32( imm, R_EAX );
  1087     SETE_t();
  1088     sh4_x86.tstate = TSTATE_E;
  1089 :}
  1090 TST.B #imm, @(R0, GBR) {:  
  1091     COUNT_INST(I_TSTB);
  1092     load_reg( R_EAX, 0);
  1093     load_reg( R_ECX, R_GBR);
  1094     ADD_r32_r32( R_ECX, R_EAX );
  1095     MMU_TRANSLATE_READ( R_EAX );
  1096     MEM_READ_BYTE( R_EAX, R_EAX );
  1097     TEST_imm8_r8( imm, R_AL );
  1098     SETE_t();
  1099     sh4_x86.tstate = TSTATE_E;
  1100 :}
  1101 XOR Rm, Rn {:  
  1102     COUNT_INST(I_XOR);
  1103     load_reg( R_EAX, Rm );
  1104     load_reg( R_ECX, Rn );
  1105     XOR_r32_r32( R_EAX, R_ECX );
  1106     store_reg( R_ECX, Rn );
  1107     sh4_x86.tstate = TSTATE_NONE;
  1108 :}
  1109 XOR #imm, R0 {:  
  1110     COUNT_INST(I_XORI);
  1111     load_reg( R_EAX, 0 );
  1112     XOR_imm32_r32( imm, R_EAX );
  1113     store_reg( R_EAX, 0 );
  1114     sh4_x86.tstate = TSTATE_NONE;
  1115 :}
  1116 XOR.B #imm, @(R0, GBR) {:  
  1117     COUNT_INST(I_XORB);
  1118     load_reg( R_EAX, 0 );
  1119     load_spreg( R_ECX, R_GBR );
  1120     ADD_r32_r32( R_ECX, R_EAX );
  1121     MMU_TRANSLATE_WRITE( R_EAX );
  1122     PUSH_realigned_r32(R_EAX);
  1123     MEM_READ_BYTE(R_EAX, R_EDX);
  1124     POP_realigned_r32(R_EAX);
  1125     XOR_imm32_r32( imm, R_EDX );
  1126     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1127     sh4_x86.tstate = TSTATE_NONE;
  1128 :}
  1129 XTRCT Rm, Rn {:
  1130     COUNT_INST(I_XTRCT);
  1131     load_reg( R_EAX, Rm );
  1132     load_reg( R_ECX, Rn );
  1133     SHL_imm8_r32( 16, R_EAX );
  1134     SHR_imm8_r32( 16, R_ECX );
  1135     OR_r32_r32( R_EAX, R_ECX );
  1136     store_reg( R_ECX, Rn );
  1137     sh4_x86.tstate = TSTATE_NONE;
  1138 :}
  1140 /* Data move instructions */
  1141 MOV Rm, Rn {:  
  1142     COUNT_INST(I_MOV);
  1143     load_reg( R_EAX, Rm );
  1144     store_reg( R_EAX, Rn );
  1145 :}
  1146 MOV #imm, Rn {:  
  1147     COUNT_INST(I_MOVI);
  1148     load_imm32( R_EAX, imm );
  1149     store_reg( R_EAX, Rn );
  1150 :}
  1151 MOV.B Rm, @Rn {:  
  1152     COUNT_INST(I_MOVB);
  1153     load_reg( R_EAX, Rn );
  1154     MMU_TRANSLATE_WRITE( R_EAX );
  1155     load_reg( R_EDX, Rm );
  1156     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1157     sh4_x86.tstate = TSTATE_NONE;
  1158 :}
  1159 MOV.B Rm, @-Rn {:  
  1160     COUNT_INST(I_MOVB);
  1161     load_reg( R_EAX, Rn );
  1162     ADD_imm8s_r32( -1, R_EAX );
  1163     MMU_TRANSLATE_WRITE( R_EAX );
  1164     load_reg( R_EDX, Rm );
  1165     ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
  1166     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1167     sh4_x86.tstate = TSTATE_NONE;
  1168 :}
  1169 MOV.B Rm, @(R0, Rn) {:  
  1170     COUNT_INST(I_MOVB);
  1171     load_reg( R_EAX, 0 );
  1172     load_reg( R_ECX, Rn );
  1173     ADD_r32_r32( R_ECX, R_EAX );
  1174     MMU_TRANSLATE_WRITE( R_EAX );
  1175     load_reg( R_EDX, Rm );
  1176     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1177     sh4_x86.tstate = TSTATE_NONE;
  1178 :}
  1179 MOV.B R0, @(disp, GBR) {:  
  1180     COUNT_INST(I_MOVB);
  1181     load_spreg( R_EAX, R_GBR );
  1182     ADD_imm32_r32( disp, R_EAX );
  1183     MMU_TRANSLATE_WRITE( R_EAX );
  1184     load_reg( R_EDX, 0 );
  1185     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1186     sh4_x86.tstate = TSTATE_NONE;
  1187 :}
  1188 MOV.B R0, @(disp, Rn) {:  
  1189     COUNT_INST(I_MOVB);
  1190     load_reg( R_EAX, Rn );
  1191     ADD_imm32_r32( disp, R_EAX );
  1192     MMU_TRANSLATE_WRITE( R_EAX );
  1193     load_reg( R_EDX, 0 );
  1194     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1195     sh4_x86.tstate = TSTATE_NONE;
  1196 :}
  1197 MOV.B @Rm, Rn {:  
  1198     COUNT_INST(I_MOVB);
  1199     load_reg( R_EAX, Rm );
  1200     MMU_TRANSLATE_READ( R_EAX );
  1201     MEM_READ_BYTE( R_EAX, R_EAX );
  1202     store_reg( R_EAX, Rn );
  1203     sh4_x86.tstate = TSTATE_NONE;
  1204 :}
  1205 MOV.B @Rm+, Rn {:  
  1206     COUNT_INST(I_MOVB);
  1207     load_reg( R_EAX, Rm );
  1208     MMU_TRANSLATE_READ( R_EAX );
  1209     ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
  1210     MEM_READ_BYTE( R_EAX, R_EAX );
  1211     store_reg( R_EAX, Rn );
  1212     sh4_x86.tstate = TSTATE_NONE;
  1213 :}
  1214 MOV.B @(R0, Rm), Rn {:  
  1215     COUNT_INST(I_MOVB);
  1216     load_reg( R_EAX, 0 );
  1217     load_reg( R_ECX, Rm );
  1218     ADD_r32_r32( R_ECX, R_EAX );
  1219     MMU_TRANSLATE_READ( R_EAX )
  1220     MEM_READ_BYTE( R_EAX, R_EAX );
  1221     store_reg( R_EAX, Rn );
  1222     sh4_x86.tstate = TSTATE_NONE;
  1223 :}
  1224 MOV.B @(disp, GBR), R0 {:  
  1225     COUNT_INST(I_MOVB);
  1226     load_spreg( R_EAX, R_GBR );
  1227     ADD_imm32_r32( disp, R_EAX );
  1228     MMU_TRANSLATE_READ( R_EAX );
  1229     MEM_READ_BYTE( R_EAX, R_EAX );
  1230     store_reg( R_EAX, 0 );
  1231     sh4_x86.tstate = TSTATE_NONE;
  1232 :}
  1233 MOV.B @(disp, Rm), R0 {:  
  1234     COUNT_INST(I_MOVB);
  1235     load_reg( R_EAX, Rm );
  1236     ADD_imm32_r32( disp, R_EAX );
  1237     MMU_TRANSLATE_READ( R_EAX );
  1238     MEM_READ_BYTE( R_EAX, R_EAX );
  1239     store_reg( R_EAX, 0 );
  1240     sh4_x86.tstate = TSTATE_NONE;
  1241 :}
  1242 MOV.L Rm, @Rn {:
  1243     COUNT_INST(I_MOVL);
  1244     load_reg( R_EAX, Rn );
  1245     check_walign32(R_EAX);
  1246     MMU_TRANSLATE_WRITE( R_EAX );
  1247     load_reg( R_EDX, Rm );
  1248     MEM_WRITE_LONG( R_EAX, R_EDX );
  1249     sh4_x86.tstate = TSTATE_NONE;
  1250 :}
  1251 MOV.L Rm, @-Rn {:  
  1252     COUNT_INST(I_MOVL);
  1253     load_reg( R_EAX, Rn );
  1254     ADD_imm8s_r32( -4, R_EAX );
  1255     check_walign32( R_EAX );
  1256     MMU_TRANSLATE_WRITE( R_EAX );
  1257     load_reg( R_EDX, Rm );
  1258     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  1259     MEM_WRITE_LONG( R_EAX, R_EDX );
  1260     sh4_x86.tstate = TSTATE_NONE;
  1261 :}
  1262 MOV.L Rm, @(R0, Rn) {:  
  1263     COUNT_INST(I_MOVL);
  1264     load_reg( R_EAX, 0 );
  1265     load_reg( R_ECX, Rn );
  1266     ADD_r32_r32( R_ECX, R_EAX );
  1267     check_walign32( R_EAX );
  1268     MMU_TRANSLATE_WRITE( R_EAX );
  1269     load_reg( R_EDX, Rm );
  1270     MEM_WRITE_LONG( R_EAX, R_EDX );
  1271     sh4_x86.tstate = TSTATE_NONE;
  1272 :}
  1273 MOV.L R0, @(disp, GBR) {:  
  1274     COUNT_INST(I_MOVL);
  1275     load_spreg( R_EAX, R_GBR );
  1276     ADD_imm32_r32( disp, R_EAX );
  1277     check_walign32( R_EAX );
  1278     MMU_TRANSLATE_WRITE( R_EAX );
  1279     load_reg( R_EDX, 0 );
  1280     MEM_WRITE_LONG( R_EAX, R_EDX );
  1281     sh4_x86.tstate = TSTATE_NONE;
  1282 :}
  1283 MOV.L Rm, @(disp, Rn) {:  
  1284     COUNT_INST(I_MOVL);
  1285     load_reg( R_EAX, Rn );
  1286     ADD_imm32_r32( disp, R_EAX );
  1287     check_walign32( R_EAX );
  1288     MMU_TRANSLATE_WRITE( R_EAX );
  1289     load_reg( R_EDX, Rm );
  1290     MEM_WRITE_LONG( R_EAX, R_EDX );
  1291     sh4_x86.tstate = TSTATE_NONE;
  1292 :}
  1293 MOV.L @Rm, Rn {:  
  1294     COUNT_INST(I_MOVL);
  1295     load_reg( R_EAX, Rm );
  1296     check_ralign32( R_EAX );
  1297     MMU_TRANSLATE_READ( R_EAX );
  1298     MEM_READ_LONG( R_EAX, R_EAX );
  1299     store_reg( R_EAX, Rn );
  1300     sh4_x86.tstate = TSTATE_NONE;
  1301 :}
  1302 MOV.L @Rm+, Rn {:  
  1303     COUNT_INST(I_MOVL);
  1304     load_reg( R_EAX, Rm );
  1305     check_ralign32( R_EAX );
  1306     MMU_TRANSLATE_READ( R_EAX );
  1307     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1308     MEM_READ_LONG( R_EAX, R_EAX );
  1309     store_reg( R_EAX, Rn );
  1310     sh4_x86.tstate = TSTATE_NONE;
  1311 :}
  1312 MOV.L @(R0, Rm), Rn {:  
  1313     COUNT_INST(I_MOVL);
  1314     load_reg( R_EAX, 0 );
  1315     load_reg( R_ECX, Rm );
  1316     ADD_r32_r32( R_ECX, R_EAX );
  1317     check_ralign32( R_EAX );
  1318     MMU_TRANSLATE_READ( R_EAX );
  1319     MEM_READ_LONG( R_EAX, R_EAX );
  1320     store_reg( R_EAX, Rn );
  1321     sh4_x86.tstate = TSTATE_NONE;
  1322 :}
  1323 MOV.L @(disp, GBR), R0 {:
  1324     COUNT_INST(I_MOVL);
  1325     load_spreg( R_EAX, R_GBR );
  1326     ADD_imm32_r32( disp, R_EAX );
  1327     check_ralign32( R_EAX );
  1328     MMU_TRANSLATE_READ( R_EAX );
  1329     MEM_READ_LONG( R_EAX, R_EAX );
  1330     store_reg( R_EAX, 0 );
  1331     sh4_x86.tstate = TSTATE_NONE;
  1332 :}
  1333 MOV.L @(disp, PC), Rn {:  
  1334     COUNT_INST(I_MOVLPC);
  1335     if( sh4_x86.in_delay_slot ) {
  1336 	SLOTILLEGAL();
  1337     } else {
  1338 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1339 	if( IS_IN_ICACHE(target) ) {
  1340 	    // If the target address is in the same page as the code, it's
  1341 	    // pretty safe to just ref it directly and circumvent the whole
  1342 	    // memory subsystem. (this is a big performance win)
  1344 	    // FIXME: There's a corner-case that's not handled here when
  1345 	    // the current code-page is in the ITLB but not in the UTLB.
  1346 	    // (should generate a TLB miss although need to test SH4 
  1347 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1348 	    // behaviour though.
  1349 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1350 	    MOV_moff32_EAX( ptr );
  1351 	} else {
  1352 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1353 	    // different virtual address than the translation was done with,
  1354 	    // but we can safely assume that the low bits are the same.
  1355 	    load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1356 	    ADD_sh4r_r32( R_PC, R_EAX );
  1357 	    MMU_TRANSLATE_READ( R_EAX );
  1358 	    MEM_READ_LONG( R_EAX, R_EAX );
  1359 	    sh4_x86.tstate = TSTATE_NONE;
  1361 	store_reg( R_EAX, Rn );
  1363 :}
  1364 MOV.L @(disp, Rm), Rn {:  
  1365     COUNT_INST(I_MOVL);
  1366     load_reg( R_EAX, Rm );
  1367     ADD_imm8s_r32( disp, R_EAX );
  1368     check_ralign32( R_EAX );
  1369     MMU_TRANSLATE_READ( R_EAX );
  1370     MEM_READ_LONG( R_EAX, R_EAX );
  1371     store_reg( R_EAX, Rn );
  1372     sh4_x86.tstate = TSTATE_NONE;
  1373 :}
  1374 MOV.W Rm, @Rn {:  
  1375     COUNT_INST(I_MOVW);
  1376     load_reg( R_EAX, Rn );
  1377     check_walign16( R_EAX );
  1378     MMU_TRANSLATE_WRITE( R_EAX )
  1379     load_reg( R_EDX, Rm );
  1380     MEM_WRITE_WORD( R_EAX, R_EDX );
  1381     sh4_x86.tstate = TSTATE_NONE;
  1382 :}
  1383 MOV.W Rm, @-Rn {:  
  1384     COUNT_INST(I_MOVW);
  1385     load_reg( R_EAX, Rn );
  1386     ADD_imm8s_r32( -2, R_EAX );
  1387     check_walign16( R_EAX );
  1388     MMU_TRANSLATE_WRITE( R_EAX );
  1389     load_reg( R_EDX, Rm );
  1390     ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
  1391     MEM_WRITE_WORD( R_EAX, R_EDX );
  1392     sh4_x86.tstate = TSTATE_NONE;
  1393 :}
  1394 MOV.W Rm, @(R0, Rn) {:  
  1395     COUNT_INST(I_MOVW);
  1396     load_reg( R_EAX, 0 );
  1397     load_reg( R_ECX, Rn );
  1398     ADD_r32_r32( R_ECX, R_EAX );
  1399     check_walign16( R_EAX );
  1400     MMU_TRANSLATE_WRITE( R_EAX );
  1401     load_reg( R_EDX, Rm );
  1402     MEM_WRITE_WORD( R_EAX, R_EDX );
  1403     sh4_x86.tstate = TSTATE_NONE;
  1404 :}
  1405 MOV.W R0, @(disp, GBR) {:  
  1406     COUNT_INST(I_MOVW);
  1407     load_spreg( R_EAX, R_GBR );
  1408     ADD_imm32_r32( disp, R_EAX );
  1409     check_walign16( R_EAX );
  1410     MMU_TRANSLATE_WRITE( R_EAX );
  1411     load_reg( R_EDX, 0 );
  1412     MEM_WRITE_WORD( R_EAX, R_EDX );
  1413     sh4_x86.tstate = TSTATE_NONE;
  1414 :}
  1415 MOV.W R0, @(disp, Rn) {:  
  1416     COUNT_INST(I_MOVW);
  1417     load_reg( R_EAX, Rn );
  1418     ADD_imm32_r32( disp, R_EAX );
  1419     check_walign16( R_EAX );
  1420     MMU_TRANSLATE_WRITE( R_EAX );
  1421     load_reg( R_EDX, 0 );
  1422     MEM_WRITE_WORD( R_EAX, R_EDX );
  1423     sh4_x86.tstate = TSTATE_NONE;
  1424 :}
  1425 MOV.W @Rm, Rn {:  
  1426     COUNT_INST(I_MOVW);
  1427     load_reg( R_EAX, Rm );
  1428     check_ralign16( R_EAX );
  1429     MMU_TRANSLATE_READ( R_EAX );
  1430     MEM_READ_WORD( R_EAX, R_EAX );
  1431     store_reg( R_EAX, Rn );
  1432     sh4_x86.tstate = TSTATE_NONE;
  1433 :}
  1434 MOV.W @Rm+, Rn {:  
  1435     COUNT_INST(I_MOVW);
  1436     load_reg( R_EAX, Rm );
  1437     check_ralign16( R_EAX );
  1438     MMU_TRANSLATE_READ( R_EAX );
  1439     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
  1440     MEM_READ_WORD( R_EAX, R_EAX );
  1441     store_reg( R_EAX, Rn );
  1442     sh4_x86.tstate = TSTATE_NONE;
  1443 :}
  1444 MOV.W @(R0, Rm), Rn {:  
  1445     COUNT_INST(I_MOVW);
  1446     load_reg( R_EAX, 0 );
  1447     load_reg( R_ECX, Rm );
  1448     ADD_r32_r32( R_ECX, R_EAX );
  1449     check_ralign16( R_EAX );
  1450     MMU_TRANSLATE_READ( R_EAX );
  1451     MEM_READ_WORD( R_EAX, R_EAX );
  1452     store_reg( R_EAX, Rn );
  1453     sh4_x86.tstate = TSTATE_NONE;
  1454 :}
  1455 MOV.W @(disp, GBR), R0 {:  
  1456     COUNT_INST(I_MOVW);
  1457     load_spreg( R_EAX, R_GBR );
  1458     ADD_imm32_r32( disp, R_EAX );
  1459     check_ralign16( R_EAX );
  1460     MMU_TRANSLATE_READ( R_EAX );
  1461     MEM_READ_WORD( R_EAX, R_EAX );
  1462     store_reg( R_EAX, 0 );
  1463     sh4_x86.tstate = TSTATE_NONE;
  1464 :}
  1465 MOV.W @(disp, PC), Rn {:  
  1466     COUNT_INST(I_MOVW);
  1467     if( sh4_x86.in_delay_slot ) {
  1468 	SLOTILLEGAL();
  1469     } else {
  1470 	// See comments for MOV.L @(disp, PC), Rn
  1471 	uint32_t target = pc + disp + 4;
  1472 	if( IS_IN_ICACHE(target) ) {
  1473 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1474 	    MOV_moff32_EAX( ptr );
  1475 	    MOVSX_r16_r32( R_EAX, R_EAX );
  1476 	} else {
  1477 	    load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
  1478 	    ADD_sh4r_r32( R_PC, R_EAX );
  1479 	    MMU_TRANSLATE_READ( R_EAX );
  1480 	    MEM_READ_WORD( R_EAX, R_EAX );
  1481 	    sh4_x86.tstate = TSTATE_NONE;
  1483 	store_reg( R_EAX, Rn );
  1485 :}
  1486 MOV.W @(disp, Rm), R0 {:  
  1487     COUNT_INST(I_MOVW);
  1488     load_reg( R_EAX, Rm );
  1489     ADD_imm32_r32( disp, R_EAX );
  1490     check_ralign16( R_EAX );
  1491     MMU_TRANSLATE_READ( R_EAX );
  1492     MEM_READ_WORD( R_EAX, R_EAX );
  1493     store_reg( R_EAX, 0 );
  1494     sh4_x86.tstate = TSTATE_NONE;
  1495 :}
  1496 MOVA @(disp, PC), R0 {:  
  1497     COUNT_INST(I_MOVA);
  1498     if( sh4_x86.in_delay_slot ) {
  1499 	SLOTILLEGAL();
  1500     } else {
  1501 	load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1502 	ADD_sh4r_r32( R_PC, R_ECX );
  1503 	store_reg( R_ECX, 0 );
  1504 	sh4_x86.tstate = TSTATE_NONE;
  1506 :}
  1507 MOVCA.L R0, @Rn {:  
  1508     COUNT_INST(I_MOVCA);
  1509     load_reg( R_EAX, Rn );
  1510     check_walign32( R_EAX );
  1511     MMU_TRANSLATE_WRITE( R_EAX );
  1512     load_reg( R_EDX, 0 );
  1513     MEM_WRITE_LONG( R_EAX, R_EDX );
  1514     sh4_x86.tstate = TSTATE_NONE;
  1515 :}
  1517 /* Control transfer instructions */
  1518 BF disp {:
  1519     COUNT_INST(I_BF);
  1520     if( sh4_x86.in_delay_slot ) {
  1521 	SLOTILLEGAL();
  1522     } else {
  1523 	sh4vma_t target = disp + pc + 4;
  1524 	JT_rel8( nottaken );
  1525 	exit_block_rel(target, pc+2 );
  1526 	JMP_TARGET(nottaken);
  1527 	return 2;
  1529 :}
  1530 BF/S disp {:
  1531     COUNT_INST(I_BFS);
  1532     if( sh4_x86.in_delay_slot ) {
  1533 	SLOTILLEGAL();
  1534     } else {
  1535 	sh4_x86.in_delay_slot = DELAY_PC;
  1536 	if( UNTRANSLATABLE(pc+2) ) {
  1537 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1538 	    JT_rel8(nottaken);
  1539 	    ADD_imm32_r32( disp, R_EAX );
  1540 	    JMP_TARGET(nottaken);
  1541 	    ADD_sh4r_r32( R_PC, R_EAX );
  1542 	    store_spreg( R_EAX, R_NEW_PC );
  1543 	    exit_block_emu(pc+2);
  1544 	    sh4_x86.branch_taken = TRUE;
  1545 	    return 2;
  1546 	} else {
  1547 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1548 		CMP_imm8s_sh4r( 1, R_T );
  1549 		sh4_x86.tstate = TSTATE_E;
  1551 	    sh4vma_t target = disp + pc + 4;
  1552 	    OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JT rel32
  1553 	    int save_tstate = sh4_x86.tstate;
  1554 	    sh4_translate_instruction(pc+2);
  1555 	    exit_block_rel( target, pc+4 );
  1557 	    // not taken
  1558 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1559 	    sh4_x86.tstate = save_tstate;
  1560 	    sh4_translate_instruction(pc+2);
  1561 	    return 4;
  1564 :}
  1565 BRA disp {:  
  1566     COUNT_INST(I_BRA);
  1567     if( sh4_x86.in_delay_slot ) {
  1568 	SLOTILLEGAL();
  1569     } else {
  1570 	sh4_x86.in_delay_slot = DELAY_PC;
  1571 	sh4_x86.branch_taken = TRUE;
  1572 	if( UNTRANSLATABLE(pc+2) ) {
  1573 	    load_spreg( R_EAX, R_PC );
  1574 	    ADD_imm32_r32( pc + disp + 4 - sh4_x86.block_start_pc, R_EAX );
  1575 	    store_spreg( R_EAX, R_NEW_PC );
  1576 	    exit_block_emu(pc+2);
  1577 	    return 2;
  1578 	} else {
  1579 	    sh4_translate_instruction( pc + 2 );
  1580 	    exit_block_rel( disp + pc + 4, pc+4 );
  1581 	    return 4;
  1584 :}
  1585 BRAF Rn {:  
  1586     COUNT_INST(I_BRAF);
  1587     if( sh4_x86.in_delay_slot ) {
  1588 	SLOTILLEGAL();
  1589     } else {
  1590 	load_spreg( R_EAX, R_PC );
  1591 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1592 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1593 	store_spreg( R_EAX, R_NEW_PC );
  1594 	sh4_x86.in_delay_slot = DELAY_PC;
  1595 	sh4_x86.tstate = TSTATE_NONE;
  1596 	sh4_x86.branch_taken = TRUE;
  1597 	if( UNTRANSLATABLE(pc+2) ) {
  1598 	    exit_block_emu(pc+2);
  1599 	    return 2;
  1600 	} else {
  1601 	    sh4_translate_instruction( pc + 2 );
  1602 	    exit_block_newpcset(pc+2);
  1603 	    return 4;
  1606 :}
  1607 BSR disp {:  
  1608     COUNT_INST(I_BSR);
  1609     if( sh4_x86.in_delay_slot ) {
  1610 	SLOTILLEGAL();
  1611     } else {
  1612 	load_spreg( R_EAX, R_PC );
  1613 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1614 	store_spreg( R_EAX, R_PR );
  1615 	sh4_x86.in_delay_slot = DELAY_PC;
  1616 	sh4_x86.branch_taken = TRUE;
  1617 	sh4_x86.tstate = TSTATE_NONE;
  1618 	if( UNTRANSLATABLE(pc+2) ) {
  1619 	    ADD_imm32_r32( disp, R_EAX );
  1620 	    store_spreg( R_EAX, R_NEW_PC );
  1621 	    exit_block_emu(pc+2);
  1622 	    return 2;
  1623 	} else {
  1624 	    sh4_translate_instruction( pc + 2 );
  1625 	    exit_block_rel( disp + pc + 4, pc+4 );
  1626 	    return 4;
  1629 :}
  1630 BSRF Rn {:  
  1631     COUNT_INST(I_BSRF);
  1632     if( sh4_x86.in_delay_slot ) {
  1633 	SLOTILLEGAL();
  1634     } else {
  1635 	load_spreg( R_EAX, R_PC );
  1636 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1637 	store_spreg( R_EAX, R_PR );
  1638 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1639 	store_spreg( R_EAX, R_NEW_PC );
  1641 	sh4_x86.in_delay_slot = DELAY_PC;
  1642 	sh4_x86.tstate = TSTATE_NONE;
  1643 	sh4_x86.branch_taken = TRUE;
  1644 	if( UNTRANSLATABLE(pc+2) ) {
  1645 	    exit_block_emu(pc+2);
  1646 	    return 2;
  1647 	} else {
  1648 	    sh4_translate_instruction( pc + 2 );
  1649 	    exit_block_newpcset(pc+2);
  1650 	    return 4;
  1653 :}
  1654 BT disp {:
  1655     COUNT_INST(I_BT);
  1656     if( sh4_x86.in_delay_slot ) {
  1657 	SLOTILLEGAL();
  1658     } else {
  1659 	sh4vma_t target = disp + pc + 4;
  1660 	JF_rel8( nottaken );
  1661 	exit_block_rel(target, pc+2 );
  1662 	JMP_TARGET(nottaken);
  1663 	return 2;
  1665 :}
  1666 BT/S disp {:
  1667     COUNT_INST(I_BTS);
  1668     if( sh4_x86.in_delay_slot ) {
  1669 	SLOTILLEGAL();
  1670     } else {
  1671 	sh4_x86.in_delay_slot = DELAY_PC;
  1672 	if( UNTRANSLATABLE(pc+2) ) {
  1673 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1674 	    JF_rel8(nottaken);
  1675 	    ADD_imm32_r32( disp, R_EAX );
  1676 	    JMP_TARGET(nottaken);
  1677 	    ADD_sh4r_r32( R_PC, R_EAX );
  1678 	    store_spreg( R_EAX, R_NEW_PC );
  1679 	    exit_block_emu(pc+2);
  1680 	    sh4_x86.branch_taken = TRUE;
  1681 	    return 2;
  1682 	} else {
  1683 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1684 		CMP_imm8s_sh4r( 1, R_T );
  1685 		sh4_x86.tstate = TSTATE_E;
  1687 	    OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JF rel32
  1688 	    int save_tstate = sh4_x86.tstate;
  1689 	    sh4_translate_instruction(pc+2);
  1690 	    exit_block_rel( disp + pc + 4, pc+4 );
  1691 	    // not taken
  1692 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1693 	    sh4_x86.tstate = save_tstate;
  1694 	    sh4_translate_instruction(pc+2);
  1695 	    return 4;
  1698 :}
  1699 JMP @Rn {:  
  1700     COUNT_INST(I_JMP);
  1701     if( sh4_x86.in_delay_slot ) {
  1702 	SLOTILLEGAL();
  1703     } else {
  1704 	load_reg( R_ECX, Rn );
  1705 	store_spreg( R_ECX, R_NEW_PC );
  1706 	sh4_x86.in_delay_slot = DELAY_PC;
  1707 	sh4_x86.branch_taken = TRUE;
  1708 	if( UNTRANSLATABLE(pc+2) ) {
  1709 	    exit_block_emu(pc+2);
  1710 	    return 2;
  1711 	} else {
  1712 	    sh4_translate_instruction(pc+2);
  1713 	    exit_block_newpcset(pc+2);
  1714 	    return 4;
  1717 :}
  1718 JSR @Rn {:  
  1719     COUNT_INST(I_JSR);
  1720     if( sh4_x86.in_delay_slot ) {
  1721 	SLOTILLEGAL();
  1722     } else {
  1723 	load_spreg( R_EAX, R_PC );
  1724 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1725 	store_spreg( R_EAX, R_PR );
  1726 	load_reg( R_ECX, Rn );
  1727 	store_spreg( R_ECX, R_NEW_PC );
  1728 	sh4_x86.in_delay_slot = DELAY_PC;
  1729 	sh4_x86.branch_taken = TRUE;
  1730 	sh4_x86.tstate = TSTATE_NONE;
  1731 	if( UNTRANSLATABLE(pc+2) ) {
  1732 	    exit_block_emu(pc+2);
  1733 	    return 2;
  1734 	} else {
  1735 	    sh4_translate_instruction(pc+2);
  1736 	    exit_block_newpcset(pc+2);
  1737 	    return 4;
  1740 :}
  1741 RTE {:  
  1742     COUNT_INST(I_RTE);
  1743     if( sh4_x86.in_delay_slot ) {
  1744 	SLOTILLEGAL();
  1745     } else {
  1746 	check_priv();
  1747 	load_spreg( R_ECX, R_SPC );
  1748 	store_spreg( R_ECX, R_NEW_PC );
  1749 	load_spreg( R_EAX, R_SSR );
  1750 	call_func1( sh4_write_sr, R_EAX );
  1751 	sh4_x86.in_delay_slot = DELAY_PC;
  1752 	sh4_x86.priv_checked = FALSE;
  1753 	sh4_x86.fpuen_checked = FALSE;
  1754 	sh4_x86.tstate = TSTATE_NONE;
  1755 	sh4_x86.branch_taken = TRUE;
  1756 	if( UNTRANSLATABLE(pc+2) ) {
  1757 	    exit_block_emu(pc+2);
  1758 	    return 2;
  1759 	} else {
  1760 	    sh4_translate_instruction(pc+2);
  1761 	    exit_block_newpcset(pc+2);
  1762 	    return 4;
  1765 :}
  1766 RTS {:  
  1767     COUNT_INST(I_RTS);
  1768     if( sh4_x86.in_delay_slot ) {
  1769 	SLOTILLEGAL();
  1770     } else {
  1771 	load_spreg( R_ECX, R_PR );
  1772 	store_spreg( R_ECX, R_NEW_PC );
  1773 	sh4_x86.in_delay_slot = DELAY_PC;
  1774 	sh4_x86.branch_taken = TRUE;
  1775 	if( UNTRANSLATABLE(pc+2) ) {
  1776 	    exit_block_emu(pc+2);
  1777 	    return 2;
  1778 	} else {
  1779 	    sh4_translate_instruction(pc+2);
  1780 	    exit_block_newpcset(pc+2);
  1781 	    return 4;
  1784 :}
  1785 TRAPA #imm {:  
  1786     COUNT_INST(I_TRAPA);
  1787     if( sh4_x86.in_delay_slot ) {
  1788 	SLOTILLEGAL();
  1789     } else {
  1790 	load_imm32( R_ECX, pc+2 - sh4_x86.block_start_pc );   // 5
  1791 	ADD_r32_sh4r( R_ECX, R_PC );
  1792 	load_imm32( R_EAX, imm );
  1793 	call_func1( sh4_raise_trap, R_EAX );
  1794 	sh4_x86.tstate = TSTATE_NONE;
  1795 	exit_block_pcset(pc);
  1796 	sh4_x86.branch_taken = TRUE;
  1797 	return 2;
  1799 :}
  1800 UNDEF {:  
  1801     COUNT_INST(I_UNDEF);
  1802     if( sh4_x86.in_delay_slot ) {
  1803 	SLOTILLEGAL();
  1804     } else {
  1805 	JMP_exc(EXC_ILLEGAL);
  1806 	return 2;
  1808 :}
  1810 CLRMAC {:  
  1811     COUNT_INST(I_CLRMAC);
  1812     XOR_r32_r32(R_EAX, R_EAX);
  1813     store_spreg( R_EAX, R_MACL );
  1814     store_spreg( R_EAX, R_MACH );
  1815     sh4_x86.tstate = TSTATE_NONE;
  1816 :}
  1817 CLRS {:
  1818     COUNT_INST(I_CLRS);
  1819     CLC();
  1820     SETC_sh4r(R_S);
  1821     sh4_x86.tstate = TSTATE_NONE;
  1822 :}
  1823 CLRT {:  
  1824     COUNT_INST(I_CLRT);
  1825     CLC();
  1826     SETC_t();
  1827     sh4_x86.tstate = TSTATE_C;
  1828 :}
  1829 SETS {:  
  1830     COUNT_INST(I_SETS);
  1831     STC();
  1832     SETC_sh4r(R_S);
  1833     sh4_x86.tstate = TSTATE_NONE;
  1834 :}
  1835 SETT {:  
  1836     COUNT_INST(I_SETT);
  1837     STC();
  1838     SETC_t();
  1839     sh4_x86.tstate = TSTATE_C;
  1840 :}
  1842 /* Floating point moves */
  1843 FMOV FRm, FRn {:  
  1844     COUNT_INST(I_FMOV1);
  1845     check_fpuen();
  1846     if( sh4_x86.double_size ) {
  1847         load_dr0( R_EAX, FRm );
  1848         load_dr1( R_ECX, FRm );
  1849         store_dr0( R_EAX, FRn );
  1850         store_dr1( R_ECX, FRn );
  1851     } else {
  1852         load_fr( R_EAX, FRm ); // SZ=0 branch
  1853         store_fr( R_EAX, FRn );
  1855 :}
  1856 FMOV FRm, @Rn {: 
  1857     COUNT_INST(I_FMOV2);
  1858     check_fpuen();
  1859     load_reg( R_EAX, Rn );
  1860     if( sh4_x86.double_size ) {
  1861         check_walign64( R_EAX );
  1862         MMU_TRANSLATE_WRITE( R_EAX );
  1863         load_dr0( R_EDX, FRm );
  1864         load_dr1( R_ECX, FRm );
  1865         MEM_WRITE_DOUBLE( R_EAX, R_EDX, R_ECX );
  1866     } else {
  1867         check_walign32( R_EAX );
  1868         MMU_TRANSLATE_WRITE( R_EAX );
  1869         load_fr( R_EDX, FRm );
  1870         MEM_WRITE_LONG( R_EAX, R_EDX );
  1872     sh4_x86.tstate = TSTATE_NONE;
  1873 :}
  1874 FMOV @Rm, FRn {:  
  1875     COUNT_INST(I_FMOV5);
  1876     check_fpuen();
  1877     load_reg( R_EAX, Rm );
  1878     if( sh4_x86.double_size ) {
  1879         check_ralign64( R_EAX );
  1880         MMU_TRANSLATE_READ( R_EAX );
  1881         MEM_READ_DOUBLE( R_EAX, R_EDX, R_EAX );
  1882         store_dr0( R_EDX, FRn );
  1883         store_dr1( R_EAX, FRn );    
  1884     } else {
  1885         check_ralign32( R_EAX );
  1886         MMU_TRANSLATE_READ( R_EAX );
  1887         MEM_READ_LONG( R_EAX, R_EAX );
  1888         store_fr( R_EAX, FRn );
  1890     sh4_x86.tstate = TSTATE_NONE;
  1891 :}
  1892 FMOV FRm, @-Rn {:  
  1893     COUNT_INST(I_FMOV3);
  1894     check_fpuen();
  1895     load_reg( R_EAX, Rn );
  1896     if( sh4_x86.double_size ) {
  1897         check_walign64( R_EAX );
  1898         ADD_imm8s_r32(-8,R_EAX);
  1899         MMU_TRANSLATE_WRITE( R_EAX );
  1900         load_dr0( R_EDX, FRm );
  1901         load_dr1( R_ECX, FRm );
  1902         ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
  1903         MEM_WRITE_DOUBLE( R_EAX, R_EDX, R_ECX );
  1904     } else {
  1905         check_walign32( R_EAX );
  1906         ADD_imm8s_r32( -4, R_EAX );
  1907         MMU_TRANSLATE_WRITE( R_EAX );
  1908         load_fr( R_EDX, FRm );
  1909         ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
  1910         MEM_WRITE_LONG( R_EAX, R_EDX );
  1912     sh4_x86.tstate = TSTATE_NONE;
  1913 :}
  1914 FMOV @Rm+, FRn {:
  1915     COUNT_INST(I_FMOV6);
  1916     check_fpuen();
  1917     load_reg( R_EAX, Rm );
  1918     if( sh4_x86.double_size ) {
  1919         check_ralign64( R_EAX );
  1920         MMU_TRANSLATE_READ( R_EAX );
  1921         ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
  1922         MEM_READ_DOUBLE( R_EAX, R_EDX, R_EAX );
  1923         store_dr0( R_EDX, FRn );
  1924         store_dr1( R_EAX, FRn );
  1925     } else {
  1926         check_ralign32( R_EAX );
  1927         MMU_TRANSLATE_READ( R_EAX );
  1928         ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1929         MEM_READ_LONG( R_EAX, R_EAX );
  1930         store_fr( R_EAX, FRn );
  1932     sh4_x86.tstate = TSTATE_NONE;
  1933 :}
  1934 FMOV FRm, @(R0, Rn) {:  
  1935     COUNT_INST(I_FMOV4);
  1936     check_fpuen();
  1937     load_reg( R_EAX, Rn );
  1938     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1939     if( sh4_x86.double_size ) {
  1940         check_walign64( R_EAX );
  1941         MMU_TRANSLATE_WRITE( R_EAX );
  1942         load_dr0( R_EDX, FRm );
  1943         load_dr1( R_ECX, FRm );
  1944         MEM_WRITE_DOUBLE( R_EAX, R_EDX, R_ECX );
  1945     } else {
  1946         check_walign32( R_EAX );
  1947         MMU_TRANSLATE_WRITE( R_EAX );
  1948         load_fr( R_EDX, FRm );
  1949         MEM_WRITE_LONG( R_EAX, R_EDX ); // 12
  1951     sh4_x86.tstate = TSTATE_NONE;
  1952 :}
  1953 FMOV @(R0, Rm), FRn {:  
  1954     COUNT_INST(I_FMOV7);
  1955     check_fpuen();
  1956     load_reg( R_EAX, Rm );
  1957     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1958     if( sh4_x86.double_size ) {
  1959         check_ralign64( R_EAX );
  1960         MMU_TRANSLATE_READ( R_EAX );
  1961         MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1962         store_dr0( R_ECX, FRn );
  1963         store_dr1( R_EAX, FRn );
  1964     } else {
  1965         check_ralign32( R_EAX );
  1966         MMU_TRANSLATE_READ( R_EAX );
  1967         MEM_READ_LONG( R_EAX, R_EAX );
  1968         store_fr( R_EAX, FRn );
  1970     sh4_x86.tstate = TSTATE_NONE;
  1971 :}
  1972 FLDI0 FRn {:  /* IFF PR=0 */
  1973     COUNT_INST(I_FLDI0);
  1974     check_fpuen();
  1975     if( sh4_x86.double_prec == 0 ) {
  1976         XOR_r32_r32( R_EAX, R_EAX );
  1977         store_fr( R_EAX, FRn );
  1979     sh4_x86.tstate = TSTATE_NONE;
  1980 :}
  1981 FLDI1 FRn {:  /* IFF PR=0 */
  1982     COUNT_INST(I_FLDI1);
  1983     check_fpuen();
  1984     if( sh4_x86.double_prec == 0 ) {
  1985         load_imm32(R_EAX, 0x3F800000);
  1986         store_fr( R_EAX, FRn );
  1988 :}
  1990 FLOAT FPUL, FRn {:  
  1991     COUNT_INST(I_FLOAT);
  1992     check_fpuen();
  1993     FILD_sh4r(R_FPUL);
  1994     if( sh4_x86.double_prec ) {
  1995         pop_dr( FRn );
  1996     } else {
  1997         pop_fr( FRn );
  1999 :}
  2000 FTRC FRm, FPUL {:  
  2001     COUNT_INST(I_FTRC);
  2002     check_fpuen();
  2003     if( sh4_x86.double_prec ) {
  2004         push_dr( FRm );
  2005     } else {
  2006         push_fr( FRm );
  2008     load_ptr( R_ECX, &max_int );
  2009     FILD_r32ind( R_ECX );
  2010     FCOMIP_st(1);
  2011     JNA_rel8( sat );
  2012     load_ptr( R_ECX, &min_int );  // 5
  2013     FILD_r32ind( R_ECX );           // 2
  2014     FCOMIP_st(1);                   // 2
  2015     JAE_rel8( sat2 );            // 2
  2016     load_ptr( R_EAX, &save_fcw );
  2017     FNSTCW_r32ind( R_EAX );
  2018     load_ptr( R_EDX, &trunc_fcw );
  2019     FLDCW_r32ind( R_EDX );
  2020     FISTP_sh4r(R_FPUL);             // 3
  2021     FLDCW_r32ind( R_EAX );
  2022     JMP_rel8(end);             // 2
  2024     JMP_TARGET(sat);
  2025     JMP_TARGET(sat2);
  2026     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  2027     store_spreg( R_ECX, R_FPUL );
  2028     FPOP_st();
  2029     JMP_TARGET(end);
  2030     sh4_x86.tstate = TSTATE_NONE;
  2031 :}
  2032 FLDS FRm, FPUL {:  
  2033     COUNT_INST(I_FLDS);
  2034     check_fpuen();
  2035     load_fr( R_EAX, FRm );
  2036     store_spreg( R_EAX, R_FPUL );
  2037 :}
  2038 FSTS FPUL, FRn {:  
  2039     COUNT_INST(I_FSTS);
  2040     check_fpuen();
  2041     load_spreg( R_EAX, R_FPUL );
  2042     store_fr( R_EAX, FRn );
  2043 :}
  2044 FCNVDS FRm, FPUL {:  
  2045     COUNT_INST(I_FCNVDS);
  2046     check_fpuen();
  2047     if( sh4_x86.double_prec ) {
  2048         push_dr( FRm );
  2049         pop_fpul();
  2051 :}
  2052 FCNVSD FPUL, FRn {:  
  2053     COUNT_INST(I_FCNVSD);
  2054     check_fpuen();
  2055     if( sh4_x86.double_prec ) {
  2056         push_fpul();
  2057         pop_dr( FRn );
  2059 :}
  2061 /* Floating point instructions */
  2062 FABS FRn {:  
  2063     COUNT_INST(I_FABS);
  2064     check_fpuen();
  2065     if( sh4_x86.double_prec ) {
  2066         push_dr(FRn);
  2067         FABS_st0();
  2068         pop_dr(FRn);
  2069     } else {
  2070         push_fr(FRn);
  2071         FABS_st0();
  2072         pop_fr(FRn);
  2074 :}
  2075 FADD FRm, FRn {:  
  2076     COUNT_INST(I_FADD);
  2077     check_fpuen();
  2078     if( sh4_x86.double_prec ) {
  2079         push_dr(FRm);
  2080         push_dr(FRn);
  2081         FADDP_st(1);
  2082         pop_dr(FRn);
  2083     } else {
  2084         push_fr(FRm);
  2085         push_fr(FRn);
  2086         FADDP_st(1);
  2087         pop_fr(FRn);
  2089 :}
  2090 FDIV FRm, FRn {:  
  2091     COUNT_INST(I_FDIV);
  2092     check_fpuen();
  2093     if( sh4_x86.double_prec ) {
  2094         push_dr(FRn);
  2095         push_dr(FRm);
  2096         FDIVP_st(1);
  2097         pop_dr(FRn);
  2098     } else {
  2099         push_fr(FRn);
  2100         push_fr(FRm);
  2101         FDIVP_st(1);
  2102         pop_fr(FRn);
  2104 :}
  2105 FMAC FR0, FRm, FRn {:  
  2106     COUNT_INST(I_FMAC);
  2107     check_fpuen();
  2108     if( sh4_x86.double_prec ) {
  2109         push_dr( 0 );
  2110         push_dr( FRm );
  2111         FMULP_st(1);
  2112         push_dr( FRn );
  2113         FADDP_st(1);
  2114         pop_dr( FRn );
  2115     } else {
  2116         push_fr( 0 );
  2117         push_fr( FRm );
  2118         FMULP_st(1);
  2119         push_fr( FRn );
  2120         FADDP_st(1);
  2121         pop_fr( FRn );
  2123 :}
  2125 FMUL FRm, FRn {:  
  2126     COUNT_INST(I_FMUL);
  2127     check_fpuen();
  2128     if( sh4_x86.double_prec ) {
  2129         push_dr(FRm);
  2130         push_dr(FRn);
  2131         FMULP_st(1);
  2132         pop_dr(FRn);
  2133     } else {
  2134         push_fr(FRm);
  2135         push_fr(FRn);
  2136         FMULP_st(1);
  2137         pop_fr(FRn);
  2139 :}
  2140 FNEG FRn {:  
  2141     COUNT_INST(I_FNEG);
  2142     check_fpuen();
  2143     if( sh4_x86.double_prec ) {
  2144         push_dr(FRn);
  2145         FCHS_st0();
  2146         pop_dr(FRn);
  2147     } else {
  2148         push_fr(FRn);
  2149         FCHS_st0();
  2150         pop_fr(FRn);
  2152 :}
  2153 FSRRA FRn {:  
  2154     COUNT_INST(I_FSRRA);
  2155     check_fpuen();
  2156     if( sh4_x86.double_prec == 0 ) {
  2157         FLD1_st0();
  2158         push_fr(FRn);
  2159         FSQRT_st0();
  2160         FDIVP_st(1);
  2161         pop_fr(FRn);
  2163 :}
  2164 FSQRT FRn {:  
  2165     COUNT_INST(I_FSQRT);
  2166     check_fpuen();
  2167     if( sh4_x86.double_prec ) {
  2168         push_dr(FRn);
  2169         FSQRT_st0();
  2170         pop_dr(FRn);
  2171     } else {
  2172         push_fr(FRn);
  2173         FSQRT_st0();
  2174         pop_fr(FRn);
  2176 :}
  2177 FSUB FRm, FRn {:  
  2178     COUNT_INST(I_FSUB);
  2179     check_fpuen();
  2180     if( sh4_x86.double_prec ) {
  2181         push_dr(FRn);
  2182         push_dr(FRm);
  2183         FSUBP_st(1);
  2184         pop_dr(FRn);
  2185     } else {
  2186         push_fr(FRn);
  2187         push_fr(FRm);
  2188         FSUBP_st(1);
  2189         pop_fr(FRn);
  2191 :}
  2193 FCMP/EQ FRm, FRn {:  
  2194     COUNT_INST(I_FCMPEQ);
  2195     check_fpuen();
  2196     if( sh4_x86.double_prec ) {
  2197         push_dr(FRm);
  2198         push_dr(FRn);
  2199     } else {
  2200         push_fr(FRm);
  2201         push_fr(FRn);
  2203     FCOMIP_st(1);
  2204     SETE_t();
  2205     FPOP_st();
  2206     sh4_x86.tstate = TSTATE_E;
  2207 :}
  2208 FCMP/GT FRm, FRn {:  
  2209     COUNT_INST(I_FCMPGT);
  2210     check_fpuen();
  2211     if( sh4_x86.double_prec ) {
  2212         push_dr(FRm);
  2213         push_dr(FRn);
  2214     } else {
  2215         push_fr(FRm);
  2216         push_fr(FRn);
  2218     FCOMIP_st(1);
  2219     SETA_t();
  2220     FPOP_st();
  2221     sh4_x86.tstate = TSTATE_A;
  2222 :}
  2224 FSCA FPUL, FRn {:  
  2225     COUNT_INST(I_FSCA);
  2226     check_fpuen();
  2227     if( sh4_x86.double_prec == 0 ) {
  2228         LEA_sh4r_rptr( REG_OFFSET(fr[0][FRn&0x0E]), R_EDX );
  2229         load_spreg( R_EAX, R_FPUL );
  2230         call_func2( sh4_fsca, R_EAX, R_EDX );
  2232     sh4_x86.tstate = TSTATE_NONE;
  2233 :}
  2234 FIPR FVm, FVn {:  
  2235     COUNT_INST(I_FIPR);
  2236     check_fpuen();
  2237     if( sh4_x86.double_prec == 0 ) {
  2238         if( sh4_x86.sse3_enabled ) {
  2239             MOVAPS_sh4r_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
  2240             MULPS_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
  2241             HADDPS_xmm_xmm( 4, 4 ); 
  2242             HADDPS_xmm_xmm( 4, 4 );
  2243             MOVSS_xmm_sh4r( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
  2244         } else {
  2245             push_fr( FVm<<2 );
  2246             push_fr( FVn<<2 );
  2247             FMULP_st(1);
  2248             push_fr( (FVm<<2)+1);
  2249             push_fr( (FVn<<2)+1);
  2250             FMULP_st(1);
  2251             FADDP_st(1);
  2252             push_fr( (FVm<<2)+2);
  2253             push_fr( (FVn<<2)+2);
  2254             FMULP_st(1);
  2255             FADDP_st(1);
  2256             push_fr( (FVm<<2)+3);
  2257             push_fr( (FVn<<2)+3);
  2258             FMULP_st(1);
  2259             FADDP_st(1);
  2260             pop_fr( (FVn<<2)+3);
  2263 :}
  2264 FTRV XMTRX, FVn {:  
  2265     COUNT_INST(I_FTRV);
  2266     check_fpuen();
  2267     if( sh4_x86.double_prec == 0 ) {
  2268         if( sh4_x86.sse3_enabled ) {
  2269             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1  M0  M3  M2
  2270             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5  M4  M7  M6
  2271             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9  M8  M11 M10
  2272             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
  2274             MOVSLDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
  2275             MOVSHDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
  2276             MOVAPS_xmm_xmm( 4, 6 );
  2277             MOVAPS_xmm_xmm( 5, 7 );
  2278             MOVLHPS_xmm_xmm( 4, 4 );  // V1 V1 V1 V1
  2279             MOVHLPS_xmm_xmm( 6, 6 );  // V3 V3 V3 V3
  2280             MOVLHPS_xmm_xmm( 5, 5 );  // V0 V0 V0 V0
  2281             MOVHLPS_xmm_xmm( 7, 7 );  // V2 V2 V2 V2
  2282             MULPS_xmm_xmm( 0, 4 );
  2283             MULPS_xmm_xmm( 1, 5 );
  2284             MULPS_xmm_xmm( 2, 6 );
  2285             MULPS_xmm_xmm( 3, 7 );
  2286             ADDPS_xmm_xmm( 5, 4 );
  2287             ADDPS_xmm_xmm( 7, 6 );
  2288             ADDPS_xmm_xmm( 6, 4 );
  2289             MOVAPS_xmm_sh4r( 4, REG_OFFSET(fr[0][FVn<<2]) );
  2290         } else {
  2291             LEA_sh4r_rptr( REG_OFFSET(fr[0][FVn<<2]), R_EAX );
  2292             call_func1( sh4_ftrv, R_EAX );
  2295     sh4_x86.tstate = TSTATE_NONE;
  2296 :}
  2298 FRCHG {:  
  2299     COUNT_INST(I_FRCHG);
  2300     check_fpuen();
  2301     load_spreg( R_ECX, R_FPSCR );
  2302     XOR_imm32_r32( FPSCR_FR, R_ECX );
  2303     store_spreg( R_ECX, R_FPSCR );
  2304     call_func0( sh4_switch_fr_banks );
  2305     sh4_x86.tstate = TSTATE_NONE;
  2306 :}
  2307 FSCHG {:  
  2308     COUNT_INST(I_FSCHG);
  2309     check_fpuen();
  2310     load_spreg( R_ECX, R_FPSCR );
  2311     XOR_imm32_r32( FPSCR_SZ, R_ECX );
  2312     store_spreg( R_ECX, R_FPSCR );
  2313     sh4_x86.tstate = TSTATE_NONE;
  2314     sh4_x86.double_size = !sh4_x86.double_size;
  2315 :}
  2317 /* Processor control instructions */
  2318 LDC Rm, SR {:
  2319     COUNT_INST(I_LDCSR);
  2320     if( sh4_x86.in_delay_slot ) {
  2321 	SLOTILLEGAL();
  2322     } else {
  2323 	check_priv();
  2324 	load_reg( R_EAX, Rm );
  2325 	call_func1( sh4_write_sr, R_EAX );
  2326 	sh4_x86.priv_checked = FALSE;
  2327 	sh4_x86.fpuen_checked = FALSE;
  2328 	sh4_x86.tstate = TSTATE_NONE;
  2330 :}
  2331 LDC Rm, GBR {: 
  2332     COUNT_INST(I_LDC);
  2333     load_reg( R_EAX, Rm );
  2334     store_spreg( R_EAX, R_GBR );
  2335 :}
  2336 LDC Rm, VBR {:  
  2337     COUNT_INST(I_LDC);
  2338     check_priv();
  2339     load_reg( R_EAX, Rm );
  2340     store_spreg( R_EAX, R_VBR );
  2341     sh4_x86.tstate = TSTATE_NONE;
  2342 :}
  2343 LDC Rm, SSR {:  
  2344     COUNT_INST(I_LDC);
  2345     check_priv();
  2346     load_reg( R_EAX, Rm );
  2347     store_spreg( R_EAX, R_SSR );
  2348     sh4_x86.tstate = TSTATE_NONE;
  2349 :}
  2350 LDC Rm, SGR {:  
  2351     COUNT_INST(I_LDC);
  2352     check_priv();
  2353     load_reg( R_EAX, Rm );
  2354     store_spreg( R_EAX, R_SGR );
  2355     sh4_x86.tstate = TSTATE_NONE;
  2356 :}
  2357 LDC Rm, SPC {:  
  2358     COUNT_INST(I_LDC);
  2359     check_priv();
  2360     load_reg( R_EAX, Rm );
  2361     store_spreg( R_EAX, R_SPC );
  2362     sh4_x86.tstate = TSTATE_NONE;
  2363 :}
  2364 LDC Rm, DBR {:  
  2365     COUNT_INST(I_LDC);
  2366     check_priv();
  2367     load_reg( R_EAX, Rm );
  2368     store_spreg( R_EAX, R_DBR );
  2369     sh4_x86.tstate = TSTATE_NONE;
  2370 :}
  2371 LDC Rm, Rn_BANK {:  
  2372     COUNT_INST(I_LDC);
  2373     check_priv();
  2374     load_reg( R_EAX, Rm );
  2375     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2376     sh4_x86.tstate = TSTATE_NONE;
  2377 :}
  2378 LDC.L @Rm+, GBR {:  
  2379     COUNT_INST(I_LDCM);
  2380     load_reg( R_EAX, Rm );
  2381     check_ralign32( R_EAX );
  2382     MMU_TRANSLATE_READ( R_EAX );
  2383     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2384     MEM_READ_LONG( R_EAX, R_EAX );
  2385     store_spreg( R_EAX, R_GBR );
  2386     sh4_x86.tstate = TSTATE_NONE;
  2387 :}
  2388 LDC.L @Rm+, SR {:
  2389     COUNT_INST(I_LDCSRM);
  2390     if( sh4_x86.in_delay_slot ) {
  2391 	SLOTILLEGAL();
  2392     } else {
  2393 	check_priv();
  2394 	load_reg( R_EAX, Rm );
  2395 	check_ralign32( R_EAX );
  2396 	MMU_TRANSLATE_READ( R_EAX );
  2397 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2398 	MEM_READ_LONG( R_EAX, R_EAX );
  2399 	call_func1( sh4_write_sr, R_EAX );
  2400 	sh4_x86.priv_checked = FALSE;
  2401 	sh4_x86.fpuen_checked = FALSE;
  2402 	sh4_x86.tstate = TSTATE_NONE;
  2404 :}
  2405 LDC.L @Rm+, VBR {:  
  2406     COUNT_INST(I_LDCM);
  2407     check_priv();
  2408     load_reg( R_EAX, Rm );
  2409     check_ralign32( R_EAX );
  2410     MMU_TRANSLATE_READ( R_EAX );
  2411     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2412     MEM_READ_LONG( R_EAX, R_EAX );
  2413     store_spreg( R_EAX, R_VBR );
  2414     sh4_x86.tstate = TSTATE_NONE;
  2415 :}
  2416 LDC.L @Rm+, SSR {:
  2417     COUNT_INST(I_LDCM);
  2418     check_priv();
  2419     load_reg( R_EAX, Rm );
  2420     check_ralign32( R_EAX );
  2421     MMU_TRANSLATE_READ( R_EAX );
  2422     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2423     MEM_READ_LONG( R_EAX, R_EAX );
  2424     store_spreg( R_EAX, R_SSR );
  2425     sh4_x86.tstate = TSTATE_NONE;
  2426 :}
  2427 LDC.L @Rm+, SGR {:  
  2428     COUNT_INST(I_LDCM);
  2429     check_priv();
  2430     load_reg( R_EAX, Rm );
  2431     check_ralign32( R_EAX );
  2432     MMU_TRANSLATE_READ( R_EAX );
  2433     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2434     MEM_READ_LONG( R_EAX, R_EAX );
  2435     store_spreg( R_EAX, R_SGR );
  2436     sh4_x86.tstate = TSTATE_NONE;
  2437 :}
  2438 LDC.L @Rm+, SPC {:  
  2439     COUNT_INST(I_LDCM);
  2440     check_priv();
  2441     load_reg( R_EAX, Rm );
  2442     check_ralign32( R_EAX );
  2443     MMU_TRANSLATE_READ( R_EAX );
  2444     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2445     MEM_READ_LONG( R_EAX, R_EAX );
  2446     store_spreg( R_EAX, R_SPC );
  2447     sh4_x86.tstate = TSTATE_NONE;
  2448 :}
  2449 LDC.L @Rm+, DBR {:  
  2450     COUNT_INST(I_LDCM);
  2451     check_priv();
  2452     load_reg( R_EAX, Rm );
  2453     check_ralign32( R_EAX );
  2454     MMU_TRANSLATE_READ( R_EAX );
  2455     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2456     MEM_READ_LONG( R_EAX, R_EAX );
  2457     store_spreg( R_EAX, R_DBR );
  2458     sh4_x86.tstate = TSTATE_NONE;
  2459 :}
  2460 LDC.L @Rm+, Rn_BANK {:  
  2461     COUNT_INST(I_LDCM);
  2462     check_priv();
  2463     load_reg( R_EAX, Rm );
  2464     check_ralign32( R_EAX );
  2465     MMU_TRANSLATE_READ( R_EAX );
  2466     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2467     MEM_READ_LONG( R_EAX, R_EAX );
  2468     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2469     sh4_x86.tstate = TSTATE_NONE;
  2470 :}
  2471 LDS Rm, FPSCR {:
  2472     COUNT_INST(I_LDSFPSCR);
  2473     check_fpuen();
  2474     load_reg( R_EAX, Rm );
  2475     call_func1( sh4_write_fpscr, R_EAX );
  2476     sh4_x86.tstate = TSTATE_NONE;
  2477     return 2;
  2478 :}
  2479 LDS.L @Rm+, FPSCR {:  
  2480     COUNT_INST(I_LDSFPSCRM);
  2481     check_fpuen();
  2482     load_reg( R_EAX, Rm );
  2483     check_ralign32( R_EAX );
  2484     MMU_TRANSLATE_READ( R_EAX );
  2485     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2486     MEM_READ_LONG( R_EAX, R_EAX );
  2487     call_func1( sh4_write_fpscr, R_EAX );
  2488     sh4_x86.tstate = TSTATE_NONE;
  2489     return 2;
  2490 :}
  2491 LDS Rm, FPUL {:  
  2492     COUNT_INST(I_LDS);
  2493     check_fpuen();
  2494     load_reg( R_EAX, Rm );
  2495     store_spreg( R_EAX, R_FPUL );
  2496 :}
  2497 LDS.L @Rm+, FPUL {:  
  2498     COUNT_INST(I_LDSM);
  2499     check_fpuen();
  2500     load_reg( R_EAX, Rm );
  2501     check_ralign32( R_EAX );
  2502     MMU_TRANSLATE_READ( R_EAX );
  2503     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2504     MEM_READ_LONG( R_EAX, R_EAX );
  2505     store_spreg( R_EAX, R_FPUL );
  2506     sh4_x86.tstate = TSTATE_NONE;
  2507 :}
  2508 LDS Rm, MACH {: 
  2509     COUNT_INST(I_LDS);
  2510     load_reg( R_EAX, Rm );
  2511     store_spreg( R_EAX, R_MACH );
  2512 :}
  2513 LDS.L @Rm+, MACH {:  
  2514     COUNT_INST(I_LDSM);
  2515     load_reg( R_EAX, Rm );
  2516     check_ralign32( R_EAX );
  2517     MMU_TRANSLATE_READ( R_EAX );
  2518     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2519     MEM_READ_LONG( R_EAX, R_EAX );
  2520     store_spreg( R_EAX, R_MACH );
  2521     sh4_x86.tstate = TSTATE_NONE;
  2522 :}
  2523 LDS Rm, MACL {:  
  2524     COUNT_INST(I_LDS);
  2525     load_reg( R_EAX, Rm );
  2526     store_spreg( R_EAX, R_MACL );
  2527 :}
  2528 LDS.L @Rm+, MACL {:  
  2529     COUNT_INST(I_LDSM);
  2530     load_reg( R_EAX, Rm );
  2531     check_ralign32( R_EAX );
  2532     MMU_TRANSLATE_READ( R_EAX );
  2533     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2534     MEM_READ_LONG( R_EAX, R_EAX );
  2535     store_spreg( R_EAX, R_MACL );
  2536     sh4_x86.tstate = TSTATE_NONE;
  2537 :}
  2538 LDS Rm, PR {:  
  2539     COUNT_INST(I_LDS);
  2540     load_reg( R_EAX, Rm );
  2541     store_spreg( R_EAX, R_PR );
  2542 :}
  2543 LDS.L @Rm+, PR {:  
  2544     COUNT_INST(I_LDSM);
  2545     load_reg( R_EAX, Rm );
  2546     check_ralign32( R_EAX );
  2547     MMU_TRANSLATE_READ( R_EAX );
  2548     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2549     MEM_READ_LONG( R_EAX, R_EAX );
  2550     store_spreg( R_EAX, R_PR );
  2551     sh4_x86.tstate = TSTATE_NONE;
  2552 :}
  2553 LDTLB {:  
  2554     COUNT_INST(I_LDTLB);
  2555     call_func0( MMU_ldtlb );
  2556     sh4_x86.tstate = TSTATE_NONE;
  2557 :}
  2558 OCBI @Rn {:
  2559     COUNT_INST(I_OCBI);
  2560 :}
  2561 OCBP @Rn {:
  2562     COUNT_INST(I_OCBP);
  2563 :}
  2564 OCBWB @Rn {:
  2565     COUNT_INST(I_OCBWB);
  2566 :}
  2567 PREF @Rn {:
  2568     COUNT_INST(I_PREF);
  2569     load_reg( R_EAX, Rn );
  2570     MOV_r32_r32( R_EAX, R_ECX );
  2571     AND_imm32_r32( 0xFC000000, R_ECX );
  2572     CMP_imm32_r32( 0xE0000000, R_ECX );
  2573     JNE_rel8(end);
  2574     call_func1( sh4_flush_store_queue, R_EAX );
  2575     TEST_r32_r32( R_EAX, R_EAX );
  2576     JE_exc(-1);
  2577     JMP_TARGET(end);
  2578     sh4_x86.tstate = TSTATE_NONE;
  2579 :}
  2580 SLEEP {: 
  2581     COUNT_INST(I_SLEEP);
  2582     check_priv();
  2583     call_func0( sh4_sleep );
  2584     sh4_x86.tstate = TSTATE_NONE;
  2585     sh4_x86.in_delay_slot = DELAY_NONE;
  2586     return 2;
  2587 :}
  2588 STC SR, Rn {:
  2589     COUNT_INST(I_STCSR);
  2590     check_priv();
  2591     call_func0(sh4_read_sr);
  2592     store_reg( R_EAX, Rn );
  2593     sh4_x86.tstate = TSTATE_NONE;
  2594 :}
  2595 STC GBR, Rn {:  
  2596     COUNT_INST(I_STC);
  2597     load_spreg( R_EAX, R_GBR );
  2598     store_reg( R_EAX, Rn );
  2599 :}
  2600 STC VBR, Rn {:  
  2601     COUNT_INST(I_STC);
  2602     check_priv();
  2603     load_spreg( R_EAX, R_VBR );
  2604     store_reg( R_EAX, Rn );
  2605     sh4_x86.tstate = TSTATE_NONE;
  2606 :}
  2607 STC SSR, Rn {:  
  2608     COUNT_INST(I_STC);
  2609     check_priv();
  2610     load_spreg( R_EAX, R_SSR );
  2611     store_reg( R_EAX, Rn );
  2612     sh4_x86.tstate = TSTATE_NONE;
  2613 :}
  2614 STC SPC, Rn {:  
  2615     COUNT_INST(I_STC);
  2616     check_priv();
  2617     load_spreg( R_EAX, R_SPC );
  2618     store_reg( R_EAX, Rn );
  2619     sh4_x86.tstate = TSTATE_NONE;
  2620 :}
  2621 STC SGR, Rn {:  
  2622     COUNT_INST(I_STC);
  2623     check_priv();
  2624     load_spreg( R_EAX, R_SGR );
  2625     store_reg( R_EAX, Rn );
  2626     sh4_x86.tstate = TSTATE_NONE;
  2627 :}
  2628 STC DBR, Rn {:  
  2629     COUNT_INST(I_STC);
  2630     check_priv();
  2631     load_spreg( R_EAX, R_DBR );
  2632     store_reg( R_EAX, Rn );
  2633     sh4_x86.tstate = TSTATE_NONE;
  2634 :}
  2635 STC Rm_BANK, Rn {:
  2636     COUNT_INST(I_STC);
  2637     check_priv();
  2638     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2639     store_reg( R_EAX, Rn );
  2640     sh4_x86.tstate = TSTATE_NONE;
  2641 :}
  2642 STC.L SR, @-Rn {:
  2643     COUNT_INST(I_STCSRM);
  2644     check_priv();
  2645     load_reg( R_EAX, Rn );
  2646     check_walign32( R_EAX );
  2647     ADD_imm8s_r32( -4, R_EAX );
  2648     MMU_TRANSLATE_WRITE( R_EAX );
  2649     PUSH_realigned_r32( R_EAX );
  2650     call_func0( sh4_read_sr );
  2651     POP_realigned_r32( R_ECX );
  2652     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2653     MEM_WRITE_LONG( R_ECX, R_EAX );
  2654     sh4_x86.tstate = TSTATE_NONE;
  2655 :}
  2656 STC.L VBR, @-Rn {:  
  2657     COUNT_INST(I_STCM);
  2658     check_priv();
  2659     load_reg( R_EAX, Rn );
  2660     check_walign32( R_EAX );
  2661     ADD_imm8s_r32( -4, R_EAX );
  2662     MMU_TRANSLATE_WRITE( R_EAX );
  2663     load_spreg( R_EDX, R_VBR );
  2664     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2665     MEM_WRITE_LONG( R_EAX, R_EDX );
  2666     sh4_x86.tstate = TSTATE_NONE;
  2667 :}
  2668 STC.L SSR, @-Rn {:  
  2669     COUNT_INST(I_STCM);
  2670     check_priv();
  2671     load_reg( R_EAX, Rn );
  2672     check_walign32( R_EAX );
  2673     ADD_imm8s_r32( -4, R_EAX );
  2674     MMU_TRANSLATE_WRITE( R_EAX );
  2675     load_spreg( R_EDX, R_SSR );
  2676     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2677     MEM_WRITE_LONG( R_EAX, R_EDX );
  2678     sh4_x86.tstate = TSTATE_NONE;
  2679 :}
  2680 STC.L SPC, @-Rn {:
  2681     COUNT_INST(I_STCM);
  2682     check_priv();
  2683     load_reg( R_EAX, Rn );
  2684     check_walign32( R_EAX );
  2685     ADD_imm8s_r32( -4, R_EAX );
  2686     MMU_TRANSLATE_WRITE( R_EAX );
  2687     load_spreg( R_EDX, R_SPC );
  2688     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2689     MEM_WRITE_LONG( R_EAX, R_EDX );
  2690     sh4_x86.tstate = TSTATE_NONE;
  2691 :}
  2692 STC.L SGR, @-Rn {:  
  2693     COUNT_INST(I_STCM);
  2694     check_priv();
  2695     load_reg( R_EAX, Rn );
  2696     check_walign32( R_EAX );
  2697     ADD_imm8s_r32( -4, R_EAX );
  2698     MMU_TRANSLATE_WRITE( R_EAX );
  2699     load_spreg( R_EDX, R_SGR );
  2700     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2701     MEM_WRITE_LONG( R_EAX, R_EDX );
  2702     sh4_x86.tstate = TSTATE_NONE;
  2703 :}
  2704 STC.L DBR, @-Rn {:  
  2705     COUNT_INST(I_STCM);
  2706     check_priv();
  2707     load_reg( R_EAX, Rn );
  2708     check_walign32( R_EAX );
  2709     ADD_imm8s_r32( -4, R_EAX );
  2710     MMU_TRANSLATE_WRITE( R_EAX );
  2711     load_spreg( R_EDX, R_DBR );
  2712     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2713     MEM_WRITE_LONG( R_EAX, R_EDX );
  2714     sh4_x86.tstate = TSTATE_NONE;
  2715 :}
  2716 STC.L Rm_BANK, @-Rn {:  
  2717     COUNT_INST(I_STCM);
  2718     check_priv();
  2719     load_reg( R_EAX, Rn );
  2720     check_walign32( R_EAX );
  2721     ADD_imm8s_r32( -4, R_EAX );
  2722     MMU_TRANSLATE_WRITE( R_EAX );
  2723     load_spreg( R_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
  2724     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2725     MEM_WRITE_LONG( R_EAX, R_EDX );
  2726     sh4_x86.tstate = TSTATE_NONE;
  2727 :}
  2728 STC.L GBR, @-Rn {:  
  2729     COUNT_INST(I_STCM);
  2730     load_reg( R_EAX, Rn );
  2731     check_walign32( R_EAX );
  2732     ADD_imm8s_r32( -4, R_EAX );
  2733     MMU_TRANSLATE_WRITE( R_EAX );
  2734     load_spreg( R_EDX, R_GBR );
  2735     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2736     MEM_WRITE_LONG( R_EAX, R_EDX );
  2737     sh4_x86.tstate = TSTATE_NONE;
  2738 :}
  2739 STS FPSCR, Rn {:  
  2740     COUNT_INST(I_STSFPSCR);
  2741     check_fpuen();
  2742     load_spreg( R_EAX, R_FPSCR );
  2743     store_reg( R_EAX, Rn );
  2744 :}
  2745 STS.L FPSCR, @-Rn {:  
  2746     COUNT_INST(I_STSFPSCRM);
  2747     check_fpuen();
  2748     load_reg( R_EAX, Rn );
  2749     check_walign32( R_EAX );
  2750     ADD_imm8s_r32( -4, R_EAX );
  2751     MMU_TRANSLATE_WRITE( R_EAX );
  2752     load_spreg( R_EDX, R_FPSCR );
  2753     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2754     MEM_WRITE_LONG( R_EAX, R_EDX );
  2755     sh4_x86.tstate = TSTATE_NONE;
  2756 :}
  2757 STS FPUL, Rn {:  
  2758     COUNT_INST(I_STS);
  2759     check_fpuen();
  2760     load_spreg( R_EAX, R_FPUL );
  2761     store_reg( R_EAX, Rn );
  2762 :}
  2763 STS.L FPUL, @-Rn {:  
  2764     COUNT_INST(I_STSM);
  2765     check_fpuen();
  2766     load_reg( R_EAX, Rn );
  2767     check_walign32( R_EAX );
  2768     ADD_imm8s_r32( -4, R_EAX );
  2769     MMU_TRANSLATE_WRITE( R_EAX );
  2770     load_spreg( R_EDX, R_FPUL );
  2771     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2772     MEM_WRITE_LONG( R_EAX, R_EDX );
  2773     sh4_x86.tstate = TSTATE_NONE;
  2774 :}
  2775 STS MACH, Rn {:  
  2776     COUNT_INST(I_STS);
  2777     load_spreg( R_EAX, R_MACH );
  2778     store_reg( R_EAX, Rn );
  2779 :}
  2780 STS.L MACH, @-Rn {:  
  2781     COUNT_INST(I_STSM);
  2782     load_reg( R_EAX, Rn );
  2783     check_walign32( R_EAX );
  2784     ADD_imm8s_r32( -4, R_EAX );
  2785     MMU_TRANSLATE_WRITE( R_EAX );
  2786     load_spreg( R_EDX, R_MACH );
  2787     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2788     MEM_WRITE_LONG( R_EAX, R_EDX );
  2789     sh4_x86.tstate = TSTATE_NONE;
  2790 :}
  2791 STS MACL, Rn {:  
  2792     COUNT_INST(I_STS);
  2793     load_spreg( R_EAX, R_MACL );
  2794     store_reg( R_EAX, Rn );
  2795 :}
  2796 STS.L MACL, @-Rn {:  
  2797     COUNT_INST(I_STSM);
  2798     load_reg( R_EAX, Rn );
  2799     check_walign32( R_EAX );
  2800     ADD_imm8s_r32( -4, R_EAX );
  2801     MMU_TRANSLATE_WRITE( R_EAX );
  2802     load_spreg( R_EDX, R_MACL );
  2803     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2804     MEM_WRITE_LONG( R_EAX, R_EDX );
  2805     sh4_x86.tstate = TSTATE_NONE;
  2806 :}
  2807 STS PR, Rn {:  
  2808     COUNT_INST(I_STS);
  2809     load_spreg( R_EAX, R_PR );
  2810     store_reg( R_EAX, Rn );
  2811 :}
  2812 STS.L PR, @-Rn {:  
  2813     COUNT_INST(I_STSM);
  2814     load_reg( R_EAX, Rn );
  2815     check_walign32( R_EAX );
  2816     ADD_imm8s_r32( -4, R_EAX );
  2817     MMU_TRANSLATE_WRITE( R_EAX );
  2818     load_spreg( R_EDX, R_PR );
  2819     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2820     MEM_WRITE_LONG( R_EAX, R_EDX );
  2821     sh4_x86.tstate = TSTATE_NONE;
  2822 :}
  2824 NOP {: 
  2825     COUNT_INST(I_NOP);
  2826     /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ 
  2827 :}
  2828 %%
  2829     sh4_x86.in_delay_slot = DELAY_NONE;
  2830     return 0;
.