Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 908:a00debcf2600
prev905:4c17ebd9ef5e
next911:2f6ba75b84d1
author nkeynes
date Thu Oct 30 05:50:21 2008 +0000 (11 years ago)
permissions -rw-r--r--
last change Fix x86-64 build (typos et al)
Remove Push/pop ebx - don't really need it and saves adding more target-specific asm
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "lxdream.h"
    29 #include "sh4/xltcache.h"
    30 #include "sh4/sh4core.h"
    31 #include "sh4/sh4trans.h"
    32 #include "sh4/sh4stat.h"
    33 #include "sh4/sh4mmio.h"
    34 #include "sh4/x86op.h"
    35 #include "clock.h"
    37 #define DEFAULT_BACKPATCH_SIZE 4096
    39 struct backpatch_record {
    40     uint32_t fixup_offset;
    41     uint32_t fixup_icount;
    42     int32_t exc_code;
    43 };
    45 #define DELAY_NONE 0
    46 #define DELAY_PC 1
    47 #define DELAY_PC_PR 2
    49 /** 
    50  * Struct to manage internal translation state. This state is not saved -
    51  * it is only valid between calls to sh4_translate_begin_block() and
    52  * sh4_translate_end_block()
    53  */
    54 struct sh4_x86_state {
    55     int in_delay_slot;
    56     gboolean priv_checked; /* true if we've already checked the cpu mode. */
    57     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    58     gboolean branch_taken; /* true if we branched unconditionally */
    59     gboolean double_prec; /* true if FPU is in double-precision mode */
    60     gboolean double_size; /* true if FPU is in double-size mode */
    61     gboolean sse3_enabled; /* true if host supports SSE3 instructions */
    62     uint32_t block_start_pc;
    63     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    64     int tstate;
    66     /* mode flags */
    67     gboolean tlb_on; /* True if tlb translation is active */
    69     /* Allocated memory for the (block-wide) back-patch list */
    70     struct backpatch_record *backpatch_list;
    71     uint32_t backpatch_posn;
    72     uint32_t backpatch_size;
    73 };
    75 #define TSTATE_NONE -1
    76 #define TSTATE_O    0
    77 #define TSTATE_C    2
    78 #define TSTATE_E    4
    79 #define TSTATE_NE   5
    80 #define TSTATE_G    0xF
    81 #define TSTATE_GE   0xD
    82 #define TSTATE_A    7
    83 #define TSTATE_AE   3
    85 #ifdef ENABLE_SH4STATS
    86 #define COUNT_INST(id) load_imm32(R_EAX,id); call_func1(sh4_stats_add, R_EAX); sh4_x86.tstate = TSTATE_NONE
    87 #else
    88 #define COUNT_INST(id)
    89 #endif
    91 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
    92 #define JT_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    93 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    94     OP(0x70+sh4_x86.tstate); MARK_JMP8(label); OP(-1)
    96 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
    97 #define JF_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    98 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    99     OP(0x70+ (sh4_x86.tstate^1)); MARK_JMP8(label); OP(-1)
   101 static struct sh4_x86_state sh4_x86;
   103 static uint32_t max_int = 0x7FFFFFFF;
   104 static uint32_t min_int = 0x80000000;
   105 static uint32_t save_fcw; /* save value for fpu control word */
   106 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   108 gboolean is_sse3_supported()
   109 {
   110     uint32_t features;
   112     __asm__ __volatile__(
   113         "mov $0x01, %%eax\n\t"
   114         "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
   115     return (features & 1) ? TRUE : FALSE;
   116 }
   118 void sh4_translate_init(void)
   119 {
   120     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   121     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   122     sh4_x86.sse3_enabled = is_sse3_supported();
   123 }
   126 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   127 {
   128     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   129 	sh4_x86.backpatch_size <<= 1;
   130 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   131 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   132 	assert( sh4_x86.backpatch_list != NULL );
   133     }
   134     if( sh4_x86.in_delay_slot ) {
   135 	fixup_pc -= 2;
   136     }
   137     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
   138 	((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code);
   139     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   140     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   141     sh4_x86.backpatch_posn++;
   142 }
   144 /**
   145  * Emit an instruction to load an SH4 reg into a real register
   146  */
   147 static inline void load_reg( int x86reg, int sh4reg ) 
   148 {
   149     /* mov [bp+n], reg */
   150     OP(0x8B);
   151     OP(0x45 + (x86reg<<3));
   152     OP(REG_OFFSET(r[sh4reg]));
   153 }
   155 static inline void load_reg16s( int x86reg, int sh4reg )
   156 {
   157     OP(0x0F);
   158     OP(0xBF);
   159     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   160 }
   162 static inline void load_reg16u( int x86reg, int sh4reg )
   163 {
   164     OP(0x0F);
   165     OP(0xB7);
   166     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   168 }
   170 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   171 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   172 /**
   173  * Emit an instruction to load an immediate value into a register
   174  */
   175 static inline void load_imm32( int x86reg, uint32_t value ) {
   176     /* mov #value, reg */
   177     OP(0xB8 + x86reg);
   178     OP32(value);
   179 }
   181 /**
   182  * Load an immediate 64-bit quantity (note: x86-64 only)
   183  */
   184 static inline void load_imm64( int x86reg, uint64_t value ) {
   185     /* mov #value, reg */
   186     REXW();
   187     OP(0xB8 + x86reg);
   188     OP64(value);
   189 }
   191 /**
   192  * Emit an instruction to store an SH4 reg (RN)
   193  */
   194 void static inline store_reg( int x86reg, int sh4reg ) {
   195     /* mov reg, [bp+n] */
   196     OP(0x89);
   197     OP(0x45 + (x86reg<<3));
   198     OP(REG_OFFSET(r[sh4reg]));
   199 }
   201 /**
   202  * Load an FR register (single-precision floating point) into an integer x86
   203  * register (eg for register-to-register moves)
   204  */
   205 #define load_fr(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[0][(frm)^1]) )
   206 #define load_xf(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[1][(frm)^1]) )
   208 /**
   209  * Load the low half of a DR register (DR or XD) into an integer x86 register 
   210  */
   211 #define load_dr0(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   212 #define load_dr1(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   214 /**
   215  * Store an FR register (single-precision floating point) from an integer x86+
   216  * register (eg for register-to-register moves)
   217  */
   218 #define store_fr(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[0][(frm)^1]) )
   219 #define store_xf(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[1][(frm)^1]) )
   221 #define store_dr0(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   222 #define store_dr1(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   225 #define push_fpul()  FLDF_sh4r(R_FPUL)
   226 #define pop_fpul()   FSTPF_sh4r(R_FPUL)
   227 #define push_fr(frm) FLDF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   228 #define pop_fr(frm)  FSTPF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   229 #define push_xf(frm) FLDF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   230 #define pop_xf(frm)  FSTPF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   231 #define push_dr(frm) FLDD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   232 #define pop_dr(frm)  FSTPD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   233 #define push_xdr(frm) FLDD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   234 #define pop_xdr(frm)  FSTPD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   238 /* Exception checks - Note that all exception checks will clobber EAX */
   240 #define check_priv( ) \
   241     if( !sh4_x86.priv_checked ) { \
   242 	sh4_x86.priv_checked = TRUE;\
   243 	load_spreg( R_EAX, R_SR );\
   244 	AND_imm32_r32( SR_MD, R_EAX );\
   245 	if( sh4_x86.in_delay_slot ) {\
   246 	    JE_exc( EXC_SLOT_ILLEGAL );\
   247 	} else {\
   248 	    JE_exc( EXC_ILLEGAL );\
   249 	}\
   250 	sh4_x86.tstate = TSTATE_NONE; \
   251     }\
   253 #define check_fpuen( ) \
   254     if( !sh4_x86.fpuen_checked ) {\
   255 	sh4_x86.fpuen_checked = TRUE;\
   256 	load_spreg( R_EAX, R_SR );\
   257 	AND_imm32_r32( SR_FD, R_EAX );\
   258 	if( sh4_x86.in_delay_slot ) {\
   259 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   260 	} else {\
   261 	    JNE_exc(EXC_FPU_DISABLED);\
   262 	}\
   263 	sh4_x86.tstate = TSTATE_NONE; \
   264     }
   266 #define check_ralign16( x86reg ) \
   267     TEST_imm32_r32( 0x00000001, x86reg ); \
   268     JNE_exc(EXC_DATA_ADDR_READ)
   270 #define check_walign16( x86reg ) \
   271     TEST_imm32_r32( 0x00000001, x86reg ); \
   272     JNE_exc(EXC_DATA_ADDR_WRITE);
   274 #define check_ralign32( x86reg ) \
   275     TEST_imm32_r32( 0x00000003, x86reg ); \
   276     JNE_exc(EXC_DATA_ADDR_READ)
   278 #define check_walign32( x86reg ) \
   279     TEST_imm32_r32( 0x00000003, x86reg ); \
   280     JNE_exc(EXC_DATA_ADDR_WRITE);
   282 #define check_ralign64( x86reg ) \
   283     TEST_imm32_r32( 0x00000007, x86reg ); \
   284     JNE_exc(EXC_DATA_ADDR_READ)
   286 #define check_walign64( x86reg ) \
   287     TEST_imm32_r32( 0x00000007, x86reg ); \
   288     JNE_exc(EXC_DATA_ADDR_WRITE);
   290 #define UNDEF(ir)
   291 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   292 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
   293 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
   294 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
   295 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
   296 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
   297 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
   299 /**
   300  * Perform MMU translation on the address in addr_reg for a read operation, iff the TLB is turned 
   301  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   302  */
   303 #define MMU_TRANSLATE_READ( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   305 #define MMU_TRANSLATE_READ_EXC( addr_reg, exc_code ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(exc_code); MEM_RESULT(addr_reg) }
   306 /**
   307  * Perform MMU translation on the address in addr_reg for a write operation, iff the TLB is turned 
   308  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   309  */
   310 #define MMU_TRANSLATE_WRITE( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_write, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   312 #define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = DELAY_NONE; return 1;
   314 /****** Import appropriate calling conventions ******/
   315 #if SIZEOF_VOID_P == 8
   316 #include "sh4/ia64abi.h"
   317 #else /* 32-bit system */
   318 #ifdef APPLE_BUILD
   319 #include "sh4/ia32mac.h"
   320 #else
   321 #include "sh4/ia32abi.h"
   322 #endif
   323 #endif
   325 void sh4_translate_begin_block( sh4addr_t pc ) 
   326 {
   327 	enter_block();
   328     sh4_x86.in_delay_slot = FALSE;
   329     sh4_x86.priv_checked = FALSE;
   330     sh4_x86.fpuen_checked = FALSE;
   331     sh4_x86.branch_taken = FALSE;
   332     sh4_x86.backpatch_posn = 0;
   333     sh4_x86.block_start_pc = pc;
   334     sh4_x86.tlb_on = IS_MMU_ENABLED();
   335     sh4_x86.tstate = TSTATE_NONE;
   336     sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
   337     sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
   338 }
   341 uint32_t sh4_translate_end_block_size()
   342 {
   343     if( sh4_x86.backpatch_posn <= 3 ) {
   344         return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);
   345     } else {
   346         return EPILOGUE_SIZE + 48 + (sh4_x86.backpatch_posn-3)*15;
   347     }
   348 }
   351 /**
   352  * Embed a breakpoint into the generated code
   353  */
   354 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   355 {
   356     load_imm32( R_EAX, pc );
   357     call_func1( sh4_translate_breakpoint_hit, R_EAX );
   358     sh4_x86.tstate = TSTATE_NONE;
   359 }
   362 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   364 /**
   365  * Embed a call to sh4_execute_instruction for situations that we
   366  * can't translate (just page-crossing delay slots at the moment).
   367  * Caller is responsible for setting new_pc before calling this function.
   368  *
   369  * Performs:
   370  *   Set PC = endpc
   371  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   372  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   373  *   Call sh4_execute_instruction
   374  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   375  */
   376 void exit_block_emu( sh4vma_t endpc )
   377 {
   378     load_imm32( R_ECX, endpc - sh4_x86.block_start_pc );   // 5
   379     ADD_r32_sh4r( R_ECX, R_PC );
   381     load_imm32( R_ECX, (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period ); // 5
   382     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
   383     load_imm32( R_ECX, sh4_x86.in_delay_slot ? 1 : 0 );
   384     store_spreg( R_ECX, REG_OFFSET(in_delay_slot) );
   386     call_func0( sh4_execute_instruction );    
   387     load_spreg( R_EAX, R_PC );
   388     if( sh4_x86.tlb_on ) {
   389 	call_func1(xlat_get_code_by_vma,R_EAX);
   390     } else {
   391 	call_func1(xlat_get_code,R_EAX);
   392     }
   393     AND_imm8s_rptr( 0xFC, R_EAX );
   394     POP_r32(R_EBP);
   395     RET();
   396 } 
   398 /**
   399  * Translate a single instruction. Delayed branches are handled specially
   400  * by translating both branch and delayed instruction as a single unit (as
   401  * 
   402  * The instruction MUST be in the icache (assert check)
   403  *
   404  * @return true if the instruction marks the end of a basic block
   405  * (eg a branch or 
   406  */
   407 uint32_t sh4_translate_instruction( sh4vma_t pc )
   408 {
   409     uint32_t ir;
   410     /* Read instruction from icache */
   411     assert( IS_IN_ICACHE(pc) );
   412     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   414 	/* PC is not in the current icache - this usually means we're running
   415 	 * with MMU on, and we've gone past the end of the page. And since 
   416 	 * sh4_translate_block is pretty careful about this, it means we're
   417 	 * almost certainly in a delay slot.
   418 	 *
   419 	 * Since we can't assume the page is present (and we can't fault it in
   420 	 * at this point, inline a call to sh4_execute_instruction (with a few
   421 	 * small repairs to cope with the different environment).
   422 	 */
   424     if( !sh4_x86.in_delay_slot ) {
   425 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   426     }
   427 %%
   428 /* ALU operations */
   429 ADD Rm, Rn {:
   430     COUNT_INST(I_ADD);
   431     load_reg( R_EAX, Rm );
   432     load_reg( R_ECX, Rn );
   433     ADD_r32_r32( R_EAX, R_ECX );
   434     store_reg( R_ECX, Rn );
   435     sh4_x86.tstate = TSTATE_NONE;
   436 :}
   437 ADD #imm, Rn {:  
   438     COUNT_INST(I_ADDI);
   439     load_reg( R_EAX, Rn );
   440     ADD_imm8s_r32( imm, R_EAX );
   441     store_reg( R_EAX, Rn );
   442     sh4_x86.tstate = TSTATE_NONE;
   443 :}
   444 ADDC Rm, Rn {:
   445     COUNT_INST(I_ADDC);
   446     if( sh4_x86.tstate != TSTATE_C ) {
   447 	LDC_t();
   448     }
   449     load_reg( R_EAX, Rm );
   450     load_reg( R_ECX, Rn );
   451     ADC_r32_r32( R_EAX, R_ECX );
   452     store_reg( R_ECX, Rn );
   453     SETC_t();
   454     sh4_x86.tstate = TSTATE_C;
   455 :}
   456 ADDV Rm, Rn {:
   457     COUNT_INST(I_ADDV);
   458     load_reg( R_EAX, Rm );
   459     load_reg( R_ECX, Rn );
   460     ADD_r32_r32( R_EAX, R_ECX );
   461     store_reg( R_ECX, Rn );
   462     SETO_t();
   463     sh4_x86.tstate = TSTATE_O;
   464 :}
   465 AND Rm, Rn {:
   466     COUNT_INST(I_AND);
   467     load_reg( R_EAX, Rm );
   468     load_reg( R_ECX, Rn );
   469     AND_r32_r32( R_EAX, R_ECX );
   470     store_reg( R_ECX, Rn );
   471     sh4_x86.tstate = TSTATE_NONE;
   472 :}
   473 AND #imm, R0 {:  
   474     COUNT_INST(I_ANDI);
   475     load_reg( R_EAX, 0 );
   476     AND_imm32_r32(imm, R_EAX); 
   477     store_reg( R_EAX, 0 );
   478     sh4_x86.tstate = TSTATE_NONE;
   479 :}
   480 AND.B #imm, @(R0, GBR) {: 
   481     COUNT_INST(I_ANDB);
   482     load_reg( R_EAX, 0 );
   483     load_spreg( R_ECX, R_GBR );
   484     ADD_r32_r32( R_ECX, R_EAX );
   485     MMU_TRANSLATE_WRITE( R_EAX );
   486     PUSH_realigned_r32(R_EAX);
   487     MEM_READ_BYTE( R_EAX, R_EDX );
   488     POP_realigned_r32(R_EAX);
   489     AND_imm32_r32(imm, R_EDX );
   490     MEM_WRITE_BYTE( R_EAX, R_EDX );
   491     sh4_x86.tstate = TSTATE_NONE;
   492 :}
   493 CMP/EQ Rm, Rn {:  
   494     COUNT_INST(I_CMPEQ);
   495     load_reg( R_EAX, Rm );
   496     load_reg( R_ECX, Rn );
   497     CMP_r32_r32( R_EAX, R_ECX );
   498     SETE_t();
   499     sh4_x86.tstate = TSTATE_E;
   500 :}
   501 CMP/EQ #imm, R0 {:  
   502     COUNT_INST(I_CMPEQI);
   503     load_reg( R_EAX, 0 );
   504     CMP_imm8s_r32(imm, R_EAX);
   505     SETE_t();
   506     sh4_x86.tstate = TSTATE_E;
   507 :}
   508 CMP/GE Rm, Rn {:  
   509     COUNT_INST(I_CMPGE);
   510     load_reg( R_EAX, Rm );
   511     load_reg( R_ECX, Rn );
   512     CMP_r32_r32( R_EAX, R_ECX );
   513     SETGE_t();
   514     sh4_x86.tstate = TSTATE_GE;
   515 :}
   516 CMP/GT Rm, Rn {: 
   517     COUNT_INST(I_CMPGT);
   518     load_reg( R_EAX, Rm );
   519     load_reg( R_ECX, Rn );
   520     CMP_r32_r32( R_EAX, R_ECX );
   521     SETG_t();
   522     sh4_x86.tstate = TSTATE_G;
   523 :}
   524 CMP/HI Rm, Rn {:  
   525     COUNT_INST(I_CMPHI);
   526     load_reg( R_EAX, Rm );
   527     load_reg( R_ECX, Rn );
   528     CMP_r32_r32( R_EAX, R_ECX );
   529     SETA_t();
   530     sh4_x86.tstate = TSTATE_A;
   531 :}
   532 CMP/HS Rm, Rn {: 
   533     COUNT_INST(I_CMPHS);
   534     load_reg( R_EAX, Rm );
   535     load_reg( R_ECX, Rn );
   536     CMP_r32_r32( R_EAX, R_ECX );
   537     SETAE_t();
   538     sh4_x86.tstate = TSTATE_AE;
   539  :}
   540 CMP/PL Rn {: 
   541     COUNT_INST(I_CMPPL);
   542     load_reg( R_EAX, Rn );
   543     CMP_imm8s_r32( 0, R_EAX );
   544     SETG_t();
   545     sh4_x86.tstate = TSTATE_G;
   546 :}
   547 CMP/PZ Rn {:  
   548     COUNT_INST(I_CMPPZ);
   549     load_reg( R_EAX, Rn );
   550     CMP_imm8s_r32( 0, R_EAX );
   551     SETGE_t();
   552     sh4_x86.tstate = TSTATE_GE;
   553 :}
   554 CMP/STR Rm, Rn {:  
   555     COUNT_INST(I_CMPSTR);
   556     load_reg( R_EAX, Rm );
   557     load_reg( R_ECX, Rn );
   558     XOR_r32_r32( R_ECX, R_EAX );
   559     TEST_r8_r8( R_AL, R_AL );
   560     JE_rel8(target1);
   561     TEST_r8_r8( R_AH, R_AH );
   562     JE_rel8(target2);
   563     SHR_imm8_r32( 16, R_EAX );
   564     TEST_r8_r8( R_AL, R_AL );
   565     JE_rel8(target3);
   566     TEST_r8_r8( R_AH, R_AH );
   567     JMP_TARGET(target1);
   568     JMP_TARGET(target2);
   569     JMP_TARGET(target3);
   570     SETE_t();
   571     sh4_x86.tstate = TSTATE_E;
   572 :}
   573 DIV0S Rm, Rn {:
   574     COUNT_INST(I_DIV0S);
   575     load_reg( R_EAX, Rm );
   576     load_reg( R_ECX, Rn );
   577     SHR_imm8_r32( 31, R_EAX );
   578     SHR_imm8_r32( 31, R_ECX );
   579     store_spreg( R_EAX, R_M );
   580     store_spreg( R_ECX, R_Q );
   581     CMP_r32_r32( R_EAX, R_ECX );
   582     SETNE_t();
   583     sh4_x86.tstate = TSTATE_NE;
   584 :}
   585 DIV0U {:  
   586     COUNT_INST(I_DIV0U);
   587     XOR_r32_r32( R_EAX, R_EAX );
   588     store_spreg( R_EAX, R_Q );
   589     store_spreg( R_EAX, R_M );
   590     store_spreg( R_EAX, R_T );
   591     sh4_x86.tstate = TSTATE_C; // works for DIV1
   592 :}
   593 DIV1 Rm, Rn {:
   594     COUNT_INST(I_DIV1);
   595     load_spreg( R_ECX, R_M );
   596     load_reg( R_EAX, Rn );
   597     if( sh4_x86.tstate != TSTATE_C ) {
   598 	LDC_t();
   599     }
   600     RCL1_r32( R_EAX );
   601     SETC_r8( R_DL ); // Q'
   602     CMP_sh4r_r32( R_Q, R_ECX );
   603     JE_rel8(mqequal);
   604     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   605     JMP_rel8(end);
   606     JMP_TARGET(mqequal);
   607     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   608     JMP_TARGET(end);
   609     store_reg( R_EAX, Rn ); // Done with Rn now
   610     SETC_r8(R_AL); // tmp1
   611     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   612     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   613     store_spreg( R_ECX, R_Q );
   614     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   615     MOVZX_r8_r32( R_AL, R_EAX );
   616     store_spreg( R_EAX, R_T );
   617     sh4_x86.tstate = TSTATE_NONE;
   618 :}
   619 DMULS.L Rm, Rn {:  
   620     COUNT_INST(I_DMULS);
   621     load_reg( R_EAX, Rm );
   622     load_reg( R_ECX, Rn );
   623     IMUL_r32(R_ECX);
   624     store_spreg( R_EDX, R_MACH );
   625     store_spreg( R_EAX, R_MACL );
   626     sh4_x86.tstate = TSTATE_NONE;
   627 :}
   628 DMULU.L Rm, Rn {:  
   629     COUNT_INST(I_DMULU);
   630     load_reg( R_EAX, Rm );
   631     load_reg( R_ECX, Rn );
   632     MUL_r32(R_ECX);
   633     store_spreg( R_EDX, R_MACH );
   634     store_spreg( R_EAX, R_MACL );    
   635     sh4_x86.tstate = TSTATE_NONE;
   636 :}
   637 DT Rn {:  
   638     COUNT_INST(I_DT);
   639     load_reg( R_EAX, Rn );
   640     ADD_imm8s_r32( -1, R_EAX );
   641     store_reg( R_EAX, Rn );
   642     SETE_t();
   643     sh4_x86.tstate = TSTATE_E;
   644 :}
   645 EXTS.B Rm, Rn {:  
   646     COUNT_INST(I_EXTSB);
   647     load_reg( R_EAX, Rm );
   648     MOVSX_r8_r32( R_EAX, R_EAX );
   649     store_reg( R_EAX, Rn );
   650 :}
   651 EXTS.W Rm, Rn {:  
   652     COUNT_INST(I_EXTSW);
   653     load_reg( R_EAX, Rm );
   654     MOVSX_r16_r32( R_EAX, R_EAX );
   655     store_reg( R_EAX, Rn );
   656 :}
   657 EXTU.B Rm, Rn {:  
   658     COUNT_INST(I_EXTUB);
   659     load_reg( R_EAX, Rm );
   660     MOVZX_r8_r32( R_EAX, R_EAX );
   661     store_reg( R_EAX, Rn );
   662 :}
   663 EXTU.W Rm, Rn {:  
   664     COUNT_INST(I_EXTUW);
   665     load_reg( R_EAX, Rm );
   666     MOVZX_r16_r32( R_EAX, R_EAX );
   667     store_reg( R_EAX, Rn );
   668 :}
   669 MAC.L @Rm+, @Rn+ {:
   670     COUNT_INST(I_MACL);
   671     if( Rm == Rn ) {
   672 	load_reg( R_EAX, Rm );
   673 	check_ralign32( R_EAX );
   674 	MMU_TRANSLATE_READ( R_EAX );
   675 	PUSH_realigned_r32( R_EAX );
   676 	load_reg( R_EAX, Rn );
   677 	ADD_imm8s_r32( 4, R_EAX );
   678 	MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
   679 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
   680 	// Note translate twice in case of page boundaries. Maybe worth
   681 	// adding a page-boundary check to skip the second translation
   682     } else {
   683 	load_reg( R_EAX, Rm );
   684 	check_ralign32( R_EAX );
   685 	MMU_TRANSLATE_READ( R_EAX );
   686 	load_reg( R_ECX, Rn );
   687 	check_ralign32( R_ECX );
   688 	PUSH_realigned_r32( R_EAX );
   689 	MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
   690 	MOV_r32_r32( R_ECX, R_EAX );
   691 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   692 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   693     }
   694     MEM_READ_LONG( R_EAX, R_EAX );
   695     POP_r32( R_ECX );
   696     PUSH_r32( R_EAX );
   697     MEM_READ_LONG( R_ECX, R_EAX );
   698     POP_realigned_r32( R_ECX );
   700     IMUL_r32( R_ECX );
   701     ADD_r32_sh4r( R_EAX, R_MACL );
   702     ADC_r32_sh4r( R_EDX, R_MACH );
   704     load_spreg( R_ECX, R_S );
   705     TEST_r32_r32(R_ECX, R_ECX);
   706     JE_rel8( nosat );
   707     call_func0( signsat48 );
   708     JMP_TARGET( nosat );
   709     sh4_x86.tstate = TSTATE_NONE;
   710 :}
   711 MAC.W @Rm+, @Rn+ {:  
   712     COUNT_INST(I_MACW);
   713     if( Rm == Rn ) {
   714 	load_reg( R_EAX, Rm );
   715 	check_ralign16( R_EAX );
   716 	MMU_TRANSLATE_READ( R_EAX );
   717 	PUSH_realigned_r32( R_EAX );
   718 	load_reg( R_EAX, Rn );
   719 	ADD_imm8s_r32( 2, R_EAX );
   720 	MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
   721 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   722 	// Note translate twice in case of page boundaries. Maybe worth
   723 	// adding a page-boundary check to skip the second translation
   724     } else {
   725 	load_reg( R_EAX, Rm );
   726 	check_ralign16( R_EAX );
   727 	MMU_TRANSLATE_READ( R_EAX );
   728 	load_reg( R_ECX, Rn );
   729 	check_ralign16( R_ECX );
   730 	PUSH_realigned_r32( R_EAX );
   731 	MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
   732 	MOV_r32_r32( R_ECX, R_EAX );
   733 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   734 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   735     }
   736     MEM_READ_WORD( R_EAX, R_EAX );
   737     POP_r32( R_ECX );
   738     PUSH_r32( R_EAX );
   739     MEM_READ_WORD( R_ECX, R_EAX );
   740     POP_realigned_r32( R_ECX );
   741     IMUL_r32( R_ECX );
   743     load_spreg( R_ECX, R_S );
   744     TEST_r32_r32( R_ECX, R_ECX );
   745     JE_rel8( nosat );
   747     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   748     JNO_rel8( end );            // 2
   749     load_imm32( R_EDX, 1 );         // 5
   750     store_spreg( R_EDX, R_MACH );   // 6
   751     JS_rel8( positive );        // 2
   752     load_imm32( R_EAX, 0x80000000 );// 5
   753     store_spreg( R_EAX, R_MACL );   // 6
   754     JMP_rel8(end2);           // 2
   756     JMP_TARGET(positive);
   757     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   758     store_spreg( R_EAX, R_MACL );   // 6
   759     JMP_rel8(end3);            // 2
   761     JMP_TARGET(nosat);
   762     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   763     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   764     JMP_TARGET(end);
   765     JMP_TARGET(end2);
   766     JMP_TARGET(end3);
   767     sh4_x86.tstate = TSTATE_NONE;
   768 :}
   769 MOVT Rn {:  
   770     COUNT_INST(I_MOVT);
   771     load_spreg( R_EAX, R_T );
   772     store_reg( R_EAX, Rn );
   773 :}
   774 MUL.L Rm, Rn {:  
   775     COUNT_INST(I_MULL);
   776     load_reg( R_EAX, Rm );
   777     load_reg( R_ECX, Rn );
   778     MUL_r32( R_ECX );
   779     store_spreg( R_EAX, R_MACL );
   780     sh4_x86.tstate = TSTATE_NONE;
   781 :}
   782 MULS.W Rm, Rn {:
   783     COUNT_INST(I_MULSW);
   784     load_reg16s( R_EAX, Rm );
   785     load_reg16s( R_ECX, Rn );
   786     MUL_r32( R_ECX );
   787     store_spreg( R_EAX, R_MACL );
   788     sh4_x86.tstate = TSTATE_NONE;
   789 :}
   790 MULU.W Rm, Rn {:  
   791     COUNT_INST(I_MULUW);
   792     load_reg16u( R_EAX, Rm );
   793     load_reg16u( R_ECX, Rn );
   794     MUL_r32( R_ECX );
   795     store_spreg( R_EAX, R_MACL );
   796     sh4_x86.tstate = TSTATE_NONE;
   797 :}
   798 NEG Rm, Rn {:
   799     COUNT_INST(I_NEG);
   800     load_reg( R_EAX, Rm );
   801     NEG_r32( R_EAX );
   802     store_reg( R_EAX, Rn );
   803     sh4_x86.tstate = TSTATE_NONE;
   804 :}
   805 NEGC Rm, Rn {:  
   806     COUNT_INST(I_NEGC);
   807     load_reg( R_EAX, Rm );
   808     XOR_r32_r32( R_ECX, R_ECX );
   809     LDC_t();
   810     SBB_r32_r32( R_EAX, R_ECX );
   811     store_reg( R_ECX, Rn );
   812     SETC_t();
   813     sh4_x86.tstate = TSTATE_C;
   814 :}
   815 NOT Rm, Rn {:  
   816     COUNT_INST(I_NOT);
   817     load_reg( R_EAX, Rm );
   818     NOT_r32( R_EAX );
   819     store_reg( R_EAX, Rn );
   820     sh4_x86.tstate = TSTATE_NONE;
   821 :}
   822 OR Rm, Rn {:  
   823     COUNT_INST(I_OR);
   824     load_reg( R_EAX, Rm );
   825     load_reg( R_ECX, Rn );
   826     OR_r32_r32( R_EAX, R_ECX );
   827     store_reg( R_ECX, Rn );
   828     sh4_x86.tstate = TSTATE_NONE;
   829 :}
   830 OR #imm, R0 {:
   831     COUNT_INST(I_ORI);
   832     load_reg( R_EAX, 0 );
   833     OR_imm32_r32(imm, R_EAX);
   834     store_reg( R_EAX, 0 );
   835     sh4_x86.tstate = TSTATE_NONE;
   836 :}
   837 OR.B #imm, @(R0, GBR) {:  
   838     COUNT_INST(I_ORB);
   839     load_reg( R_EAX, 0 );
   840     load_spreg( R_ECX, R_GBR );
   841     ADD_r32_r32( R_ECX, R_EAX );
   842     MMU_TRANSLATE_WRITE( R_EAX );
   843     PUSH_realigned_r32(R_EAX);
   844     MEM_READ_BYTE( R_EAX, R_EDX );
   845     POP_realigned_r32(R_EAX);
   846     OR_imm32_r32(imm, R_EDX );
   847     MEM_WRITE_BYTE( R_EAX, R_EDX );
   848     sh4_x86.tstate = TSTATE_NONE;
   849 :}
   850 ROTCL Rn {:
   851     COUNT_INST(I_ROTCL);
   852     load_reg( R_EAX, Rn );
   853     if( sh4_x86.tstate != TSTATE_C ) {
   854 	LDC_t();
   855     }
   856     RCL1_r32( R_EAX );
   857     store_reg( R_EAX, Rn );
   858     SETC_t();
   859     sh4_x86.tstate = TSTATE_C;
   860 :}
   861 ROTCR Rn {:  
   862     COUNT_INST(I_ROTCR);
   863     load_reg( R_EAX, Rn );
   864     if( sh4_x86.tstate != TSTATE_C ) {
   865 	LDC_t();
   866     }
   867     RCR1_r32( R_EAX );
   868     store_reg( R_EAX, Rn );
   869     SETC_t();
   870     sh4_x86.tstate = TSTATE_C;
   871 :}
   872 ROTL Rn {:  
   873     COUNT_INST(I_ROTL);
   874     load_reg( R_EAX, Rn );
   875     ROL1_r32( R_EAX );
   876     store_reg( R_EAX, Rn );
   877     SETC_t();
   878     sh4_x86.tstate = TSTATE_C;
   879 :}
   880 ROTR Rn {:  
   881     COUNT_INST(I_ROTR);
   882     load_reg( R_EAX, Rn );
   883     ROR1_r32( R_EAX );
   884     store_reg( R_EAX, Rn );
   885     SETC_t();
   886     sh4_x86.tstate = TSTATE_C;
   887 :}
   888 SHAD Rm, Rn {:
   889     COUNT_INST(I_SHAD);
   890     /* Annoyingly enough, not directly convertible */
   891     load_reg( R_EAX, Rn );
   892     load_reg( R_ECX, Rm );
   893     CMP_imm32_r32( 0, R_ECX );
   894     JGE_rel8(doshl);
   896     NEG_r32( R_ECX );      // 2
   897     AND_imm8_r8( 0x1F, R_CL ); // 3
   898     JE_rel8(emptysar);     // 2
   899     SAR_r32_CL( R_EAX );       // 2
   900     JMP_rel8(end);          // 2
   902     JMP_TARGET(emptysar);
   903     SAR_imm8_r32(31, R_EAX );  // 3
   904     JMP_rel8(end2);
   906     JMP_TARGET(doshl);
   907     AND_imm8_r8( 0x1F, R_CL ); // 3
   908     SHL_r32_CL( R_EAX );       // 2
   909     JMP_TARGET(end);
   910     JMP_TARGET(end2);
   911     store_reg( R_EAX, Rn );
   912     sh4_x86.tstate = TSTATE_NONE;
   913 :}
   914 SHLD Rm, Rn {:  
   915     COUNT_INST(I_SHLD);
   916     load_reg( R_EAX, Rn );
   917     load_reg( R_ECX, Rm );
   918     CMP_imm32_r32( 0, R_ECX );
   919     JGE_rel8(doshl);
   921     NEG_r32( R_ECX );      // 2
   922     AND_imm8_r8( 0x1F, R_CL ); // 3
   923     JE_rel8(emptyshr );
   924     SHR_r32_CL( R_EAX );       // 2
   925     JMP_rel8(end);          // 2
   927     JMP_TARGET(emptyshr);
   928     XOR_r32_r32( R_EAX, R_EAX );
   929     JMP_rel8(end2);
   931     JMP_TARGET(doshl);
   932     AND_imm8_r8( 0x1F, R_CL ); // 3
   933     SHL_r32_CL( R_EAX );       // 2
   934     JMP_TARGET(end);
   935     JMP_TARGET(end2);
   936     store_reg( R_EAX, Rn );
   937     sh4_x86.tstate = TSTATE_NONE;
   938 :}
   939 SHAL Rn {: 
   940     COUNT_INST(I_SHAL);
   941     load_reg( R_EAX, Rn );
   942     SHL1_r32( R_EAX );
   943     SETC_t();
   944     store_reg( R_EAX, Rn );
   945     sh4_x86.tstate = TSTATE_C;
   946 :}
   947 SHAR Rn {:  
   948     COUNT_INST(I_SHAR);
   949     load_reg( R_EAX, Rn );
   950     SAR1_r32( R_EAX );
   951     SETC_t();
   952     store_reg( R_EAX, Rn );
   953     sh4_x86.tstate = TSTATE_C;
   954 :}
   955 SHLL Rn {:  
   956     COUNT_INST(I_SHLL);
   957     load_reg( R_EAX, Rn );
   958     SHL1_r32( R_EAX );
   959     SETC_t();
   960     store_reg( R_EAX, Rn );
   961     sh4_x86.tstate = TSTATE_C;
   962 :}
   963 SHLL2 Rn {:
   964     COUNT_INST(I_SHLL);
   965     load_reg( R_EAX, Rn );
   966     SHL_imm8_r32( 2, R_EAX );
   967     store_reg( R_EAX, Rn );
   968     sh4_x86.tstate = TSTATE_NONE;
   969 :}
   970 SHLL8 Rn {:  
   971     COUNT_INST(I_SHLL);
   972     load_reg( R_EAX, Rn );
   973     SHL_imm8_r32( 8, R_EAX );
   974     store_reg( R_EAX, Rn );
   975     sh4_x86.tstate = TSTATE_NONE;
   976 :}
   977 SHLL16 Rn {:  
   978     COUNT_INST(I_SHLL);
   979     load_reg( R_EAX, Rn );
   980     SHL_imm8_r32( 16, R_EAX );
   981     store_reg( R_EAX, Rn );
   982     sh4_x86.tstate = TSTATE_NONE;
   983 :}
   984 SHLR Rn {:  
   985     COUNT_INST(I_SHLR);
   986     load_reg( R_EAX, Rn );
   987     SHR1_r32( R_EAX );
   988     SETC_t();
   989     store_reg( R_EAX, Rn );
   990     sh4_x86.tstate = TSTATE_C;
   991 :}
   992 SHLR2 Rn {:  
   993     COUNT_INST(I_SHLR);
   994     load_reg( R_EAX, Rn );
   995     SHR_imm8_r32( 2, R_EAX );
   996     store_reg( R_EAX, Rn );
   997     sh4_x86.tstate = TSTATE_NONE;
   998 :}
   999 SHLR8 Rn {:  
  1000     COUNT_INST(I_SHLR);
  1001     load_reg( R_EAX, Rn );
  1002     SHR_imm8_r32( 8, R_EAX );
  1003     store_reg( R_EAX, Rn );
  1004     sh4_x86.tstate = TSTATE_NONE;
  1005 :}
  1006 SHLR16 Rn {:  
  1007     COUNT_INST(I_SHLR);
  1008     load_reg( R_EAX, Rn );
  1009     SHR_imm8_r32( 16, R_EAX );
  1010     store_reg( R_EAX, Rn );
  1011     sh4_x86.tstate = TSTATE_NONE;
  1012 :}
  1013 SUB Rm, Rn {:  
  1014     COUNT_INST(I_SUB);
  1015     load_reg( R_EAX, Rm );
  1016     load_reg( R_ECX, Rn );
  1017     SUB_r32_r32( R_EAX, R_ECX );
  1018     store_reg( R_ECX, Rn );
  1019     sh4_x86.tstate = TSTATE_NONE;
  1020 :}
  1021 SUBC Rm, Rn {:  
  1022     COUNT_INST(I_SUBC);
  1023     load_reg( R_EAX, Rm );
  1024     load_reg( R_ECX, Rn );
  1025     if( sh4_x86.tstate != TSTATE_C ) {
  1026 	LDC_t();
  1028     SBB_r32_r32( R_EAX, R_ECX );
  1029     store_reg( R_ECX, Rn );
  1030     SETC_t();
  1031     sh4_x86.tstate = TSTATE_C;
  1032 :}
  1033 SUBV Rm, Rn {:  
  1034     COUNT_INST(I_SUBV);
  1035     load_reg( R_EAX, Rm );
  1036     load_reg( R_ECX, Rn );
  1037     SUB_r32_r32( R_EAX, R_ECX );
  1038     store_reg( R_ECX, Rn );
  1039     SETO_t();
  1040     sh4_x86.tstate = TSTATE_O;
  1041 :}
  1042 SWAP.B Rm, Rn {:  
  1043     COUNT_INST(I_SWAPB);
  1044     load_reg( R_EAX, Rm );
  1045     XCHG_r8_r8( R_AL, R_AH ); // NB: does not touch EFLAGS
  1046     store_reg( R_EAX, Rn );
  1047 :}
  1048 SWAP.W Rm, Rn {:  
  1049     COUNT_INST(I_SWAPB);
  1050     load_reg( R_EAX, Rm );
  1051     MOV_r32_r32( R_EAX, R_ECX );
  1052     SHL_imm8_r32( 16, R_ECX );
  1053     SHR_imm8_r32( 16, R_EAX );
  1054     OR_r32_r32( R_EAX, R_ECX );
  1055     store_reg( R_ECX, Rn );
  1056     sh4_x86.tstate = TSTATE_NONE;
  1057 :}
  1058 TAS.B @Rn {:  
  1059     COUNT_INST(I_TASB);
  1060     load_reg( R_EAX, Rn );
  1061     MMU_TRANSLATE_WRITE( R_EAX );
  1062     PUSH_realigned_r32( R_EAX );
  1063     MEM_READ_BYTE( R_EAX, R_EDX );
  1064     TEST_r8_r8( R_DL, R_DL );
  1065     SETE_t();
  1066     OR_imm8_r8( 0x80, R_DL );
  1067     POP_realigned_r32( R_EAX );
  1068     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1069     sh4_x86.tstate = TSTATE_NONE;
  1070 :}
  1071 TST Rm, Rn {:  
  1072     COUNT_INST(I_TST);
  1073     load_reg( R_EAX, Rm );
  1074     load_reg( R_ECX, Rn );
  1075     TEST_r32_r32( R_EAX, R_ECX );
  1076     SETE_t();
  1077     sh4_x86.tstate = TSTATE_E;
  1078 :}
  1079 TST #imm, R0 {:  
  1080     COUNT_INST(I_TSTI);
  1081     load_reg( R_EAX, 0 );
  1082     TEST_imm32_r32( imm, R_EAX );
  1083     SETE_t();
  1084     sh4_x86.tstate = TSTATE_E;
  1085 :}
  1086 TST.B #imm, @(R0, GBR) {:  
  1087     COUNT_INST(I_TSTB);
  1088     load_reg( R_EAX, 0);
  1089     load_reg( R_ECX, R_GBR);
  1090     ADD_r32_r32( R_ECX, R_EAX );
  1091     MMU_TRANSLATE_READ( R_EAX );
  1092     MEM_READ_BYTE( R_EAX, R_EAX );
  1093     TEST_imm8_r8( imm, R_AL );
  1094     SETE_t();
  1095     sh4_x86.tstate = TSTATE_E;
  1096 :}
  1097 XOR Rm, Rn {:  
  1098     COUNT_INST(I_XOR);
  1099     load_reg( R_EAX, Rm );
  1100     load_reg( R_ECX, Rn );
  1101     XOR_r32_r32( R_EAX, R_ECX );
  1102     store_reg( R_ECX, Rn );
  1103     sh4_x86.tstate = TSTATE_NONE;
  1104 :}
  1105 XOR #imm, R0 {:  
  1106     COUNT_INST(I_XORI);
  1107     load_reg( R_EAX, 0 );
  1108     XOR_imm32_r32( imm, R_EAX );
  1109     store_reg( R_EAX, 0 );
  1110     sh4_x86.tstate = TSTATE_NONE;
  1111 :}
  1112 XOR.B #imm, @(R0, GBR) {:  
  1113     COUNT_INST(I_XORB);
  1114     load_reg( R_EAX, 0 );
  1115     load_spreg( R_ECX, R_GBR );
  1116     ADD_r32_r32( R_ECX, R_EAX );
  1117     MMU_TRANSLATE_WRITE( R_EAX );
  1118     PUSH_realigned_r32(R_EAX);
  1119     MEM_READ_BYTE(R_EAX, R_EDX);
  1120     POP_realigned_r32(R_EAX);
  1121     XOR_imm32_r32( imm, R_EDX );
  1122     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1123     sh4_x86.tstate = TSTATE_NONE;
  1124 :}
  1125 XTRCT Rm, Rn {:
  1126     COUNT_INST(I_XTRCT);
  1127     load_reg( R_EAX, Rm );
  1128     load_reg( R_ECX, Rn );
  1129     SHL_imm8_r32( 16, R_EAX );
  1130     SHR_imm8_r32( 16, R_ECX );
  1131     OR_r32_r32( R_EAX, R_ECX );
  1132     store_reg( R_ECX, Rn );
  1133     sh4_x86.tstate = TSTATE_NONE;
  1134 :}
  1136 /* Data move instructions */
  1137 MOV Rm, Rn {:  
  1138     COUNT_INST(I_MOV);
  1139     load_reg( R_EAX, Rm );
  1140     store_reg( R_EAX, Rn );
  1141 :}
  1142 MOV #imm, Rn {:  
  1143     COUNT_INST(I_MOVI);
  1144     load_imm32( R_EAX, imm );
  1145     store_reg( R_EAX, Rn );
  1146 :}
  1147 MOV.B Rm, @Rn {:  
  1148     COUNT_INST(I_MOVB);
  1149     load_reg( R_EAX, Rn );
  1150     MMU_TRANSLATE_WRITE( R_EAX );
  1151     load_reg( R_EDX, Rm );
  1152     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1153     sh4_x86.tstate = TSTATE_NONE;
  1154 :}
  1155 MOV.B Rm, @-Rn {:  
  1156     COUNT_INST(I_MOVB);
  1157     load_reg( R_EAX, Rn );
  1158     ADD_imm8s_r32( -1, R_EAX );
  1159     MMU_TRANSLATE_WRITE( R_EAX );
  1160     load_reg( R_EDX, Rm );
  1161     ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
  1162     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1163     sh4_x86.tstate = TSTATE_NONE;
  1164 :}
  1165 MOV.B Rm, @(R0, Rn) {:  
  1166     COUNT_INST(I_MOVB);
  1167     load_reg( R_EAX, 0 );
  1168     load_reg( R_ECX, Rn );
  1169     ADD_r32_r32( R_ECX, R_EAX );
  1170     MMU_TRANSLATE_WRITE( R_EAX );
  1171     load_reg( R_EDX, Rm );
  1172     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1173     sh4_x86.tstate = TSTATE_NONE;
  1174 :}
  1175 MOV.B R0, @(disp, GBR) {:  
  1176     COUNT_INST(I_MOVB);
  1177     load_spreg( R_EAX, R_GBR );
  1178     ADD_imm32_r32( disp, R_EAX );
  1179     MMU_TRANSLATE_WRITE( R_EAX );
  1180     load_reg( R_EDX, 0 );
  1181     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1182     sh4_x86.tstate = TSTATE_NONE;
  1183 :}
  1184 MOV.B R0, @(disp, Rn) {:  
  1185     COUNT_INST(I_MOVB);
  1186     load_reg( R_EAX, Rn );
  1187     ADD_imm32_r32( disp, R_EAX );
  1188     MMU_TRANSLATE_WRITE( R_EAX );
  1189     load_reg( R_EDX, 0 );
  1190     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1191     sh4_x86.tstate = TSTATE_NONE;
  1192 :}
  1193 MOV.B @Rm, Rn {:  
  1194     COUNT_INST(I_MOVB);
  1195     load_reg( R_EAX, Rm );
  1196     MMU_TRANSLATE_READ( R_EAX );
  1197     MEM_READ_BYTE( R_EAX, R_EAX );
  1198     store_reg( R_EAX, Rn );
  1199     sh4_x86.tstate = TSTATE_NONE;
  1200 :}
  1201 MOV.B @Rm+, Rn {:  
  1202     COUNT_INST(I_MOVB);
  1203     load_reg( R_EAX, Rm );
  1204     MMU_TRANSLATE_READ( R_EAX );
  1205     ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
  1206     MEM_READ_BYTE( R_EAX, R_EAX );
  1207     store_reg( R_EAX, Rn );
  1208     sh4_x86.tstate = TSTATE_NONE;
  1209 :}
  1210 MOV.B @(R0, Rm), Rn {:  
  1211     COUNT_INST(I_MOVB);
  1212     load_reg( R_EAX, 0 );
  1213     load_reg( R_ECX, Rm );
  1214     ADD_r32_r32( R_ECX, R_EAX );
  1215     MMU_TRANSLATE_READ( R_EAX )
  1216     MEM_READ_BYTE( R_EAX, R_EAX );
  1217     store_reg( R_EAX, Rn );
  1218     sh4_x86.tstate = TSTATE_NONE;
  1219 :}
  1220 MOV.B @(disp, GBR), R0 {:  
  1221     COUNT_INST(I_MOVB);
  1222     load_spreg( R_EAX, R_GBR );
  1223     ADD_imm32_r32( disp, R_EAX );
  1224     MMU_TRANSLATE_READ( R_EAX );
  1225     MEM_READ_BYTE( R_EAX, R_EAX );
  1226     store_reg( R_EAX, 0 );
  1227     sh4_x86.tstate = TSTATE_NONE;
  1228 :}
  1229 MOV.B @(disp, Rm), R0 {:  
  1230     COUNT_INST(I_MOVB);
  1231     load_reg( R_EAX, Rm );
  1232     ADD_imm32_r32( disp, R_EAX );
  1233     MMU_TRANSLATE_READ( R_EAX );
  1234     MEM_READ_BYTE( R_EAX, R_EAX );
  1235     store_reg( R_EAX, 0 );
  1236     sh4_x86.tstate = TSTATE_NONE;
  1237 :}
  1238 MOV.L Rm, @Rn {:
  1239     COUNT_INST(I_MOVL);
  1240     load_reg( R_EAX, Rn );
  1241     check_walign32(R_EAX);
  1242     MMU_TRANSLATE_WRITE( R_EAX );
  1243     load_reg( R_EDX, Rm );
  1244     MEM_WRITE_LONG( R_EAX, R_EDX );
  1245     sh4_x86.tstate = TSTATE_NONE;
  1246 :}
  1247 MOV.L Rm, @-Rn {:  
  1248     COUNT_INST(I_MOVL);
  1249     load_reg( R_EAX, Rn );
  1250     ADD_imm8s_r32( -4, R_EAX );
  1251     check_walign32( R_EAX );
  1252     MMU_TRANSLATE_WRITE( R_EAX );
  1253     load_reg( R_EDX, Rm );
  1254     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  1255     MEM_WRITE_LONG( R_EAX, R_EDX );
  1256     sh4_x86.tstate = TSTATE_NONE;
  1257 :}
  1258 MOV.L Rm, @(R0, Rn) {:  
  1259     COUNT_INST(I_MOVL);
  1260     load_reg( R_EAX, 0 );
  1261     load_reg( R_ECX, Rn );
  1262     ADD_r32_r32( R_ECX, R_EAX );
  1263     check_walign32( R_EAX );
  1264     MMU_TRANSLATE_WRITE( R_EAX );
  1265     load_reg( R_EDX, Rm );
  1266     MEM_WRITE_LONG( R_EAX, R_EDX );
  1267     sh4_x86.tstate = TSTATE_NONE;
  1268 :}
  1269 MOV.L R0, @(disp, GBR) {:  
  1270     COUNT_INST(I_MOVL);
  1271     load_spreg( R_EAX, R_GBR );
  1272     ADD_imm32_r32( disp, R_EAX );
  1273     check_walign32( R_EAX );
  1274     MMU_TRANSLATE_WRITE( R_EAX );
  1275     load_reg( R_EDX, 0 );
  1276     MEM_WRITE_LONG( R_EAX, R_EDX );
  1277     sh4_x86.tstate = TSTATE_NONE;
  1278 :}
  1279 MOV.L Rm, @(disp, Rn) {:  
  1280     COUNT_INST(I_MOVL);
  1281     load_reg( R_EAX, Rn );
  1282     ADD_imm32_r32( disp, R_EAX );
  1283     check_walign32( R_EAX );
  1284     MMU_TRANSLATE_WRITE( R_EAX );
  1285     load_reg( R_EDX, Rm );
  1286     MEM_WRITE_LONG( R_EAX, R_EDX );
  1287     sh4_x86.tstate = TSTATE_NONE;
  1288 :}
  1289 MOV.L @Rm, Rn {:  
  1290     COUNT_INST(I_MOVL);
  1291     load_reg( R_EAX, Rm );
  1292     check_ralign32( R_EAX );
  1293     MMU_TRANSLATE_READ( R_EAX );
  1294     MEM_READ_LONG( R_EAX, R_EAX );
  1295     store_reg( R_EAX, Rn );
  1296     sh4_x86.tstate = TSTATE_NONE;
  1297 :}
  1298 MOV.L @Rm+, Rn {:  
  1299     COUNT_INST(I_MOVL);
  1300     load_reg( R_EAX, Rm );
  1301     check_ralign32( R_EAX );
  1302     MMU_TRANSLATE_READ( R_EAX );
  1303     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1304     MEM_READ_LONG( R_EAX, R_EAX );
  1305     store_reg( R_EAX, Rn );
  1306     sh4_x86.tstate = TSTATE_NONE;
  1307 :}
  1308 MOV.L @(R0, Rm), Rn {:  
  1309     COUNT_INST(I_MOVL);
  1310     load_reg( R_EAX, 0 );
  1311     load_reg( R_ECX, Rm );
  1312     ADD_r32_r32( R_ECX, R_EAX );
  1313     check_ralign32( R_EAX );
  1314     MMU_TRANSLATE_READ( R_EAX );
  1315     MEM_READ_LONG( R_EAX, R_EAX );
  1316     store_reg( R_EAX, Rn );
  1317     sh4_x86.tstate = TSTATE_NONE;
  1318 :}
  1319 MOV.L @(disp, GBR), R0 {:
  1320     COUNT_INST(I_MOVL);
  1321     load_spreg( R_EAX, R_GBR );
  1322     ADD_imm32_r32( disp, R_EAX );
  1323     check_ralign32( R_EAX );
  1324     MMU_TRANSLATE_READ( R_EAX );
  1325     MEM_READ_LONG( R_EAX, R_EAX );
  1326     store_reg( R_EAX, 0 );
  1327     sh4_x86.tstate = TSTATE_NONE;
  1328 :}
  1329 MOV.L @(disp, PC), Rn {:  
  1330     COUNT_INST(I_MOVLPC);
  1331     if( sh4_x86.in_delay_slot ) {
  1332 	SLOTILLEGAL();
  1333     } else {
  1334 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1335 	if( IS_IN_ICACHE(target) ) {
  1336 	    // If the target address is in the same page as the code, it's
  1337 	    // pretty safe to just ref it directly and circumvent the whole
  1338 	    // memory subsystem. (this is a big performance win)
  1340 	    // FIXME: There's a corner-case that's not handled here when
  1341 	    // the current code-page is in the ITLB but not in the UTLB.
  1342 	    // (should generate a TLB miss although need to test SH4 
  1343 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1344 	    // behaviour though.
  1345 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1346 	    MOV_moff32_EAX( ptr );
  1347 	} else {
  1348 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1349 	    // different virtual address than the translation was done with,
  1350 	    // but we can safely assume that the low bits are the same.
  1351 	    load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1352 	    ADD_sh4r_r32( R_PC, R_EAX );
  1353 	    MMU_TRANSLATE_READ( R_EAX );
  1354 	    MEM_READ_LONG( R_EAX, R_EAX );
  1355 	    sh4_x86.tstate = TSTATE_NONE;
  1357 	store_reg( R_EAX, Rn );
  1359 :}
  1360 MOV.L @(disp, Rm), Rn {:  
  1361     COUNT_INST(I_MOVL);
  1362     load_reg( R_EAX, Rm );
  1363     ADD_imm8s_r32( disp, R_EAX );
  1364     check_ralign32( R_EAX );
  1365     MMU_TRANSLATE_READ( R_EAX );
  1366     MEM_READ_LONG( R_EAX, R_EAX );
  1367     store_reg( R_EAX, Rn );
  1368     sh4_x86.tstate = TSTATE_NONE;
  1369 :}
  1370 MOV.W Rm, @Rn {:  
  1371     COUNT_INST(I_MOVW);
  1372     load_reg( R_EAX, Rn );
  1373     check_walign16( R_EAX );
  1374     MMU_TRANSLATE_WRITE( R_EAX )
  1375     load_reg( R_EDX, Rm );
  1376     MEM_WRITE_WORD( R_EAX, R_EDX );
  1377     sh4_x86.tstate = TSTATE_NONE;
  1378 :}
  1379 MOV.W Rm, @-Rn {:  
  1380     COUNT_INST(I_MOVW);
  1381     load_reg( R_EAX, Rn );
  1382     ADD_imm8s_r32( -2, R_EAX );
  1383     check_walign16( R_EAX );
  1384     MMU_TRANSLATE_WRITE( R_EAX );
  1385     load_reg( R_EDX, Rm );
  1386     ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
  1387     MEM_WRITE_WORD( R_EAX, R_EDX );
  1388     sh4_x86.tstate = TSTATE_NONE;
  1389 :}
  1390 MOV.W Rm, @(R0, Rn) {:  
  1391     COUNT_INST(I_MOVW);
  1392     load_reg( R_EAX, 0 );
  1393     load_reg( R_ECX, Rn );
  1394     ADD_r32_r32( R_ECX, R_EAX );
  1395     check_walign16( R_EAX );
  1396     MMU_TRANSLATE_WRITE( R_EAX );
  1397     load_reg( R_EDX, Rm );
  1398     MEM_WRITE_WORD( R_EAX, R_EDX );
  1399     sh4_x86.tstate = TSTATE_NONE;
  1400 :}
  1401 MOV.W R0, @(disp, GBR) {:  
  1402     COUNT_INST(I_MOVW);
  1403     load_spreg( R_EAX, R_GBR );
  1404     ADD_imm32_r32( disp, R_EAX );
  1405     check_walign16( R_EAX );
  1406     MMU_TRANSLATE_WRITE( R_EAX );
  1407     load_reg( R_EDX, 0 );
  1408     MEM_WRITE_WORD( R_EAX, R_EDX );
  1409     sh4_x86.tstate = TSTATE_NONE;
  1410 :}
  1411 MOV.W R0, @(disp, Rn) {:  
  1412     COUNT_INST(I_MOVW);
  1413     load_reg( R_EAX, Rn );
  1414     ADD_imm32_r32( disp, R_EAX );
  1415     check_walign16( R_EAX );
  1416     MMU_TRANSLATE_WRITE( R_EAX );
  1417     load_reg( R_EDX, 0 );
  1418     MEM_WRITE_WORD( R_EAX, R_EDX );
  1419     sh4_x86.tstate = TSTATE_NONE;
  1420 :}
  1421 MOV.W @Rm, Rn {:  
  1422     COUNT_INST(I_MOVW);
  1423     load_reg( R_EAX, Rm );
  1424     check_ralign16( R_EAX );
  1425     MMU_TRANSLATE_READ( R_EAX );
  1426     MEM_READ_WORD( R_EAX, R_EAX );
  1427     store_reg( R_EAX, Rn );
  1428     sh4_x86.tstate = TSTATE_NONE;
  1429 :}
  1430 MOV.W @Rm+, Rn {:  
  1431     COUNT_INST(I_MOVW);
  1432     load_reg( R_EAX, Rm );
  1433     check_ralign16( R_EAX );
  1434     MMU_TRANSLATE_READ( R_EAX );
  1435     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
  1436     MEM_READ_WORD( R_EAX, R_EAX );
  1437     store_reg( R_EAX, Rn );
  1438     sh4_x86.tstate = TSTATE_NONE;
  1439 :}
  1440 MOV.W @(R0, Rm), Rn {:  
  1441     COUNT_INST(I_MOVW);
  1442     load_reg( R_EAX, 0 );
  1443     load_reg( R_ECX, Rm );
  1444     ADD_r32_r32( R_ECX, R_EAX );
  1445     check_ralign16( R_EAX );
  1446     MMU_TRANSLATE_READ( R_EAX );
  1447     MEM_READ_WORD( R_EAX, R_EAX );
  1448     store_reg( R_EAX, Rn );
  1449     sh4_x86.tstate = TSTATE_NONE;
  1450 :}
  1451 MOV.W @(disp, GBR), R0 {:  
  1452     COUNT_INST(I_MOVW);
  1453     load_spreg( R_EAX, R_GBR );
  1454     ADD_imm32_r32( disp, R_EAX );
  1455     check_ralign16( R_EAX );
  1456     MMU_TRANSLATE_READ( R_EAX );
  1457     MEM_READ_WORD( R_EAX, R_EAX );
  1458     store_reg( R_EAX, 0 );
  1459     sh4_x86.tstate = TSTATE_NONE;
  1460 :}
  1461 MOV.W @(disp, PC), Rn {:  
  1462     COUNT_INST(I_MOVW);
  1463     if( sh4_x86.in_delay_slot ) {
  1464 	SLOTILLEGAL();
  1465     } else {
  1466 	// See comments for MOV.L @(disp, PC), Rn
  1467 	uint32_t target = pc + disp + 4;
  1468 	if( IS_IN_ICACHE(target) ) {
  1469 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1470 	    MOV_moff32_EAX( ptr );
  1471 	    MOVSX_r16_r32( R_EAX, R_EAX );
  1472 	} else {
  1473 	    load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
  1474 	    ADD_sh4r_r32( R_PC, R_EAX );
  1475 	    MMU_TRANSLATE_READ( R_EAX );
  1476 	    MEM_READ_WORD( R_EAX, R_EAX );
  1477 	    sh4_x86.tstate = TSTATE_NONE;
  1479 	store_reg( R_EAX, Rn );
  1481 :}
  1482 MOV.W @(disp, Rm), R0 {:  
  1483     COUNT_INST(I_MOVW);
  1484     load_reg( R_EAX, Rm );
  1485     ADD_imm32_r32( disp, R_EAX );
  1486     check_ralign16( R_EAX );
  1487     MMU_TRANSLATE_READ( R_EAX );
  1488     MEM_READ_WORD( R_EAX, R_EAX );
  1489     store_reg( R_EAX, 0 );
  1490     sh4_x86.tstate = TSTATE_NONE;
  1491 :}
  1492 MOVA @(disp, PC), R0 {:  
  1493     COUNT_INST(I_MOVA);
  1494     if( sh4_x86.in_delay_slot ) {
  1495 	SLOTILLEGAL();
  1496     } else {
  1497 	load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1498 	ADD_sh4r_r32( R_PC, R_ECX );
  1499 	store_reg( R_ECX, 0 );
  1500 	sh4_x86.tstate = TSTATE_NONE;
  1502 :}
  1503 MOVCA.L R0, @Rn {:  
  1504     COUNT_INST(I_MOVCA);
  1505     load_reg( R_EAX, Rn );
  1506     check_walign32( R_EAX );
  1507     MMU_TRANSLATE_WRITE( R_EAX );
  1508     load_reg( R_EDX, 0 );
  1509     MEM_WRITE_LONG( R_EAX, R_EDX );
  1510     sh4_x86.tstate = TSTATE_NONE;
  1511 :}
  1513 /* Control transfer instructions */
  1514 BF disp {:
  1515     COUNT_INST(I_BF);
  1516     if( sh4_x86.in_delay_slot ) {
  1517 	SLOTILLEGAL();
  1518     } else {
  1519 	sh4vma_t target = disp + pc + 4;
  1520 	JT_rel8( nottaken );
  1521 	exit_block_rel(target, pc+2 );
  1522 	JMP_TARGET(nottaken);
  1523 	return 2;
  1525 :}
  1526 BF/S disp {:
  1527     COUNT_INST(I_BFS);
  1528     if( sh4_x86.in_delay_slot ) {
  1529 	SLOTILLEGAL();
  1530     } else {
  1531 	sh4_x86.in_delay_slot = DELAY_PC;
  1532 	if( UNTRANSLATABLE(pc+2) ) {
  1533 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1534 	    JT_rel8(nottaken);
  1535 	    ADD_imm32_r32( disp, R_EAX );
  1536 	    JMP_TARGET(nottaken);
  1537 	    ADD_sh4r_r32( R_PC, R_EAX );
  1538 	    store_spreg( R_EAX, R_NEW_PC );
  1539 	    exit_block_emu(pc+2);
  1540 	    sh4_x86.branch_taken = TRUE;
  1541 	    return 2;
  1542 	} else {
  1543 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1544 		CMP_imm8s_sh4r( 1, R_T );
  1545 		sh4_x86.tstate = TSTATE_E;
  1547 	    sh4vma_t target = disp + pc + 4;
  1548 	    OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JT rel32
  1549 	    int save_tstate = sh4_x86.tstate;
  1550 	    sh4_translate_instruction(pc+2);
  1551 	    exit_block_rel( target, pc+4 );
  1553 	    // not taken
  1554 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1555 	    sh4_x86.tstate = save_tstate;
  1556 	    sh4_translate_instruction(pc+2);
  1557 	    return 4;
  1560 :}
  1561 BRA disp {:  
  1562     COUNT_INST(I_BRA);
  1563     if( sh4_x86.in_delay_slot ) {
  1564 	SLOTILLEGAL();
  1565     } else {
  1566 	sh4_x86.in_delay_slot = DELAY_PC;
  1567 	sh4_x86.branch_taken = TRUE;
  1568 	if( UNTRANSLATABLE(pc+2) ) {
  1569 	    load_spreg( R_EAX, R_PC );
  1570 	    ADD_imm32_r32( pc + disp + 4 - sh4_x86.block_start_pc, R_EAX );
  1571 	    store_spreg( R_EAX, R_NEW_PC );
  1572 	    exit_block_emu(pc+2);
  1573 	    return 2;
  1574 	} else {
  1575 	    sh4_translate_instruction( pc + 2 );
  1576 	    exit_block_rel( disp + pc + 4, pc+4 );
  1577 	    return 4;
  1580 :}
  1581 BRAF Rn {:  
  1582     COUNT_INST(I_BRAF);
  1583     if( sh4_x86.in_delay_slot ) {
  1584 	SLOTILLEGAL();
  1585     } else {
  1586 	load_spreg( R_EAX, R_PC );
  1587 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1588 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1589 	store_spreg( R_EAX, R_NEW_PC );
  1590 	sh4_x86.in_delay_slot = DELAY_PC;
  1591 	sh4_x86.tstate = TSTATE_NONE;
  1592 	sh4_x86.branch_taken = TRUE;
  1593 	if( UNTRANSLATABLE(pc+2) ) {
  1594 	    exit_block_emu(pc+2);
  1595 	    return 2;
  1596 	} else {
  1597 	    sh4_translate_instruction( pc + 2 );
  1598 	    exit_block_newpcset(pc+2);
  1599 	    return 4;
  1602 :}
  1603 BSR disp {:  
  1604     COUNT_INST(I_BSR);
  1605     if( sh4_x86.in_delay_slot ) {
  1606 	SLOTILLEGAL();
  1607     } else {
  1608 	load_spreg( R_EAX, R_PC );
  1609 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1610 	store_spreg( R_EAX, R_PR );
  1611 	sh4_x86.in_delay_slot = DELAY_PC;
  1612 	sh4_x86.branch_taken = TRUE;
  1613 	sh4_x86.tstate = TSTATE_NONE;
  1614 	if( UNTRANSLATABLE(pc+2) ) {
  1615 	    ADD_imm32_r32( disp, R_EAX );
  1616 	    store_spreg( R_EAX, R_NEW_PC );
  1617 	    exit_block_emu(pc+2);
  1618 	    return 2;
  1619 	} else {
  1620 	    sh4_translate_instruction( pc + 2 );
  1621 	    exit_block_rel( disp + pc + 4, pc+4 );
  1622 	    return 4;
  1625 :}
  1626 BSRF Rn {:  
  1627     COUNT_INST(I_BSRF);
  1628     if( sh4_x86.in_delay_slot ) {
  1629 	SLOTILLEGAL();
  1630     } else {
  1631 	load_spreg( R_EAX, R_PC );
  1632 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1633 	store_spreg( R_EAX, R_PR );
  1634 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1635 	store_spreg( R_EAX, R_NEW_PC );
  1637 	sh4_x86.in_delay_slot = DELAY_PC;
  1638 	sh4_x86.tstate = TSTATE_NONE;
  1639 	sh4_x86.branch_taken = TRUE;
  1640 	if( UNTRANSLATABLE(pc+2) ) {
  1641 	    exit_block_emu(pc+2);
  1642 	    return 2;
  1643 	} else {
  1644 	    sh4_translate_instruction( pc + 2 );
  1645 	    exit_block_newpcset(pc+2);
  1646 	    return 4;
  1649 :}
  1650 BT disp {:
  1651     COUNT_INST(I_BT);
  1652     if( sh4_x86.in_delay_slot ) {
  1653 	SLOTILLEGAL();
  1654     } else {
  1655 	sh4vma_t target = disp + pc + 4;
  1656 	JF_rel8( nottaken );
  1657 	exit_block_rel(target, pc+2 );
  1658 	JMP_TARGET(nottaken);
  1659 	return 2;
  1661 :}
  1662 BT/S disp {:
  1663     COUNT_INST(I_BTS);
  1664     if( sh4_x86.in_delay_slot ) {
  1665 	SLOTILLEGAL();
  1666     } else {
  1667 	sh4_x86.in_delay_slot = DELAY_PC;
  1668 	if( UNTRANSLATABLE(pc+2) ) {
  1669 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1670 	    JF_rel8(nottaken);
  1671 	    ADD_imm32_r32( disp, R_EAX );
  1672 	    JMP_TARGET(nottaken);
  1673 	    ADD_sh4r_r32( R_PC, R_EAX );
  1674 	    store_spreg( R_EAX, R_NEW_PC );
  1675 	    exit_block_emu(pc+2);
  1676 	    sh4_x86.branch_taken = TRUE;
  1677 	    return 2;
  1678 	} else {
  1679 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1680 		CMP_imm8s_sh4r( 1, R_T );
  1681 		sh4_x86.tstate = TSTATE_E;
  1683 	    OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JF rel32
  1684 	    int save_tstate = sh4_x86.tstate;
  1685 	    sh4_translate_instruction(pc+2);
  1686 	    exit_block_rel( disp + pc + 4, pc+4 );
  1687 	    // not taken
  1688 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1689 	    sh4_x86.tstate = save_tstate;
  1690 	    sh4_translate_instruction(pc+2);
  1691 	    return 4;
  1694 :}
  1695 JMP @Rn {:  
  1696     COUNT_INST(I_JMP);
  1697     if( sh4_x86.in_delay_slot ) {
  1698 	SLOTILLEGAL();
  1699     } else {
  1700 	load_reg( R_ECX, Rn );
  1701 	store_spreg( R_ECX, R_NEW_PC );
  1702 	sh4_x86.in_delay_slot = DELAY_PC;
  1703 	sh4_x86.branch_taken = TRUE;
  1704 	if( UNTRANSLATABLE(pc+2) ) {
  1705 	    exit_block_emu(pc+2);
  1706 	    return 2;
  1707 	} else {
  1708 	    sh4_translate_instruction(pc+2);
  1709 	    exit_block_newpcset(pc+2);
  1710 	    return 4;
  1713 :}
  1714 JSR @Rn {:  
  1715     COUNT_INST(I_JSR);
  1716     if( sh4_x86.in_delay_slot ) {
  1717 	SLOTILLEGAL();
  1718     } else {
  1719 	load_spreg( R_EAX, R_PC );
  1720 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1721 	store_spreg( R_EAX, R_PR );
  1722 	load_reg( R_ECX, Rn );
  1723 	store_spreg( R_ECX, R_NEW_PC );
  1724 	sh4_x86.in_delay_slot = DELAY_PC;
  1725 	sh4_x86.branch_taken = TRUE;
  1726 	sh4_x86.tstate = TSTATE_NONE;
  1727 	if( UNTRANSLATABLE(pc+2) ) {
  1728 	    exit_block_emu(pc+2);
  1729 	    return 2;
  1730 	} else {
  1731 	    sh4_translate_instruction(pc+2);
  1732 	    exit_block_newpcset(pc+2);
  1733 	    return 4;
  1736 :}
  1737 RTE {:  
  1738     COUNT_INST(I_RTE);
  1739     if( sh4_x86.in_delay_slot ) {
  1740 	SLOTILLEGAL();
  1741     } else {
  1742 	check_priv();
  1743 	load_spreg( R_ECX, R_SPC );
  1744 	store_spreg( R_ECX, R_NEW_PC );
  1745 	load_spreg( R_EAX, R_SSR );
  1746 	call_func1( sh4_write_sr, R_EAX );
  1747 	sh4_x86.in_delay_slot = DELAY_PC;
  1748 	sh4_x86.priv_checked = FALSE;
  1749 	sh4_x86.fpuen_checked = FALSE;
  1750 	sh4_x86.tstate = TSTATE_NONE;
  1751 	sh4_x86.branch_taken = TRUE;
  1752 	if( UNTRANSLATABLE(pc+2) ) {
  1753 	    exit_block_emu(pc+2);
  1754 	    return 2;
  1755 	} else {
  1756 	    sh4_translate_instruction(pc+2);
  1757 	    exit_block_newpcset(pc+2);
  1758 	    return 4;
  1761 :}
  1762 RTS {:  
  1763     COUNT_INST(I_RTS);
  1764     if( sh4_x86.in_delay_slot ) {
  1765 	SLOTILLEGAL();
  1766     } else {
  1767 	load_spreg( R_ECX, R_PR );
  1768 	store_spreg( R_ECX, R_NEW_PC );
  1769 	sh4_x86.in_delay_slot = DELAY_PC;
  1770 	sh4_x86.branch_taken = TRUE;
  1771 	if( UNTRANSLATABLE(pc+2) ) {
  1772 	    exit_block_emu(pc+2);
  1773 	    return 2;
  1774 	} else {
  1775 	    sh4_translate_instruction(pc+2);
  1776 	    exit_block_newpcset(pc+2);
  1777 	    return 4;
  1780 :}
  1781 TRAPA #imm {:  
  1782     COUNT_INST(I_TRAPA);
  1783     if( sh4_x86.in_delay_slot ) {
  1784 	SLOTILLEGAL();
  1785     } else {
  1786 	load_imm32( R_ECX, pc+2 - sh4_x86.block_start_pc );   // 5
  1787 	ADD_r32_sh4r( R_ECX, R_PC );
  1788 	load_imm32( R_EAX, imm );
  1789 	call_func1( sh4_raise_trap, R_EAX );
  1790 	sh4_x86.tstate = TSTATE_NONE;
  1791 	exit_block_pcset(pc);
  1792 	sh4_x86.branch_taken = TRUE;
  1793 	return 2;
  1795 :}
  1796 UNDEF {:  
  1797     COUNT_INST(I_UNDEF);
  1798     if( sh4_x86.in_delay_slot ) {
  1799 	SLOTILLEGAL();
  1800     } else {
  1801 	JMP_exc(EXC_ILLEGAL);
  1802 	return 2;
  1804 :}
  1806 CLRMAC {:  
  1807     COUNT_INST(I_CLRMAC);
  1808     XOR_r32_r32(R_EAX, R_EAX);
  1809     store_spreg( R_EAX, R_MACL );
  1810     store_spreg( R_EAX, R_MACH );
  1811     sh4_x86.tstate = TSTATE_NONE;
  1812 :}
  1813 CLRS {:
  1814     COUNT_INST(I_CLRS);
  1815     CLC();
  1816     SETC_sh4r(R_S);
  1817     sh4_x86.tstate = TSTATE_NONE;
  1818 :}
  1819 CLRT {:  
  1820     COUNT_INST(I_CLRT);
  1821     CLC();
  1822     SETC_t();
  1823     sh4_x86.tstate = TSTATE_C;
  1824 :}
  1825 SETS {:  
  1826     COUNT_INST(I_SETS);
  1827     STC();
  1828     SETC_sh4r(R_S);
  1829     sh4_x86.tstate = TSTATE_NONE;
  1830 :}
  1831 SETT {:  
  1832     COUNT_INST(I_SETT);
  1833     STC();
  1834     SETC_t();
  1835     sh4_x86.tstate = TSTATE_C;
  1836 :}
  1838 /* Floating point moves */
  1839 FMOV FRm, FRn {:  
  1840     COUNT_INST(I_FMOV1);
  1841     check_fpuen();
  1842     if( sh4_x86.double_size ) {
  1843         load_dr0( R_EAX, FRm );
  1844         load_dr1( R_ECX, FRm );
  1845         store_dr0( R_EAX, FRn );
  1846         store_dr1( R_ECX, FRn );
  1847     } else {
  1848         load_fr( R_EAX, FRm ); // SZ=0 branch
  1849         store_fr( R_EAX, FRn );
  1851 :}
  1852 FMOV FRm, @Rn {: 
  1853     COUNT_INST(I_FMOV2);
  1854     check_fpuen();
  1855     load_reg( R_EAX, Rn );
  1856     if( sh4_x86.double_size ) {
  1857         check_walign64( R_EAX );
  1858         MMU_TRANSLATE_WRITE( R_EAX );
  1859         load_dr0( R_EDX, FRm );
  1860         load_dr1( R_ECX, FRm );
  1861         MEM_WRITE_DOUBLE( R_EAX, R_EDX, R_ECX );
  1862     } else {
  1863         check_walign32( R_EAX );
  1864         MMU_TRANSLATE_WRITE( R_EAX );
  1865         load_fr( R_EDX, FRm );
  1866         MEM_WRITE_LONG( R_EAX, R_EDX );
  1868     sh4_x86.tstate = TSTATE_NONE;
  1869 :}
  1870 FMOV @Rm, FRn {:  
  1871     COUNT_INST(I_FMOV5);
  1872     check_fpuen();
  1873     load_reg( R_EAX, Rm );
  1874     if( sh4_x86.double_size ) {
  1875         check_ralign64( R_EAX );
  1876         MMU_TRANSLATE_READ( R_EAX );
  1877         MEM_READ_DOUBLE( R_EAX, R_EDX, R_EAX );
  1878         store_dr0( R_EDX, FRn );
  1879         store_dr1( R_EAX, FRn );    
  1880     } else {
  1881         check_ralign32( R_EAX );
  1882         MMU_TRANSLATE_READ( R_EAX );
  1883         MEM_READ_LONG( R_EAX, R_EAX );
  1884         store_fr( R_EAX, FRn );
  1886     sh4_x86.tstate = TSTATE_NONE;
  1887 :}
  1888 FMOV FRm, @-Rn {:  
  1889     COUNT_INST(I_FMOV3);
  1890     check_fpuen();
  1891     load_reg( R_EAX, Rn );
  1892     if( sh4_x86.double_size ) {
  1893         check_walign64( R_EAX );
  1894         ADD_imm8s_r32(-8,R_EAX);
  1895         MMU_TRANSLATE_WRITE( R_EAX );
  1896         load_dr0( R_EDX, FRm );
  1897         load_dr1( R_ECX, FRm );
  1898         ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
  1899         MEM_WRITE_DOUBLE( R_EAX, R_EDX, R_ECX );
  1900     } else {
  1901         check_walign32( R_EAX );
  1902         ADD_imm8s_r32( -4, R_EAX );
  1903         MMU_TRANSLATE_WRITE( R_EAX );
  1904         load_fr( R_EDX, FRm );
  1905         ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
  1906         MEM_WRITE_LONG( R_EAX, R_EDX );
  1908     sh4_x86.tstate = TSTATE_NONE;
  1909 :}
  1910 FMOV @Rm+, FRn {:
  1911     COUNT_INST(I_FMOV6);
  1912     check_fpuen();
  1913     load_reg( R_EAX, Rm );
  1914     if( sh4_x86.double_size ) {
  1915         check_ralign64( R_EAX );
  1916         MMU_TRANSLATE_READ( R_EAX );
  1917         ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
  1918         MEM_READ_DOUBLE( R_EAX, R_EDX, R_EAX );
  1919         store_dr0( R_EDX, FRn );
  1920         store_dr1( R_EAX, FRn );
  1921     } else {
  1922         check_ralign32( R_EAX );
  1923         MMU_TRANSLATE_READ( R_EAX );
  1924         ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1925         MEM_READ_LONG( R_EAX, R_EAX );
  1926         store_fr( R_EAX, FRn );
  1928     sh4_x86.tstate = TSTATE_NONE;
  1929 :}
  1930 FMOV FRm, @(R0, Rn) {:  
  1931     COUNT_INST(I_FMOV4);
  1932     check_fpuen();
  1933     load_reg( R_EAX, Rn );
  1934     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1935     if( sh4_x86.double_size ) {
  1936         check_walign64( R_EAX );
  1937         MMU_TRANSLATE_WRITE( R_EAX );
  1938         load_dr0( R_EDX, FRm );
  1939         load_dr1( R_ECX, FRm );
  1940         MEM_WRITE_DOUBLE( R_EAX, R_EDX, R_ECX );
  1941     } else {
  1942         check_walign32( R_EAX );
  1943         MMU_TRANSLATE_WRITE( R_EAX );
  1944         load_fr( R_EDX, FRm );
  1945         MEM_WRITE_LONG( R_EAX, R_EDX ); // 12
  1947     sh4_x86.tstate = TSTATE_NONE;
  1948 :}
  1949 FMOV @(R0, Rm), FRn {:  
  1950     COUNT_INST(I_FMOV7);
  1951     check_fpuen();
  1952     load_reg( R_EAX, Rm );
  1953     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1954     if( sh4_x86.double_size ) {
  1955         check_ralign64( R_EAX );
  1956         MMU_TRANSLATE_READ( R_EAX );
  1957         MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1958         store_dr0( R_ECX, FRn );
  1959         store_dr1( R_EAX, FRn );
  1960     } else {
  1961         check_ralign32( R_EAX );
  1962         MMU_TRANSLATE_READ( R_EAX );
  1963         MEM_READ_LONG( R_EAX, R_EAX );
  1964         store_fr( R_EAX, FRn );
  1966     sh4_x86.tstate = TSTATE_NONE;
  1967 :}
  1968 FLDI0 FRn {:  /* IFF PR=0 */
  1969     COUNT_INST(I_FLDI0);
  1970     check_fpuen();
  1971     if( sh4_x86.double_prec == 0 ) {
  1972         XOR_r32_r32( R_EAX, R_EAX );
  1973         store_fr( R_EAX, FRn );
  1975     sh4_x86.tstate = TSTATE_NONE;
  1976 :}
  1977 FLDI1 FRn {:  /* IFF PR=0 */
  1978     COUNT_INST(I_FLDI1);
  1979     check_fpuen();
  1980     if( sh4_x86.double_prec == 0 ) {
  1981         load_imm32(R_EAX, 0x3F800000);
  1982         store_fr( R_EAX, FRn );
  1984 :}
  1986 FLOAT FPUL, FRn {:  
  1987     COUNT_INST(I_FLOAT);
  1988     check_fpuen();
  1989     FILD_sh4r(R_FPUL);
  1990     if( sh4_x86.double_prec ) {
  1991         pop_dr( FRn );
  1992     } else {
  1993         pop_fr( FRn );
  1995 :}
  1996 FTRC FRm, FPUL {:  
  1997     COUNT_INST(I_FTRC);
  1998     check_fpuen();
  1999     if( sh4_x86.double_prec ) {
  2000         push_dr( FRm );
  2001     } else {
  2002         push_fr( FRm );
  2004     load_ptr( R_ECX, &max_int );
  2005     FILD_r32ind( R_ECX );
  2006     FCOMIP_st(1);
  2007     JNA_rel8( sat );
  2008     load_ptr( R_ECX, &min_int );  // 5
  2009     FILD_r32ind( R_ECX );           // 2
  2010     FCOMIP_st(1);                   // 2
  2011     JAE_rel8( sat2 );            // 2
  2012     load_ptr( R_EAX, &save_fcw );
  2013     FNSTCW_r32ind( R_EAX );
  2014     load_ptr( R_EDX, &trunc_fcw );
  2015     FLDCW_r32ind( R_EDX );
  2016     FISTP_sh4r(R_FPUL);             // 3
  2017     FLDCW_r32ind( R_EAX );
  2018     JMP_rel8(end);             // 2
  2020     JMP_TARGET(sat);
  2021     JMP_TARGET(sat2);
  2022     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  2023     store_spreg( R_ECX, R_FPUL );
  2024     FPOP_st();
  2025     JMP_TARGET(end);
  2026     sh4_x86.tstate = TSTATE_NONE;
  2027 :}
  2028 FLDS FRm, FPUL {:  
  2029     COUNT_INST(I_FLDS);
  2030     check_fpuen();
  2031     load_fr( R_EAX, FRm );
  2032     store_spreg( R_EAX, R_FPUL );
  2033 :}
  2034 FSTS FPUL, FRn {:  
  2035     COUNT_INST(I_FSTS);
  2036     check_fpuen();
  2037     load_spreg( R_EAX, R_FPUL );
  2038     store_fr( R_EAX, FRn );
  2039 :}
  2040 FCNVDS FRm, FPUL {:  
  2041     COUNT_INST(I_FCNVDS);
  2042     check_fpuen();
  2043     if( sh4_x86.double_prec ) {
  2044         push_dr( FRm );
  2045         pop_fpul();
  2047 :}
  2048 FCNVSD FPUL, FRn {:  
  2049     COUNT_INST(I_FCNVSD);
  2050     check_fpuen();
  2051     if( sh4_x86.double_prec ) {
  2052         push_fpul();
  2053         pop_dr( FRn );
  2055 :}
  2057 /* Floating point instructions */
  2058 FABS FRn {:  
  2059     COUNT_INST(I_FABS);
  2060     check_fpuen();
  2061     if( sh4_x86.double_prec ) {
  2062         push_dr(FRn);
  2063         FABS_st0();
  2064         pop_dr(FRn);
  2065     } else {
  2066         push_fr(FRn);
  2067         FABS_st0();
  2068         pop_fr(FRn);
  2070 :}
  2071 FADD FRm, FRn {:  
  2072     COUNT_INST(I_FADD);
  2073     check_fpuen();
  2074     if( sh4_x86.double_prec ) {
  2075         push_dr(FRm);
  2076         push_dr(FRn);
  2077         FADDP_st(1);
  2078         pop_dr(FRn);
  2079     } else {
  2080         push_fr(FRm);
  2081         push_fr(FRn);
  2082         FADDP_st(1);
  2083         pop_fr(FRn);
  2085 :}
  2086 FDIV FRm, FRn {:  
  2087     COUNT_INST(I_FDIV);
  2088     check_fpuen();
  2089     if( sh4_x86.double_prec ) {
  2090         push_dr(FRn);
  2091         push_dr(FRm);
  2092         FDIVP_st(1);
  2093         pop_dr(FRn);
  2094     } else {
  2095         push_fr(FRn);
  2096         push_fr(FRm);
  2097         FDIVP_st(1);
  2098         pop_fr(FRn);
  2100 :}
  2101 FMAC FR0, FRm, FRn {:  
  2102     COUNT_INST(I_FMAC);
  2103     check_fpuen();
  2104     if( sh4_x86.double_prec ) {
  2105         push_dr( 0 );
  2106         push_dr( FRm );
  2107         FMULP_st(1);
  2108         push_dr( FRn );
  2109         FADDP_st(1);
  2110         pop_dr( FRn );
  2111     } else {
  2112         push_fr( 0 );
  2113         push_fr( FRm );
  2114         FMULP_st(1);
  2115         push_fr( FRn );
  2116         FADDP_st(1);
  2117         pop_fr( FRn );
  2119 :}
  2121 FMUL FRm, FRn {:  
  2122     COUNT_INST(I_FMUL);
  2123     check_fpuen();
  2124     if( sh4_x86.double_prec ) {
  2125         push_dr(FRm);
  2126         push_dr(FRn);
  2127         FMULP_st(1);
  2128         pop_dr(FRn);
  2129     } else {
  2130         push_fr(FRm);
  2131         push_fr(FRn);
  2132         FMULP_st(1);
  2133         pop_fr(FRn);
  2135 :}
  2136 FNEG FRn {:  
  2137     COUNT_INST(I_FNEG);
  2138     check_fpuen();
  2139     if( sh4_x86.double_prec ) {
  2140         push_dr(FRn);
  2141         FCHS_st0();
  2142         pop_dr(FRn);
  2143     } else {
  2144         push_fr(FRn);
  2145         FCHS_st0();
  2146         pop_fr(FRn);
  2148 :}
  2149 FSRRA FRn {:  
  2150     COUNT_INST(I_FSRRA);
  2151     check_fpuen();
  2152     if( sh4_x86.double_prec == 0 ) {
  2153         FLD1_st0();
  2154         push_fr(FRn);
  2155         FSQRT_st0();
  2156         FDIVP_st(1);
  2157         pop_fr(FRn);
  2159 :}
  2160 FSQRT FRn {:  
  2161     COUNT_INST(I_FSQRT);
  2162     check_fpuen();
  2163     if( sh4_x86.double_prec ) {
  2164         push_dr(FRn);
  2165         FSQRT_st0();
  2166         pop_dr(FRn);
  2167     } else {
  2168         push_fr(FRn);
  2169         FSQRT_st0();
  2170         pop_fr(FRn);
  2172 :}
  2173 FSUB FRm, FRn {:  
  2174     COUNT_INST(I_FSUB);
  2175     check_fpuen();
  2176     if( sh4_x86.double_prec ) {
  2177         push_dr(FRn);
  2178         push_dr(FRm);
  2179         FSUBP_st(1);
  2180         pop_dr(FRn);
  2181     } else {
  2182         push_fr(FRn);
  2183         push_fr(FRm);
  2184         FSUBP_st(1);
  2185         pop_fr(FRn);
  2187 :}
  2189 FCMP/EQ FRm, FRn {:  
  2190     COUNT_INST(I_FCMPEQ);
  2191     check_fpuen();
  2192     if( sh4_x86.double_prec ) {
  2193         push_dr(FRm);
  2194         push_dr(FRn);
  2195     } else {
  2196         push_fr(FRm);
  2197         push_fr(FRn);
  2199     FCOMIP_st(1);
  2200     SETE_t();
  2201     FPOP_st();
  2202     sh4_x86.tstate = TSTATE_E;
  2203 :}
  2204 FCMP/GT FRm, FRn {:  
  2205     COUNT_INST(I_FCMPGT);
  2206     check_fpuen();
  2207     if( sh4_x86.double_prec ) {
  2208         push_dr(FRm);
  2209         push_dr(FRn);
  2210     } else {
  2211         push_fr(FRm);
  2212         push_fr(FRn);
  2214     FCOMIP_st(1);
  2215     SETA_t();
  2216     FPOP_st();
  2217     sh4_x86.tstate = TSTATE_A;
  2218 :}
  2220 FSCA FPUL, FRn {:  
  2221     COUNT_INST(I_FSCA);
  2222     check_fpuen();
  2223     if( sh4_x86.double_prec == 0 ) {
  2224         LEA_sh4r_rptr( REG_OFFSET(fr[0][FRn&0x0E]), R_EDX );
  2225         load_spreg( R_EAX, R_FPUL );
  2226         call_func2( sh4_fsca, R_EAX, R_EDX );
  2228     sh4_x86.tstate = TSTATE_NONE;
  2229 :}
  2230 FIPR FVm, FVn {:  
  2231     COUNT_INST(I_FIPR);
  2232     check_fpuen();
  2233     if( sh4_x86.double_prec == 0 ) {
  2234         if( sh4_x86.sse3_enabled ) {
  2235             MOVAPS_sh4r_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
  2236             MULPS_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
  2237             HADDPS_xmm_xmm( 4, 4 ); 
  2238             HADDPS_xmm_xmm( 4, 4 );
  2239             MOVSS_xmm_sh4r( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
  2240         } else {
  2241             push_fr( FVm<<2 );
  2242             push_fr( FVn<<2 );
  2243             FMULP_st(1);
  2244             push_fr( (FVm<<2)+1);
  2245             push_fr( (FVn<<2)+1);
  2246             FMULP_st(1);
  2247             FADDP_st(1);
  2248             push_fr( (FVm<<2)+2);
  2249             push_fr( (FVn<<2)+2);
  2250             FMULP_st(1);
  2251             FADDP_st(1);
  2252             push_fr( (FVm<<2)+3);
  2253             push_fr( (FVn<<2)+3);
  2254             FMULP_st(1);
  2255             FADDP_st(1);
  2256             pop_fr( (FVn<<2)+3);
  2259 :}
  2260 FTRV XMTRX, FVn {:  
  2261     COUNT_INST(I_FTRV);
  2262     check_fpuen();
  2263     if( sh4_x86.double_prec == 0 ) {
  2264         if( sh4_x86.sse3_enabled ) {
  2265             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1  M0  M3  M2
  2266             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5  M4  M7  M6
  2267             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9  M8  M11 M10
  2268             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
  2270             MOVSLDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
  2271             MOVSHDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
  2272             MOVAPS_xmm_xmm( 4, 6 );
  2273             MOVAPS_xmm_xmm( 5, 7 );
  2274             MOVLHPS_xmm_xmm( 4, 4 );  // V1 V1 V1 V1
  2275             MOVHLPS_xmm_xmm( 6, 6 );  // V3 V3 V3 V3
  2276             MOVLHPS_xmm_xmm( 5, 5 );  // V0 V0 V0 V0
  2277             MOVHLPS_xmm_xmm( 7, 7 );  // V2 V2 V2 V2
  2278             MULPS_xmm_xmm( 0, 4 );
  2279             MULPS_xmm_xmm( 1, 5 );
  2280             MULPS_xmm_xmm( 2, 6 );
  2281             MULPS_xmm_xmm( 3, 7 );
  2282             ADDPS_xmm_xmm( 5, 4 );
  2283             ADDPS_xmm_xmm( 7, 6 );
  2284             ADDPS_xmm_xmm( 6, 4 );
  2285             MOVAPS_xmm_sh4r( 4, REG_OFFSET(fr[0][FVn<<2]) );
  2286         } else {
  2287             LEA_sh4r_rptr( REG_OFFSET(fr[0][FVn<<2]), R_EAX );
  2288             call_func1( sh4_ftrv, R_EAX );
  2291     sh4_x86.tstate = TSTATE_NONE;
  2292 :}
  2294 FRCHG {:  
  2295     COUNT_INST(I_FRCHG);
  2296     check_fpuen();
  2297     load_spreg( R_ECX, R_FPSCR );
  2298     XOR_imm32_r32( FPSCR_FR, R_ECX );
  2299     store_spreg( R_ECX, R_FPSCR );
  2300     call_func0( sh4_switch_fr_banks );
  2301     sh4_x86.tstate = TSTATE_NONE;
  2302 :}
  2303 FSCHG {:  
  2304     COUNT_INST(I_FSCHG);
  2305     check_fpuen();
  2306     load_spreg( R_ECX, R_FPSCR );
  2307     XOR_imm32_r32( FPSCR_SZ, R_ECX );
  2308     store_spreg( R_ECX, R_FPSCR );
  2309     sh4_x86.tstate = TSTATE_NONE;
  2310     sh4_x86.double_size = !sh4_x86.double_size;
  2311 :}
  2313 /* Processor control instructions */
  2314 LDC Rm, SR {:
  2315     COUNT_INST(I_LDCSR);
  2316     if( sh4_x86.in_delay_slot ) {
  2317 	SLOTILLEGAL();
  2318     } else {
  2319 	check_priv();
  2320 	load_reg( R_EAX, Rm );
  2321 	call_func1( sh4_write_sr, R_EAX );
  2322 	sh4_x86.priv_checked = FALSE;
  2323 	sh4_x86.fpuen_checked = FALSE;
  2324 	sh4_x86.tstate = TSTATE_NONE;
  2326 :}
  2327 LDC Rm, GBR {: 
  2328     COUNT_INST(I_LDC);
  2329     load_reg( R_EAX, Rm );
  2330     store_spreg( R_EAX, R_GBR );
  2331 :}
  2332 LDC Rm, VBR {:  
  2333     COUNT_INST(I_LDC);
  2334     check_priv();
  2335     load_reg( R_EAX, Rm );
  2336     store_spreg( R_EAX, R_VBR );
  2337     sh4_x86.tstate = TSTATE_NONE;
  2338 :}
  2339 LDC Rm, SSR {:  
  2340     COUNT_INST(I_LDC);
  2341     check_priv();
  2342     load_reg( R_EAX, Rm );
  2343     store_spreg( R_EAX, R_SSR );
  2344     sh4_x86.tstate = TSTATE_NONE;
  2345 :}
  2346 LDC Rm, SGR {:  
  2347     COUNT_INST(I_LDC);
  2348     check_priv();
  2349     load_reg( R_EAX, Rm );
  2350     store_spreg( R_EAX, R_SGR );
  2351     sh4_x86.tstate = TSTATE_NONE;
  2352 :}
  2353 LDC Rm, SPC {:  
  2354     COUNT_INST(I_LDC);
  2355     check_priv();
  2356     load_reg( R_EAX, Rm );
  2357     store_spreg( R_EAX, R_SPC );
  2358     sh4_x86.tstate = TSTATE_NONE;
  2359 :}
  2360 LDC Rm, DBR {:  
  2361     COUNT_INST(I_LDC);
  2362     check_priv();
  2363     load_reg( R_EAX, Rm );
  2364     store_spreg( R_EAX, R_DBR );
  2365     sh4_x86.tstate = TSTATE_NONE;
  2366 :}
  2367 LDC Rm, Rn_BANK {:  
  2368     COUNT_INST(I_LDC);
  2369     check_priv();
  2370     load_reg( R_EAX, Rm );
  2371     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2372     sh4_x86.tstate = TSTATE_NONE;
  2373 :}
  2374 LDC.L @Rm+, GBR {:  
  2375     COUNT_INST(I_LDCM);
  2376     load_reg( R_EAX, Rm );
  2377     check_ralign32( R_EAX );
  2378     MMU_TRANSLATE_READ( R_EAX );
  2379     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2380     MEM_READ_LONG( R_EAX, R_EAX );
  2381     store_spreg( R_EAX, R_GBR );
  2382     sh4_x86.tstate = TSTATE_NONE;
  2383 :}
  2384 LDC.L @Rm+, SR {:
  2385     COUNT_INST(I_LDCSRM);
  2386     if( sh4_x86.in_delay_slot ) {
  2387 	SLOTILLEGAL();
  2388     } else {
  2389 	check_priv();
  2390 	load_reg( R_EAX, Rm );
  2391 	check_ralign32( R_EAX );
  2392 	MMU_TRANSLATE_READ( R_EAX );
  2393 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2394 	MEM_READ_LONG( R_EAX, R_EAX );
  2395 	call_func1( sh4_write_sr, R_EAX );
  2396 	sh4_x86.priv_checked = FALSE;
  2397 	sh4_x86.fpuen_checked = FALSE;
  2398 	sh4_x86.tstate = TSTATE_NONE;
  2400 :}
  2401 LDC.L @Rm+, VBR {:  
  2402     COUNT_INST(I_LDCM);
  2403     check_priv();
  2404     load_reg( R_EAX, Rm );
  2405     check_ralign32( R_EAX );
  2406     MMU_TRANSLATE_READ( R_EAX );
  2407     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2408     MEM_READ_LONG( R_EAX, R_EAX );
  2409     store_spreg( R_EAX, R_VBR );
  2410     sh4_x86.tstate = TSTATE_NONE;
  2411 :}
  2412 LDC.L @Rm+, SSR {:
  2413     COUNT_INST(I_LDCM);
  2414     check_priv();
  2415     load_reg( R_EAX, Rm );
  2416     check_ralign32( R_EAX );
  2417     MMU_TRANSLATE_READ( R_EAX );
  2418     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2419     MEM_READ_LONG( R_EAX, R_EAX );
  2420     store_spreg( R_EAX, R_SSR );
  2421     sh4_x86.tstate = TSTATE_NONE;
  2422 :}
  2423 LDC.L @Rm+, SGR {:  
  2424     COUNT_INST(I_LDCM);
  2425     check_priv();
  2426     load_reg( R_EAX, Rm );
  2427     check_ralign32( R_EAX );
  2428     MMU_TRANSLATE_READ( R_EAX );
  2429     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2430     MEM_READ_LONG( R_EAX, R_EAX );
  2431     store_spreg( R_EAX, R_SGR );
  2432     sh4_x86.tstate = TSTATE_NONE;
  2433 :}
  2434 LDC.L @Rm+, SPC {:  
  2435     COUNT_INST(I_LDCM);
  2436     check_priv();
  2437     load_reg( R_EAX, Rm );
  2438     check_ralign32( R_EAX );
  2439     MMU_TRANSLATE_READ( R_EAX );
  2440     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2441     MEM_READ_LONG( R_EAX, R_EAX );
  2442     store_spreg( R_EAX, R_SPC );
  2443     sh4_x86.tstate = TSTATE_NONE;
  2444 :}
  2445 LDC.L @Rm+, DBR {:  
  2446     COUNT_INST(I_LDCM);
  2447     check_priv();
  2448     load_reg( R_EAX, Rm );
  2449     check_ralign32( R_EAX );
  2450     MMU_TRANSLATE_READ( R_EAX );
  2451     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2452     MEM_READ_LONG( R_EAX, R_EAX );
  2453     store_spreg( R_EAX, R_DBR );
  2454     sh4_x86.tstate = TSTATE_NONE;
  2455 :}
  2456 LDC.L @Rm+, Rn_BANK {:  
  2457     COUNT_INST(I_LDCM);
  2458     check_priv();
  2459     load_reg( R_EAX, Rm );
  2460     check_ralign32( R_EAX );
  2461     MMU_TRANSLATE_READ( R_EAX );
  2462     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2463     MEM_READ_LONG( R_EAX, R_EAX );
  2464     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2465     sh4_x86.tstate = TSTATE_NONE;
  2466 :}
  2467 LDS Rm, FPSCR {:
  2468     COUNT_INST(I_LDSFPSCR);
  2469     check_fpuen();
  2470     load_reg( R_EAX, Rm );
  2471     call_func1( sh4_write_fpscr, R_EAX );
  2472     sh4_x86.tstate = TSTATE_NONE;
  2473     return 2;
  2474 :}
  2475 LDS.L @Rm+, FPSCR {:  
  2476     COUNT_INST(I_LDSFPSCRM);
  2477     check_fpuen();
  2478     load_reg( R_EAX, Rm );
  2479     check_ralign32( R_EAX );
  2480     MMU_TRANSLATE_READ( R_EAX );
  2481     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2482     MEM_READ_LONG( R_EAX, R_EAX );
  2483     call_func1( sh4_write_fpscr, R_EAX );
  2484     sh4_x86.tstate = TSTATE_NONE;
  2485     return 2;
  2486 :}
  2487 LDS Rm, FPUL {:  
  2488     COUNT_INST(I_LDS);
  2489     check_fpuen();
  2490     load_reg( R_EAX, Rm );
  2491     store_spreg( R_EAX, R_FPUL );
  2492 :}
  2493 LDS.L @Rm+, FPUL {:  
  2494     COUNT_INST(I_LDSM);
  2495     check_fpuen();
  2496     load_reg( R_EAX, Rm );
  2497     check_ralign32( R_EAX );
  2498     MMU_TRANSLATE_READ( R_EAX );
  2499     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2500     MEM_READ_LONG( R_EAX, R_EAX );
  2501     store_spreg( R_EAX, R_FPUL );
  2502     sh4_x86.tstate = TSTATE_NONE;
  2503 :}
  2504 LDS Rm, MACH {: 
  2505     COUNT_INST(I_LDS);
  2506     load_reg( R_EAX, Rm );
  2507     store_spreg( R_EAX, R_MACH );
  2508 :}
  2509 LDS.L @Rm+, MACH {:  
  2510     COUNT_INST(I_LDSM);
  2511     load_reg( R_EAX, Rm );
  2512     check_ralign32( R_EAX );
  2513     MMU_TRANSLATE_READ( R_EAX );
  2514     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2515     MEM_READ_LONG( R_EAX, R_EAX );
  2516     store_spreg( R_EAX, R_MACH );
  2517     sh4_x86.tstate = TSTATE_NONE;
  2518 :}
  2519 LDS Rm, MACL {:  
  2520     COUNT_INST(I_LDS);
  2521     load_reg( R_EAX, Rm );
  2522     store_spreg( R_EAX, R_MACL );
  2523 :}
  2524 LDS.L @Rm+, MACL {:  
  2525     COUNT_INST(I_LDSM);
  2526     load_reg( R_EAX, Rm );
  2527     check_ralign32( R_EAX );
  2528     MMU_TRANSLATE_READ( R_EAX );
  2529     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2530     MEM_READ_LONG( R_EAX, R_EAX );
  2531     store_spreg( R_EAX, R_MACL );
  2532     sh4_x86.tstate = TSTATE_NONE;
  2533 :}
  2534 LDS Rm, PR {:  
  2535     COUNT_INST(I_LDS);
  2536     load_reg( R_EAX, Rm );
  2537     store_spreg( R_EAX, R_PR );
  2538 :}
  2539 LDS.L @Rm+, PR {:  
  2540     COUNT_INST(I_LDSM);
  2541     load_reg( R_EAX, Rm );
  2542     check_ralign32( R_EAX );
  2543     MMU_TRANSLATE_READ( R_EAX );
  2544     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2545     MEM_READ_LONG( R_EAX, R_EAX );
  2546     store_spreg( R_EAX, R_PR );
  2547     sh4_x86.tstate = TSTATE_NONE;
  2548 :}
  2549 LDTLB {:  
  2550     COUNT_INST(I_LDTLB);
  2551     call_func0( MMU_ldtlb );
  2552     sh4_x86.tstate = TSTATE_NONE;
  2553 :}
  2554 OCBI @Rn {:
  2555     COUNT_INST(I_OCBI);
  2556 :}
  2557 OCBP @Rn {:
  2558     COUNT_INST(I_OCBP);
  2559 :}
  2560 OCBWB @Rn {:
  2561     COUNT_INST(I_OCBWB);
  2562 :}
  2563 PREF @Rn {:
  2564     COUNT_INST(I_PREF);
  2565     load_reg( R_EAX, Rn );
  2566     MOV_r32_r32( R_EAX, R_ECX );
  2567     AND_imm32_r32( 0xFC000000, R_ECX );
  2568     CMP_imm32_r32( 0xE0000000, R_ECX );
  2569     JNE_rel8(end);
  2570     call_func1( sh4_flush_store_queue, R_EAX );
  2571     TEST_r32_r32( R_EAX, R_EAX );
  2572     JE_exc(-1);
  2573     JMP_TARGET(end);
  2574     sh4_x86.tstate = TSTATE_NONE;
  2575 :}
  2576 SLEEP {: 
  2577     COUNT_INST(I_SLEEP);
  2578     check_priv();
  2579     call_func0( sh4_sleep );
  2580     sh4_x86.tstate = TSTATE_NONE;
  2581     sh4_x86.in_delay_slot = DELAY_NONE;
  2582     return 2;
  2583 :}
  2584 STC SR, Rn {:
  2585     COUNT_INST(I_STCSR);
  2586     check_priv();
  2587     call_func0(sh4_read_sr);
  2588     store_reg( R_EAX, Rn );
  2589     sh4_x86.tstate = TSTATE_NONE;
  2590 :}
  2591 STC GBR, Rn {:  
  2592     COUNT_INST(I_STC);
  2593     load_spreg( R_EAX, R_GBR );
  2594     store_reg( R_EAX, Rn );
  2595 :}
  2596 STC VBR, Rn {:  
  2597     COUNT_INST(I_STC);
  2598     check_priv();
  2599     load_spreg( R_EAX, R_VBR );
  2600     store_reg( R_EAX, Rn );
  2601     sh4_x86.tstate = TSTATE_NONE;
  2602 :}
  2603 STC SSR, Rn {:  
  2604     COUNT_INST(I_STC);
  2605     check_priv();
  2606     load_spreg( R_EAX, R_SSR );
  2607     store_reg( R_EAX, Rn );
  2608     sh4_x86.tstate = TSTATE_NONE;
  2609 :}
  2610 STC SPC, Rn {:  
  2611     COUNT_INST(I_STC);
  2612     check_priv();
  2613     load_spreg( R_EAX, R_SPC );
  2614     store_reg( R_EAX, Rn );
  2615     sh4_x86.tstate = TSTATE_NONE;
  2616 :}
  2617 STC SGR, Rn {:  
  2618     COUNT_INST(I_STC);
  2619     check_priv();
  2620     load_spreg( R_EAX, R_SGR );
  2621     store_reg( R_EAX, Rn );
  2622     sh4_x86.tstate = TSTATE_NONE;
  2623 :}
  2624 STC DBR, Rn {:  
  2625     COUNT_INST(I_STC);
  2626     check_priv();
  2627     load_spreg( R_EAX, R_DBR );
  2628     store_reg( R_EAX, Rn );
  2629     sh4_x86.tstate = TSTATE_NONE;
  2630 :}
  2631 STC Rm_BANK, Rn {:
  2632     COUNT_INST(I_STC);
  2633     check_priv();
  2634     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2635     store_reg( R_EAX, Rn );
  2636     sh4_x86.tstate = TSTATE_NONE;
  2637 :}
  2638 STC.L SR, @-Rn {:
  2639     COUNT_INST(I_STCSRM);
  2640     check_priv();
  2641     load_reg( R_EAX, Rn );
  2642     check_walign32( R_EAX );
  2643     ADD_imm8s_r32( -4, R_EAX );
  2644     MMU_TRANSLATE_WRITE( R_EAX );
  2645     PUSH_realigned_r32( R_EAX );
  2646     call_func0( sh4_read_sr );
  2647     POP_realigned_r32( R_ECX );
  2648     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2649     MEM_WRITE_LONG( R_ECX, R_EAX );
  2650     sh4_x86.tstate = TSTATE_NONE;
  2651 :}
  2652 STC.L VBR, @-Rn {:  
  2653     COUNT_INST(I_STCM);
  2654     check_priv();
  2655     load_reg( R_EAX, Rn );
  2656     check_walign32( R_EAX );
  2657     ADD_imm8s_r32( -4, R_EAX );
  2658     MMU_TRANSLATE_WRITE( R_EAX );
  2659     load_spreg( R_EDX, R_VBR );
  2660     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2661     MEM_WRITE_LONG( R_EAX, R_EDX );
  2662     sh4_x86.tstate = TSTATE_NONE;
  2663 :}
  2664 STC.L SSR, @-Rn {:  
  2665     COUNT_INST(I_STCM);
  2666     check_priv();
  2667     load_reg( R_EAX, Rn );
  2668     check_walign32( R_EAX );
  2669     ADD_imm8s_r32( -4, R_EAX );
  2670     MMU_TRANSLATE_WRITE( R_EAX );
  2671     load_spreg( R_EDX, R_SSR );
  2672     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2673     MEM_WRITE_LONG( R_EAX, R_EDX );
  2674     sh4_x86.tstate = TSTATE_NONE;
  2675 :}
  2676 STC.L SPC, @-Rn {:
  2677     COUNT_INST(I_STCM);
  2678     check_priv();
  2679     load_reg( R_EAX, Rn );
  2680     check_walign32( R_EAX );
  2681     ADD_imm8s_r32( -4, R_EAX );
  2682     MMU_TRANSLATE_WRITE( R_EAX );
  2683     load_spreg( R_EDX, R_SPC );
  2684     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2685     MEM_WRITE_LONG( R_EAX, R_EDX );
  2686     sh4_x86.tstate = TSTATE_NONE;
  2687 :}
  2688 STC.L SGR, @-Rn {:  
  2689     COUNT_INST(I_STCM);
  2690     check_priv();
  2691     load_reg( R_EAX, Rn );
  2692     check_walign32( R_EAX );
  2693     ADD_imm8s_r32( -4, R_EAX );
  2694     MMU_TRANSLATE_WRITE( R_EAX );
  2695     load_spreg( R_EDX, R_SGR );
  2696     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2697     MEM_WRITE_LONG( R_EAX, R_EDX );
  2698     sh4_x86.tstate = TSTATE_NONE;
  2699 :}
  2700 STC.L DBR, @-Rn {:  
  2701     COUNT_INST(I_STCM);
  2702     check_priv();
  2703     load_reg( R_EAX, Rn );
  2704     check_walign32( R_EAX );
  2705     ADD_imm8s_r32( -4, R_EAX );
  2706     MMU_TRANSLATE_WRITE( R_EAX );
  2707     load_spreg( R_EDX, R_DBR );
  2708     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2709     MEM_WRITE_LONG( R_EAX, R_EDX );
  2710     sh4_x86.tstate = TSTATE_NONE;
  2711 :}
  2712 STC.L Rm_BANK, @-Rn {:  
  2713     COUNT_INST(I_STCM);
  2714     check_priv();
  2715     load_reg( R_EAX, Rn );
  2716     check_walign32( R_EAX );
  2717     ADD_imm8s_r32( -4, R_EAX );
  2718     MMU_TRANSLATE_WRITE( R_EAX );
  2719     load_spreg( R_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
  2720     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2721     MEM_WRITE_LONG( R_EAX, R_EDX );
  2722     sh4_x86.tstate = TSTATE_NONE;
  2723 :}
  2724 STC.L GBR, @-Rn {:  
  2725     COUNT_INST(I_STCM);
  2726     load_reg( R_EAX, Rn );
  2727     check_walign32( R_EAX );
  2728     ADD_imm8s_r32( -4, R_EAX );
  2729     MMU_TRANSLATE_WRITE( R_EAX );
  2730     load_spreg( R_EDX, R_GBR );
  2731     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2732     MEM_WRITE_LONG( R_EAX, R_EDX );
  2733     sh4_x86.tstate = TSTATE_NONE;
  2734 :}
  2735 STS FPSCR, Rn {:  
  2736     COUNT_INST(I_STSFPSCR);
  2737     check_fpuen();
  2738     load_spreg( R_EAX, R_FPSCR );
  2739     store_reg( R_EAX, Rn );
  2740 :}
  2741 STS.L FPSCR, @-Rn {:  
  2742     COUNT_INST(I_STSFPSCRM);
  2743     check_fpuen();
  2744     load_reg( R_EAX, Rn );
  2745     check_walign32( R_EAX );
  2746     ADD_imm8s_r32( -4, R_EAX );
  2747     MMU_TRANSLATE_WRITE( R_EAX );
  2748     load_spreg( R_EDX, R_FPSCR );
  2749     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2750     MEM_WRITE_LONG( R_EAX, R_EDX );
  2751     sh4_x86.tstate = TSTATE_NONE;
  2752 :}
  2753 STS FPUL, Rn {:  
  2754     COUNT_INST(I_STS);
  2755     check_fpuen();
  2756     load_spreg( R_EAX, R_FPUL );
  2757     store_reg( R_EAX, Rn );
  2758 :}
  2759 STS.L FPUL, @-Rn {:  
  2760     COUNT_INST(I_STSM);
  2761     check_fpuen();
  2762     load_reg( R_EAX, Rn );
  2763     check_walign32( R_EAX );
  2764     ADD_imm8s_r32( -4, R_EAX );
  2765     MMU_TRANSLATE_WRITE( R_EAX );
  2766     load_spreg( R_EDX, R_FPUL );
  2767     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2768     MEM_WRITE_LONG( R_EAX, R_EDX );
  2769     sh4_x86.tstate = TSTATE_NONE;
  2770 :}
  2771 STS MACH, Rn {:  
  2772     COUNT_INST(I_STS);
  2773     load_spreg( R_EAX, R_MACH );
  2774     store_reg( R_EAX, Rn );
  2775 :}
  2776 STS.L MACH, @-Rn {:  
  2777     COUNT_INST(I_STSM);
  2778     load_reg( R_EAX, Rn );
  2779     check_walign32( R_EAX );
  2780     ADD_imm8s_r32( -4, R_EAX );
  2781     MMU_TRANSLATE_WRITE( R_EAX );
  2782     load_spreg( R_EDX, R_MACH );
  2783     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2784     MEM_WRITE_LONG( R_EAX, R_EDX );
  2785     sh4_x86.tstate = TSTATE_NONE;
  2786 :}
  2787 STS MACL, Rn {:  
  2788     COUNT_INST(I_STS);
  2789     load_spreg( R_EAX, R_MACL );
  2790     store_reg( R_EAX, Rn );
  2791 :}
  2792 STS.L MACL, @-Rn {:  
  2793     COUNT_INST(I_STSM);
  2794     load_reg( R_EAX, Rn );
  2795     check_walign32( R_EAX );
  2796     ADD_imm8s_r32( -4, R_EAX );
  2797     MMU_TRANSLATE_WRITE( R_EAX );
  2798     load_spreg( R_EDX, R_MACL );
  2799     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2800     MEM_WRITE_LONG( R_EAX, R_EDX );
  2801     sh4_x86.tstate = TSTATE_NONE;
  2802 :}
  2803 STS PR, Rn {:  
  2804     COUNT_INST(I_STS);
  2805     load_spreg( R_EAX, R_PR );
  2806     store_reg( R_EAX, Rn );
  2807 :}
  2808 STS.L PR, @-Rn {:  
  2809     COUNT_INST(I_STSM);
  2810     load_reg( R_EAX, Rn );
  2811     check_walign32( R_EAX );
  2812     ADD_imm8s_r32( -4, R_EAX );
  2813     MMU_TRANSLATE_WRITE( R_EAX );
  2814     load_spreg( R_EDX, R_PR );
  2815     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2816     MEM_WRITE_LONG( R_EAX, R_EDX );
  2817     sh4_x86.tstate = TSTATE_NONE;
  2818 :}
  2820 NOP {: 
  2821     COUNT_INST(I_NOP);
  2822     /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ 
  2823 :}
  2824 %%
  2825     sh4_x86.in_delay_slot = DELAY_NONE;
  2826     return 0;
.