Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 974:16b079ed11bb
prev956:4c1ed9e03985
next975:007bf7eb944f
author nkeynes
date Mon Jan 26 03:09:53 2009 +0000 (11 years ago)
permissions -rw-r--r--
last change Fix double-counting of instructions in delay slots in sh4_finalize_instruction
Fix spc value when taking an exception in mmu_update_icache in a delay slot
Fix under-counting of instructions in newpc delay slots in translated blocks
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "lxdream.h"
    29 #include "sh4/xltcache.h"
    30 #include "sh4/sh4core.h"
    31 #include "sh4/sh4trans.h"
    32 #include "sh4/sh4stat.h"
    33 #include "sh4/sh4mmio.h"
    34 #include "sh4/x86op.h"
    35 #include "sh4/mmu.h"
    36 #include "clock.h"
    38 #define DEFAULT_BACKPATCH_SIZE 4096
    40 struct backpatch_record {
    41     uint32_t fixup_offset;
    42     uint32_t fixup_icount;
    43     int32_t exc_code;
    44 };
    46 #define DELAY_NONE 0
    47 #define DELAY_PC 1
    48 #define DELAY_PC_PR 2
    50 /** 
    51  * Struct to manage internal translation state. This state is not saved -
    52  * it is only valid between calls to sh4_translate_begin_block() and
    53  * sh4_translate_end_block()
    54  */
    55 struct sh4_x86_state {
    56     int in_delay_slot;
    57     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    58     gboolean branch_taken; /* true if we branched unconditionally */
    59     gboolean double_prec; /* true if FPU is in double-precision mode */
    60     gboolean double_size; /* true if FPU is in double-size mode */
    61     gboolean sse3_enabled; /* true if host supports SSE3 instructions */
    62     uint32_t block_start_pc;
    63     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    64     int tstate;
    66     /* mode flags */
    67     gboolean tlb_on; /* True if tlb translation is active */
    69     /* Allocated memory for the (block-wide) back-patch list */
    70     struct backpatch_record *backpatch_list;
    71     uint32_t backpatch_posn;
    72     uint32_t backpatch_size;
    73 };
    75 #define TSTATE_NONE -1
    76 #define TSTATE_O    0
    77 #define TSTATE_C    2
    78 #define TSTATE_E    4
    79 #define TSTATE_NE   5
    80 #define TSTATE_G    0xF
    81 #define TSTATE_GE   0xD
    82 #define TSTATE_A    7
    83 #define TSTATE_AE   3
    85 #ifdef ENABLE_SH4STATS
    86 #define COUNT_INST(id) load_imm32(R_EAX,id); call_func1(sh4_stats_add, R_EAX); sh4_x86.tstate = TSTATE_NONE
    87 #else
    88 #define COUNT_INST(id)
    89 #endif
    91 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
    92 #define JT_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    93 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    94     OP(0x70+sh4_x86.tstate); MARK_JMP8(label); OP(-1)
    96 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
    97 #define JF_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    98 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    99     OP(0x70+ (sh4_x86.tstate^1)); MARK_JMP8(label); OP(-1)
   101 static struct sh4_x86_state sh4_x86;
   103 static uint32_t max_int = 0x7FFFFFFF;
   104 static uint32_t min_int = 0x80000000;
   105 static uint32_t save_fcw; /* save value for fpu control word */
   106 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   108 gboolean is_sse3_supported()
   109 {
   110     uint32_t features;
   112     __asm__ __volatile__(
   113         "mov $0x01, %%eax\n\t"
   114         "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
   115     return (features & 1) ? TRUE : FALSE;
   116 }
   118 void sh4_translate_init(void)
   119 {
   120     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   121     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   122     sh4_x86.sse3_enabled = is_sse3_supported();
   123 }
   126 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   127 {
   128     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   129 	sh4_x86.backpatch_size <<= 1;
   130 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   131 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   132 	assert( sh4_x86.backpatch_list != NULL );
   133     }
   134     if( sh4_x86.in_delay_slot ) {
   135 	fixup_pc -= 2;
   136     }
   137     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
   138 	((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code);
   139     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   140     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   141     sh4_x86.backpatch_posn++;
   142 }
   144 /**
   145  * Emit an instruction to load an SH4 reg into a real register
   146  */
   147 static inline void load_reg( int x86reg, int sh4reg ) 
   148 {
   149     /* mov [bp+n], reg */
   150     OP(0x8B);
   151     OP(0x45 + (x86reg<<3));
   152     OP(REG_OFFSET(r[sh4reg]));
   153 }
   155 static inline void load_reg16s( int x86reg, int sh4reg )
   156 {
   157     OP(0x0F);
   158     OP(0xBF);
   159     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   160 }
   162 static inline void load_reg16u( int x86reg, int sh4reg )
   163 {
   164     OP(0x0F);
   165     OP(0xB7);
   166     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   168 }
   170 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   171 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   172 /**
   173  * Emit an instruction to load an immediate value into a register
   174  */
   175 static inline void load_imm32( int x86reg, uint32_t value ) {
   176     /* mov #value, reg */
   177     OP(0xB8 + x86reg);
   178     OP32(value);
   179 }
   182 /**
   183  * Load an immediate 64-bit quantity (note: x86-64 only)
   184  */
   185 static inline void load_imm64( int x86reg, uint64_t value ) {
   186     /* mov #value, reg */
   187     REXW();
   188     OP(0xB8 + x86reg);
   189     OP64(value);
   190 }
   192 /**
   193  * Emit an instruction to store an SH4 reg (RN)
   194  */
   195 void static inline store_reg( int x86reg, int sh4reg ) {
   196     /* mov reg, [bp+n] */
   197     OP(0x89);
   198     OP(0x45 + (x86reg<<3));
   199     OP(REG_OFFSET(r[sh4reg]));
   200 }
   202 /**
   203  * Load an FR register (single-precision floating point) into an integer x86
   204  * register (eg for register-to-register moves)
   205  */
   206 #define load_fr(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[0][(frm)^1]) )
   207 #define load_xf(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[1][(frm)^1]) )
   209 /**
   210  * Load the low half of a DR register (DR or XD) into an integer x86 register 
   211  */
   212 #define load_dr0(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   213 #define load_dr1(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   215 /**
   216  * Store an FR register (single-precision floating point) from an integer x86+
   217  * register (eg for register-to-register moves)
   218  */
   219 #define store_fr(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[0][(frm)^1]) )
   220 #define store_xf(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[1][(frm)^1]) )
   222 #define store_dr0(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   223 #define store_dr1(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   226 #define push_fpul()  FLDF_sh4r(R_FPUL)
   227 #define pop_fpul()   FSTPF_sh4r(R_FPUL)
   228 #define push_fr(frm) FLDF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   229 #define pop_fr(frm)  FSTPF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   230 #define push_xf(frm) FLDF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   231 #define pop_xf(frm)  FSTPF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   232 #define push_dr(frm) FLDD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   233 #define pop_dr(frm)  FSTPD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   234 #define push_xdr(frm) FLDD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   235 #define pop_xdr(frm)  FSTPD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   239 /* Exception checks - Note that all exception checks will clobber EAX */
   241 #define check_priv( ) \
   242     if( (sh4r.xlat_sh4_mode & SR_MD) == 0 ) { \
   243         if( sh4_x86.in_delay_slot ) { \
   244             exit_block_exc(EXC_SLOT_ILLEGAL, (pc-2) ); \
   245         } else { \
   246             exit_block_exc(EXC_ILLEGAL, pc); \
   247         } \
   248         sh4_x86.branch_taken = TRUE; \
   249         sh4_x86.in_delay_slot = DELAY_NONE; \
   250         return 2; \
   251     }
   253 #define check_fpuen( ) \
   254     if( !sh4_x86.fpuen_checked ) {\
   255 	sh4_x86.fpuen_checked = TRUE;\
   256 	load_spreg( R_EAX, R_SR );\
   257 	AND_imm32_r32( SR_FD, R_EAX );\
   258 	if( sh4_x86.in_delay_slot ) {\
   259 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   260 	} else {\
   261 	    JNE_exc(EXC_FPU_DISABLED);\
   262 	}\
   263 	sh4_x86.tstate = TSTATE_NONE; \
   264     }
   266 #define check_ralign16( x86reg ) \
   267     TEST_imm32_r32( 0x00000001, x86reg ); \
   268     JNE_exc(EXC_DATA_ADDR_READ)
   270 #define check_walign16( x86reg ) \
   271     TEST_imm32_r32( 0x00000001, x86reg ); \
   272     JNE_exc(EXC_DATA_ADDR_WRITE);
   274 #define check_ralign32( x86reg ) \
   275     TEST_imm32_r32( 0x00000003, x86reg ); \
   276     JNE_exc(EXC_DATA_ADDR_READ)
   278 #define check_walign32( x86reg ) \
   279     TEST_imm32_r32( 0x00000003, x86reg ); \
   280     JNE_exc(EXC_DATA_ADDR_WRITE);
   282 #define check_ralign64( x86reg ) \
   283     TEST_imm32_r32( 0x00000007, x86reg ); \
   284     JNE_exc(EXC_DATA_ADDR_READ)
   286 #define check_walign64( x86reg ) \
   287     TEST_imm32_r32( 0x00000007, x86reg ); \
   288     JNE_exc(EXC_DATA_ADDR_WRITE);
   290 #define UNDEF(ir)
   291 #define MEM_REGION_PTR(name) offsetof( struct mem_region_fn, name )
   292 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   293 /* Note: For SR.MD == 1 && MMUCR.AT == 0, there are no memory exceptions, so 
   294  * don't waste the cycles expecting them. Otherwise we need to save the exception pointer.
   295  */
   297 #ifdef HAVE_FRAME_ADDRESS
   298 #define _CALL_READ(addr_reg, fn) if( !sh4_x86.tlb_on && (sh4r.xlat_sh4_mode & SR_MD) ) { \
   299         call_func1_r32disp8(R_ECX, MEM_REGION_PTR(fn), addr_reg); } else { \
   300         call_func1_r32disp8_exc(R_ECX, MEM_REGION_PTR(fn), addr_reg, pc); } 
   301 #define _CALL_WRITE(addr_reg, val_reg, fn) if( !sh4_x86.tlb_on && (sh4r.xlat_sh4_mode & SR_MD) ) { \
   302         call_func2_r32disp8(R_ECX, MEM_REGION_PTR(fn), addr_reg, val_reg); } else { \
   303         call_func2_r32disp8_exc(R_ECX, MEM_REGION_PTR(fn), addr_reg, val_reg, pc); }
   304 #else 
   305 #define _CALL_READ(addr_reg, fn) call_func1_r32disp8(R_ECX, MEM_REGION_PTR(fn), addr_reg)
   306 #define _CALL_WRITE(addr_reg, val_reg, fn) call_func2_r32disp8(R_ECX, MEM_REGION_PTR(fn), addr_reg, val_reg)
   307 #endif
   309 #define MEM_READ_BYTE( addr_reg, value_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, read_byte); MEM_RESULT(value_reg)
   310 #define MEM_READ_WORD( addr_reg, value_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, read_word); MEM_RESULT(value_reg)
   311 #define MEM_READ_LONG( addr_reg, value_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, read_long); MEM_RESULT(value_reg)
   312 #define MEM_WRITE_BYTE( addr_reg, value_reg ) decode_address(addr_reg); _CALL_WRITE(addr_reg, value_reg, write_byte)
   313 #define MEM_WRITE_WORD( addr_reg, value_reg ) decode_address(addr_reg); _CALL_WRITE(addr_reg, value_reg, write_word)
   314 #define MEM_WRITE_LONG( addr_reg, value_reg ) decode_address(addr_reg); _CALL_WRITE(addr_reg, value_reg, write_long)
   315 #define MEM_PREFETCH( addr_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, prefetch)
   317 #define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2); sh4_x86.in_delay_slot = DELAY_NONE; return 2;
   319 /****** Import appropriate calling conventions ******/
   320 #if SIZEOF_VOID_P == 8
   321 #include "sh4/ia64abi.h"
   322 #else /* 32-bit system */
   323 #include "sh4/ia32abi.h"
   324 #endif
   326 void sh4_translate_begin_block( sh4addr_t pc ) 
   327 {
   328     enter_block();
   329     sh4_x86.in_delay_slot = FALSE;
   330     sh4_x86.fpuen_checked = FALSE;
   331     sh4_x86.branch_taken = FALSE;
   332     sh4_x86.backpatch_posn = 0;
   333     sh4_x86.block_start_pc = pc;
   334     sh4_x86.tlb_on = IS_TLB_ENABLED();
   335     sh4_x86.tstate = TSTATE_NONE;
   336     sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
   337     sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
   338 }
   341 uint32_t sh4_translate_end_block_size()
   342 {
   343     if( sh4_x86.backpatch_posn <= 3 ) {
   344         return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);
   345     } else {
   346         return EPILOGUE_SIZE + 48 + (sh4_x86.backpatch_posn-3)*15;
   347     }
   348 }
   351 /**
   352  * Embed a breakpoint into the generated code
   353  */
   354 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   355 {
   356     load_imm32( R_EAX, pc );
   357     call_func1( sh4_translate_breakpoint_hit, R_EAX );
   358     sh4_x86.tstate = TSTATE_NONE;
   359 }
   362 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   364 /**
   365  * Embed a call to sh4_execute_instruction for situations that we
   366  * can't translate (just page-crossing delay slots at the moment).
   367  * Caller is responsible for setting new_pc before calling this function.
   368  *
   369  * Performs:
   370  *   Set PC = endpc
   371  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   372  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   373  *   Call sh4_execute_instruction
   374  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   375  */
   376 void exit_block_emu( sh4vma_t endpc )
   377 {
   378     load_imm32( R_ECX, endpc - sh4_x86.block_start_pc );   // 5
   379     ADD_r32_sh4r( R_ECX, R_PC );
   381     load_imm32( R_ECX, (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period ); // 5
   382     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
   383     load_imm32( R_ECX, sh4_x86.in_delay_slot ? 1 : 0 );
   384     store_spreg( R_ECX, REG_OFFSET(in_delay_slot) );
   386     call_func0( sh4_execute_instruction );    
   387     load_spreg( R_EAX, R_PC );
   388     if( sh4_x86.tlb_on ) {
   389 	call_func1(xlat_get_code_by_vma,R_EAX);
   390     } else {
   391 	call_func1(xlat_get_code,R_EAX);
   392     }
   393     exit_block();
   394 } 
   396 /**
   397  * Translate a single instruction. Delayed branches are handled specially
   398  * by translating both branch and delayed instruction as a single unit (as
   399  * 
   400  * The instruction MUST be in the icache (assert check)
   401  *
   402  * @return true if the instruction marks the end of a basic block
   403  * (eg a branch or 
   404  */
   405 uint32_t sh4_translate_instruction( sh4vma_t pc )
   406 {
   407     uint32_t ir;
   408     /* Read instruction from icache */
   409     assert( IS_IN_ICACHE(pc) );
   410     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   412     if( !sh4_x86.in_delay_slot ) {
   413 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   414     }
   415 %%
   416 /* ALU operations */
   417 ADD Rm, Rn {:
   418     COUNT_INST(I_ADD);
   419     load_reg( R_EAX, Rm );
   420     load_reg( R_ECX, Rn );
   421     ADD_r32_r32( R_EAX, R_ECX );
   422     store_reg( R_ECX, Rn );
   423     sh4_x86.tstate = TSTATE_NONE;
   424 :}
   425 ADD #imm, Rn {:  
   426     COUNT_INST(I_ADDI);
   427     ADD_imm8s_sh4r( imm, REG_OFFSET(r[Rn]) );
   428     sh4_x86.tstate = TSTATE_NONE;
   429 :}
   430 ADDC Rm, Rn {:
   431     COUNT_INST(I_ADDC);
   432     if( sh4_x86.tstate != TSTATE_C ) {
   433         LDC_t();
   434     }
   435     load_reg( R_EAX, Rm );
   436     load_reg( R_ECX, Rn );
   437     ADC_r32_r32( R_EAX, R_ECX );
   438     store_reg( R_ECX, Rn );
   439     SETC_t();
   440     sh4_x86.tstate = TSTATE_C;
   441 :}
   442 ADDV Rm, Rn {:
   443     COUNT_INST(I_ADDV);
   444     load_reg( R_EAX, Rm );
   445     load_reg( R_ECX, Rn );
   446     ADD_r32_r32( R_EAX, R_ECX );
   447     store_reg( R_ECX, Rn );
   448     SETO_t();
   449     sh4_x86.tstate = TSTATE_O;
   450 :}
   451 AND Rm, Rn {:
   452     COUNT_INST(I_AND);
   453     load_reg( R_EAX, Rm );
   454     load_reg( R_ECX, Rn );
   455     AND_r32_r32( R_EAX, R_ECX );
   456     store_reg( R_ECX, Rn );
   457     sh4_x86.tstate = TSTATE_NONE;
   458 :}
   459 AND #imm, R0 {:  
   460     COUNT_INST(I_ANDI);
   461     load_reg( R_EAX, 0 );
   462     AND_imm32_r32(imm, R_EAX); 
   463     store_reg( R_EAX, 0 );
   464     sh4_x86.tstate = TSTATE_NONE;
   465 :}
   466 AND.B #imm, @(R0, GBR) {: 
   467     COUNT_INST(I_ANDB);
   468     load_reg( R_EAX, 0 );
   469     ADD_sh4r_r32( R_GBR, R_EAX );
   470     MOV_r32_esp8(R_EAX, 0);
   471     MEM_READ_BYTE( R_EAX, R_EDX );
   472     MOV_esp8_r32(0, R_EAX);
   473     AND_imm32_r32(imm, R_EDX );
   474     MEM_WRITE_BYTE( R_EAX, R_EDX );
   475     sh4_x86.tstate = TSTATE_NONE;
   476 :}
   477 CMP/EQ Rm, Rn {:  
   478     COUNT_INST(I_CMPEQ);
   479     load_reg( R_EAX, Rm );
   480     load_reg( R_ECX, Rn );
   481     CMP_r32_r32( R_EAX, R_ECX );
   482     SETE_t();
   483     sh4_x86.tstate = TSTATE_E;
   484 :}
   485 CMP/EQ #imm, R0 {:  
   486     COUNT_INST(I_CMPEQI);
   487     load_reg( R_EAX, 0 );
   488     CMP_imm8s_r32(imm, R_EAX);
   489     SETE_t();
   490     sh4_x86.tstate = TSTATE_E;
   491 :}
   492 CMP/GE Rm, Rn {:  
   493     COUNT_INST(I_CMPGE);
   494     load_reg( R_EAX, Rm );
   495     load_reg( R_ECX, Rn );
   496     CMP_r32_r32( R_EAX, R_ECX );
   497     SETGE_t();
   498     sh4_x86.tstate = TSTATE_GE;
   499 :}
   500 CMP/GT Rm, Rn {: 
   501     COUNT_INST(I_CMPGT);
   502     load_reg( R_EAX, Rm );
   503     load_reg( R_ECX, Rn );
   504     CMP_r32_r32( R_EAX, R_ECX );
   505     SETG_t();
   506     sh4_x86.tstate = TSTATE_G;
   507 :}
   508 CMP/HI Rm, Rn {:  
   509     COUNT_INST(I_CMPHI);
   510     load_reg( R_EAX, Rm );
   511     load_reg( R_ECX, Rn );
   512     CMP_r32_r32( R_EAX, R_ECX );
   513     SETA_t();
   514     sh4_x86.tstate = TSTATE_A;
   515 :}
   516 CMP/HS Rm, Rn {: 
   517     COUNT_INST(I_CMPHS);
   518     load_reg( R_EAX, Rm );
   519     load_reg( R_ECX, Rn );
   520     CMP_r32_r32( R_EAX, R_ECX );
   521     SETAE_t();
   522     sh4_x86.tstate = TSTATE_AE;
   523  :}
   524 CMP/PL Rn {: 
   525     COUNT_INST(I_CMPPL);
   526     load_reg( R_EAX, Rn );
   527     CMP_imm8s_r32( 0, R_EAX );
   528     SETG_t();
   529     sh4_x86.tstate = TSTATE_G;
   530 :}
   531 CMP/PZ Rn {:  
   532     COUNT_INST(I_CMPPZ);
   533     load_reg( R_EAX, Rn );
   534     CMP_imm8s_r32( 0, R_EAX );
   535     SETGE_t();
   536     sh4_x86.tstate = TSTATE_GE;
   537 :}
   538 CMP/STR Rm, Rn {:  
   539     COUNT_INST(I_CMPSTR);
   540     load_reg( R_EAX, Rm );
   541     load_reg( R_ECX, Rn );
   542     XOR_r32_r32( R_ECX, R_EAX );
   543     TEST_r8_r8( R_AL, R_AL );
   544     JE_rel8(target1);
   545     TEST_r8_r8( R_AH, R_AH );
   546     JE_rel8(target2);
   547     SHR_imm8_r32( 16, R_EAX );
   548     TEST_r8_r8( R_AL, R_AL );
   549     JE_rel8(target3);
   550     TEST_r8_r8( R_AH, R_AH );
   551     JMP_TARGET(target1);
   552     JMP_TARGET(target2);
   553     JMP_TARGET(target3);
   554     SETE_t();
   555     sh4_x86.tstate = TSTATE_E;
   556 :}
   557 DIV0S Rm, Rn {:
   558     COUNT_INST(I_DIV0S);
   559     load_reg( R_EAX, Rm );
   560     load_reg( R_ECX, Rn );
   561     SHR_imm8_r32( 31, R_EAX );
   562     SHR_imm8_r32( 31, R_ECX );
   563     store_spreg( R_EAX, R_M );
   564     store_spreg( R_ECX, R_Q );
   565     CMP_r32_r32( R_EAX, R_ECX );
   566     SETNE_t();
   567     sh4_x86.tstate = TSTATE_NE;
   568 :}
   569 DIV0U {:  
   570     COUNT_INST(I_DIV0U);
   571     XOR_r32_r32( R_EAX, R_EAX );
   572     store_spreg( R_EAX, R_Q );
   573     store_spreg( R_EAX, R_M );
   574     store_spreg( R_EAX, R_T );
   575     sh4_x86.tstate = TSTATE_C; // works for DIV1
   576 :}
   577 DIV1 Rm, Rn {:
   578     COUNT_INST(I_DIV1);
   579     load_spreg( R_ECX, R_M );
   580     load_reg( R_EAX, Rn );
   581     if( sh4_x86.tstate != TSTATE_C ) {
   582 	LDC_t();
   583     }
   584     RCL1_r32( R_EAX );
   585     SETC_r8( R_DL ); // Q'
   586     CMP_sh4r_r32( R_Q, R_ECX );
   587     JE_rel8(mqequal);
   588     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   589     JMP_rel8(end);
   590     JMP_TARGET(mqequal);
   591     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   592     JMP_TARGET(end);
   593     store_reg( R_EAX, Rn ); // Done with Rn now
   594     SETC_r8(R_AL); // tmp1
   595     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   596     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   597     store_spreg( R_ECX, R_Q );
   598     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   599     MOVZX_r8_r32( R_AL, R_EAX );
   600     store_spreg( R_EAX, R_T );
   601     sh4_x86.tstate = TSTATE_NONE;
   602 :}
   603 DMULS.L Rm, Rn {:  
   604     COUNT_INST(I_DMULS);
   605     load_reg( R_EAX, Rm );
   606     load_reg( R_ECX, Rn );
   607     IMUL_r32(R_ECX);
   608     store_spreg( R_EDX, R_MACH );
   609     store_spreg( R_EAX, R_MACL );
   610     sh4_x86.tstate = TSTATE_NONE;
   611 :}
   612 DMULU.L Rm, Rn {:  
   613     COUNT_INST(I_DMULU);
   614     load_reg( R_EAX, Rm );
   615     load_reg( R_ECX, Rn );
   616     MUL_r32(R_ECX);
   617     store_spreg( R_EDX, R_MACH );
   618     store_spreg( R_EAX, R_MACL );    
   619     sh4_x86.tstate = TSTATE_NONE;
   620 :}
   621 DT Rn {:  
   622     COUNT_INST(I_DT);
   623     load_reg( R_EAX, Rn );
   624     ADD_imm8s_r32( -1, R_EAX );
   625     store_reg( R_EAX, Rn );
   626     SETE_t();
   627     sh4_x86.tstate = TSTATE_E;
   628 :}
   629 EXTS.B Rm, Rn {:  
   630     COUNT_INST(I_EXTSB);
   631     load_reg( R_EAX, Rm );
   632     MOVSX_r8_r32( R_EAX, R_EAX );
   633     store_reg( R_EAX, Rn );
   634 :}
   635 EXTS.W Rm, Rn {:  
   636     COUNT_INST(I_EXTSW);
   637     load_reg( R_EAX, Rm );
   638     MOVSX_r16_r32( R_EAX, R_EAX );
   639     store_reg( R_EAX, Rn );
   640 :}
   641 EXTU.B Rm, Rn {:  
   642     COUNT_INST(I_EXTUB);
   643     load_reg( R_EAX, Rm );
   644     MOVZX_r8_r32( R_EAX, R_EAX );
   645     store_reg( R_EAX, Rn );
   646 :}
   647 EXTU.W Rm, Rn {:  
   648     COUNT_INST(I_EXTUW);
   649     load_reg( R_EAX, Rm );
   650     MOVZX_r16_r32( R_EAX, R_EAX );
   651     store_reg( R_EAX, Rn );
   652 :}
   653 MAC.L @Rm+, @Rn+ {:
   654     COUNT_INST(I_MACL);
   655     if( Rm == Rn ) {
   656 	load_reg( R_EAX, Rm );
   657 	check_ralign32( R_EAX );
   658 	MEM_READ_LONG( R_EAX, R_EAX );
   659 	MOV_r32_esp8(R_EAX, 0);
   660 	load_reg( R_EAX, Rm );
   661 	LEA_r32disp8_r32( R_EAX, 4, R_EAX );
   662 	MEM_READ_LONG( R_EAX, R_EAX );
   663         ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
   664     } else {
   665 	load_reg( R_EAX, Rm );
   666 	check_ralign32( R_EAX );
   667 	MEM_READ_LONG( R_EAX, R_EAX );
   668 	MOV_r32_esp8( R_EAX, 0 );
   669 	load_reg( R_EAX, Rn );
   670 	check_ralign32( R_EAX );
   671 	MEM_READ_LONG( R_EAX, R_EAX );
   672 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   673 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   674     }
   676     IMUL_esp8( 0 );
   677     ADD_r32_sh4r( R_EAX, R_MACL );
   678     ADC_r32_sh4r( R_EDX, R_MACH );
   680     load_spreg( R_ECX, R_S );
   681     TEST_r32_r32(R_ECX, R_ECX);
   682     JE_rel8( nosat );
   683     call_func0( signsat48 );
   684     JMP_TARGET( nosat );
   685     sh4_x86.tstate = TSTATE_NONE;
   686 :}
   687 MAC.W @Rm+, @Rn+ {:  
   688     COUNT_INST(I_MACW);
   689     if( Rm == Rn ) {
   690 	load_reg( R_EAX, Rm );
   691 	check_ralign16( R_EAX );
   692 	MEM_READ_WORD( R_EAX, R_EAX );
   693         MOV_r32_esp8( R_EAX, 0 );
   694 	load_reg( R_EAX, Rm );
   695 	LEA_r32disp8_r32( R_EAX, 2, R_EAX );
   696 	MEM_READ_WORD( R_EAX, R_EAX );
   697 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   698 	// Note translate twice in case of page boundaries. Maybe worth
   699 	// adding a page-boundary check to skip the second translation
   700     } else {
   701 	load_reg( R_EAX, Rm );
   702 	check_ralign16( R_EAX );
   703 	MEM_READ_WORD( R_EAX, R_EAX );
   704         MOV_r32_esp8( R_EAX, 0 );
   705 	load_reg( R_EAX, Rn );
   706 	check_ralign16( R_EAX );
   707 	MEM_READ_WORD( R_EAX, R_EAX );
   708 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   709 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   710     }
   711     IMUL_esp8( 0 );
   712     load_spreg( R_ECX, R_S );
   713     TEST_r32_r32( R_ECX, R_ECX );
   714     JE_rel8( nosat );
   716     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   717     JNO_rel8( end );            // 2
   718     load_imm32( R_EDX, 1 );         // 5
   719     store_spreg( R_EDX, R_MACH );   // 6
   720     JS_rel8( positive );        // 2
   721     load_imm32( R_EAX, 0x80000000 );// 5
   722     store_spreg( R_EAX, R_MACL );   // 6
   723     JMP_rel8(end2);           // 2
   725     JMP_TARGET(positive);
   726     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   727     store_spreg( R_EAX, R_MACL );   // 6
   728     JMP_rel8(end3);            // 2
   730     JMP_TARGET(nosat);
   731     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   732     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   733     JMP_TARGET(end);
   734     JMP_TARGET(end2);
   735     JMP_TARGET(end3);
   736     sh4_x86.tstate = TSTATE_NONE;
   737 :}
   738 MOVT Rn {:  
   739     COUNT_INST(I_MOVT);
   740     load_spreg( R_EAX, R_T );
   741     store_reg( R_EAX, Rn );
   742 :}
   743 MUL.L Rm, Rn {:  
   744     COUNT_INST(I_MULL);
   745     load_reg( R_EAX, Rm );
   746     load_reg( R_ECX, Rn );
   747     MUL_r32( R_ECX );
   748     store_spreg( R_EAX, R_MACL );
   749     sh4_x86.tstate = TSTATE_NONE;
   750 :}
   751 MULS.W Rm, Rn {:
   752     COUNT_INST(I_MULSW);
   753     load_reg16s( R_EAX, Rm );
   754     load_reg16s( R_ECX, Rn );
   755     MUL_r32( R_ECX );
   756     store_spreg( R_EAX, R_MACL );
   757     sh4_x86.tstate = TSTATE_NONE;
   758 :}
   759 MULU.W Rm, Rn {:  
   760     COUNT_INST(I_MULUW);
   761     load_reg16u( R_EAX, Rm );
   762     load_reg16u( R_ECX, Rn );
   763     MUL_r32( R_ECX );
   764     store_spreg( R_EAX, R_MACL );
   765     sh4_x86.tstate = TSTATE_NONE;
   766 :}
   767 NEG Rm, Rn {:
   768     COUNT_INST(I_NEG);
   769     load_reg( R_EAX, Rm );
   770     NEG_r32( R_EAX );
   771     store_reg( R_EAX, Rn );
   772     sh4_x86.tstate = TSTATE_NONE;
   773 :}
   774 NEGC Rm, Rn {:  
   775     COUNT_INST(I_NEGC);
   776     load_reg( R_EAX, Rm );
   777     XOR_r32_r32( R_ECX, R_ECX );
   778     LDC_t();
   779     SBB_r32_r32( R_EAX, R_ECX );
   780     store_reg( R_ECX, Rn );
   781     SETC_t();
   782     sh4_x86.tstate = TSTATE_C;
   783 :}
   784 NOT Rm, Rn {:  
   785     COUNT_INST(I_NOT);
   786     load_reg( R_EAX, Rm );
   787     NOT_r32( R_EAX );
   788     store_reg( R_EAX, Rn );
   789     sh4_x86.tstate = TSTATE_NONE;
   790 :}
   791 OR Rm, Rn {:  
   792     COUNT_INST(I_OR);
   793     load_reg( R_EAX, Rm );
   794     load_reg( R_ECX, Rn );
   795     OR_r32_r32( R_EAX, R_ECX );
   796     store_reg( R_ECX, Rn );
   797     sh4_x86.tstate = TSTATE_NONE;
   798 :}
   799 OR #imm, R0 {:
   800     COUNT_INST(I_ORI);
   801     load_reg( R_EAX, 0 );
   802     OR_imm32_r32(imm, R_EAX);
   803     store_reg( R_EAX, 0 );
   804     sh4_x86.tstate = TSTATE_NONE;
   805 :}
   806 OR.B #imm, @(R0, GBR) {:  
   807     COUNT_INST(I_ORB);
   808     load_reg( R_EAX, 0 );
   809     ADD_sh4r_r32( R_GBR, R_EAX );
   810     MOV_r32_esp8( R_EAX, 0 );
   811     MEM_READ_BYTE( R_EAX, R_EDX );
   812     MOV_esp8_r32( 0, R_EAX );
   813     OR_imm32_r32(imm, R_EDX );
   814     MEM_WRITE_BYTE( R_EAX, R_EDX );
   815     sh4_x86.tstate = TSTATE_NONE;
   816 :}
   817 ROTCL Rn {:
   818     COUNT_INST(I_ROTCL);
   819     load_reg( R_EAX, Rn );
   820     if( sh4_x86.tstate != TSTATE_C ) {
   821 	LDC_t();
   822     }
   823     RCL1_r32( R_EAX );
   824     store_reg( R_EAX, Rn );
   825     SETC_t();
   826     sh4_x86.tstate = TSTATE_C;
   827 :}
   828 ROTCR Rn {:  
   829     COUNT_INST(I_ROTCR);
   830     load_reg( R_EAX, Rn );
   831     if( sh4_x86.tstate != TSTATE_C ) {
   832 	LDC_t();
   833     }
   834     RCR1_r32( R_EAX );
   835     store_reg( R_EAX, Rn );
   836     SETC_t();
   837     sh4_x86.tstate = TSTATE_C;
   838 :}
   839 ROTL Rn {:  
   840     COUNT_INST(I_ROTL);
   841     load_reg( R_EAX, Rn );
   842     ROL1_r32( R_EAX );
   843     store_reg( R_EAX, Rn );
   844     SETC_t();
   845     sh4_x86.tstate = TSTATE_C;
   846 :}
   847 ROTR Rn {:  
   848     COUNT_INST(I_ROTR);
   849     load_reg( R_EAX, Rn );
   850     ROR1_r32( R_EAX );
   851     store_reg( R_EAX, Rn );
   852     SETC_t();
   853     sh4_x86.tstate = TSTATE_C;
   854 :}
   855 SHAD Rm, Rn {:
   856     COUNT_INST(I_SHAD);
   857     /* Annoyingly enough, not directly convertible */
   858     load_reg( R_EAX, Rn );
   859     load_reg( R_ECX, Rm );
   860     CMP_imm32_r32( 0, R_ECX );
   861     JGE_rel8(doshl);
   863     NEG_r32( R_ECX );      // 2
   864     AND_imm8_r8( 0x1F, R_CL ); // 3
   865     JE_rel8(emptysar);     // 2
   866     SAR_r32_CL( R_EAX );       // 2
   867     JMP_rel8(end);          // 2
   869     JMP_TARGET(emptysar);
   870     SAR_imm8_r32(31, R_EAX );  // 3
   871     JMP_rel8(end2);
   873     JMP_TARGET(doshl);
   874     AND_imm8_r8( 0x1F, R_CL ); // 3
   875     SHL_r32_CL( R_EAX );       // 2
   876     JMP_TARGET(end);
   877     JMP_TARGET(end2);
   878     store_reg( R_EAX, Rn );
   879     sh4_x86.tstate = TSTATE_NONE;
   880 :}
   881 SHLD Rm, Rn {:  
   882     COUNT_INST(I_SHLD);
   883     load_reg( R_EAX, Rn );
   884     load_reg( R_ECX, Rm );
   885     CMP_imm32_r32( 0, R_ECX );
   886     JGE_rel8(doshl);
   888     NEG_r32( R_ECX );      // 2
   889     AND_imm8_r8( 0x1F, R_CL ); // 3
   890     JE_rel8(emptyshr );
   891     SHR_r32_CL( R_EAX );       // 2
   892     JMP_rel8(end);          // 2
   894     JMP_TARGET(emptyshr);
   895     XOR_r32_r32( R_EAX, R_EAX );
   896     JMP_rel8(end2);
   898     JMP_TARGET(doshl);
   899     AND_imm8_r8( 0x1F, R_CL ); // 3
   900     SHL_r32_CL( R_EAX );       // 2
   901     JMP_TARGET(end);
   902     JMP_TARGET(end2);
   903     store_reg( R_EAX, Rn );
   904     sh4_x86.tstate = TSTATE_NONE;
   905 :}
   906 SHAL Rn {: 
   907     COUNT_INST(I_SHAL);
   908     load_reg( R_EAX, Rn );
   909     SHL1_r32( R_EAX );
   910     SETC_t();
   911     store_reg( R_EAX, Rn );
   912     sh4_x86.tstate = TSTATE_C;
   913 :}
   914 SHAR Rn {:  
   915     COUNT_INST(I_SHAR);
   916     load_reg( R_EAX, Rn );
   917     SAR1_r32( R_EAX );
   918     SETC_t();
   919     store_reg( R_EAX, Rn );
   920     sh4_x86.tstate = TSTATE_C;
   921 :}
   922 SHLL Rn {:  
   923     COUNT_INST(I_SHLL);
   924     load_reg( R_EAX, Rn );
   925     SHL1_r32( R_EAX );
   926     SETC_t();
   927     store_reg( R_EAX, Rn );
   928     sh4_x86.tstate = TSTATE_C;
   929 :}
   930 SHLL2 Rn {:
   931     COUNT_INST(I_SHLL);
   932     load_reg( R_EAX, Rn );
   933     SHL_imm8_r32( 2, R_EAX );
   934     store_reg( R_EAX, Rn );
   935     sh4_x86.tstate = TSTATE_NONE;
   936 :}
   937 SHLL8 Rn {:  
   938     COUNT_INST(I_SHLL);
   939     load_reg( R_EAX, Rn );
   940     SHL_imm8_r32( 8, R_EAX );
   941     store_reg( R_EAX, Rn );
   942     sh4_x86.tstate = TSTATE_NONE;
   943 :}
   944 SHLL16 Rn {:  
   945     COUNT_INST(I_SHLL);
   946     load_reg( R_EAX, Rn );
   947     SHL_imm8_r32( 16, R_EAX );
   948     store_reg( R_EAX, Rn );
   949     sh4_x86.tstate = TSTATE_NONE;
   950 :}
   951 SHLR Rn {:  
   952     COUNT_INST(I_SHLR);
   953     load_reg( R_EAX, Rn );
   954     SHR1_r32( R_EAX );
   955     SETC_t();
   956     store_reg( R_EAX, Rn );
   957     sh4_x86.tstate = TSTATE_C;
   958 :}
   959 SHLR2 Rn {:  
   960     COUNT_INST(I_SHLR);
   961     load_reg( R_EAX, Rn );
   962     SHR_imm8_r32( 2, R_EAX );
   963     store_reg( R_EAX, Rn );
   964     sh4_x86.tstate = TSTATE_NONE;
   965 :}
   966 SHLR8 Rn {:  
   967     COUNT_INST(I_SHLR);
   968     load_reg( R_EAX, Rn );
   969     SHR_imm8_r32( 8, R_EAX );
   970     store_reg( R_EAX, Rn );
   971     sh4_x86.tstate = TSTATE_NONE;
   972 :}
   973 SHLR16 Rn {:  
   974     COUNT_INST(I_SHLR);
   975     load_reg( R_EAX, Rn );
   976     SHR_imm8_r32( 16, R_EAX );
   977     store_reg( R_EAX, Rn );
   978     sh4_x86.tstate = TSTATE_NONE;
   979 :}
   980 SUB Rm, Rn {:  
   981     COUNT_INST(I_SUB);
   982     load_reg( R_EAX, Rm );
   983     load_reg( R_ECX, Rn );
   984     SUB_r32_r32( R_EAX, R_ECX );
   985     store_reg( R_ECX, Rn );
   986     sh4_x86.tstate = TSTATE_NONE;
   987 :}
   988 SUBC Rm, Rn {:  
   989     COUNT_INST(I_SUBC);
   990     load_reg( R_EAX, Rm );
   991     load_reg( R_ECX, Rn );
   992     if( sh4_x86.tstate != TSTATE_C ) {
   993 	LDC_t();
   994     }
   995     SBB_r32_r32( R_EAX, R_ECX );
   996     store_reg( R_ECX, Rn );
   997     SETC_t();
   998     sh4_x86.tstate = TSTATE_C;
   999 :}
  1000 SUBV Rm, Rn {:  
  1001     COUNT_INST(I_SUBV);
  1002     load_reg( R_EAX, Rm );
  1003     load_reg( R_ECX, Rn );
  1004     SUB_r32_r32( R_EAX, R_ECX );
  1005     store_reg( R_ECX, Rn );
  1006     SETO_t();
  1007     sh4_x86.tstate = TSTATE_O;
  1008 :}
  1009 SWAP.B Rm, Rn {:  
  1010     COUNT_INST(I_SWAPB);
  1011     load_reg( R_EAX, Rm );
  1012     XCHG_r8_r8( R_AL, R_AH ); // NB: does not touch EFLAGS
  1013     store_reg( R_EAX, Rn );
  1014 :}
  1015 SWAP.W Rm, Rn {:  
  1016     COUNT_INST(I_SWAPB);
  1017     load_reg( R_EAX, Rm );
  1018     MOV_r32_r32( R_EAX, R_ECX );
  1019     SHL_imm8_r32( 16, R_ECX );
  1020     SHR_imm8_r32( 16, R_EAX );
  1021     OR_r32_r32( R_EAX, R_ECX );
  1022     store_reg( R_ECX, Rn );
  1023     sh4_x86.tstate = TSTATE_NONE;
  1024 :}
  1025 TAS.B @Rn {:  
  1026     COUNT_INST(I_TASB);
  1027     load_reg( R_EAX, Rn );
  1028     MOV_r32_esp8( R_EAX, 0 );
  1029     MEM_READ_BYTE( R_EAX, R_EDX );
  1030     TEST_r8_r8( R_DL, R_DL );
  1031     SETE_t();
  1032     OR_imm8_r8( 0x80, R_DL );
  1033     MOV_esp8_r32( 0, R_EAX );
  1034     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1035     sh4_x86.tstate = TSTATE_NONE;
  1036 :}
  1037 TST Rm, Rn {:  
  1038     COUNT_INST(I_TST);
  1039     load_reg( R_EAX, Rm );
  1040     load_reg( R_ECX, Rn );
  1041     TEST_r32_r32( R_EAX, R_ECX );
  1042     SETE_t();
  1043     sh4_x86.tstate = TSTATE_E;
  1044 :}
  1045 TST #imm, R0 {:  
  1046     COUNT_INST(I_TSTI);
  1047     load_reg( R_EAX, 0 );
  1048     TEST_imm32_r32( imm, R_EAX );
  1049     SETE_t();
  1050     sh4_x86.tstate = TSTATE_E;
  1051 :}
  1052 TST.B #imm, @(R0, GBR) {:  
  1053     COUNT_INST(I_TSTB);
  1054     load_reg( R_EAX, 0);
  1055     ADD_sh4r_r32( R_GBR, R_EAX );
  1056     MEM_READ_BYTE( R_EAX, R_EAX );
  1057     TEST_imm8_r8( imm, R_AL );
  1058     SETE_t();
  1059     sh4_x86.tstate = TSTATE_E;
  1060 :}
  1061 XOR Rm, Rn {:  
  1062     COUNT_INST(I_XOR);
  1063     load_reg( R_EAX, Rm );
  1064     load_reg( R_ECX, Rn );
  1065     XOR_r32_r32( R_EAX, R_ECX );
  1066     store_reg( R_ECX, Rn );
  1067     sh4_x86.tstate = TSTATE_NONE;
  1068 :}
  1069 XOR #imm, R0 {:  
  1070     COUNT_INST(I_XORI);
  1071     load_reg( R_EAX, 0 );
  1072     XOR_imm32_r32( imm, R_EAX );
  1073     store_reg( R_EAX, 0 );
  1074     sh4_x86.tstate = TSTATE_NONE;
  1075 :}
  1076 XOR.B #imm, @(R0, GBR) {:  
  1077     COUNT_INST(I_XORB);
  1078     load_reg( R_EAX, 0 );
  1079     ADD_sh4r_r32( R_GBR, R_EAX ); 
  1080     MOV_r32_esp8( R_EAX, 0 );
  1081     MEM_READ_BYTE(R_EAX, R_EDX);
  1082     MOV_esp8_r32( 0, R_EAX );
  1083     XOR_imm32_r32( imm, R_EDX );
  1084     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1085     sh4_x86.tstate = TSTATE_NONE;
  1086 :}
  1087 XTRCT Rm, Rn {:
  1088     COUNT_INST(I_XTRCT);
  1089     load_reg( R_EAX, Rm );
  1090     load_reg( R_ECX, Rn );
  1091     SHL_imm8_r32( 16, R_EAX );
  1092     SHR_imm8_r32( 16, R_ECX );
  1093     OR_r32_r32( R_EAX, R_ECX );
  1094     store_reg( R_ECX, Rn );
  1095     sh4_x86.tstate = TSTATE_NONE;
  1096 :}
  1098 /* Data move instructions */
  1099 MOV Rm, Rn {:  
  1100     COUNT_INST(I_MOV);
  1101     load_reg( R_EAX, Rm );
  1102     store_reg( R_EAX, Rn );
  1103 :}
  1104 MOV #imm, Rn {:  
  1105     COUNT_INST(I_MOVI);
  1106     load_imm32( R_EAX, imm );
  1107     store_reg( R_EAX, Rn );
  1108 :}
  1109 MOV.B Rm, @Rn {:  
  1110     COUNT_INST(I_MOVB);
  1111     load_reg( R_EAX, Rn );
  1112     load_reg( R_EDX, Rm );
  1113     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1114     sh4_x86.tstate = TSTATE_NONE;
  1115 :}
  1116 MOV.B Rm, @-Rn {:  
  1117     COUNT_INST(I_MOVB);
  1118     load_reg( R_EAX, Rn );
  1119     LEA_r32disp8_r32( R_EAX, -1, R_EAX );
  1120     load_reg( R_EDX, Rm );
  1121     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1122     ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
  1123     sh4_x86.tstate = TSTATE_NONE;
  1124 :}
  1125 MOV.B Rm, @(R0, Rn) {:  
  1126     COUNT_INST(I_MOVB);
  1127     load_reg( R_EAX, 0 );
  1128     ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1129     load_reg( R_EDX, Rm );
  1130     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1131     sh4_x86.tstate = TSTATE_NONE;
  1132 :}
  1133 MOV.B R0, @(disp, GBR) {:  
  1134     COUNT_INST(I_MOVB);
  1135     load_spreg( R_EAX, R_GBR );
  1136     ADD_imm32_r32( disp, R_EAX );
  1137     load_reg( R_EDX, 0 );
  1138     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1139     sh4_x86.tstate = TSTATE_NONE;
  1140 :}
  1141 MOV.B R0, @(disp, Rn) {:  
  1142     COUNT_INST(I_MOVB);
  1143     load_reg( R_EAX, Rn );
  1144     ADD_imm32_r32( disp, R_EAX );
  1145     load_reg( R_EDX, 0 );
  1146     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1147     sh4_x86.tstate = TSTATE_NONE;
  1148 :}
  1149 MOV.B @Rm, Rn {:  
  1150     COUNT_INST(I_MOVB);
  1151     load_reg( R_EAX, Rm );
  1152     MEM_READ_BYTE( R_EAX, R_EAX );
  1153     store_reg( R_EAX, Rn );
  1154     sh4_x86.tstate = TSTATE_NONE;
  1155 :}
  1156 MOV.B @Rm+, Rn {:  
  1157     COUNT_INST(I_MOVB);
  1158     load_reg( R_EAX, Rm );
  1159     MEM_READ_BYTE( R_EAX, R_EAX );
  1160     if( Rm != Rn ) {
  1161     	ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
  1163     store_reg( R_EAX, Rn );
  1164     sh4_x86.tstate = TSTATE_NONE;
  1165 :}
  1166 MOV.B @(R0, Rm), Rn {:  
  1167     COUNT_INST(I_MOVB);
  1168     load_reg( R_EAX, 0 );
  1169     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
  1170     MEM_READ_BYTE( R_EAX, R_EAX );
  1171     store_reg( R_EAX, Rn );
  1172     sh4_x86.tstate = TSTATE_NONE;
  1173 :}
  1174 MOV.B @(disp, GBR), R0 {:  
  1175     COUNT_INST(I_MOVB);
  1176     load_spreg( R_EAX, R_GBR );
  1177     ADD_imm32_r32( disp, R_EAX );
  1178     MEM_READ_BYTE( R_EAX, R_EAX );
  1179     store_reg( R_EAX, 0 );
  1180     sh4_x86.tstate = TSTATE_NONE;
  1181 :}
  1182 MOV.B @(disp, Rm), R0 {:  
  1183     COUNT_INST(I_MOVB);
  1184     load_reg( R_EAX, Rm );
  1185     ADD_imm32_r32( disp, R_EAX );
  1186     MEM_READ_BYTE( R_EAX, R_EAX );
  1187     store_reg( R_EAX, 0 );
  1188     sh4_x86.tstate = TSTATE_NONE;
  1189 :}
  1190 MOV.L Rm, @Rn {:
  1191     COUNT_INST(I_MOVL);
  1192     load_reg( R_EAX, Rn );
  1193     check_walign32(R_EAX);
  1194     MOV_r32_r32( R_EAX, R_ECX );
  1195     AND_imm32_r32( 0xFC000000, R_ECX );
  1196     CMP_imm32_r32( 0xE0000000, R_ECX );
  1197     JNE_rel8( notsq );
  1198     AND_imm8s_r32( 0x3C, R_EAX );
  1199     load_reg( R_EDX, Rm );
  1200     MOV_r32_ebpr32disp32( R_EDX, R_EAX, REG_OFFSET(store_queue) );
  1201     JMP_rel8(end);
  1202     JMP_TARGET(notsq);
  1203     load_reg( R_EDX, Rm );
  1204     MEM_WRITE_LONG( R_EAX, R_EDX );
  1205     JMP_TARGET(end);
  1206     sh4_x86.tstate = TSTATE_NONE;
  1207 :}
  1208 MOV.L Rm, @-Rn {:  
  1209     COUNT_INST(I_MOVL);
  1210     load_reg( R_EAX, Rn );
  1211     ADD_imm8s_r32( -4, R_EAX );
  1212     check_walign32( R_EAX );
  1213     load_reg( R_EDX, Rm );
  1214     MEM_WRITE_LONG( R_EAX, R_EDX );
  1215     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  1216     sh4_x86.tstate = TSTATE_NONE;
  1217 :}
  1218 MOV.L Rm, @(R0, Rn) {:  
  1219     COUNT_INST(I_MOVL);
  1220     load_reg( R_EAX, 0 );
  1221     ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1222     check_walign32( R_EAX );
  1223     load_reg( R_EDX, Rm );
  1224     MEM_WRITE_LONG( R_EAX, R_EDX );
  1225     sh4_x86.tstate = TSTATE_NONE;
  1226 :}
  1227 MOV.L R0, @(disp, GBR) {:  
  1228     COUNT_INST(I_MOVL);
  1229     load_spreg( R_EAX, R_GBR );
  1230     ADD_imm32_r32( disp, R_EAX );
  1231     check_walign32( R_EAX );
  1232     load_reg( R_EDX, 0 );
  1233     MEM_WRITE_LONG( R_EAX, R_EDX );
  1234     sh4_x86.tstate = TSTATE_NONE;
  1235 :}
  1236 MOV.L Rm, @(disp, Rn) {:  
  1237     COUNT_INST(I_MOVL);
  1238     load_reg( R_EAX, Rn );
  1239     ADD_imm32_r32( disp, R_EAX );
  1240     check_walign32( R_EAX );
  1241     MOV_r32_r32( R_EAX, R_ECX );
  1242     AND_imm32_r32( 0xFC000000, R_ECX );
  1243     CMP_imm32_r32( 0xE0000000, R_ECX );
  1244     JNE_rel8( notsq );
  1245     AND_imm8s_r32( 0x3C, R_EAX );
  1246     load_reg( R_EDX, Rm );
  1247     MOV_r32_ebpr32disp32( R_EDX, R_EAX, REG_OFFSET(store_queue) );
  1248     JMP_rel8(end);
  1249     JMP_TARGET(notsq);
  1250     load_reg( R_EDX, Rm );
  1251     MEM_WRITE_LONG( R_EAX, R_EDX );
  1252     JMP_TARGET(end);
  1253     sh4_x86.tstate = TSTATE_NONE;
  1254 :}
  1255 MOV.L @Rm, Rn {:  
  1256     COUNT_INST(I_MOVL);
  1257     load_reg( R_EAX, Rm );
  1258     check_ralign32( R_EAX );
  1259     MEM_READ_LONG( R_EAX, R_EAX );
  1260     store_reg( R_EAX, Rn );
  1261     sh4_x86.tstate = TSTATE_NONE;
  1262 :}
  1263 MOV.L @Rm+, Rn {:  
  1264     COUNT_INST(I_MOVL);
  1265     load_reg( R_EAX, Rm );
  1266     check_ralign32( R_EAX );
  1267     MEM_READ_LONG( R_EAX, R_EAX );
  1268     if( Rm != Rn ) {
  1269     	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1271     store_reg( R_EAX, Rn );
  1272     sh4_x86.tstate = TSTATE_NONE;
  1273 :}
  1274 MOV.L @(R0, Rm), Rn {:  
  1275     COUNT_INST(I_MOVL);
  1276     load_reg( R_EAX, 0 );
  1277     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
  1278     check_ralign32( R_EAX );
  1279     MEM_READ_LONG( R_EAX, R_EAX );
  1280     store_reg( R_EAX, Rn );
  1281     sh4_x86.tstate = TSTATE_NONE;
  1282 :}
  1283 MOV.L @(disp, GBR), R0 {:
  1284     COUNT_INST(I_MOVL);
  1285     load_spreg( R_EAX, R_GBR );
  1286     ADD_imm32_r32( disp, R_EAX );
  1287     check_ralign32( R_EAX );
  1288     MEM_READ_LONG( R_EAX, R_EAX );
  1289     store_reg( R_EAX, 0 );
  1290     sh4_x86.tstate = TSTATE_NONE;
  1291 :}
  1292 MOV.L @(disp, PC), Rn {:  
  1293     COUNT_INST(I_MOVLPC);
  1294     if( sh4_x86.in_delay_slot ) {
  1295 	SLOTILLEGAL();
  1296     } else {
  1297 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1298 	if( IS_IN_ICACHE(target) ) {
  1299 	    // If the target address is in the same page as the code, it's
  1300 	    // pretty safe to just ref it directly and circumvent the whole
  1301 	    // memory subsystem. (this is a big performance win)
  1303 	    // FIXME: There's a corner-case that's not handled here when
  1304 	    // the current code-page is in the ITLB but not in the UTLB.
  1305 	    // (should generate a TLB miss although need to test SH4 
  1306 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1307 	    // behaviour though.
  1308 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1309 	    MOV_moff32_EAX( ptr );
  1310 	} else {
  1311 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1312 	    // different virtual address than the translation was done with,
  1313 	    // but we can safely assume that the low bits are the same.
  1314 	    load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1315 	    ADD_sh4r_r32( R_PC, R_EAX );
  1316 	    MEM_READ_LONG( R_EAX, R_EAX );
  1317 	    sh4_x86.tstate = TSTATE_NONE;
  1319 	store_reg( R_EAX, Rn );
  1321 :}
  1322 MOV.L @(disp, Rm), Rn {:  
  1323     COUNT_INST(I_MOVL);
  1324     load_reg( R_EAX, Rm );
  1325     ADD_imm8s_r32( disp, R_EAX );
  1326     check_ralign32( R_EAX );
  1327     MEM_READ_LONG( R_EAX, R_EAX );
  1328     store_reg( R_EAX, Rn );
  1329     sh4_x86.tstate = TSTATE_NONE;
  1330 :}
  1331 MOV.W Rm, @Rn {:  
  1332     COUNT_INST(I_MOVW);
  1333     load_reg( R_EAX, Rn );
  1334     check_walign16( R_EAX );
  1335     load_reg( R_EDX, Rm );
  1336     MEM_WRITE_WORD( R_EAX, R_EDX );
  1337     sh4_x86.tstate = TSTATE_NONE;
  1338 :}
  1339 MOV.W Rm, @-Rn {:  
  1340     COUNT_INST(I_MOVW);
  1341     load_reg( R_EAX, Rn );
  1342     check_walign16( R_EAX );
  1343     LEA_r32disp8_r32( R_EAX, -2, R_EAX );
  1344     load_reg( R_EDX, Rm );
  1345     MEM_WRITE_WORD( R_EAX, R_EDX );
  1346     ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
  1347     sh4_x86.tstate = TSTATE_NONE;
  1348 :}
  1349 MOV.W Rm, @(R0, Rn) {:  
  1350     COUNT_INST(I_MOVW);
  1351     load_reg( R_EAX, 0 );
  1352     ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1353     check_walign16( R_EAX );
  1354     load_reg( R_EDX, Rm );
  1355     MEM_WRITE_WORD( R_EAX, R_EDX );
  1356     sh4_x86.tstate = TSTATE_NONE;
  1357 :}
  1358 MOV.W R0, @(disp, GBR) {:  
  1359     COUNT_INST(I_MOVW);
  1360     load_spreg( R_EAX, R_GBR );
  1361     ADD_imm32_r32( disp, R_EAX );
  1362     check_walign16( R_EAX );
  1363     load_reg( R_EDX, 0 );
  1364     MEM_WRITE_WORD( R_EAX, R_EDX );
  1365     sh4_x86.tstate = TSTATE_NONE;
  1366 :}
  1367 MOV.W R0, @(disp, Rn) {:  
  1368     COUNT_INST(I_MOVW);
  1369     load_reg( R_EAX, Rn );
  1370     ADD_imm32_r32( disp, R_EAX );
  1371     check_walign16( R_EAX );
  1372     load_reg( R_EDX, 0 );
  1373     MEM_WRITE_WORD( R_EAX, R_EDX );
  1374     sh4_x86.tstate = TSTATE_NONE;
  1375 :}
  1376 MOV.W @Rm, Rn {:  
  1377     COUNT_INST(I_MOVW);
  1378     load_reg( R_EAX, Rm );
  1379     check_ralign16( R_EAX );
  1380     MEM_READ_WORD( R_EAX, R_EAX );
  1381     store_reg( R_EAX, Rn );
  1382     sh4_x86.tstate = TSTATE_NONE;
  1383 :}
  1384 MOV.W @Rm+, Rn {:  
  1385     COUNT_INST(I_MOVW);
  1386     load_reg( R_EAX, Rm );
  1387     check_ralign16( R_EAX );
  1388     MEM_READ_WORD( R_EAX, R_EAX );
  1389     if( Rm != Rn ) {
  1390         ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
  1392     store_reg( R_EAX, Rn );
  1393     sh4_x86.tstate = TSTATE_NONE;
  1394 :}
  1395 MOV.W @(R0, Rm), Rn {:  
  1396     COUNT_INST(I_MOVW);
  1397     load_reg( R_EAX, 0 );
  1398     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
  1399     check_ralign16( R_EAX );
  1400     MEM_READ_WORD( R_EAX, R_EAX );
  1401     store_reg( R_EAX, Rn );
  1402     sh4_x86.tstate = TSTATE_NONE;
  1403 :}
  1404 MOV.W @(disp, GBR), R0 {:  
  1405     COUNT_INST(I_MOVW);
  1406     load_spreg( R_EAX, R_GBR );
  1407     ADD_imm32_r32( disp, R_EAX );
  1408     check_ralign16( R_EAX );
  1409     MEM_READ_WORD( R_EAX, R_EAX );
  1410     store_reg( R_EAX, 0 );
  1411     sh4_x86.tstate = TSTATE_NONE;
  1412 :}
  1413 MOV.W @(disp, PC), Rn {:  
  1414     COUNT_INST(I_MOVW);
  1415     if( sh4_x86.in_delay_slot ) {
  1416 	SLOTILLEGAL();
  1417     } else {
  1418 	// See comments for MOV.L @(disp, PC), Rn
  1419 	uint32_t target = pc + disp + 4;
  1420 	if( IS_IN_ICACHE(target) ) {
  1421 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1422 	    MOV_moff32_EAX( ptr );
  1423 	    MOVSX_r16_r32( R_EAX, R_EAX );
  1424 	} else {
  1425 	    load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
  1426 	    ADD_sh4r_r32( R_PC, R_EAX );
  1427 	    MEM_READ_WORD( R_EAX, R_EAX );
  1428 	    sh4_x86.tstate = TSTATE_NONE;
  1430 	store_reg( R_EAX, Rn );
  1432 :}
  1433 MOV.W @(disp, Rm), R0 {:  
  1434     COUNT_INST(I_MOVW);
  1435     load_reg( R_EAX, Rm );
  1436     ADD_imm32_r32( disp, R_EAX );
  1437     check_ralign16( R_EAX );
  1438     MEM_READ_WORD( R_EAX, R_EAX );
  1439     store_reg( R_EAX, 0 );
  1440     sh4_x86.tstate = TSTATE_NONE;
  1441 :}
  1442 MOVA @(disp, PC), R0 {:  
  1443     COUNT_INST(I_MOVA);
  1444     if( sh4_x86.in_delay_slot ) {
  1445 	SLOTILLEGAL();
  1446     } else {
  1447 	load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1448 	ADD_sh4r_r32( R_PC, R_ECX );
  1449 	store_reg( R_ECX, 0 );
  1450 	sh4_x86.tstate = TSTATE_NONE;
  1452 :}
  1453 MOVCA.L R0, @Rn {:  
  1454     COUNT_INST(I_MOVCA);
  1455     load_reg( R_EAX, Rn );
  1456     check_walign32( R_EAX );
  1457     load_reg( R_EDX, 0 );
  1458     MEM_WRITE_LONG( R_EAX, R_EDX );
  1459     sh4_x86.tstate = TSTATE_NONE;
  1460 :}
  1462 /* Control transfer instructions */
  1463 BF disp {:
  1464     COUNT_INST(I_BF);
  1465     if( sh4_x86.in_delay_slot ) {
  1466 	SLOTILLEGAL();
  1467     } else {
  1468 	sh4vma_t target = disp + pc + 4;
  1469 	JT_rel8( nottaken );
  1470 	exit_block_rel(target, pc+2 );
  1471 	JMP_TARGET(nottaken);
  1472 	return 2;
  1474 :}
  1475 BF/S disp {:
  1476     COUNT_INST(I_BFS);
  1477     if( sh4_x86.in_delay_slot ) {
  1478 	SLOTILLEGAL();
  1479     } else {
  1480 	sh4_x86.in_delay_slot = DELAY_PC;
  1481 	if( UNTRANSLATABLE(pc+2) ) {
  1482 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1483 	    JT_rel8(nottaken);
  1484 	    ADD_imm32_r32( disp, R_EAX );
  1485 	    JMP_TARGET(nottaken);
  1486 	    ADD_sh4r_r32( R_PC, R_EAX );
  1487 	    store_spreg( R_EAX, R_NEW_PC );
  1488 	    exit_block_emu(pc+2);
  1489 	    sh4_x86.branch_taken = TRUE;
  1490 	    return 2;
  1491 	} else {
  1492 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1493 		CMP_imm8s_sh4r( 1, R_T );
  1494 		sh4_x86.tstate = TSTATE_E;
  1496 	    sh4vma_t target = disp + pc + 4;
  1497 	    OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JT rel32
  1498 	    int save_tstate = sh4_x86.tstate;
  1499 	    sh4_translate_instruction(pc+2);
  1500 	    exit_block_rel( target, pc+4 );
  1502 	    // not taken
  1503 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1504 	    sh4_x86.tstate = save_tstate;
  1505 	    sh4_translate_instruction(pc+2);
  1506 	    return 4;
  1509 :}
  1510 BRA disp {:  
  1511     COUNT_INST(I_BRA);
  1512     if( sh4_x86.in_delay_slot ) {
  1513 	SLOTILLEGAL();
  1514     } else {
  1515 	sh4_x86.in_delay_slot = DELAY_PC;
  1516 	sh4_x86.branch_taken = TRUE;
  1517 	if( UNTRANSLATABLE(pc+2) ) {
  1518 	    load_spreg( R_EAX, R_PC );
  1519 	    ADD_imm32_r32( pc + disp + 4 - sh4_x86.block_start_pc, R_EAX );
  1520 	    store_spreg( R_EAX, R_NEW_PC );
  1521 	    exit_block_emu(pc+2);
  1522 	    return 2;
  1523 	} else {
  1524 	    sh4_translate_instruction( pc + 2 );
  1525 	    exit_block_rel( disp + pc + 4, pc+4 );
  1526 	    return 4;
  1529 :}
  1530 BRAF Rn {:  
  1531     COUNT_INST(I_BRAF);
  1532     if( sh4_x86.in_delay_slot ) {
  1533 	SLOTILLEGAL();
  1534     } else {
  1535 	load_spreg( R_EAX, R_PC );
  1536 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1537 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1538 	store_spreg( R_EAX, R_NEW_PC );
  1539 	sh4_x86.in_delay_slot = DELAY_PC;
  1540 	sh4_x86.tstate = TSTATE_NONE;
  1541 	sh4_x86.branch_taken = TRUE;
  1542 	if( UNTRANSLATABLE(pc+2) ) {
  1543 	    exit_block_emu(pc+2);
  1544 	    return 2;
  1545 	} else {
  1546 	    sh4_translate_instruction( pc + 2 );
  1547 	    exit_block_newpcset(pc+4);
  1548 	    return 4;
  1551 :}
  1552 BSR disp {:  
  1553     COUNT_INST(I_BSR);
  1554     if( sh4_x86.in_delay_slot ) {
  1555 	SLOTILLEGAL();
  1556     } else {
  1557 	load_spreg( R_EAX, R_PC );
  1558 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1559 	store_spreg( R_EAX, R_PR );
  1560 	sh4_x86.in_delay_slot = DELAY_PC;
  1561 	sh4_x86.branch_taken = TRUE;
  1562 	sh4_x86.tstate = TSTATE_NONE;
  1563 	if( UNTRANSLATABLE(pc+2) ) {
  1564 	    ADD_imm32_r32( disp, R_EAX );
  1565 	    store_spreg( R_EAX, R_NEW_PC );
  1566 	    exit_block_emu(pc+2);
  1567 	    return 2;
  1568 	} else {
  1569 	    sh4_translate_instruction( pc + 2 );
  1570 	    exit_block_rel( disp + pc + 4, pc+4 );
  1571 	    return 4;
  1574 :}
  1575 BSRF Rn {:  
  1576     COUNT_INST(I_BSRF);
  1577     if( sh4_x86.in_delay_slot ) {
  1578 	SLOTILLEGAL();
  1579     } else {
  1580 	load_spreg( R_EAX, R_PC );
  1581 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1582 	store_spreg( R_EAX, R_PR );
  1583 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1584 	store_spreg( R_EAX, R_NEW_PC );
  1586 	sh4_x86.in_delay_slot = DELAY_PC;
  1587 	sh4_x86.tstate = TSTATE_NONE;
  1588 	sh4_x86.branch_taken = TRUE;
  1589 	if( UNTRANSLATABLE(pc+2) ) {
  1590 	    exit_block_emu(pc+2);
  1591 	    return 2;
  1592 	} else {
  1593 	    sh4_translate_instruction( pc + 2 );
  1594 	    exit_block_newpcset(pc+4);
  1595 	    return 4;
  1598 :}
  1599 BT disp {:
  1600     COUNT_INST(I_BT);
  1601     if( sh4_x86.in_delay_slot ) {
  1602 	SLOTILLEGAL();
  1603     } else {
  1604 	sh4vma_t target = disp + pc + 4;
  1605 	JF_rel8( nottaken );
  1606 	exit_block_rel(target, pc+2 );
  1607 	JMP_TARGET(nottaken);
  1608 	return 2;
  1610 :}
  1611 BT/S disp {:
  1612     COUNT_INST(I_BTS);
  1613     if( sh4_x86.in_delay_slot ) {
  1614 	SLOTILLEGAL();
  1615     } else {
  1616 	sh4_x86.in_delay_slot = DELAY_PC;
  1617 	if( UNTRANSLATABLE(pc+2) ) {
  1618 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1619 	    JF_rel8(nottaken);
  1620 	    ADD_imm32_r32( disp, R_EAX );
  1621 	    JMP_TARGET(nottaken);
  1622 	    ADD_sh4r_r32( R_PC, R_EAX );
  1623 	    store_spreg( R_EAX, R_NEW_PC );
  1624 	    exit_block_emu(pc+2);
  1625 	    sh4_x86.branch_taken = TRUE;
  1626 	    return 2;
  1627 	} else {
  1628 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1629 		CMP_imm8s_sh4r( 1, R_T );
  1630 		sh4_x86.tstate = TSTATE_E;
  1632 	    OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JF rel32
  1633 	    int save_tstate = sh4_x86.tstate;
  1634 	    sh4_translate_instruction(pc+2);
  1635 	    exit_block_rel( disp + pc + 4, pc+4 );
  1636 	    // not taken
  1637 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1638 	    sh4_x86.tstate = save_tstate;
  1639 	    sh4_translate_instruction(pc+2);
  1640 	    return 4;
  1643 :}
  1644 JMP @Rn {:  
  1645     COUNT_INST(I_JMP);
  1646     if( sh4_x86.in_delay_slot ) {
  1647 	SLOTILLEGAL();
  1648     } else {
  1649 	load_reg( R_ECX, Rn );
  1650 	store_spreg( R_ECX, R_NEW_PC );
  1651 	sh4_x86.in_delay_slot = DELAY_PC;
  1652 	sh4_x86.branch_taken = TRUE;
  1653 	if( UNTRANSLATABLE(pc+2) ) {
  1654 	    exit_block_emu(pc+2);
  1655 	    return 2;
  1656 	} else {
  1657 	    sh4_translate_instruction(pc+2);
  1658 	    exit_block_newpcset(pc+4);
  1659 	    return 4;
  1662 :}
  1663 JSR @Rn {:  
  1664     COUNT_INST(I_JSR);
  1665     if( sh4_x86.in_delay_slot ) {
  1666 	SLOTILLEGAL();
  1667     } else {
  1668 	load_spreg( R_EAX, R_PC );
  1669 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1670 	store_spreg( R_EAX, R_PR );
  1671 	load_reg( R_ECX, Rn );
  1672 	store_spreg( R_ECX, R_NEW_PC );
  1673 	sh4_x86.in_delay_slot = DELAY_PC;
  1674 	sh4_x86.branch_taken = TRUE;
  1675 	sh4_x86.tstate = TSTATE_NONE;
  1676 	if( UNTRANSLATABLE(pc+2) ) {
  1677 	    exit_block_emu(pc+2);
  1678 	    return 2;
  1679 	} else {
  1680 	    sh4_translate_instruction(pc+2);
  1681 	    exit_block_newpcset(pc+4);
  1682 	    return 4;
  1685 :}
  1686 RTE {:  
  1687     COUNT_INST(I_RTE);
  1688     if( sh4_x86.in_delay_slot ) {
  1689 	SLOTILLEGAL();
  1690     } else {
  1691 	check_priv();
  1692 	load_spreg( R_ECX, R_SPC );
  1693 	store_spreg( R_ECX, R_NEW_PC );
  1694 	load_spreg( R_EAX, R_SSR );
  1695 	call_func1( sh4_write_sr, R_EAX );
  1696 	sh4_x86.in_delay_slot = DELAY_PC;
  1697 	sh4_x86.fpuen_checked = FALSE;
  1698 	sh4_x86.tstate = TSTATE_NONE;
  1699 	sh4_x86.branch_taken = TRUE;
  1700 	if( UNTRANSLATABLE(pc+2) ) {
  1701 	    exit_block_emu(pc+2);
  1702 	    return 2;
  1703 	} else {
  1704 	    sh4_translate_instruction(pc+2);
  1705 	    exit_block_newpcset(pc+4);
  1706 	    return 4;
  1709 :}
  1710 RTS {:  
  1711     COUNT_INST(I_RTS);
  1712     if( sh4_x86.in_delay_slot ) {
  1713 	SLOTILLEGAL();
  1714     } else {
  1715 	load_spreg( R_ECX, R_PR );
  1716 	store_spreg( R_ECX, R_NEW_PC );
  1717 	sh4_x86.in_delay_slot = DELAY_PC;
  1718 	sh4_x86.branch_taken = TRUE;
  1719 	if( UNTRANSLATABLE(pc+2) ) {
  1720 	    exit_block_emu(pc+2);
  1721 	    return 2;
  1722 	} else {
  1723 	    sh4_translate_instruction(pc+2);
  1724 	    exit_block_newpcset(pc+4);
  1725 	    return 4;
  1728 :}
  1729 TRAPA #imm {:  
  1730     COUNT_INST(I_TRAPA);
  1731     if( sh4_x86.in_delay_slot ) {
  1732 	SLOTILLEGAL();
  1733     } else {
  1734 	load_imm32( R_ECX, pc+2 - sh4_x86.block_start_pc );   // 5
  1735 	ADD_r32_sh4r( R_ECX, R_PC );
  1736 	load_imm32( R_EAX, imm );
  1737 	call_func1( sh4_raise_trap, R_EAX );
  1738 	sh4_x86.tstate = TSTATE_NONE;
  1739 	exit_block_pcset(pc+2);
  1740 	sh4_x86.branch_taken = TRUE;
  1741 	return 2;
  1743 :}
  1744 UNDEF {:  
  1745     COUNT_INST(I_UNDEF);
  1746     if( sh4_x86.in_delay_slot ) {
  1747 	exit_block_exc(EXC_SLOT_ILLEGAL, pc-2);    
  1748     } else {
  1749 	exit_block_exc(EXC_ILLEGAL, pc);    
  1750 	return 2;
  1752 :}
  1754 CLRMAC {:  
  1755     COUNT_INST(I_CLRMAC);
  1756     XOR_r32_r32(R_EAX, R_EAX);
  1757     store_spreg( R_EAX, R_MACL );
  1758     store_spreg( R_EAX, R_MACH );
  1759     sh4_x86.tstate = TSTATE_NONE;
  1760 :}
  1761 CLRS {:
  1762     COUNT_INST(I_CLRS);
  1763     CLC();
  1764     SETC_sh4r(R_S);
  1765     sh4_x86.tstate = TSTATE_NONE;
  1766 :}
  1767 CLRT {:  
  1768     COUNT_INST(I_CLRT);
  1769     CLC();
  1770     SETC_t();
  1771     sh4_x86.tstate = TSTATE_C;
  1772 :}
  1773 SETS {:  
  1774     COUNT_INST(I_SETS);
  1775     STC();
  1776     SETC_sh4r(R_S);
  1777     sh4_x86.tstate = TSTATE_NONE;
  1778 :}
  1779 SETT {:  
  1780     COUNT_INST(I_SETT);
  1781     STC();
  1782     SETC_t();
  1783     sh4_x86.tstate = TSTATE_C;
  1784 :}
  1786 /* Floating point moves */
  1787 FMOV FRm, FRn {:  
  1788     COUNT_INST(I_FMOV1);
  1789     check_fpuen();
  1790     if( sh4_x86.double_size ) {
  1791         load_dr0( R_EAX, FRm );
  1792         load_dr1( R_ECX, FRm );
  1793         store_dr0( R_EAX, FRn );
  1794         store_dr1( R_ECX, FRn );
  1795     } else {
  1796         load_fr( R_EAX, FRm ); // SZ=0 branch
  1797         store_fr( R_EAX, FRn );
  1799 :}
  1800 FMOV FRm, @Rn {: 
  1801     COUNT_INST(I_FMOV2);
  1802     check_fpuen();
  1803     load_reg( R_EAX, Rn );
  1804     if( sh4_x86.double_size ) {
  1805         check_walign64( R_EAX );
  1806         load_dr0( R_EDX, FRm );
  1807         MEM_WRITE_LONG( R_EAX, R_EDX );
  1808         load_reg( R_EAX, Rn );
  1809         LEA_r32disp8_r32( R_EAX, 4, R_EAX );
  1810         load_dr1( R_EDX, FRm );
  1811         MEM_WRITE_LONG( R_EAX, R_EDX );
  1812     } else {
  1813         check_walign32( R_EAX );
  1814         load_fr( R_EDX, FRm );
  1815         MEM_WRITE_LONG( R_EAX, R_EDX );
  1817     sh4_x86.tstate = TSTATE_NONE;
  1818 :}
  1819 FMOV @Rm, FRn {:  
  1820     COUNT_INST(I_FMOV5);
  1821     check_fpuen();
  1822     load_reg( R_EAX, Rm );
  1823     if( sh4_x86.double_size ) {
  1824         check_ralign64( R_EAX );
  1825         MEM_READ_LONG( R_EAX, R_EAX );
  1826         store_dr0( R_EAX, FRn );
  1827         load_reg( R_EAX, Rm );
  1828         LEA_r32disp8_r32( R_EAX, 4, R_EAX );
  1829         MEM_READ_LONG( R_EAX, R_EAX );
  1830         store_dr1( R_EAX, FRn );
  1831     } else {
  1832         check_ralign32( R_EAX );
  1833         MEM_READ_LONG( R_EAX, R_EAX );
  1834         store_fr( R_EAX, FRn );
  1836     sh4_x86.tstate = TSTATE_NONE;
  1837 :}
  1838 FMOV FRm, @-Rn {:  
  1839     COUNT_INST(I_FMOV3);
  1840     check_fpuen();
  1841     load_reg( R_EAX, Rn );
  1842     if( sh4_x86.double_size ) {
  1843         check_walign64( R_EAX );
  1844         LEA_r32disp8_r32( R_EAX, -8, R_EAX );
  1845         load_dr0( R_EDX, FRm );
  1846         MEM_WRITE_LONG( R_EAX, R_EDX );
  1847         load_reg( R_EAX, Rn );
  1848         LEA_r32disp8_r32( R_EAX, -4, R_EAX );
  1849         load_dr1( R_EDX, FRm );
  1850         MEM_WRITE_LONG( R_EAX, R_EDX );
  1851         ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
  1852     } else {
  1853         check_walign32( R_EAX );
  1854         LEA_r32disp8_r32( R_EAX, -4, R_EAX );
  1855         load_fr( R_EDX, FRm );
  1856         MEM_WRITE_LONG( R_EAX, R_EDX );
  1857         ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
  1859     sh4_x86.tstate = TSTATE_NONE;
  1860 :}
  1861 FMOV @Rm+, FRn {:
  1862     COUNT_INST(I_FMOV6);
  1863     check_fpuen();
  1864     load_reg( R_EAX, Rm );
  1865     if( sh4_x86.double_size ) {
  1866         check_ralign64( R_EAX );
  1867         MEM_READ_LONG( R_EAX, R_EAX );
  1868         store_dr0( R_EAX, FRn );
  1869         load_reg( R_EAX, Rm );
  1870         LEA_r32disp8_r32( R_EAX, 4, R_EAX );
  1871         MEM_READ_LONG( R_EAX, R_EAX );
  1872         store_dr1( R_EAX, FRn );
  1873         ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
  1874     } else {
  1875         check_ralign32( R_EAX );
  1876         MEM_READ_LONG( R_EAX, R_EAX );
  1877         store_fr( R_EAX, FRn );
  1878         ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1880     sh4_x86.tstate = TSTATE_NONE;
  1881 :}
  1882 FMOV FRm, @(R0, Rn) {:  
  1883     COUNT_INST(I_FMOV4);
  1884     check_fpuen();
  1885     load_reg( R_EAX, Rn );
  1886     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1887     if( sh4_x86.double_size ) {
  1888         check_walign64( R_EAX );
  1889         load_dr0( R_EDX, FRm );
  1890         MEM_WRITE_LONG( R_EAX, R_EDX );
  1891         load_reg( R_EAX, Rn );
  1892         ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1893         LEA_r32disp8_r32( R_EAX, 4, R_EAX );
  1894         load_dr1( R_EDX, FRm );
  1895         MEM_WRITE_LONG( R_EAX, R_EDX );
  1896     } else {
  1897         check_walign32( R_EAX );
  1898         load_fr( R_EDX, FRm );
  1899         MEM_WRITE_LONG( R_EAX, R_EDX ); // 12
  1901     sh4_x86.tstate = TSTATE_NONE;
  1902 :}
  1903 FMOV @(R0, Rm), FRn {:  
  1904     COUNT_INST(I_FMOV7);
  1905     check_fpuen();
  1906     load_reg( R_EAX, Rm );
  1907     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1908     if( sh4_x86.double_size ) {
  1909         check_ralign64( R_EAX );
  1910         MEM_READ_LONG( R_EAX, R_EAX );
  1911         store_dr0( R_EAX, FRn );
  1912         load_reg( R_EAX, Rm );
  1913         ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1914         LEA_r32disp8_r32( R_EAX, 4, R_EAX );
  1915         MEM_READ_LONG( R_EAX, R_EAX );
  1916         store_dr1( R_EAX, FRn );
  1917     } else {
  1918         check_ralign32( R_EAX );
  1919         MEM_READ_LONG( R_EAX, R_EAX );
  1920         store_fr( R_EAX, FRn );
  1922     sh4_x86.tstate = TSTATE_NONE;
  1923 :}
  1924 FLDI0 FRn {:  /* IFF PR=0 */
  1925     COUNT_INST(I_FLDI0);
  1926     check_fpuen();
  1927     if( sh4_x86.double_prec == 0 ) {
  1928         XOR_r32_r32( R_EAX, R_EAX );
  1929         store_fr( R_EAX, FRn );
  1931     sh4_x86.tstate = TSTATE_NONE;
  1932 :}
  1933 FLDI1 FRn {:  /* IFF PR=0 */
  1934     COUNT_INST(I_FLDI1);
  1935     check_fpuen();
  1936     if( sh4_x86.double_prec == 0 ) {
  1937         load_imm32(R_EAX, 0x3F800000);
  1938         store_fr( R_EAX, FRn );
  1940 :}
  1942 FLOAT FPUL, FRn {:  
  1943     COUNT_INST(I_FLOAT);
  1944     check_fpuen();
  1945     FILD_sh4r(R_FPUL);
  1946     if( sh4_x86.double_prec ) {
  1947         pop_dr( FRn );
  1948     } else {
  1949         pop_fr( FRn );
  1951 :}
  1952 FTRC FRm, FPUL {:  
  1953     COUNT_INST(I_FTRC);
  1954     check_fpuen();
  1955     if( sh4_x86.double_prec ) {
  1956         push_dr( FRm );
  1957     } else {
  1958         push_fr( FRm );
  1960     load_ptr( R_ECX, &max_int );
  1961     FILD_r32ind( R_ECX );
  1962     FCOMIP_st(1);
  1963     JNA_rel8( sat );
  1964     load_ptr( R_ECX, &min_int );  // 5
  1965     FILD_r32ind( R_ECX );           // 2
  1966     FCOMIP_st(1);                   // 2
  1967     JAE_rel8( sat2 );            // 2
  1968     load_ptr( R_EAX, &save_fcw );
  1969     FNSTCW_r32ind( R_EAX );
  1970     load_ptr( R_EDX, &trunc_fcw );
  1971     FLDCW_r32ind( R_EDX );
  1972     FISTP_sh4r(R_FPUL);             // 3
  1973     FLDCW_r32ind( R_EAX );
  1974     JMP_rel8(end);             // 2
  1976     JMP_TARGET(sat);
  1977     JMP_TARGET(sat2);
  1978     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  1979     store_spreg( R_ECX, R_FPUL );
  1980     FPOP_st();
  1981     JMP_TARGET(end);
  1982     sh4_x86.tstate = TSTATE_NONE;
  1983 :}
  1984 FLDS FRm, FPUL {:  
  1985     COUNT_INST(I_FLDS);
  1986     check_fpuen();
  1987     load_fr( R_EAX, FRm );
  1988     store_spreg( R_EAX, R_FPUL );
  1989 :}
  1990 FSTS FPUL, FRn {:  
  1991     COUNT_INST(I_FSTS);
  1992     check_fpuen();
  1993     load_spreg( R_EAX, R_FPUL );
  1994     store_fr( R_EAX, FRn );
  1995 :}
  1996 FCNVDS FRm, FPUL {:  
  1997     COUNT_INST(I_FCNVDS);
  1998     check_fpuen();
  1999     if( sh4_x86.double_prec ) {
  2000         push_dr( FRm );
  2001         pop_fpul();
  2003 :}
  2004 FCNVSD FPUL, FRn {:  
  2005     COUNT_INST(I_FCNVSD);
  2006     check_fpuen();
  2007     if( sh4_x86.double_prec ) {
  2008         push_fpul();
  2009         pop_dr( FRn );
  2011 :}
  2013 /* Floating point instructions */
  2014 FABS FRn {:  
  2015     COUNT_INST(I_FABS);
  2016     check_fpuen();
  2017     if( sh4_x86.double_prec ) {
  2018         push_dr(FRn);
  2019         FABS_st0();
  2020         pop_dr(FRn);
  2021     } else {
  2022         push_fr(FRn);
  2023         FABS_st0();
  2024         pop_fr(FRn);
  2026 :}
  2027 FADD FRm, FRn {:  
  2028     COUNT_INST(I_FADD);
  2029     check_fpuen();
  2030     if( sh4_x86.double_prec ) {
  2031         push_dr(FRm);
  2032         push_dr(FRn);
  2033         FADDP_st(1);
  2034         pop_dr(FRn);
  2035     } else {
  2036         push_fr(FRm);
  2037         push_fr(FRn);
  2038         FADDP_st(1);
  2039         pop_fr(FRn);
  2041 :}
  2042 FDIV FRm, FRn {:  
  2043     COUNT_INST(I_FDIV);
  2044     check_fpuen();
  2045     if( sh4_x86.double_prec ) {
  2046         push_dr(FRn);
  2047         push_dr(FRm);
  2048         FDIVP_st(1);
  2049         pop_dr(FRn);
  2050     } else {
  2051         push_fr(FRn);
  2052         push_fr(FRm);
  2053         FDIVP_st(1);
  2054         pop_fr(FRn);
  2056 :}
  2057 FMAC FR0, FRm, FRn {:  
  2058     COUNT_INST(I_FMAC);
  2059     check_fpuen();
  2060     if( sh4_x86.double_prec ) {
  2061         push_dr( 0 );
  2062         push_dr( FRm );
  2063         FMULP_st(1);
  2064         push_dr( FRn );
  2065         FADDP_st(1);
  2066         pop_dr( FRn );
  2067     } else {
  2068         push_fr( 0 );
  2069         push_fr( FRm );
  2070         FMULP_st(1);
  2071         push_fr( FRn );
  2072         FADDP_st(1);
  2073         pop_fr( FRn );
  2075 :}
  2077 FMUL FRm, FRn {:  
  2078     COUNT_INST(I_FMUL);
  2079     check_fpuen();
  2080     if( sh4_x86.double_prec ) {
  2081         push_dr(FRm);
  2082         push_dr(FRn);
  2083         FMULP_st(1);
  2084         pop_dr(FRn);
  2085     } else {
  2086         push_fr(FRm);
  2087         push_fr(FRn);
  2088         FMULP_st(1);
  2089         pop_fr(FRn);
  2091 :}
  2092 FNEG FRn {:  
  2093     COUNT_INST(I_FNEG);
  2094     check_fpuen();
  2095     if( sh4_x86.double_prec ) {
  2096         push_dr(FRn);
  2097         FCHS_st0();
  2098         pop_dr(FRn);
  2099     } else {
  2100         push_fr(FRn);
  2101         FCHS_st0();
  2102         pop_fr(FRn);
  2104 :}
  2105 FSRRA FRn {:  
  2106     COUNT_INST(I_FSRRA);
  2107     check_fpuen();
  2108     if( sh4_x86.double_prec == 0 ) {
  2109         FLD1_st0();
  2110         push_fr(FRn);
  2111         FSQRT_st0();
  2112         FDIVP_st(1);
  2113         pop_fr(FRn);
  2115 :}
  2116 FSQRT FRn {:  
  2117     COUNT_INST(I_FSQRT);
  2118     check_fpuen();
  2119     if( sh4_x86.double_prec ) {
  2120         push_dr(FRn);
  2121         FSQRT_st0();
  2122         pop_dr(FRn);
  2123     } else {
  2124         push_fr(FRn);
  2125         FSQRT_st0();
  2126         pop_fr(FRn);
  2128 :}
  2129 FSUB FRm, FRn {:  
  2130     COUNT_INST(I_FSUB);
  2131     check_fpuen();
  2132     if( sh4_x86.double_prec ) {
  2133         push_dr(FRn);
  2134         push_dr(FRm);
  2135         FSUBP_st(1);
  2136         pop_dr(FRn);
  2137     } else {
  2138         push_fr(FRn);
  2139         push_fr(FRm);
  2140         FSUBP_st(1);
  2141         pop_fr(FRn);
  2143 :}
  2145 FCMP/EQ FRm, FRn {:  
  2146     COUNT_INST(I_FCMPEQ);
  2147     check_fpuen();
  2148     if( sh4_x86.double_prec ) {
  2149         push_dr(FRm);
  2150         push_dr(FRn);
  2151     } else {
  2152         push_fr(FRm);
  2153         push_fr(FRn);
  2155     FCOMIP_st(1);
  2156     SETE_t();
  2157     FPOP_st();
  2158     sh4_x86.tstate = TSTATE_E;
  2159 :}
  2160 FCMP/GT FRm, FRn {:  
  2161     COUNT_INST(I_FCMPGT);
  2162     check_fpuen();
  2163     if( sh4_x86.double_prec ) {
  2164         push_dr(FRm);
  2165         push_dr(FRn);
  2166     } else {
  2167         push_fr(FRm);
  2168         push_fr(FRn);
  2170     FCOMIP_st(1);
  2171     SETA_t();
  2172     FPOP_st();
  2173     sh4_x86.tstate = TSTATE_A;
  2174 :}
  2176 FSCA FPUL, FRn {:  
  2177     COUNT_INST(I_FSCA);
  2178     check_fpuen();
  2179     if( sh4_x86.double_prec == 0 ) {
  2180         LEA_sh4r_rptr( REG_OFFSET(fr[0][FRn&0x0E]), R_EDX );
  2181         load_spreg( R_EAX, R_FPUL );
  2182         call_func2( sh4_fsca, R_EAX, R_EDX );
  2184     sh4_x86.tstate = TSTATE_NONE;
  2185 :}
  2186 FIPR FVm, FVn {:  
  2187     COUNT_INST(I_FIPR);
  2188     check_fpuen();
  2189     if( sh4_x86.double_prec == 0 ) {
  2190         if( sh4_x86.sse3_enabled ) {
  2191             MOVAPS_sh4r_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
  2192             MULPS_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
  2193             HADDPS_xmm_xmm( 4, 4 ); 
  2194             HADDPS_xmm_xmm( 4, 4 );
  2195             MOVSS_xmm_sh4r( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
  2196         } else {
  2197             push_fr( FVm<<2 );
  2198             push_fr( FVn<<2 );
  2199             FMULP_st(1);
  2200             push_fr( (FVm<<2)+1);
  2201             push_fr( (FVn<<2)+1);
  2202             FMULP_st(1);
  2203             FADDP_st(1);
  2204             push_fr( (FVm<<2)+2);
  2205             push_fr( (FVn<<2)+2);
  2206             FMULP_st(1);
  2207             FADDP_st(1);
  2208             push_fr( (FVm<<2)+3);
  2209             push_fr( (FVn<<2)+3);
  2210             FMULP_st(1);
  2211             FADDP_st(1);
  2212             pop_fr( (FVn<<2)+3);
  2215 :}
  2216 FTRV XMTRX, FVn {:  
  2217     COUNT_INST(I_FTRV);
  2218     check_fpuen();
  2219     if( sh4_x86.double_prec == 0 ) {
  2220         if( sh4_x86.sse3_enabled ) {
  2221             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1  M0  M3  M2
  2222             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5  M4  M7  M6
  2223             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9  M8  M11 M10
  2224             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
  2226             MOVSLDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
  2227             MOVSHDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
  2228             MOVAPS_xmm_xmm( 4, 6 );
  2229             MOVAPS_xmm_xmm( 5, 7 );
  2230             MOVLHPS_xmm_xmm( 4, 4 );  // V1 V1 V1 V1
  2231             MOVHLPS_xmm_xmm( 6, 6 );  // V3 V3 V3 V3
  2232             MOVLHPS_xmm_xmm( 5, 5 );  // V0 V0 V0 V0
  2233             MOVHLPS_xmm_xmm( 7, 7 );  // V2 V2 V2 V2
  2234             MULPS_xmm_xmm( 0, 4 );
  2235             MULPS_xmm_xmm( 1, 5 );
  2236             MULPS_xmm_xmm( 2, 6 );
  2237             MULPS_xmm_xmm( 3, 7 );
  2238             ADDPS_xmm_xmm( 5, 4 );
  2239             ADDPS_xmm_xmm( 7, 6 );
  2240             ADDPS_xmm_xmm( 6, 4 );
  2241             MOVAPS_xmm_sh4r( 4, REG_OFFSET(fr[0][FVn<<2]) );
  2242         } else {
  2243             LEA_sh4r_rptr( REG_OFFSET(fr[0][FVn<<2]), R_EAX );
  2244             call_func1( sh4_ftrv, R_EAX );
  2247     sh4_x86.tstate = TSTATE_NONE;
  2248 :}
  2250 FRCHG {:  
  2251     COUNT_INST(I_FRCHG);
  2252     check_fpuen();
  2253     XOR_imm32_sh4r( FPSCR_FR, R_FPSCR );
  2254     call_func0( sh4_switch_fr_banks );
  2255     sh4_x86.tstate = TSTATE_NONE;
  2256 :}
  2257 FSCHG {:  
  2258     COUNT_INST(I_FSCHG);
  2259     check_fpuen();
  2260     XOR_imm32_sh4r( FPSCR_SZ, R_FPSCR);
  2261     XOR_imm32_sh4r( FPSCR_SZ, REG_OFFSET(xlat_sh4_mode) );
  2262     sh4_x86.tstate = TSTATE_NONE;
  2263     sh4_x86.double_size = !sh4_x86.double_size;
  2264 :}
  2266 /* Processor control instructions */
  2267 LDC Rm, SR {:
  2268     COUNT_INST(I_LDCSR);
  2269     if( sh4_x86.in_delay_slot ) {
  2270 	SLOTILLEGAL();
  2271     } else {
  2272 	check_priv();
  2273 	load_reg( R_EAX, Rm );
  2274 	call_func1( sh4_write_sr, R_EAX );
  2275 	sh4_x86.fpuen_checked = FALSE;
  2276 	sh4_x86.tstate = TSTATE_NONE;
  2277 	return 2;
  2279 :}
  2280 LDC Rm, GBR {: 
  2281     COUNT_INST(I_LDC);
  2282     load_reg( R_EAX, Rm );
  2283     store_spreg( R_EAX, R_GBR );
  2284 :}
  2285 LDC Rm, VBR {:  
  2286     COUNT_INST(I_LDC);
  2287     check_priv();
  2288     load_reg( R_EAX, Rm );
  2289     store_spreg( R_EAX, R_VBR );
  2290     sh4_x86.tstate = TSTATE_NONE;
  2291 :}
  2292 LDC Rm, SSR {:  
  2293     COUNT_INST(I_LDC);
  2294     check_priv();
  2295     load_reg( R_EAX, Rm );
  2296     store_spreg( R_EAX, R_SSR );
  2297     sh4_x86.tstate = TSTATE_NONE;
  2298 :}
  2299 LDC Rm, SGR {:  
  2300     COUNT_INST(I_LDC);
  2301     check_priv();
  2302     load_reg( R_EAX, Rm );
  2303     store_spreg( R_EAX, R_SGR );
  2304     sh4_x86.tstate = TSTATE_NONE;
  2305 :}
  2306 LDC Rm, SPC {:  
  2307     COUNT_INST(I_LDC);
  2308     check_priv();
  2309     load_reg( R_EAX, Rm );
  2310     store_spreg( R_EAX, R_SPC );
  2311     sh4_x86.tstate = TSTATE_NONE;
  2312 :}
  2313 LDC Rm, DBR {:  
  2314     COUNT_INST(I_LDC);
  2315     check_priv();
  2316     load_reg( R_EAX, Rm );
  2317     store_spreg( R_EAX, R_DBR );
  2318     sh4_x86.tstate = TSTATE_NONE;
  2319 :}
  2320 LDC Rm, Rn_BANK {:  
  2321     COUNT_INST(I_LDC);
  2322     check_priv();
  2323     load_reg( R_EAX, Rm );
  2324     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2325     sh4_x86.tstate = TSTATE_NONE;
  2326 :}
  2327 LDC.L @Rm+, GBR {:  
  2328     COUNT_INST(I_LDCM);
  2329     load_reg( R_EAX, Rm );
  2330     check_ralign32( R_EAX );
  2331     MEM_READ_LONG( R_EAX, R_EAX );
  2332     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2333     store_spreg( R_EAX, R_GBR );
  2334     sh4_x86.tstate = TSTATE_NONE;
  2335 :}
  2336 LDC.L @Rm+, SR {:
  2337     COUNT_INST(I_LDCSRM);
  2338     if( sh4_x86.in_delay_slot ) {
  2339 	SLOTILLEGAL();
  2340     } else {
  2341 	check_priv();
  2342 	load_reg( R_EAX, Rm );
  2343 	check_ralign32( R_EAX );
  2344 	MEM_READ_LONG( R_EAX, R_EAX );
  2345 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2346 	call_func1( sh4_write_sr, R_EAX );
  2347 	sh4_x86.fpuen_checked = FALSE;
  2348 	sh4_x86.tstate = TSTATE_NONE;
  2349 	return 2;
  2351 :}
  2352 LDC.L @Rm+, VBR {:  
  2353     COUNT_INST(I_LDCM);
  2354     check_priv();
  2355     load_reg( R_EAX, Rm );
  2356     check_ralign32( R_EAX );
  2357     MEM_READ_LONG( R_EAX, R_EAX );
  2358     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2359     store_spreg( R_EAX, R_VBR );
  2360     sh4_x86.tstate = TSTATE_NONE;
  2361 :}
  2362 LDC.L @Rm+, SSR {:
  2363     COUNT_INST(I_LDCM);
  2364     check_priv();
  2365     load_reg( R_EAX, Rm );
  2366     check_ralign32( R_EAX );
  2367     MEM_READ_LONG( R_EAX, R_EAX );
  2368     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2369     store_spreg( R_EAX, R_SSR );
  2370     sh4_x86.tstate = TSTATE_NONE;
  2371 :}
  2372 LDC.L @Rm+, SGR {:  
  2373     COUNT_INST(I_LDCM);
  2374     check_priv();
  2375     load_reg( R_EAX, Rm );
  2376     check_ralign32( R_EAX );
  2377     MEM_READ_LONG( R_EAX, R_EAX );
  2378     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2379     store_spreg( R_EAX, R_SGR );
  2380     sh4_x86.tstate = TSTATE_NONE;
  2381 :}
  2382 LDC.L @Rm+, SPC {:  
  2383     COUNT_INST(I_LDCM);
  2384     check_priv();
  2385     load_reg( R_EAX, Rm );
  2386     check_ralign32( R_EAX );
  2387     MEM_READ_LONG( R_EAX, R_EAX );
  2388     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2389     store_spreg( R_EAX, R_SPC );
  2390     sh4_x86.tstate = TSTATE_NONE;
  2391 :}
  2392 LDC.L @Rm+, DBR {:  
  2393     COUNT_INST(I_LDCM);
  2394     check_priv();
  2395     load_reg( R_EAX, Rm );
  2396     check_ralign32( R_EAX );
  2397     MEM_READ_LONG( R_EAX, R_EAX );
  2398     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2399     store_spreg( R_EAX, R_DBR );
  2400     sh4_x86.tstate = TSTATE_NONE;
  2401 :}
  2402 LDC.L @Rm+, Rn_BANK {:  
  2403     COUNT_INST(I_LDCM);
  2404     check_priv();
  2405     load_reg( R_EAX, Rm );
  2406     check_ralign32( R_EAX );
  2407     MEM_READ_LONG( R_EAX, R_EAX );
  2408     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2409     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2410     sh4_x86.tstate = TSTATE_NONE;
  2411 :}
  2412 LDS Rm, FPSCR {:
  2413     COUNT_INST(I_LDSFPSCR);
  2414     check_fpuen();
  2415     load_reg( R_EAX, Rm );
  2416     call_func1( sh4_write_fpscr, R_EAX );
  2417     sh4_x86.tstate = TSTATE_NONE;
  2418     return 2;
  2419 :}
  2420 LDS.L @Rm+, FPSCR {:  
  2421     COUNT_INST(I_LDSFPSCRM);
  2422     check_fpuen();
  2423     load_reg( R_EAX, Rm );
  2424     check_ralign32( R_EAX );
  2425     MEM_READ_LONG( R_EAX, R_EAX );
  2426     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2427     call_func1( sh4_write_fpscr, R_EAX );
  2428     sh4_x86.tstate = TSTATE_NONE;
  2429     return 2;
  2430 :}
  2431 LDS Rm, FPUL {:  
  2432     COUNT_INST(I_LDS);
  2433     check_fpuen();
  2434     load_reg( R_EAX, Rm );
  2435     store_spreg( R_EAX, R_FPUL );
  2436 :}
  2437 LDS.L @Rm+, FPUL {:  
  2438     COUNT_INST(I_LDSM);
  2439     check_fpuen();
  2440     load_reg( R_EAX, Rm );
  2441     check_ralign32( R_EAX );
  2442     MEM_READ_LONG( R_EAX, R_EAX );
  2443     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2444     store_spreg( R_EAX, R_FPUL );
  2445     sh4_x86.tstate = TSTATE_NONE;
  2446 :}
  2447 LDS Rm, MACH {: 
  2448     COUNT_INST(I_LDS);
  2449     load_reg( R_EAX, Rm );
  2450     store_spreg( R_EAX, R_MACH );
  2451 :}
  2452 LDS.L @Rm+, MACH {:  
  2453     COUNT_INST(I_LDSM);
  2454     load_reg( R_EAX, Rm );
  2455     check_ralign32( R_EAX );
  2456     MEM_READ_LONG( R_EAX, R_EAX );
  2457     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2458     store_spreg( R_EAX, R_MACH );
  2459     sh4_x86.tstate = TSTATE_NONE;
  2460 :}
  2461 LDS Rm, MACL {:  
  2462     COUNT_INST(I_LDS);
  2463     load_reg( R_EAX, Rm );
  2464     store_spreg( R_EAX, R_MACL );
  2465 :}
  2466 LDS.L @Rm+, MACL {:  
  2467     COUNT_INST(I_LDSM);
  2468     load_reg( R_EAX, Rm );
  2469     check_ralign32( R_EAX );
  2470     MEM_READ_LONG( R_EAX, R_EAX );
  2471     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2472     store_spreg( R_EAX, R_MACL );
  2473     sh4_x86.tstate = TSTATE_NONE;
  2474 :}
  2475 LDS Rm, PR {:  
  2476     COUNT_INST(I_LDS);
  2477     load_reg( R_EAX, Rm );
  2478     store_spreg( R_EAX, R_PR );
  2479 :}
  2480 LDS.L @Rm+, PR {:  
  2481     COUNT_INST(I_LDSM);
  2482     load_reg( R_EAX, Rm );
  2483     check_ralign32( R_EAX );
  2484     MEM_READ_LONG( R_EAX, R_EAX );
  2485     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2486     store_spreg( R_EAX, R_PR );
  2487     sh4_x86.tstate = TSTATE_NONE;
  2488 :}
  2489 LDTLB {:  
  2490     COUNT_INST(I_LDTLB);
  2491     call_func0( MMU_ldtlb );
  2492     sh4_x86.tstate = TSTATE_NONE;
  2493 :}
  2494 OCBI @Rn {:
  2495     COUNT_INST(I_OCBI);
  2496 :}
  2497 OCBP @Rn {:
  2498     COUNT_INST(I_OCBP);
  2499 :}
  2500 OCBWB @Rn {:
  2501     COUNT_INST(I_OCBWB);
  2502 :}
  2503 PREF @Rn {:
  2504     COUNT_INST(I_PREF);
  2505     load_reg( R_EAX, Rn );
  2506     MEM_PREFETCH( R_EAX );
  2507     sh4_x86.tstate = TSTATE_NONE;
  2508 :}
  2509 SLEEP {: 
  2510     COUNT_INST(I_SLEEP);
  2511     check_priv();
  2512     call_func0( sh4_sleep );
  2513     sh4_x86.tstate = TSTATE_NONE;
  2514     sh4_x86.in_delay_slot = DELAY_NONE;
  2515     return 2;
  2516 :}
  2517 STC SR, Rn {:
  2518     COUNT_INST(I_STCSR);
  2519     check_priv();
  2520     call_func0(sh4_read_sr);
  2521     store_reg( R_EAX, Rn );
  2522     sh4_x86.tstate = TSTATE_NONE;
  2523 :}
  2524 STC GBR, Rn {:  
  2525     COUNT_INST(I_STC);
  2526     load_spreg( R_EAX, R_GBR );
  2527     store_reg( R_EAX, Rn );
  2528 :}
  2529 STC VBR, Rn {:  
  2530     COUNT_INST(I_STC);
  2531     check_priv();
  2532     load_spreg( R_EAX, R_VBR );
  2533     store_reg( R_EAX, Rn );
  2534     sh4_x86.tstate = TSTATE_NONE;
  2535 :}
  2536 STC SSR, Rn {:  
  2537     COUNT_INST(I_STC);
  2538     check_priv();
  2539     load_spreg( R_EAX, R_SSR );
  2540     store_reg( R_EAX, Rn );
  2541     sh4_x86.tstate = TSTATE_NONE;
  2542 :}
  2543 STC SPC, Rn {:  
  2544     COUNT_INST(I_STC);
  2545     check_priv();
  2546     load_spreg( R_EAX, R_SPC );
  2547     store_reg( R_EAX, Rn );
  2548     sh4_x86.tstate = TSTATE_NONE;
  2549 :}
  2550 STC SGR, Rn {:  
  2551     COUNT_INST(I_STC);
  2552     check_priv();
  2553     load_spreg( R_EAX, R_SGR );
  2554     store_reg( R_EAX, Rn );
  2555     sh4_x86.tstate = TSTATE_NONE;
  2556 :}
  2557 STC DBR, Rn {:  
  2558     COUNT_INST(I_STC);
  2559     check_priv();
  2560     load_spreg( R_EAX, R_DBR );
  2561     store_reg( R_EAX, Rn );
  2562     sh4_x86.tstate = TSTATE_NONE;
  2563 :}
  2564 STC Rm_BANK, Rn {:
  2565     COUNT_INST(I_STC);
  2566     check_priv();
  2567     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2568     store_reg( R_EAX, Rn );
  2569     sh4_x86.tstate = TSTATE_NONE;
  2570 :}
  2571 STC.L SR, @-Rn {:
  2572     COUNT_INST(I_STCSRM);
  2573     check_priv();
  2574     call_func0( sh4_read_sr );
  2575     MOV_r32_r32( R_EAX, R_EDX );
  2576     load_reg( R_EAX, Rn );
  2577     check_walign32( R_EAX );
  2578     LEA_r32disp8_r32( R_EAX, -4, R_EAX );
  2579     MEM_WRITE_LONG( R_EAX, R_EDX );
  2580     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2581     sh4_x86.tstate = TSTATE_NONE;
  2582 :}
  2583 STC.L VBR, @-Rn {:  
  2584     COUNT_INST(I_STCM);
  2585     check_priv();
  2586     load_reg( R_EAX, Rn );
  2587     check_walign32( R_EAX );
  2588     ADD_imm8s_r32( -4, R_EAX );
  2589     load_spreg( R_EDX, R_VBR );
  2590     MEM_WRITE_LONG( R_EAX, R_EDX );
  2591     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2592     sh4_x86.tstate = TSTATE_NONE;
  2593 :}
  2594 STC.L SSR, @-Rn {:  
  2595     COUNT_INST(I_STCM);
  2596     check_priv();
  2597     load_reg( R_EAX, Rn );
  2598     check_walign32( R_EAX );
  2599     ADD_imm8s_r32( -4, R_EAX );
  2600     load_spreg( R_EDX, R_SSR );
  2601     MEM_WRITE_LONG( R_EAX, R_EDX );
  2602     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2603     sh4_x86.tstate = TSTATE_NONE;
  2604 :}
  2605 STC.L SPC, @-Rn {:
  2606     COUNT_INST(I_STCM);
  2607     check_priv();
  2608     load_reg( R_EAX, Rn );
  2609     check_walign32( R_EAX );
  2610     ADD_imm8s_r32( -4, R_EAX );
  2611     load_spreg( R_EDX, R_SPC );
  2612     MEM_WRITE_LONG( R_EAX, R_EDX );
  2613     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2614     sh4_x86.tstate = TSTATE_NONE;
  2615 :}
  2616 STC.L SGR, @-Rn {:  
  2617     COUNT_INST(I_STCM);
  2618     check_priv();
  2619     load_reg( R_EAX, Rn );
  2620     check_walign32( R_EAX );
  2621     ADD_imm8s_r32( -4, R_EAX );
  2622     load_spreg( R_EDX, R_SGR );
  2623     MEM_WRITE_LONG( R_EAX, R_EDX );
  2624     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2625     sh4_x86.tstate = TSTATE_NONE;
  2626 :}
  2627 STC.L DBR, @-Rn {:  
  2628     COUNT_INST(I_STCM);
  2629     check_priv();
  2630     load_reg( R_EAX, Rn );
  2631     check_walign32( R_EAX );
  2632     ADD_imm8s_r32( -4, R_EAX );
  2633     load_spreg( R_EDX, R_DBR );
  2634     MEM_WRITE_LONG( R_EAX, R_EDX );
  2635     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2636     sh4_x86.tstate = TSTATE_NONE;
  2637 :}
  2638 STC.L Rm_BANK, @-Rn {:  
  2639     COUNT_INST(I_STCM);
  2640     check_priv();
  2641     load_reg( R_EAX, Rn );
  2642     check_walign32( R_EAX );
  2643     ADD_imm8s_r32( -4, R_EAX );
  2644     load_spreg( R_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
  2645     MEM_WRITE_LONG( R_EAX, R_EDX );
  2646     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2647     sh4_x86.tstate = TSTATE_NONE;
  2648 :}
  2649 STC.L GBR, @-Rn {:  
  2650     COUNT_INST(I_STCM);
  2651     load_reg( R_EAX, Rn );
  2652     check_walign32( R_EAX );
  2653     ADD_imm8s_r32( -4, R_EAX );
  2654     load_spreg( R_EDX, R_GBR );
  2655     MEM_WRITE_LONG( R_EAX, R_EDX );
  2656     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2657     sh4_x86.tstate = TSTATE_NONE;
  2658 :}
  2659 STS FPSCR, Rn {:  
  2660     COUNT_INST(I_STSFPSCR);
  2661     check_fpuen();
  2662     load_spreg( R_EAX, R_FPSCR );
  2663     store_reg( R_EAX, Rn );
  2664 :}
  2665 STS.L FPSCR, @-Rn {:  
  2666     COUNT_INST(I_STSFPSCRM);
  2667     check_fpuen();
  2668     load_reg( R_EAX, Rn );
  2669     check_walign32( R_EAX );
  2670     ADD_imm8s_r32( -4, R_EAX );
  2671     load_spreg( R_EDX, R_FPSCR );
  2672     MEM_WRITE_LONG( R_EAX, R_EDX );
  2673     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2674     sh4_x86.tstate = TSTATE_NONE;
  2675 :}
  2676 STS FPUL, Rn {:  
  2677     COUNT_INST(I_STS);
  2678     check_fpuen();
  2679     load_spreg( R_EAX, R_FPUL );
  2680     store_reg( R_EAX, Rn );
  2681 :}
  2682 STS.L FPUL, @-Rn {:  
  2683     COUNT_INST(I_STSM);
  2684     check_fpuen();
  2685     load_reg( R_EAX, Rn );
  2686     check_walign32( R_EAX );
  2687     ADD_imm8s_r32( -4, R_EAX );
  2688     load_spreg( R_EDX, R_FPUL );
  2689     MEM_WRITE_LONG( R_EAX, R_EDX );
  2690     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2691     sh4_x86.tstate = TSTATE_NONE;
  2692 :}
  2693 STS MACH, Rn {:  
  2694     COUNT_INST(I_STS);
  2695     load_spreg( R_EAX, R_MACH );
  2696     store_reg( R_EAX, Rn );
  2697 :}
  2698 STS.L MACH, @-Rn {:  
  2699     COUNT_INST(I_STSM);
  2700     load_reg( R_EAX, Rn );
  2701     check_walign32( R_EAX );
  2702     ADD_imm8s_r32( -4, R_EAX );
  2703     load_spreg( R_EDX, R_MACH );
  2704     MEM_WRITE_LONG( R_EAX, R_EDX );
  2705     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2706     sh4_x86.tstate = TSTATE_NONE;
  2707 :}
  2708 STS MACL, Rn {:  
  2709     COUNT_INST(I_STS);
  2710     load_spreg( R_EAX, R_MACL );
  2711     store_reg( R_EAX, Rn );
  2712 :}
  2713 STS.L MACL, @-Rn {:  
  2714     COUNT_INST(I_STSM);
  2715     load_reg( R_EAX, Rn );
  2716     check_walign32( R_EAX );
  2717     ADD_imm8s_r32( -4, R_EAX );
  2718     load_spreg( R_EDX, R_MACL );
  2719     MEM_WRITE_LONG( R_EAX, R_EDX );
  2720     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2721     sh4_x86.tstate = TSTATE_NONE;
  2722 :}
  2723 STS PR, Rn {:  
  2724     COUNT_INST(I_STS);
  2725     load_spreg( R_EAX, R_PR );
  2726     store_reg( R_EAX, Rn );
  2727 :}
  2728 STS.L PR, @-Rn {:  
  2729     COUNT_INST(I_STSM);
  2730     load_reg( R_EAX, Rn );
  2731     check_walign32( R_EAX );
  2732     ADD_imm8s_r32( -4, R_EAX );
  2733     load_spreg( R_EDX, R_PR );
  2734     MEM_WRITE_LONG( R_EAX, R_EDX );
  2735     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2736     sh4_x86.tstate = TSTATE_NONE;
  2737 :}
  2739 NOP {: 
  2740     COUNT_INST(I_NOP);
  2741     /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ 
  2742 :}
  2743 %%
  2744     sh4_x86.in_delay_slot = DELAY_NONE;
  2745     return 0;
.