Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 975:007bf7eb944f
prev974:16b079ed11bb
next991:60c7fab9c880
author nkeynes
date Wed Feb 25 09:00:05 2009 +0000 (15 years ago)
permissions -rw-r--r--
last change Argh. Apparently we still do really need _BSD_SOURCE and _GNU_SOURCE
I think that's everything now...
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "lxdream.h"
    29 #include "sh4/xltcache.h"
    30 #include "sh4/sh4core.h"
    31 #include "sh4/sh4trans.h"
    32 #include "sh4/sh4stat.h"
    33 #include "sh4/sh4mmio.h"
    34 #include "sh4/x86op.h"
    35 #include "sh4/mmu.h"
    36 #include "clock.h"
    38 #define DEFAULT_BACKPATCH_SIZE 4096
    40 struct backpatch_record {
    41     uint32_t fixup_offset;
    42     uint32_t fixup_icount;
    43     int32_t exc_code;
    44 };
    46 #define DELAY_NONE 0
    47 #define DELAY_PC 1
    48 #define DELAY_PC_PR 2
    50 /** 
    51  * Struct to manage internal translation state. This state is not saved -
    52  * it is only valid between calls to sh4_translate_begin_block() and
    53  * sh4_translate_end_block()
    54  */
    55 struct sh4_x86_state {
    56     int in_delay_slot;
    57     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    58     gboolean branch_taken; /* true if we branched unconditionally */
    59     gboolean double_prec; /* true if FPU is in double-precision mode */
    60     gboolean double_size; /* true if FPU is in double-size mode */
    61     gboolean sse3_enabled; /* true if host supports SSE3 instructions */
    62     uint32_t block_start_pc;
    63     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    64     int tstate;
    66     /* mode flags */
    67     gboolean tlb_on; /* True if tlb translation is active */
    69     /* Allocated memory for the (block-wide) back-patch list */
    70     struct backpatch_record *backpatch_list;
    71     uint32_t backpatch_posn;
    72     uint32_t backpatch_size;
    73 };
    75 #define TSTATE_NONE -1
    76 #define TSTATE_O    0
    77 #define TSTATE_C    2
    78 #define TSTATE_E    4
    79 #define TSTATE_NE   5
    80 #define TSTATE_G    0xF
    81 #define TSTATE_GE   0xD
    82 #define TSTATE_A    7
    83 #define TSTATE_AE   3
    85 #ifdef ENABLE_SH4STATS
    86 #define COUNT_INST(id) load_imm32(R_EAX,id); call_func1(sh4_stats_add, R_EAX); sh4_x86.tstate = TSTATE_NONE
    87 #else
    88 #define COUNT_INST(id)
    89 #endif
    91 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
    92 #define JT_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    93 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    94     OP(0x70+sh4_x86.tstate); MARK_JMP8(label); OP(-1)
    96 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
    97 #define JF_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    98 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    99     OP(0x70+ (sh4_x86.tstate^1)); MARK_JMP8(label); OP(-1)
   101 static struct sh4_x86_state sh4_x86;
   103 static uint32_t max_int = 0x7FFFFFFF;
   104 static uint32_t min_int = 0x80000000;
   105 static uint32_t save_fcw; /* save value for fpu control word */
   106 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   108 gboolean is_sse3_supported()
   109 {
   110     uint32_t features;
   112     __asm__ __volatile__(
   113         "mov $0x01, %%eax\n\t"
   114         "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
   115     return (features & 1) ? TRUE : FALSE;
   116 }
   118 void sh4_translate_init(void)
   119 {
   120     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   121     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   122     sh4_x86.sse3_enabled = is_sse3_supported();
   123 }
   126 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   127 {
   128     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   129 	sh4_x86.backpatch_size <<= 1;
   130 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   131 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   132 	assert( sh4_x86.backpatch_list != NULL );
   133     }
   134     if( sh4_x86.in_delay_slot ) {
   135 	fixup_pc -= 2;
   136     }
   137     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
   138 	((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code);
   139     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   140     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   141     sh4_x86.backpatch_posn++;
   142 }
   144 /**
   145  * Emit an instruction to load an SH4 reg into a real register
   146  */
   147 static inline void load_reg( int x86reg, int sh4reg ) 
   148 {
   149     /* mov [bp+n], reg */
   150     OP(0x8B);
   151     OP(0x45 + (x86reg<<3));
   152     OP(REG_OFFSET(r[sh4reg]));
   153 }
   155 static inline void load_reg16s( int x86reg, int sh4reg )
   156 {
   157     OP(0x0F);
   158     OP(0xBF);
   159     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   160 }
   162 static inline void load_reg16u( int x86reg, int sh4reg )
   163 {
   164     OP(0x0F);
   165     OP(0xB7);
   166     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   168 }
   170 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   171 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   172 /**
   173  * Emit an instruction to load an immediate value into a register
   174  */
   175 static inline void load_imm32( int x86reg, uint32_t value ) {
   176     /* mov #value, reg */
   177     OP(0xB8 + x86reg);
   178     OP32(value);
   179 }
   182 /**
   183  * Load an immediate 64-bit quantity (note: x86-64 only)
   184  */
   185 static inline void load_imm64( int x86reg, uint64_t value ) {
   186     /* mov #value, reg */
   187     REXW();
   188     OP(0xB8 + x86reg);
   189     OP64(value);
   190 }
   192 /**
   193  * Emit an instruction to store an SH4 reg (RN)
   194  */
   195 void static inline store_reg( int x86reg, int sh4reg ) {
   196     /* mov reg, [bp+n] */
   197     OP(0x89);
   198     OP(0x45 + (x86reg<<3));
   199     OP(REG_OFFSET(r[sh4reg]));
   200 }
   202 /**
   203  * Load an FR register (single-precision floating point) into an integer x86
   204  * register (eg for register-to-register moves)
   205  */
   206 #define load_fr(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[0][(frm)^1]) )
   207 #define load_xf(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[1][(frm)^1]) )
   209 /**
   210  * Load the low half of a DR register (DR or XD) into an integer x86 register 
   211  */
   212 #define load_dr0(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   213 #define load_dr1(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   215 /**
   216  * Store an FR register (single-precision floating point) from an integer x86+
   217  * register (eg for register-to-register moves)
   218  */
   219 #define store_fr(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[0][(frm)^1]) )
   220 #define store_xf(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[1][(frm)^1]) )
   222 #define store_dr0(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   223 #define store_dr1(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   226 #define push_fpul()  FLDF_sh4r(R_FPUL)
   227 #define pop_fpul()   FSTPF_sh4r(R_FPUL)
   228 #define push_fr(frm) FLDF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   229 #define pop_fr(frm)  FSTPF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   230 #define push_xf(frm) FLDF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   231 #define pop_xf(frm)  FSTPF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   232 #define push_dr(frm) FLDD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   233 #define pop_dr(frm)  FSTPD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   234 #define push_xdr(frm) FLDD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   235 #define pop_xdr(frm)  FSTPD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   239 /* Exception checks - Note that all exception checks will clobber EAX */
   241 #define check_priv( ) \
   242     if( (sh4r.xlat_sh4_mode & SR_MD) == 0 ) { \
   243         if( sh4_x86.in_delay_slot ) { \
   244             exit_block_exc(EXC_SLOT_ILLEGAL, (pc-2) ); \
   245         } else { \
   246             exit_block_exc(EXC_ILLEGAL, pc); \
   247         } \
   248         sh4_x86.branch_taken = TRUE; \
   249         sh4_x86.in_delay_slot = DELAY_NONE; \
   250         return 2; \
   251     }
   253 #define check_fpuen( ) \
   254     if( !sh4_x86.fpuen_checked ) {\
   255 	sh4_x86.fpuen_checked = TRUE;\
   256 	load_spreg( R_EAX, R_SR );\
   257 	AND_imm32_r32( SR_FD, R_EAX );\
   258 	if( sh4_x86.in_delay_slot ) {\
   259 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   260 	} else {\
   261 	    JNE_exc(EXC_FPU_DISABLED);\
   262 	}\
   263 	sh4_x86.tstate = TSTATE_NONE; \
   264     }
   266 #define check_ralign16( x86reg ) \
   267     TEST_imm32_r32( 0x00000001, x86reg ); \
   268     JNE_exc(EXC_DATA_ADDR_READ)
   270 #define check_walign16( x86reg ) \
   271     TEST_imm32_r32( 0x00000001, x86reg ); \
   272     JNE_exc(EXC_DATA_ADDR_WRITE);
   274 #define check_ralign32( x86reg ) \
   275     TEST_imm32_r32( 0x00000003, x86reg ); \
   276     JNE_exc(EXC_DATA_ADDR_READ)
   278 #define check_walign32( x86reg ) \
   279     TEST_imm32_r32( 0x00000003, x86reg ); \
   280     JNE_exc(EXC_DATA_ADDR_WRITE);
   282 #define check_ralign64( x86reg ) \
   283     TEST_imm32_r32( 0x00000007, x86reg ); \
   284     JNE_exc(EXC_DATA_ADDR_READ)
   286 #define check_walign64( x86reg ) \
   287     TEST_imm32_r32( 0x00000007, x86reg ); \
   288     JNE_exc(EXC_DATA_ADDR_WRITE);
   290 #define UNDEF(ir)
   291 #define MEM_REGION_PTR(name) offsetof( struct mem_region_fn, name )
   292 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   293 /* Note: For SR.MD == 1 && MMUCR.AT == 0, there are no memory exceptions, so 
   294  * don't waste the cycles expecting them. Otherwise we need to save the exception pointer.
   295  */
   297 #ifdef HAVE_FRAME_ADDRESS
   298 #define _CALL_READ(addr_reg, fn) if( !sh4_x86.tlb_on && (sh4r.xlat_sh4_mode & SR_MD) ) { \
   299         call_func1_r32disp8(R_ECX, MEM_REGION_PTR(fn), addr_reg); } else { \
   300         call_func1_r32disp8_exc(R_ECX, MEM_REGION_PTR(fn), addr_reg, pc); } 
   301 #define _CALL_WRITE(addr_reg, val_reg, fn) if( !sh4_x86.tlb_on && (sh4r.xlat_sh4_mode & SR_MD) ) { \
   302         call_func2_r32disp8(R_ECX, MEM_REGION_PTR(fn), addr_reg, val_reg); } else { \
   303         call_func2_r32disp8_exc(R_ECX, MEM_REGION_PTR(fn), addr_reg, val_reg, pc); }
   304 #else 
   305 #define _CALL_READ(addr_reg, fn) call_func1_r32disp8(R_ECX, MEM_REGION_PTR(fn), addr_reg)
   306 #define _CALL_WRITE(addr_reg, val_reg, fn) call_func2_r32disp8(R_ECX, MEM_REGION_PTR(fn), addr_reg, val_reg)
   307 #endif
   309 #define MEM_READ_BYTE( addr_reg, value_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, read_byte); MEM_RESULT(value_reg)
   310 #define MEM_READ_BYTE_FOR_WRITE( addr_reg, value_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, read_byte_for_write); MEM_RESULT(value_reg)
   311 #define MEM_READ_WORD( addr_reg, value_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, read_word); MEM_RESULT(value_reg)
   312 #define MEM_READ_LONG( addr_reg, value_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, read_long); MEM_RESULT(value_reg)
   313 #define MEM_WRITE_BYTE( addr_reg, value_reg ) decode_address(addr_reg); _CALL_WRITE(addr_reg, value_reg, write_byte)
   314 #define MEM_WRITE_WORD( addr_reg, value_reg ) decode_address(addr_reg); _CALL_WRITE(addr_reg, value_reg, write_word)
   315 #define MEM_WRITE_LONG( addr_reg, value_reg ) decode_address(addr_reg); _CALL_WRITE(addr_reg, value_reg, write_long)
   316 #define MEM_PREFETCH( addr_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, prefetch)
   318 #define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2); sh4_x86.in_delay_slot = DELAY_NONE; return 2;
   320 /****** Import appropriate calling conventions ******/
   321 #if SIZEOF_VOID_P == 8
   322 #include "sh4/ia64abi.h"
   323 #else /* 32-bit system */
   324 #include "sh4/ia32abi.h"
   325 #endif
   327 void sh4_translate_begin_block( sh4addr_t pc ) 
   328 {
   329     enter_block();
   330     sh4_x86.in_delay_slot = FALSE;
   331     sh4_x86.fpuen_checked = FALSE;
   332     sh4_x86.branch_taken = FALSE;
   333     sh4_x86.backpatch_posn = 0;
   334     sh4_x86.block_start_pc = pc;
   335     sh4_x86.tlb_on = IS_TLB_ENABLED();
   336     sh4_x86.tstate = TSTATE_NONE;
   337     sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
   338     sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
   339 }
   342 uint32_t sh4_translate_end_block_size()
   343 {
   344     if( sh4_x86.backpatch_posn <= 3 ) {
   345         return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);
   346     } else {
   347         return EPILOGUE_SIZE + 48 + (sh4_x86.backpatch_posn-3)*15;
   348     }
   349 }
   352 /**
   353  * Embed a breakpoint into the generated code
   354  */
   355 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   356 {
   357     load_imm32( R_EAX, pc );
   358     call_func1( sh4_translate_breakpoint_hit, R_EAX );
   359     sh4_x86.tstate = TSTATE_NONE;
   360 }
   363 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   365 /**
   366  * Embed a call to sh4_execute_instruction for situations that we
   367  * can't translate (just page-crossing delay slots at the moment).
   368  * Caller is responsible for setting new_pc before calling this function.
   369  *
   370  * Performs:
   371  *   Set PC = endpc
   372  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   373  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   374  *   Call sh4_execute_instruction
   375  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   376  */
   377 void exit_block_emu( sh4vma_t endpc )
   378 {
   379     load_imm32( R_ECX, endpc - sh4_x86.block_start_pc );   // 5
   380     ADD_r32_sh4r( R_ECX, R_PC );
   382     load_imm32( R_ECX, (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period ); // 5
   383     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
   384     load_imm32( R_ECX, sh4_x86.in_delay_slot ? 1 : 0 );
   385     store_spreg( R_ECX, REG_OFFSET(in_delay_slot) );
   387     call_func0( sh4_execute_instruction );    
   388     load_spreg( R_EAX, R_PC );
   389     if( sh4_x86.tlb_on ) {
   390 	call_func1(xlat_get_code_by_vma,R_EAX);
   391     } else {
   392 	call_func1(xlat_get_code,R_EAX);
   393     }
   394     exit_block();
   395 } 
   397 /**
   398  * Translate a single instruction. Delayed branches are handled specially
   399  * by translating both branch and delayed instruction as a single unit (as
   400  * 
   401  * The instruction MUST be in the icache (assert check)
   402  *
   403  * @return true if the instruction marks the end of a basic block
   404  * (eg a branch or 
   405  */
   406 uint32_t sh4_translate_instruction( sh4vma_t pc )
   407 {
   408     uint32_t ir;
   409     /* Read instruction from icache */
   410     assert( IS_IN_ICACHE(pc) );
   411     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   413     if( !sh4_x86.in_delay_slot ) {
   414 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   415     }
   416 %%
   417 /* ALU operations */
   418 ADD Rm, Rn {:
   419     COUNT_INST(I_ADD);
   420     load_reg( R_EAX, Rm );
   421     load_reg( R_ECX, Rn );
   422     ADD_r32_r32( R_EAX, R_ECX );
   423     store_reg( R_ECX, Rn );
   424     sh4_x86.tstate = TSTATE_NONE;
   425 :}
   426 ADD #imm, Rn {:  
   427     COUNT_INST(I_ADDI);
   428     ADD_imm8s_sh4r( imm, REG_OFFSET(r[Rn]) );
   429     sh4_x86.tstate = TSTATE_NONE;
   430 :}
   431 ADDC Rm, Rn {:
   432     COUNT_INST(I_ADDC);
   433     if( sh4_x86.tstate != TSTATE_C ) {
   434         LDC_t();
   435     }
   436     load_reg( R_EAX, Rm );
   437     load_reg( R_ECX, Rn );
   438     ADC_r32_r32( R_EAX, R_ECX );
   439     store_reg( R_ECX, Rn );
   440     SETC_t();
   441     sh4_x86.tstate = TSTATE_C;
   442 :}
   443 ADDV Rm, Rn {:
   444     COUNT_INST(I_ADDV);
   445     load_reg( R_EAX, Rm );
   446     load_reg( R_ECX, Rn );
   447     ADD_r32_r32( R_EAX, R_ECX );
   448     store_reg( R_ECX, Rn );
   449     SETO_t();
   450     sh4_x86.tstate = TSTATE_O;
   451 :}
   452 AND Rm, Rn {:
   453     COUNT_INST(I_AND);
   454     load_reg( R_EAX, Rm );
   455     load_reg( R_ECX, Rn );
   456     AND_r32_r32( R_EAX, R_ECX );
   457     store_reg( R_ECX, Rn );
   458     sh4_x86.tstate = TSTATE_NONE;
   459 :}
   460 AND #imm, R0 {:  
   461     COUNT_INST(I_ANDI);
   462     load_reg( R_EAX, 0 );
   463     AND_imm32_r32(imm, R_EAX); 
   464     store_reg( R_EAX, 0 );
   465     sh4_x86.tstate = TSTATE_NONE;
   466 :}
   467 AND.B #imm, @(R0, GBR) {: 
   468     COUNT_INST(I_ANDB);
   469     load_reg( R_EAX, 0 );
   470     ADD_sh4r_r32( R_GBR, R_EAX );
   471     MOV_r32_esp8(R_EAX, 0);
   472     MEM_READ_BYTE_FOR_WRITE( R_EAX, R_EDX );
   473     MOV_esp8_r32(0, R_EAX);
   474     AND_imm32_r32(imm, R_EDX );
   475     MEM_WRITE_BYTE( R_EAX, R_EDX );
   476     sh4_x86.tstate = TSTATE_NONE;
   477 :}
   478 CMP/EQ Rm, Rn {:  
   479     COUNT_INST(I_CMPEQ);
   480     load_reg( R_EAX, Rm );
   481     load_reg( R_ECX, Rn );
   482     CMP_r32_r32( R_EAX, R_ECX );
   483     SETE_t();
   484     sh4_x86.tstate = TSTATE_E;
   485 :}
   486 CMP/EQ #imm, R0 {:  
   487     COUNT_INST(I_CMPEQI);
   488     load_reg( R_EAX, 0 );
   489     CMP_imm8s_r32(imm, R_EAX);
   490     SETE_t();
   491     sh4_x86.tstate = TSTATE_E;
   492 :}
   493 CMP/GE Rm, Rn {:  
   494     COUNT_INST(I_CMPGE);
   495     load_reg( R_EAX, Rm );
   496     load_reg( R_ECX, Rn );
   497     CMP_r32_r32( R_EAX, R_ECX );
   498     SETGE_t();
   499     sh4_x86.tstate = TSTATE_GE;
   500 :}
   501 CMP/GT Rm, Rn {: 
   502     COUNT_INST(I_CMPGT);
   503     load_reg( R_EAX, Rm );
   504     load_reg( R_ECX, Rn );
   505     CMP_r32_r32( R_EAX, R_ECX );
   506     SETG_t();
   507     sh4_x86.tstate = TSTATE_G;
   508 :}
   509 CMP/HI Rm, Rn {:  
   510     COUNT_INST(I_CMPHI);
   511     load_reg( R_EAX, Rm );
   512     load_reg( R_ECX, Rn );
   513     CMP_r32_r32( R_EAX, R_ECX );
   514     SETA_t();
   515     sh4_x86.tstate = TSTATE_A;
   516 :}
   517 CMP/HS Rm, Rn {: 
   518     COUNT_INST(I_CMPHS);
   519     load_reg( R_EAX, Rm );
   520     load_reg( R_ECX, Rn );
   521     CMP_r32_r32( R_EAX, R_ECX );
   522     SETAE_t();
   523     sh4_x86.tstate = TSTATE_AE;
   524  :}
   525 CMP/PL Rn {: 
   526     COUNT_INST(I_CMPPL);
   527     load_reg( R_EAX, Rn );
   528     CMP_imm8s_r32( 0, R_EAX );
   529     SETG_t();
   530     sh4_x86.tstate = TSTATE_G;
   531 :}
   532 CMP/PZ Rn {:  
   533     COUNT_INST(I_CMPPZ);
   534     load_reg( R_EAX, Rn );
   535     CMP_imm8s_r32( 0, R_EAX );
   536     SETGE_t();
   537     sh4_x86.tstate = TSTATE_GE;
   538 :}
   539 CMP/STR Rm, Rn {:  
   540     COUNT_INST(I_CMPSTR);
   541     load_reg( R_EAX, Rm );
   542     load_reg( R_ECX, Rn );
   543     XOR_r32_r32( R_ECX, R_EAX );
   544     TEST_r8_r8( R_AL, R_AL );
   545     JE_rel8(target1);
   546     TEST_r8_r8( R_AH, R_AH );
   547     JE_rel8(target2);
   548     SHR_imm8_r32( 16, R_EAX );
   549     TEST_r8_r8( R_AL, R_AL );
   550     JE_rel8(target3);
   551     TEST_r8_r8( R_AH, R_AH );
   552     JMP_TARGET(target1);
   553     JMP_TARGET(target2);
   554     JMP_TARGET(target3);
   555     SETE_t();
   556     sh4_x86.tstate = TSTATE_E;
   557 :}
   558 DIV0S Rm, Rn {:
   559     COUNT_INST(I_DIV0S);
   560     load_reg( R_EAX, Rm );
   561     load_reg( R_ECX, Rn );
   562     SHR_imm8_r32( 31, R_EAX );
   563     SHR_imm8_r32( 31, R_ECX );
   564     store_spreg( R_EAX, R_M );
   565     store_spreg( R_ECX, R_Q );
   566     CMP_r32_r32( R_EAX, R_ECX );
   567     SETNE_t();
   568     sh4_x86.tstate = TSTATE_NE;
   569 :}
   570 DIV0U {:  
   571     COUNT_INST(I_DIV0U);
   572     XOR_r32_r32( R_EAX, R_EAX );
   573     store_spreg( R_EAX, R_Q );
   574     store_spreg( R_EAX, R_M );
   575     store_spreg( R_EAX, R_T );
   576     sh4_x86.tstate = TSTATE_C; // works for DIV1
   577 :}
   578 DIV1 Rm, Rn {:
   579     COUNT_INST(I_DIV1);
   580     load_spreg( R_ECX, R_M );
   581     load_reg( R_EAX, Rn );
   582     if( sh4_x86.tstate != TSTATE_C ) {
   583 	LDC_t();
   584     }
   585     RCL1_r32( R_EAX );
   586     SETC_r8( R_DL ); // Q'
   587     CMP_sh4r_r32( R_Q, R_ECX );
   588     JE_rel8(mqequal);
   589     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   590     JMP_rel8(end);
   591     JMP_TARGET(mqequal);
   592     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   593     JMP_TARGET(end);
   594     store_reg( R_EAX, Rn ); // Done with Rn now
   595     SETC_r8(R_AL); // tmp1
   596     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   597     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   598     store_spreg( R_ECX, R_Q );
   599     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   600     MOVZX_r8_r32( R_AL, R_EAX );
   601     store_spreg( R_EAX, R_T );
   602     sh4_x86.tstate = TSTATE_NONE;
   603 :}
   604 DMULS.L Rm, Rn {:  
   605     COUNT_INST(I_DMULS);
   606     load_reg( R_EAX, Rm );
   607     load_reg( R_ECX, Rn );
   608     IMUL_r32(R_ECX);
   609     store_spreg( R_EDX, R_MACH );
   610     store_spreg( R_EAX, R_MACL );
   611     sh4_x86.tstate = TSTATE_NONE;
   612 :}
   613 DMULU.L Rm, Rn {:  
   614     COUNT_INST(I_DMULU);
   615     load_reg( R_EAX, Rm );
   616     load_reg( R_ECX, Rn );
   617     MUL_r32(R_ECX);
   618     store_spreg( R_EDX, R_MACH );
   619     store_spreg( R_EAX, R_MACL );    
   620     sh4_x86.tstate = TSTATE_NONE;
   621 :}
   622 DT Rn {:  
   623     COUNT_INST(I_DT);
   624     load_reg( R_EAX, Rn );
   625     ADD_imm8s_r32( -1, R_EAX );
   626     store_reg( R_EAX, Rn );
   627     SETE_t();
   628     sh4_x86.tstate = TSTATE_E;
   629 :}
   630 EXTS.B Rm, Rn {:  
   631     COUNT_INST(I_EXTSB);
   632     load_reg( R_EAX, Rm );
   633     MOVSX_r8_r32( R_EAX, R_EAX );
   634     store_reg( R_EAX, Rn );
   635 :}
   636 EXTS.W Rm, Rn {:  
   637     COUNT_INST(I_EXTSW);
   638     load_reg( R_EAX, Rm );
   639     MOVSX_r16_r32( R_EAX, R_EAX );
   640     store_reg( R_EAX, Rn );
   641 :}
   642 EXTU.B Rm, Rn {:  
   643     COUNT_INST(I_EXTUB);
   644     load_reg( R_EAX, Rm );
   645     MOVZX_r8_r32( R_EAX, R_EAX );
   646     store_reg( R_EAX, Rn );
   647 :}
   648 EXTU.W Rm, Rn {:  
   649     COUNT_INST(I_EXTUW);
   650     load_reg( R_EAX, Rm );
   651     MOVZX_r16_r32( R_EAX, R_EAX );
   652     store_reg( R_EAX, Rn );
   653 :}
   654 MAC.L @Rm+, @Rn+ {:
   655     COUNT_INST(I_MACL);
   656     if( Rm == Rn ) {
   657 	load_reg( R_EAX, Rm );
   658 	check_ralign32( R_EAX );
   659 	MEM_READ_LONG( R_EAX, R_EAX );
   660 	MOV_r32_esp8(R_EAX, 0);
   661 	load_reg( R_EAX, Rm );
   662 	LEA_r32disp8_r32( R_EAX, 4, R_EAX );
   663 	MEM_READ_LONG( R_EAX, R_EAX );
   664         ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
   665     } else {
   666 	load_reg( R_EAX, Rm );
   667 	check_ralign32( R_EAX );
   668 	MEM_READ_LONG( R_EAX, R_EAX );
   669 	MOV_r32_esp8( R_EAX, 0 );
   670 	load_reg( R_EAX, Rn );
   671 	check_ralign32( R_EAX );
   672 	MEM_READ_LONG( R_EAX, R_EAX );
   673 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   674 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   675     }
   677     IMUL_esp8( 0 );
   678     ADD_r32_sh4r( R_EAX, R_MACL );
   679     ADC_r32_sh4r( R_EDX, R_MACH );
   681     load_spreg( R_ECX, R_S );
   682     TEST_r32_r32(R_ECX, R_ECX);
   683     JE_rel8( nosat );
   684     call_func0( signsat48 );
   685     JMP_TARGET( nosat );
   686     sh4_x86.tstate = TSTATE_NONE;
   687 :}
   688 MAC.W @Rm+, @Rn+ {:  
   689     COUNT_INST(I_MACW);
   690     if( Rm == Rn ) {
   691 	load_reg( R_EAX, Rm );
   692 	check_ralign16( R_EAX );
   693 	MEM_READ_WORD( R_EAX, R_EAX );
   694         MOV_r32_esp8( R_EAX, 0 );
   695 	load_reg( R_EAX, Rm );
   696 	LEA_r32disp8_r32( R_EAX, 2, R_EAX );
   697 	MEM_READ_WORD( R_EAX, R_EAX );
   698 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   699 	// Note translate twice in case of page boundaries. Maybe worth
   700 	// adding a page-boundary check to skip the second translation
   701     } else {
   702 	load_reg( R_EAX, Rm );
   703 	check_ralign16( R_EAX );
   704 	MEM_READ_WORD( R_EAX, R_EAX );
   705         MOV_r32_esp8( R_EAX, 0 );
   706 	load_reg( R_EAX, Rn );
   707 	check_ralign16( R_EAX );
   708 	MEM_READ_WORD( R_EAX, R_EAX );
   709 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   710 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   711     }
   712     IMUL_esp8( 0 );
   713     load_spreg( R_ECX, R_S );
   714     TEST_r32_r32( R_ECX, R_ECX );
   715     JE_rel8( nosat );
   717     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   718     JNO_rel8( end );            // 2
   719     load_imm32( R_EDX, 1 );         // 5
   720     store_spreg( R_EDX, R_MACH );   // 6
   721     JS_rel8( positive );        // 2
   722     load_imm32( R_EAX, 0x80000000 );// 5
   723     store_spreg( R_EAX, R_MACL );   // 6
   724     JMP_rel8(end2);           // 2
   726     JMP_TARGET(positive);
   727     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   728     store_spreg( R_EAX, R_MACL );   // 6
   729     JMP_rel8(end3);            // 2
   731     JMP_TARGET(nosat);
   732     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   733     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   734     JMP_TARGET(end);
   735     JMP_TARGET(end2);
   736     JMP_TARGET(end3);
   737     sh4_x86.tstate = TSTATE_NONE;
   738 :}
   739 MOVT Rn {:  
   740     COUNT_INST(I_MOVT);
   741     load_spreg( R_EAX, R_T );
   742     store_reg( R_EAX, Rn );
   743 :}
   744 MUL.L Rm, Rn {:  
   745     COUNT_INST(I_MULL);
   746     load_reg( R_EAX, Rm );
   747     load_reg( R_ECX, Rn );
   748     MUL_r32( R_ECX );
   749     store_spreg( R_EAX, R_MACL );
   750     sh4_x86.tstate = TSTATE_NONE;
   751 :}
   752 MULS.W Rm, Rn {:
   753     COUNT_INST(I_MULSW);
   754     load_reg16s( R_EAX, Rm );
   755     load_reg16s( R_ECX, Rn );
   756     MUL_r32( R_ECX );
   757     store_spreg( R_EAX, R_MACL );
   758     sh4_x86.tstate = TSTATE_NONE;
   759 :}
   760 MULU.W Rm, Rn {:  
   761     COUNT_INST(I_MULUW);
   762     load_reg16u( R_EAX, Rm );
   763     load_reg16u( R_ECX, Rn );
   764     MUL_r32( R_ECX );
   765     store_spreg( R_EAX, R_MACL );
   766     sh4_x86.tstate = TSTATE_NONE;
   767 :}
   768 NEG Rm, Rn {:
   769     COUNT_INST(I_NEG);
   770     load_reg( R_EAX, Rm );
   771     NEG_r32( R_EAX );
   772     store_reg( R_EAX, Rn );
   773     sh4_x86.tstate = TSTATE_NONE;
   774 :}
   775 NEGC Rm, Rn {:  
   776     COUNT_INST(I_NEGC);
   777     load_reg( R_EAX, Rm );
   778     XOR_r32_r32( R_ECX, R_ECX );
   779     LDC_t();
   780     SBB_r32_r32( R_EAX, R_ECX );
   781     store_reg( R_ECX, Rn );
   782     SETC_t();
   783     sh4_x86.tstate = TSTATE_C;
   784 :}
   785 NOT Rm, Rn {:  
   786     COUNT_INST(I_NOT);
   787     load_reg( R_EAX, Rm );
   788     NOT_r32( R_EAX );
   789     store_reg( R_EAX, Rn );
   790     sh4_x86.tstate = TSTATE_NONE;
   791 :}
   792 OR Rm, Rn {:  
   793     COUNT_INST(I_OR);
   794     load_reg( R_EAX, Rm );
   795     load_reg( R_ECX, Rn );
   796     OR_r32_r32( R_EAX, R_ECX );
   797     store_reg( R_ECX, Rn );
   798     sh4_x86.tstate = TSTATE_NONE;
   799 :}
   800 OR #imm, R0 {:
   801     COUNT_INST(I_ORI);
   802     load_reg( R_EAX, 0 );
   803     OR_imm32_r32(imm, R_EAX);
   804     store_reg( R_EAX, 0 );
   805     sh4_x86.tstate = TSTATE_NONE;
   806 :}
   807 OR.B #imm, @(R0, GBR) {:  
   808     COUNT_INST(I_ORB);
   809     load_reg( R_EAX, 0 );
   810     ADD_sh4r_r32( R_GBR, R_EAX );
   811     MOV_r32_esp8( R_EAX, 0 );
   812     MEM_READ_BYTE_FOR_WRITE( R_EAX, R_EDX );
   813     MOV_esp8_r32( 0, R_EAX );
   814     OR_imm32_r32(imm, R_EDX );
   815     MEM_WRITE_BYTE( R_EAX, R_EDX );
   816     sh4_x86.tstate = TSTATE_NONE;
   817 :}
   818 ROTCL Rn {:
   819     COUNT_INST(I_ROTCL);
   820     load_reg( R_EAX, Rn );
   821     if( sh4_x86.tstate != TSTATE_C ) {
   822 	LDC_t();
   823     }
   824     RCL1_r32( R_EAX );
   825     store_reg( R_EAX, Rn );
   826     SETC_t();
   827     sh4_x86.tstate = TSTATE_C;
   828 :}
   829 ROTCR Rn {:  
   830     COUNT_INST(I_ROTCR);
   831     load_reg( R_EAX, Rn );
   832     if( sh4_x86.tstate != TSTATE_C ) {
   833 	LDC_t();
   834     }
   835     RCR1_r32( R_EAX );
   836     store_reg( R_EAX, Rn );
   837     SETC_t();
   838     sh4_x86.tstate = TSTATE_C;
   839 :}
   840 ROTL Rn {:  
   841     COUNT_INST(I_ROTL);
   842     load_reg( R_EAX, Rn );
   843     ROL1_r32( R_EAX );
   844     store_reg( R_EAX, Rn );
   845     SETC_t();
   846     sh4_x86.tstate = TSTATE_C;
   847 :}
   848 ROTR Rn {:  
   849     COUNT_INST(I_ROTR);
   850     load_reg( R_EAX, Rn );
   851     ROR1_r32( R_EAX );
   852     store_reg( R_EAX, Rn );
   853     SETC_t();
   854     sh4_x86.tstate = TSTATE_C;
   855 :}
   856 SHAD Rm, Rn {:
   857     COUNT_INST(I_SHAD);
   858     /* Annoyingly enough, not directly convertible */
   859     load_reg( R_EAX, Rn );
   860     load_reg( R_ECX, Rm );
   861     CMP_imm32_r32( 0, R_ECX );
   862     JGE_rel8(doshl);
   864     NEG_r32( R_ECX );      // 2
   865     AND_imm8_r8( 0x1F, R_CL ); // 3
   866     JE_rel8(emptysar);     // 2
   867     SAR_r32_CL( R_EAX );       // 2
   868     JMP_rel8(end);          // 2
   870     JMP_TARGET(emptysar);
   871     SAR_imm8_r32(31, R_EAX );  // 3
   872     JMP_rel8(end2);
   874     JMP_TARGET(doshl);
   875     AND_imm8_r8( 0x1F, R_CL ); // 3
   876     SHL_r32_CL( R_EAX );       // 2
   877     JMP_TARGET(end);
   878     JMP_TARGET(end2);
   879     store_reg( R_EAX, Rn );
   880     sh4_x86.tstate = TSTATE_NONE;
   881 :}
   882 SHLD Rm, Rn {:  
   883     COUNT_INST(I_SHLD);
   884     load_reg( R_EAX, Rn );
   885     load_reg( R_ECX, Rm );
   886     CMP_imm32_r32( 0, R_ECX );
   887     JGE_rel8(doshl);
   889     NEG_r32( R_ECX );      // 2
   890     AND_imm8_r8( 0x1F, R_CL ); // 3
   891     JE_rel8(emptyshr );
   892     SHR_r32_CL( R_EAX );       // 2
   893     JMP_rel8(end);          // 2
   895     JMP_TARGET(emptyshr);
   896     XOR_r32_r32( R_EAX, R_EAX );
   897     JMP_rel8(end2);
   899     JMP_TARGET(doshl);
   900     AND_imm8_r8( 0x1F, R_CL ); // 3
   901     SHL_r32_CL( R_EAX );       // 2
   902     JMP_TARGET(end);
   903     JMP_TARGET(end2);
   904     store_reg( R_EAX, Rn );
   905     sh4_x86.tstate = TSTATE_NONE;
   906 :}
   907 SHAL Rn {: 
   908     COUNT_INST(I_SHAL);
   909     load_reg( R_EAX, Rn );
   910     SHL1_r32( R_EAX );
   911     SETC_t();
   912     store_reg( R_EAX, Rn );
   913     sh4_x86.tstate = TSTATE_C;
   914 :}
   915 SHAR Rn {:  
   916     COUNT_INST(I_SHAR);
   917     load_reg( R_EAX, Rn );
   918     SAR1_r32( R_EAX );
   919     SETC_t();
   920     store_reg( R_EAX, Rn );
   921     sh4_x86.tstate = TSTATE_C;
   922 :}
   923 SHLL Rn {:  
   924     COUNT_INST(I_SHLL);
   925     load_reg( R_EAX, Rn );
   926     SHL1_r32( R_EAX );
   927     SETC_t();
   928     store_reg( R_EAX, Rn );
   929     sh4_x86.tstate = TSTATE_C;
   930 :}
   931 SHLL2 Rn {:
   932     COUNT_INST(I_SHLL);
   933     load_reg( R_EAX, Rn );
   934     SHL_imm8_r32( 2, R_EAX );
   935     store_reg( R_EAX, Rn );
   936     sh4_x86.tstate = TSTATE_NONE;
   937 :}
   938 SHLL8 Rn {:  
   939     COUNT_INST(I_SHLL);
   940     load_reg( R_EAX, Rn );
   941     SHL_imm8_r32( 8, R_EAX );
   942     store_reg( R_EAX, Rn );
   943     sh4_x86.tstate = TSTATE_NONE;
   944 :}
   945 SHLL16 Rn {:  
   946     COUNT_INST(I_SHLL);
   947     load_reg( R_EAX, Rn );
   948     SHL_imm8_r32( 16, R_EAX );
   949     store_reg( R_EAX, Rn );
   950     sh4_x86.tstate = TSTATE_NONE;
   951 :}
   952 SHLR Rn {:  
   953     COUNT_INST(I_SHLR);
   954     load_reg( R_EAX, Rn );
   955     SHR1_r32( R_EAX );
   956     SETC_t();
   957     store_reg( R_EAX, Rn );
   958     sh4_x86.tstate = TSTATE_C;
   959 :}
   960 SHLR2 Rn {:  
   961     COUNT_INST(I_SHLR);
   962     load_reg( R_EAX, Rn );
   963     SHR_imm8_r32( 2, R_EAX );
   964     store_reg( R_EAX, Rn );
   965     sh4_x86.tstate = TSTATE_NONE;
   966 :}
   967 SHLR8 Rn {:  
   968     COUNT_INST(I_SHLR);
   969     load_reg( R_EAX, Rn );
   970     SHR_imm8_r32( 8, R_EAX );
   971     store_reg( R_EAX, Rn );
   972     sh4_x86.tstate = TSTATE_NONE;
   973 :}
   974 SHLR16 Rn {:  
   975     COUNT_INST(I_SHLR);
   976     load_reg( R_EAX, Rn );
   977     SHR_imm8_r32( 16, R_EAX );
   978     store_reg( R_EAX, Rn );
   979     sh4_x86.tstate = TSTATE_NONE;
   980 :}
   981 SUB Rm, Rn {:  
   982     COUNT_INST(I_SUB);
   983     load_reg( R_EAX, Rm );
   984     load_reg( R_ECX, Rn );
   985     SUB_r32_r32( R_EAX, R_ECX );
   986     store_reg( R_ECX, Rn );
   987     sh4_x86.tstate = TSTATE_NONE;
   988 :}
   989 SUBC Rm, Rn {:  
   990     COUNT_INST(I_SUBC);
   991     load_reg( R_EAX, Rm );
   992     load_reg( R_ECX, Rn );
   993     if( sh4_x86.tstate != TSTATE_C ) {
   994 	LDC_t();
   995     }
   996     SBB_r32_r32( R_EAX, R_ECX );
   997     store_reg( R_ECX, Rn );
   998     SETC_t();
   999     sh4_x86.tstate = TSTATE_C;
  1000 :}
  1001 SUBV Rm, Rn {:  
  1002     COUNT_INST(I_SUBV);
  1003     load_reg( R_EAX, Rm );
  1004     load_reg( R_ECX, Rn );
  1005     SUB_r32_r32( R_EAX, R_ECX );
  1006     store_reg( R_ECX, Rn );
  1007     SETO_t();
  1008     sh4_x86.tstate = TSTATE_O;
  1009 :}
  1010 SWAP.B Rm, Rn {:  
  1011     COUNT_INST(I_SWAPB);
  1012     load_reg( R_EAX, Rm );
  1013     XCHG_r8_r8( R_AL, R_AH ); // NB: does not touch EFLAGS
  1014     store_reg( R_EAX, Rn );
  1015 :}
  1016 SWAP.W Rm, Rn {:  
  1017     COUNT_INST(I_SWAPB);
  1018     load_reg( R_EAX, Rm );
  1019     MOV_r32_r32( R_EAX, R_ECX );
  1020     SHL_imm8_r32( 16, R_ECX );
  1021     SHR_imm8_r32( 16, R_EAX );
  1022     OR_r32_r32( R_EAX, R_ECX );
  1023     store_reg( R_ECX, Rn );
  1024     sh4_x86.tstate = TSTATE_NONE;
  1025 :}
  1026 TAS.B @Rn {:  
  1027     COUNT_INST(I_TASB);
  1028     load_reg( R_EAX, Rn );
  1029     MOV_r32_esp8( R_EAX, 0 );
  1030     MEM_READ_BYTE_FOR_WRITE( R_EAX, R_EDX );
  1031     TEST_r8_r8( R_DL, R_DL );
  1032     SETE_t();
  1033     OR_imm8_r8( 0x80, R_DL );
  1034     MOV_esp8_r32( 0, R_EAX );
  1035     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1036     sh4_x86.tstate = TSTATE_NONE;
  1037 :}
  1038 TST Rm, Rn {:  
  1039     COUNT_INST(I_TST);
  1040     load_reg( R_EAX, Rm );
  1041     load_reg( R_ECX, Rn );
  1042     TEST_r32_r32( R_EAX, R_ECX );
  1043     SETE_t();
  1044     sh4_x86.tstate = TSTATE_E;
  1045 :}
  1046 TST #imm, R0 {:  
  1047     COUNT_INST(I_TSTI);
  1048     load_reg( R_EAX, 0 );
  1049     TEST_imm32_r32( imm, R_EAX );
  1050     SETE_t();
  1051     sh4_x86.tstate = TSTATE_E;
  1052 :}
  1053 TST.B #imm, @(R0, GBR) {:  
  1054     COUNT_INST(I_TSTB);
  1055     load_reg( R_EAX, 0);
  1056     ADD_sh4r_r32( R_GBR, R_EAX );
  1057     MEM_READ_BYTE( R_EAX, R_EAX );
  1058     TEST_imm8_r8( imm, R_AL );
  1059     SETE_t();
  1060     sh4_x86.tstate = TSTATE_E;
  1061 :}
  1062 XOR Rm, Rn {:  
  1063     COUNT_INST(I_XOR);
  1064     load_reg( R_EAX, Rm );
  1065     load_reg( R_ECX, Rn );
  1066     XOR_r32_r32( R_EAX, R_ECX );
  1067     store_reg( R_ECX, Rn );
  1068     sh4_x86.tstate = TSTATE_NONE;
  1069 :}
  1070 XOR #imm, R0 {:  
  1071     COUNT_INST(I_XORI);
  1072     load_reg( R_EAX, 0 );
  1073     XOR_imm32_r32( imm, R_EAX );
  1074     store_reg( R_EAX, 0 );
  1075     sh4_x86.tstate = TSTATE_NONE;
  1076 :}
  1077 XOR.B #imm, @(R0, GBR) {:  
  1078     COUNT_INST(I_XORB);
  1079     load_reg( R_EAX, 0 );
  1080     ADD_sh4r_r32( R_GBR, R_EAX ); 
  1081     MOV_r32_esp8( R_EAX, 0 );
  1082     MEM_READ_BYTE_FOR_WRITE(R_EAX, R_EDX);
  1083     MOV_esp8_r32( 0, R_EAX );
  1084     XOR_imm32_r32( imm, R_EDX );
  1085     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1086     sh4_x86.tstate = TSTATE_NONE;
  1087 :}
  1088 XTRCT Rm, Rn {:
  1089     COUNT_INST(I_XTRCT);
  1090     load_reg( R_EAX, Rm );
  1091     load_reg( R_ECX, Rn );
  1092     SHL_imm8_r32( 16, R_EAX );
  1093     SHR_imm8_r32( 16, R_ECX );
  1094     OR_r32_r32( R_EAX, R_ECX );
  1095     store_reg( R_ECX, Rn );
  1096     sh4_x86.tstate = TSTATE_NONE;
  1097 :}
  1099 /* Data move instructions */
  1100 MOV Rm, Rn {:  
  1101     COUNT_INST(I_MOV);
  1102     load_reg( R_EAX, Rm );
  1103     store_reg( R_EAX, Rn );
  1104 :}
  1105 MOV #imm, Rn {:  
  1106     COUNT_INST(I_MOVI);
  1107     load_imm32( R_EAX, imm );
  1108     store_reg( R_EAX, Rn );
  1109 :}
  1110 MOV.B Rm, @Rn {:  
  1111     COUNT_INST(I_MOVB);
  1112     load_reg( R_EAX, Rn );
  1113     load_reg( R_EDX, Rm );
  1114     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1115     sh4_x86.tstate = TSTATE_NONE;
  1116 :}
  1117 MOV.B Rm, @-Rn {:  
  1118     COUNT_INST(I_MOVB);
  1119     load_reg( R_EAX, Rn );
  1120     LEA_r32disp8_r32( R_EAX, -1, R_EAX );
  1121     load_reg( R_EDX, Rm );
  1122     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1123     ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
  1124     sh4_x86.tstate = TSTATE_NONE;
  1125 :}
  1126 MOV.B Rm, @(R0, Rn) {:  
  1127     COUNT_INST(I_MOVB);
  1128     load_reg( R_EAX, 0 );
  1129     ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1130     load_reg( R_EDX, Rm );
  1131     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1132     sh4_x86.tstate = TSTATE_NONE;
  1133 :}
  1134 MOV.B R0, @(disp, GBR) {:  
  1135     COUNT_INST(I_MOVB);
  1136     load_spreg( R_EAX, R_GBR );
  1137     ADD_imm32_r32( disp, R_EAX );
  1138     load_reg( R_EDX, 0 );
  1139     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1140     sh4_x86.tstate = TSTATE_NONE;
  1141 :}
  1142 MOV.B R0, @(disp, Rn) {:  
  1143     COUNT_INST(I_MOVB);
  1144     load_reg( R_EAX, Rn );
  1145     ADD_imm32_r32( disp, R_EAX );
  1146     load_reg( R_EDX, 0 );
  1147     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1148     sh4_x86.tstate = TSTATE_NONE;
  1149 :}
  1150 MOV.B @Rm, Rn {:  
  1151     COUNT_INST(I_MOVB);
  1152     load_reg( R_EAX, Rm );
  1153     MEM_READ_BYTE( R_EAX, R_EAX );
  1154     store_reg( R_EAX, Rn );
  1155     sh4_x86.tstate = TSTATE_NONE;
  1156 :}
  1157 MOV.B @Rm+, Rn {:  
  1158     COUNT_INST(I_MOVB);
  1159     load_reg( R_EAX, Rm );
  1160     MEM_READ_BYTE( R_EAX, R_EAX );
  1161     if( Rm != Rn ) {
  1162     	ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
  1164     store_reg( R_EAX, Rn );
  1165     sh4_x86.tstate = TSTATE_NONE;
  1166 :}
  1167 MOV.B @(R0, Rm), Rn {:  
  1168     COUNT_INST(I_MOVB);
  1169     load_reg( R_EAX, 0 );
  1170     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
  1171     MEM_READ_BYTE( R_EAX, R_EAX );
  1172     store_reg( R_EAX, Rn );
  1173     sh4_x86.tstate = TSTATE_NONE;
  1174 :}
  1175 MOV.B @(disp, GBR), R0 {:  
  1176     COUNT_INST(I_MOVB);
  1177     load_spreg( R_EAX, R_GBR );
  1178     ADD_imm32_r32( disp, R_EAX );
  1179     MEM_READ_BYTE( R_EAX, R_EAX );
  1180     store_reg( R_EAX, 0 );
  1181     sh4_x86.tstate = TSTATE_NONE;
  1182 :}
  1183 MOV.B @(disp, Rm), R0 {:  
  1184     COUNT_INST(I_MOVB);
  1185     load_reg( R_EAX, Rm );
  1186     ADD_imm32_r32( disp, R_EAX );
  1187     MEM_READ_BYTE( R_EAX, R_EAX );
  1188     store_reg( R_EAX, 0 );
  1189     sh4_x86.tstate = TSTATE_NONE;
  1190 :}
  1191 MOV.L Rm, @Rn {:
  1192     COUNT_INST(I_MOVL);
  1193     load_reg( R_EAX, Rn );
  1194     check_walign32(R_EAX);
  1195     MOV_r32_r32( R_EAX, R_ECX );
  1196     AND_imm32_r32( 0xFC000000, R_ECX );
  1197     CMP_imm32_r32( 0xE0000000, R_ECX );
  1198     JNE_rel8( notsq );
  1199     AND_imm8s_r32( 0x3C, R_EAX );
  1200     load_reg( R_EDX, Rm );
  1201     MOV_r32_ebpr32disp32( R_EDX, R_EAX, REG_OFFSET(store_queue) );
  1202     JMP_rel8(end);
  1203     JMP_TARGET(notsq);
  1204     load_reg( R_EDX, Rm );
  1205     MEM_WRITE_LONG( R_EAX, R_EDX );
  1206     JMP_TARGET(end);
  1207     sh4_x86.tstate = TSTATE_NONE;
  1208 :}
  1209 MOV.L Rm, @-Rn {:  
  1210     COUNT_INST(I_MOVL);
  1211     load_reg( R_EAX, Rn );
  1212     ADD_imm8s_r32( -4, R_EAX );
  1213     check_walign32( R_EAX );
  1214     load_reg( R_EDX, Rm );
  1215     MEM_WRITE_LONG( R_EAX, R_EDX );
  1216     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  1217     sh4_x86.tstate = TSTATE_NONE;
  1218 :}
  1219 MOV.L Rm, @(R0, Rn) {:  
  1220     COUNT_INST(I_MOVL);
  1221     load_reg( R_EAX, 0 );
  1222     ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1223     check_walign32( R_EAX );
  1224     load_reg( R_EDX, Rm );
  1225     MEM_WRITE_LONG( R_EAX, R_EDX );
  1226     sh4_x86.tstate = TSTATE_NONE;
  1227 :}
  1228 MOV.L R0, @(disp, GBR) {:  
  1229     COUNT_INST(I_MOVL);
  1230     load_spreg( R_EAX, R_GBR );
  1231     ADD_imm32_r32( disp, R_EAX );
  1232     check_walign32( R_EAX );
  1233     load_reg( R_EDX, 0 );
  1234     MEM_WRITE_LONG( R_EAX, R_EDX );
  1235     sh4_x86.tstate = TSTATE_NONE;
  1236 :}
  1237 MOV.L Rm, @(disp, Rn) {:  
  1238     COUNT_INST(I_MOVL);
  1239     load_reg( R_EAX, Rn );
  1240     ADD_imm32_r32( disp, R_EAX );
  1241     check_walign32( R_EAX );
  1242     MOV_r32_r32( R_EAX, R_ECX );
  1243     AND_imm32_r32( 0xFC000000, R_ECX );
  1244     CMP_imm32_r32( 0xE0000000, R_ECX );
  1245     JNE_rel8( notsq );
  1246     AND_imm8s_r32( 0x3C, R_EAX );
  1247     load_reg( R_EDX, Rm );
  1248     MOV_r32_ebpr32disp32( R_EDX, R_EAX, REG_OFFSET(store_queue) );
  1249     JMP_rel8(end);
  1250     JMP_TARGET(notsq);
  1251     load_reg( R_EDX, Rm );
  1252     MEM_WRITE_LONG( R_EAX, R_EDX );
  1253     JMP_TARGET(end);
  1254     sh4_x86.tstate = TSTATE_NONE;
  1255 :}
  1256 MOV.L @Rm, Rn {:  
  1257     COUNT_INST(I_MOVL);
  1258     load_reg( R_EAX, Rm );
  1259     check_ralign32( R_EAX );
  1260     MEM_READ_LONG( R_EAX, R_EAX );
  1261     store_reg( R_EAX, Rn );
  1262     sh4_x86.tstate = TSTATE_NONE;
  1263 :}
  1264 MOV.L @Rm+, Rn {:  
  1265     COUNT_INST(I_MOVL);
  1266     load_reg( R_EAX, Rm );
  1267     check_ralign32( R_EAX );
  1268     MEM_READ_LONG( R_EAX, R_EAX );
  1269     if( Rm != Rn ) {
  1270     	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1272     store_reg( R_EAX, Rn );
  1273     sh4_x86.tstate = TSTATE_NONE;
  1274 :}
  1275 MOV.L @(R0, Rm), Rn {:  
  1276     COUNT_INST(I_MOVL);
  1277     load_reg( R_EAX, 0 );
  1278     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
  1279     check_ralign32( R_EAX );
  1280     MEM_READ_LONG( R_EAX, R_EAX );
  1281     store_reg( R_EAX, Rn );
  1282     sh4_x86.tstate = TSTATE_NONE;
  1283 :}
  1284 MOV.L @(disp, GBR), R0 {:
  1285     COUNT_INST(I_MOVL);
  1286     load_spreg( R_EAX, R_GBR );
  1287     ADD_imm32_r32( disp, R_EAX );
  1288     check_ralign32( R_EAX );
  1289     MEM_READ_LONG( R_EAX, R_EAX );
  1290     store_reg( R_EAX, 0 );
  1291     sh4_x86.tstate = TSTATE_NONE;
  1292 :}
  1293 MOV.L @(disp, PC), Rn {:  
  1294     COUNT_INST(I_MOVLPC);
  1295     if( sh4_x86.in_delay_slot ) {
  1296 	SLOTILLEGAL();
  1297     } else {
  1298 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1299 	if( IS_IN_ICACHE(target) ) {
  1300 	    // If the target address is in the same page as the code, it's
  1301 	    // pretty safe to just ref it directly and circumvent the whole
  1302 	    // memory subsystem. (this is a big performance win)
  1304 	    // FIXME: There's a corner-case that's not handled here when
  1305 	    // the current code-page is in the ITLB but not in the UTLB.
  1306 	    // (should generate a TLB miss although need to test SH4 
  1307 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1308 	    // behaviour though.
  1309 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1310 	    MOV_moff32_EAX( ptr );
  1311 	} else {
  1312 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1313 	    // different virtual address than the translation was done with,
  1314 	    // but we can safely assume that the low bits are the same.
  1315 	    load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1316 	    ADD_sh4r_r32( R_PC, R_EAX );
  1317 	    MEM_READ_LONG( R_EAX, R_EAX );
  1318 	    sh4_x86.tstate = TSTATE_NONE;
  1320 	store_reg( R_EAX, Rn );
  1322 :}
  1323 MOV.L @(disp, Rm), Rn {:  
  1324     COUNT_INST(I_MOVL);
  1325     load_reg( R_EAX, Rm );
  1326     ADD_imm8s_r32( disp, R_EAX );
  1327     check_ralign32( R_EAX );
  1328     MEM_READ_LONG( R_EAX, R_EAX );
  1329     store_reg( R_EAX, Rn );
  1330     sh4_x86.tstate = TSTATE_NONE;
  1331 :}
  1332 MOV.W Rm, @Rn {:  
  1333     COUNT_INST(I_MOVW);
  1334     load_reg( R_EAX, Rn );
  1335     check_walign16( R_EAX );
  1336     load_reg( R_EDX, Rm );
  1337     MEM_WRITE_WORD( R_EAX, R_EDX );
  1338     sh4_x86.tstate = TSTATE_NONE;
  1339 :}
  1340 MOV.W Rm, @-Rn {:  
  1341     COUNT_INST(I_MOVW);
  1342     load_reg( R_EAX, Rn );
  1343     check_walign16( R_EAX );
  1344     LEA_r32disp8_r32( R_EAX, -2, R_EAX );
  1345     load_reg( R_EDX, Rm );
  1346     MEM_WRITE_WORD( R_EAX, R_EDX );
  1347     ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
  1348     sh4_x86.tstate = TSTATE_NONE;
  1349 :}
  1350 MOV.W Rm, @(R0, Rn) {:  
  1351     COUNT_INST(I_MOVW);
  1352     load_reg( R_EAX, 0 );
  1353     ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1354     check_walign16( R_EAX );
  1355     load_reg( R_EDX, Rm );
  1356     MEM_WRITE_WORD( R_EAX, R_EDX );
  1357     sh4_x86.tstate = TSTATE_NONE;
  1358 :}
  1359 MOV.W R0, @(disp, GBR) {:  
  1360     COUNT_INST(I_MOVW);
  1361     load_spreg( R_EAX, R_GBR );
  1362     ADD_imm32_r32( disp, R_EAX );
  1363     check_walign16( R_EAX );
  1364     load_reg( R_EDX, 0 );
  1365     MEM_WRITE_WORD( R_EAX, R_EDX );
  1366     sh4_x86.tstate = TSTATE_NONE;
  1367 :}
  1368 MOV.W R0, @(disp, Rn) {:  
  1369     COUNT_INST(I_MOVW);
  1370     load_reg( R_EAX, Rn );
  1371     ADD_imm32_r32( disp, R_EAX );
  1372     check_walign16( R_EAX );
  1373     load_reg( R_EDX, 0 );
  1374     MEM_WRITE_WORD( R_EAX, R_EDX );
  1375     sh4_x86.tstate = TSTATE_NONE;
  1376 :}
  1377 MOV.W @Rm, Rn {:  
  1378     COUNT_INST(I_MOVW);
  1379     load_reg( R_EAX, Rm );
  1380     check_ralign16( R_EAX );
  1381     MEM_READ_WORD( R_EAX, R_EAX );
  1382     store_reg( R_EAX, Rn );
  1383     sh4_x86.tstate = TSTATE_NONE;
  1384 :}
  1385 MOV.W @Rm+, Rn {:  
  1386     COUNT_INST(I_MOVW);
  1387     load_reg( R_EAX, Rm );
  1388     check_ralign16( R_EAX );
  1389     MEM_READ_WORD( R_EAX, R_EAX );
  1390     if( Rm != Rn ) {
  1391         ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
  1393     store_reg( R_EAX, Rn );
  1394     sh4_x86.tstate = TSTATE_NONE;
  1395 :}
  1396 MOV.W @(R0, Rm), Rn {:  
  1397     COUNT_INST(I_MOVW);
  1398     load_reg( R_EAX, 0 );
  1399     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
  1400     check_ralign16( R_EAX );
  1401     MEM_READ_WORD( R_EAX, R_EAX );
  1402     store_reg( R_EAX, Rn );
  1403     sh4_x86.tstate = TSTATE_NONE;
  1404 :}
  1405 MOV.W @(disp, GBR), R0 {:  
  1406     COUNT_INST(I_MOVW);
  1407     load_spreg( R_EAX, R_GBR );
  1408     ADD_imm32_r32( disp, R_EAX );
  1409     check_ralign16( R_EAX );
  1410     MEM_READ_WORD( R_EAX, R_EAX );
  1411     store_reg( R_EAX, 0 );
  1412     sh4_x86.tstate = TSTATE_NONE;
  1413 :}
  1414 MOV.W @(disp, PC), Rn {:  
  1415     COUNT_INST(I_MOVW);
  1416     if( sh4_x86.in_delay_slot ) {
  1417 	SLOTILLEGAL();
  1418     } else {
  1419 	// See comments for MOV.L @(disp, PC), Rn
  1420 	uint32_t target = pc + disp + 4;
  1421 	if( IS_IN_ICACHE(target) ) {
  1422 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1423 	    MOV_moff32_EAX( ptr );
  1424 	    MOVSX_r16_r32( R_EAX, R_EAX );
  1425 	} else {
  1426 	    load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
  1427 	    ADD_sh4r_r32( R_PC, R_EAX );
  1428 	    MEM_READ_WORD( R_EAX, R_EAX );
  1429 	    sh4_x86.tstate = TSTATE_NONE;
  1431 	store_reg( R_EAX, Rn );
  1433 :}
  1434 MOV.W @(disp, Rm), R0 {:  
  1435     COUNT_INST(I_MOVW);
  1436     load_reg( R_EAX, Rm );
  1437     ADD_imm32_r32( disp, R_EAX );
  1438     check_ralign16( R_EAX );
  1439     MEM_READ_WORD( R_EAX, R_EAX );
  1440     store_reg( R_EAX, 0 );
  1441     sh4_x86.tstate = TSTATE_NONE;
  1442 :}
  1443 MOVA @(disp, PC), R0 {:  
  1444     COUNT_INST(I_MOVA);
  1445     if( sh4_x86.in_delay_slot ) {
  1446 	SLOTILLEGAL();
  1447     } else {
  1448 	load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1449 	ADD_sh4r_r32( R_PC, R_ECX );
  1450 	store_reg( R_ECX, 0 );
  1451 	sh4_x86.tstate = TSTATE_NONE;
  1453 :}
  1454 MOVCA.L R0, @Rn {:  
  1455     COUNT_INST(I_MOVCA);
  1456     load_reg( R_EAX, Rn );
  1457     check_walign32( R_EAX );
  1458     load_reg( R_EDX, 0 );
  1459     MEM_WRITE_LONG( R_EAX, R_EDX );
  1460     sh4_x86.tstate = TSTATE_NONE;
  1461 :}
  1463 /* Control transfer instructions */
  1464 BF disp {:
  1465     COUNT_INST(I_BF);
  1466     if( sh4_x86.in_delay_slot ) {
  1467 	SLOTILLEGAL();
  1468     } else {
  1469 	sh4vma_t target = disp + pc + 4;
  1470 	JT_rel8( nottaken );
  1471 	exit_block_rel(target, pc+2 );
  1472 	JMP_TARGET(nottaken);
  1473 	return 2;
  1475 :}
  1476 BF/S disp {:
  1477     COUNT_INST(I_BFS);
  1478     if( sh4_x86.in_delay_slot ) {
  1479 	SLOTILLEGAL();
  1480     } else {
  1481 	sh4_x86.in_delay_slot = DELAY_PC;
  1482 	if( UNTRANSLATABLE(pc+2) ) {
  1483 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1484 	    JT_rel8(nottaken);
  1485 	    ADD_imm32_r32( disp, R_EAX );
  1486 	    JMP_TARGET(nottaken);
  1487 	    ADD_sh4r_r32( R_PC, R_EAX );
  1488 	    store_spreg( R_EAX, R_NEW_PC );
  1489 	    exit_block_emu(pc+2);
  1490 	    sh4_x86.branch_taken = TRUE;
  1491 	    return 2;
  1492 	} else {
  1493 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1494 		CMP_imm8s_sh4r( 1, R_T );
  1495 		sh4_x86.tstate = TSTATE_E;
  1497 	    sh4vma_t target = disp + pc + 4;
  1498 	    OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JT rel32
  1499 	    int save_tstate = sh4_x86.tstate;
  1500 	    sh4_translate_instruction(pc+2);
  1501 	    exit_block_rel( target, pc+4 );
  1503 	    // not taken
  1504 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1505 	    sh4_x86.tstate = save_tstate;
  1506 	    sh4_translate_instruction(pc+2);
  1507 	    return 4;
  1510 :}
  1511 BRA disp {:  
  1512     COUNT_INST(I_BRA);
  1513     if( sh4_x86.in_delay_slot ) {
  1514 	SLOTILLEGAL();
  1515     } else {
  1516 	sh4_x86.in_delay_slot = DELAY_PC;
  1517 	sh4_x86.branch_taken = TRUE;
  1518 	if( UNTRANSLATABLE(pc+2) ) {
  1519 	    load_spreg( R_EAX, R_PC );
  1520 	    ADD_imm32_r32( pc + disp + 4 - sh4_x86.block_start_pc, R_EAX );
  1521 	    store_spreg( R_EAX, R_NEW_PC );
  1522 	    exit_block_emu(pc+2);
  1523 	    return 2;
  1524 	} else {
  1525 	    sh4_translate_instruction( pc + 2 );
  1526 	    exit_block_rel( disp + pc + 4, pc+4 );
  1527 	    return 4;
  1530 :}
  1531 BRAF Rn {:  
  1532     COUNT_INST(I_BRAF);
  1533     if( sh4_x86.in_delay_slot ) {
  1534 	SLOTILLEGAL();
  1535     } else {
  1536 	load_spreg( R_EAX, R_PC );
  1537 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1538 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1539 	store_spreg( R_EAX, R_NEW_PC );
  1540 	sh4_x86.in_delay_slot = DELAY_PC;
  1541 	sh4_x86.tstate = TSTATE_NONE;
  1542 	sh4_x86.branch_taken = TRUE;
  1543 	if( UNTRANSLATABLE(pc+2) ) {
  1544 	    exit_block_emu(pc+2);
  1545 	    return 2;
  1546 	} else {
  1547 	    sh4_translate_instruction( pc + 2 );
  1548 	    exit_block_newpcset(pc+4);
  1549 	    return 4;
  1552 :}
  1553 BSR disp {:  
  1554     COUNT_INST(I_BSR);
  1555     if( sh4_x86.in_delay_slot ) {
  1556 	SLOTILLEGAL();
  1557     } else {
  1558 	load_spreg( R_EAX, R_PC );
  1559 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1560 	store_spreg( R_EAX, R_PR );
  1561 	sh4_x86.in_delay_slot = DELAY_PC;
  1562 	sh4_x86.branch_taken = TRUE;
  1563 	sh4_x86.tstate = TSTATE_NONE;
  1564 	if( UNTRANSLATABLE(pc+2) ) {
  1565 	    ADD_imm32_r32( disp, R_EAX );
  1566 	    store_spreg( R_EAX, R_NEW_PC );
  1567 	    exit_block_emu(pc+2);
  1568 	    return 2;
  1569 	} else {
  1570 	    sh4_translate_instruction( pc + 2 );
  1571 	    exit_block_rel( disp + pc + 4, pc+4 );
  1572 	    return 4;
  1575 :}
  1576 BSRF Rn {:  
  1577     COUNT_INST(I_BSRF);
  1578     if( sh4_x86.in_delay_slot ) {
  1579 	SLOTILLEGAL();
  1580     } else {
  1581 	load_spreg( R_EAX, R_PC );
  1582 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1583 	store_spreg( R_EAX, R_PR );
  1584 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1585 	store_spreg( R_EAX, R_NEW_PC );
  1587 	sh4_x86.in_delay_slot = DELAY_PC;
  1588 	sh4_x86.tstate = TSTATE_NONE;
  1589 	sh4_x86.branch_taken = TRUE;
  1590 	if( UNTRANSLATABLE(pc+2) ) {
  1591 	    exit_block_emu(pc+2);
  1592 	    return 2;
  1593 	} else {
  1594 	    sh4_translate_instruction( pc + 2 );
  1595 	    exit_block_newpcset(pc+4);
  1596 	    return 4;
  1599 :}
  1600 BT disp {:
  1601     COUNT_INST(I_BT);
  1602     if( sh4_x86.in_delay_slot ) {
  1603 	SLOTILLEGAL();
  1604     } else {
  1605 	sh4vma_t target = disp + pc + 4;
  1606 	JF_rel8( nottaken );
  1607 	exit_block_rel(target, pc+2 );
  1608 	JMP_TARGET(nottaken);
  1609 	return 2;
  1611 :}
  1612 BT/S disp {:
  1613     COUNT_INST(I_BTS);
  1614     if( sh4_x86.in_delay_slot ) {
  1615 	SLOTILLEGAL();
  1616     } else {
  1617 	sh4_x86.in_delay_slot = DELAY_PC;
  1618 	if( UNTRANSLATABLE(pc+2) ) {
  1619 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1620 	    JF_rel8(nottaken);
  1621 	    ADD_imm32_r32( disp, R_EAX );
  1622 	    JMP_TARGET(nottaken);
  1623 	    ADD_sh4r_r32( R_PC, R_EAX );
  1624 	    store_spreg( R_EAX, R_NEW_PC );
  1625 	    exit_block_emu(pc+2);
  1626 	    sh4_x86.branch_taken = TRUE;
  1627 	    return 2;
  1628 	} else {
  1629 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1630 		CMP_imm8s_sh4r( 1, R_T );
  1631 		sh4_x86.tstate = TSTATE_E;
  1633 	    OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JF rel32
  1634 	    int save_tstate = sh4_x86.tstate;
  1635 	    sh4_translate_instruction(pc+2);
  1636 	    exit_block_rel( disp + pc + 4, pc+4 );
  1637 	    // not taken
  1638 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1639 	    sh4_x86.tstate = save_tstate;
  1640 	    sh4_translate_instruction(pc+2);
  1641 	    return 4;
  1644 :}
  1645 JMP @Rn {:  
  1646     COUNT_INST(I_JMP);
  1647     if( sh4_x86.in_delay_slot ) {
  1648 	SLOTILLEGAL();
  1649     } else {
  1650 	load_reg( R_ECX, Rn );
  1651 	store_spreg( R_ECX, R_NEW_PC );
  1652 	sh4_x86.in_delay_slot = DELAY_PC;
  1653 	sh4_x86.branch_taken = TRUE;
  1654 	if( UNTRANSLATABLE(pc+2) ) {
  1655 	    exit_block_emu(pc+2);
  1656 	    return 2;
  1657 	} else {
  1658 	    sh4_translate_instruction(pc+2);
  1659 	    exit_block_newpcset(pc+4);
  1660 	    return 4;
  1663 :}
  1664 JSR @Rn {:  
  1665     COUNT_INST(I_JSR);
  1666     if( sh4_x86.in_delay_slot ) {
  1667 	SLOTILLEGAL();
  1668     } else {
  1669 	load_spreg( R_EAX, R_PC );
  1670 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1671 	store_spreg( R_EAX, R_PR );
  1672 	load_reg( R_ECX, Rn );
  1673 	store_spreg( R_ECX, R_NEW_PC );
  1674 	sh4_x86.in_delay_slot = DELAY_PC;
  1675 	sh4_x86.branch_taken = TRUE;
  1676 	sh4_x86.tstate = TSTATE_NONE;
  1677 	if( UNTRANSLATABLE(pc+2) ) {
  1678 	    exit_block_emu(pc+2);
  1679 	    return 2;
  1680 	} else {
  1681 	    sh4_translate_instruction(pc+2);
  1682 	    exit_block_newpcset(pc+4);
  1683 	    return 4;
  1686 :}
  1687 RTE {:  
  1688     COUNT_INST(I_RTE);
  1689     if( sh4_x86.in_delay_slot ) {
  1690 	SLOTILLEGAL();
  1691     } else {
  1692 	check_priv();
  1693 	load_spreg( R_ECX, R_SPC );
  1694 	store_spreg( R_ECX, R_NEW_PC );
  1695 	load_spreg( R_EAX, R_SSR );
  1696 	call_func1( sh4_write_sr, R_EAX );
  1697 	sh4_x86.in_delay_slot = DELAY_PC;
  1698 	sh4_x86.fpuen_checked = FALSE;
  1699 	sh4_x86.tstate = TSTATE_NONE;
  1700 	sh4_x86.branch_taken = TRUE;
  1701 	if( UNTRANSLATABLE(pc+2) ) {
  1702 	    exit_block_emu(pc+2);
  1703 	    return 2;
  1704 	} else {
  1705 	    sh4_translate_instruction(pc+2);
  1706 	    exit_block_newpcset(pc+4);
  1707 	    return 4;
  1710 :}
  1711 RTS {:  
  1712     COUNT_INST(I_RTS);
  1713     if( sh4_x86.in_delay_slot ) {
  1714 	SLOTILLEGAL();
  1715     } else {
  1716 	load_spreg( R_ECX, R_PR );
  1717 	store_spreg( R_ECX, R_NEW_PC );
  1718 	sh4_x86.in_delay_slot = DELAY_PC;
  1719 	sh4_x86.branch_taken = TRUE;
  1720 	if( UNTRANSLATABLE(pc+2) ) {
  1721 	    exit_block_emu(pc+2);
  1722 	    return 2;
  1723 	} else {
  1724 	    sh4_translate_instruction(pc+2);
  1725 	    exit_block_newpcset(pc+4);
  1726 	    return 4;
  1729 :}
  1730 TRAPA #imm {:  
  1731     COUNT_INST(I_TRAPA);
  1732     if( sh4_x86.in_delay_slot ) {
  1733 	SLOTILLEGAL();
  1734     } else {
  1735 	load_imm32( R_ECX, pc+2 - sh4_x86.block_start_pc );   // 5
  1736 	ADD_r32_sh4r( R_ECX, R_PC );
  1737 	load_imm32( R_EAX, imm );
  1738 	call_func1( sh4_raise_trap, R_EAX );
  1739 	sh4_x86.tstate = TSTATE_NONE;
  1740 	exit_block_pcset(pc+2);
  1741 	sh4_x86.branch_taken = TRUE;
  1742 	return 2;
  1744 :}
  1745 UNDEF {:  
  1746     COUNT_INST(I_UNDEF);
  1747     if( sh4_x86.in_delay_slot ) {
  1748 	exit_block_exc(EXC_SLOT_ILLEGAL, pc-2);    
  1749     } else {
  1750 	exit_block_exc(EXC_ILLEGAL, pc);    
  1751 	return 2;
  1753 :}
  1755 CLRMAC {:  
  1756     COUNT_INST(I_CLRMAC);
  1757     XOR_r32_r32(R_EAX, R_EAX);
  1758     store_spreg( R_EAX, R_MACL );
  1759     store_spreg( R_EAX, R_MACH );
  1760     sh4_x86.tstate = TSTATE_NONE;
  1761 :}
  1762 CLRS {:
  1763     COUNT_INST(I_CLRS);
  1764     CLC();
  1765     SETC_sh4r(R_S);
  1766     sh4_x86.tstate = TSTATE_NONE;
  1767 :}
  1768 CLRT {:  
  1769     COUNT_INST(I_CLRT);
  1770     CLC();
  1771     SETC_t();
  1772     sh4_x86.tstate = TSTATE_C;
  1773 :}
  1774 SETS {:  
  1775     COUNT_INST(I_SETS);
  1776     STC();
  1777     SETC_sh4r(R_S);
  1778     sh4_x86.tstate = TSTATE_NONE;
  1779 :}
  1780 SETT {:  
  1781     COUNT_INST(I_SETT);
  1782     STC();
  1783     SETC_t();
  1784     sh4_x86.tstate = TSTATE_C;
  1785 :}
  1787 /* Floating point moves */
  1788 FMOV FRm, FRn {:  
  1789     COUNT_INST(I_FMOV1);
  1790     check_fpuen();
  1791     if( sh4_x86.double_size ) {
  1792         load_dr0( R_EAX, FRm );
  1793         load_dr1( R_ECX, FRm );
  1794         store_dr0( R_EAX, FRn );
  1795         store_dr1( R_ECX, FRn );
  1796     } else {
  1797         load_fr( R_EAX, FRm ); // SZ=0 branch
  1798         store_fr( R_EAX, FRn );
  1800 :}
  1801 FMOV FRm, @Rn {: 
  1802     COUNT_INST(I_FMOV2);
  1803     check_fpuen();
  1804     load_reg( R_EAX, Rn );
  1805     if( sh4_x86.double_size ) {
  1806         check_walign64( R_EAX );
  1807         load_dr0( R_EDX, FRm );
  1808         MEM_WRITE_LONG( R_EAX, R_EDX );
  1809         load_reg( R_EAX, Rn );
  1810         LEA_r32disp8_r32( R_EAX, 4, R_EAX );
  1811         load_dr1( R_EDX, FRm );
  1812         MEM_WRITE_LONG( R_EAX, R_EDX );
  1813     } else {
  1814         check_walign32( R_EAX );
  1815         load_fr( R_EDX, FRm );
  1816         MEM_WRITE_LONG( R_EAX, R_EDX );
  1818     sh4_x86.tstate = TSTATE_NONE;
  1819 :}
  1820 FMOV @Rm, FRn {:  
  1821     COUNT_INST(I_FMOV5);
  1822     check_fpuen();
  1823     load_reg( R_EAX, Rm );
  1824     if( sh4_x86.double_size ) {
  1825         check_ralign64( R_EAX );
  1826         MEM_READ_LONG( R_EAX, R_EAX );
  1827         store_dr0( R_EAX, FRn );
  1828         load_reg( R_EAX, Rm );
  1829         LEA_r32disp8_r32( R_EAX, 4, R_EAX );
  1830         MEM_READ_LONG( R_EAX, R_EAX );
  1831         store_dr1( R_EAX, FRn );
  1832     } else {
  1833         check_ralign32( R_EAX );
  1834         MEM_READ_LONG( R_EAX, R_EAX );
  1835         store_fr( R_EAX, FRn );
  1837     sh4_x86.tstate = TSTATE_NONE;
  1838 :}
  1839 FMOV FRm, @-Rn {:  
  1840     COUNT_INST(I_FMOV3);
  1841     check_fpuen();
  1842     load_reg( R_EAX, Rn );
  1843     if( sh4_x86.double_size ) {
  1844         check_walign64( R_EAX );
  1845         LEA_r32disp8_r32( R_EAX, -8, R_EAX );
  1846         load_dr0( R_EDX, FRm );
  1847         MEM_WRITE_LONG( R_EAX, R_EDX );
  1848         load_reg( R_EAX, Rn );
  1849         LEA_r32disp8_r32( R_EAX, -4, R_EAX );
  1850         load_dr1( R_EDX, FRm );
  1851         MEM_WRITE_LONG( R_EAX, R_EDX );
  1852         ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
  1853     } else {
  1854         check_walign32( R_EAX );
  1855         LEA_r32disp8_r32( R_EAX, -4, R_EAX );
  1856         load_fr( R_EDX, FRm );
  1857         MEM_WRITE_LONG( R_EAX, R_EDX );
  1858         ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
  1860     sh4_x86.tstate = TSTATE_NONE;
  1861 :}
  1862 FMOV @Rm+, FRn {:
  1863     COUNT_INST(I_FMOV6);
  1864     check_fpuen();
  1865     load_reg( R_EAX, Rm );
  1866     if( sh4_x86.double_size ) {
  1867         check_ralign64( R_EAX );
  1868         MEM_READ_LONG( R_EAX, R_EAX );
  1869         store_dr0( R_EAX, FRn );
  1870         load_reg( R_EAX, Rm );
  1871         LEA_r32disp8_r32( R_EAX, 4, R_EAX );
  1872         MEM_READ_LONG( R_EAX, R_EAX );
  1873         store_dr1( R_EAX, FRn );
  1874         ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
  1875     } else {
  1876         check_ralign32( R_EAX );
  1877         MEM_READ_LONG( R_EAX, R_EAX );
  1878         store_fr( R_EAX, FRn );
  1879         ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1881     sh4_x86.tstate = TSTATE_NONE;
  1882 :}
  1883 FMOV FRm, @(R0, Rn) {:  
  1884     COUNT_INST(I_FMOV4);
  1885     check_fpuen();
  1886     load_reg( R_EAX, Rn );
  1887     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1888     if( sh4_x86.double_size ) {
  1889         check_walign64( R_EAX );
  1890         load_dr0( R_EDX, FRm );
  1891         MEM_WRITE_LONG( R_EAX, R_EDX );
  1892         load_reg( R_EAX, Rn );
  1893         ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1894         LEA_r32disp8_r32( R_EAX, 4, R_EAX );
  1895         load_dr1( R_EDX, FRm );
  1896         MEM_WRITE_LONG( R_EAX, R_EDX );
  1897     } else {
  1898         check_walign32( R_EAX );
  1899         load_fr( R_EDX, FRm );
  1900         MEM_WRITE_LONG( R_EAX, R_EDX ); // 12
  1902     sh4_x86.tstate = TSTATE_NONE;
  1903 :}
  1904 FMOV @(R0, Rm), FRn {:  
  1905     COUNT_INST(I_FMOV7);
  1906     check_fpuen();
  1907     load_reg( R_EAX, Rm );
  1908     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1909     if( sh4_x86.double_size ) {
  1910         check_ralign64( R_EAX );
  1911         MEM_READ_LONG( R_EAX, R_EAX );
  1912         store_dr0( R_EAX, FRn );
  1913         load_reg( R_EAX, Rm );
  1914         ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1915         LEA_r32disp8_r32( R_EAX, 4, R_EAX );
  1916         MEM_READ_LONG( R_EAX, R_EAX );
  1917         store_dr1( R_EAX, FRn );
  1918     } else {
  1919         check_ralign32( R_EAX );
  1920         MEM_READ_LONG( R_EAX, R_EAX );
  1921         store_fr( R_EAX, FRn );
  1923     sh4_x86.tstate = TSTATE_NONE;
  1924 :}
  1925 FLDI0 FRn {:  /* IFF PR=0 */
  1926     COUNT_INST(I_FLDI0);
  1927     check_fpuen();
  1928     if( sh4_x86.double_prec == 0 ) {
  1929         XOR_r32_r32( R_EAX, R_EAX );
  1930         store_fr( R_EAX, FRn );
  1932     sh4_x86.tstate = TSTATE_NONE;
  1933 :}
  1934 FLDI1 FRn {:  /* IFF PR=0 */
  1935     COUNT_INST(I_FLDI1);
  1936     check_fpuen();
  1937     if( sh4_x86.double_prec == 0 ) {
  1938         load_imm32(R_EAX, 0x3F800000);
  1939         store_fr( R_EAX, FRn );
  1941 :}
  1943 FLOAT FPUL, FRn {:  
  1944     COUNT_INST(I_FLOAT);
  1945     check_fpuen();
  1946     FILD_sh4r(R_FPUL);
  1947     if( sh4_x86.double_prec ) {
  1948         pop_dr( FRn );
  1949     } else {
  1950         pop_fr( FRn );
  1952 :}
  1953 FTRC FRm, FPUL {:  
  1954     COUNT_INST(I_FTRC);
  1955     check_fpuen();
  1956     if( sh4_x86.double_prec ) {
  1957         push_dr( FRm );
  1958     } else {
  1959         push_fr( FRm );
  1961     load_ptr( R_ECX, &max_int );
  1962     FILD_r32ind( R_ECX );
  1963     FCOMIP_st(1);
  1964     JNA_rel8( sat );
  1965     load_ptr( R_ECX, &min_int );  // 5
  1966     FILD_r32ind( R_ECX );           // 2
  1967     FCOMIP_st(1);                   // 2
  1968     JAE_rel8( sat2 );            // 2
  1969     load_ptr( R_EAX, &save_fcw );
  1970     FNSTCW_r32ind( R_EAX );
  1971     load_ptr( R_EDX, &trunc_fcw );
  1972     FLDCW_r32ind( R_EDX );
  1973     FISTP_sh4r(R_FPUL);             // 3
  1974     FLDCW_r32ind( R_EAX );
  1975     JMP_rel8(end);             // 2
  1977     JMP_TARGET(sat);
  1978     JMP_TARGET(sat2);
  1979     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  1980     store_spreg( R_ECX, R_FPUL );
  1981     FPOP_st();
  1982     JMP_TARGET(end);
  1983     sh4_x86.tstate = TSTATE_NONE;
  1984 :}
  1985 FLDS FRm, FPUL {:  
  1986     COUNT_INST(I_FLDS);
  1987     check_fpuen();
  1988     load_fr( R_EAX, FRm );
  1989     store_spreg( R_EAX, R_FPUL );
  1990 :}
  1991 FSTS FPUL, FRn {:  
  1992     COUNT_INST(I_FSTS);
  1993     check_fpuen();
  1994     load_spreg( R_EAX, R_FPUL );
  1995     store_fr( R_EAX, FRn );
  1996 :}
  1997 FCNVDS FRm, FPUL {:  
  1998     COUNT_INST(I_FCNVDS);
  1999     check_fpuen();
  2000     if( sh4_x86.double_prec ) {
  2001         push_dr( FRm );
  2002         pop_fpul();
  2004 :}
  2005 FCNVSD FPUL, FRn {:  
  2006     COUNT_INST(I_FCNVSD);
  2007     check_fpuen();
  2008     if( sh4_x86.double_prec ) {
  2009         push_fpul();
  2010         pop_dr( FRn );
  2012 :}
  2014 /* Floating point instructions */
  2015 FABS FRn {:  
  2016     COUNT_INST(I_FABS);
  2017     check_fpuen();
  2018     if( sh4_x86.double_prec ) {
  2019         push_dr(FRn);
  2020         FABS_st0();
  2021         pop_dr(FRn);
  2022     } else {
  2023         push_fr(FRn);
  2024         FABS_st0();
  2025         pop_fr(FRn);
  2027 :}
  2028 FADD FRm, FRn {:  
  2029     COUNT_INST(I_FADD);
  2030     check_fpuen();
  2031     if( sh4_x86.double_prec ) {
  2032         push_dr(FRm);
  2033         push_dr(FRn);
  2034         FADDP_st(1);
  2035         pop_dr(FRn);
  2036     } else {
  2037         push_fr(FRm);
  2038         push_fr(FRn);
  2039         FADDP_st(1);
  2040         pop_fr(FRn);
  2042 :}
  2043 FDIV FRm, FRn {:  
  2044     COUNT_INST(I_FDIV);
  2045     check_fpuen();
  2046     if( sh4_x86.double_prec ) {
  2047         push_dr(FRn);
  2048         push_dr(FRm);
  2049         FDIVP_st(1);
  2050         pop_dr(FRn);
  2051     } else {
  2052         push_fr(FRn);
  2053         push_fr(FRm);
  2054         FDIVP_st(1);
  2055         pop_fr(FRn);
  2057 :}
  2058 FMAC FR0, FRm, FRn {:  
  2059     COUNT_INST(I_FMAC);
  2060     check_fpuen();
  2061     if( sh4_x86.double_prec ) {
  2062         push_dr( 0 );
  2063         push_dr( FRm );
  2064         FMULP_st(1);
  2065         push_dr( FRn );
  2066         FADDP_st(1);
  2067         pop_dr( FRn );
  2068     } else {
  2069         push_fr( 0 );
  2070         push_fr( FRm );
  2071         FMULP_st(1);
  2072         push_fr( FRn );
  2073         FADDP_st(1);
  2074         pop_fr( FRn );
  2076 :}
  2078 FMUL FRm, FRn {:  
  2079     COUNT_INST(I_FMUL);
  2080     check_fpuen();
  2081     if( sh4_x86.double_prec ) {
  2082         push_dr(FRm);
  2083         push_dr(FRn);
  2084         FMULP_st(1);
  2085         pop_dr(FRn);
  2086     } else {
  2087         push_fr(FRm);
  2088         push_fr(FRn);
  2089         FMULP_st(1);
  2090         pop_fr(FRn);
  2092 :}
  2093 FNEG FRn {:  
  2094     COUNT_INST(I_FNEG);
  2095     check_fpuen();
  2096     if( sh4_x86.double_prec ) {
  2097         push_dr(FRn);
  2098         FCHS_st0();
  2099         pop_dr(FRn);
  2100     } else {
  2101         push_fr(FRn);
  2102         FCHS_st0();
  2103         pop_fr(FRn);
  2105 :}
  2106 FSRRA FRn {:  
  2107     COUNT_INST(I_FSRRA);
  2108     check_fpuen();
  2109     if( sh4_x86.double_prec == 0 ) {
  2110         FLD1_st0();
  2111         push_fr(FRn);
  2112         FSQRT_st0();
  2113         FDIVP_st(1);
  2114         pop_fr(FRn);
  2116 :}
  2117 FSQRT FRn {:  
  2118     COUNT_INST(I_FSQRT);
  2119     check_fpuen();
  2120     if( sh4_x86.double_prec ) {
  2121         push_dr(FRn);
  2122         FSQRT_st0();
  2123         pop_dr(FRn);
  2124     } else {
  2125         push_fr(FRn);
  2126         FSQRT_st0();
  2127         pop_fr(FRn);
  2129 :}
  2130 FSUB FRm, FRn {:  
  2131     COUNT_INST(I_FSUB);
  2132     check_fpuen();
  2133     if( sh4_x86.double_prec ) {
  2134         push_dr(FRn);
  2135         push_dr(FRm);
  2136         FSUBP_st(1);
  2137         pop_dr(FRn);
  2138     } else {
  2139         push_fr(FRn);
  2140         push_fr(FRm);
  2141         FSUBP_st(1);
  2142         pop_fr(FRn);
  2144 :}
  2146 FCMP/EQ FRm, FRn {:  
  2147     COUNT_INST(I_FCMPEQ);
  2148     check_fpuen();
  2149     if( sh4_x86.double_prec ) {
  2150         push_dr(FRm);
  2151         push_dr(FRn);
  2152     } else {
  2153         push_fr(FRm);
  2154         push_fr(FRn);
  2156     FCOMIP_st(1);
  2157     SETE_t();
  2158     FPOP_st();
  2159     sh4_x86.tstate = TSTATE_E;
  2160 :}
  2161 FCMP/GT FRm, FRn {:  
  2162     COUNT_INST(I_FCMPGT);
  2163     check_fpuen();
  2164     if( sh4_x86.double_prec ) {
  2165         push_dr(FRm);
  2166         push_dr(FRn);
  2167     } else {
  2168         push_fr(FRm);
  2169         push_fr(FRn);
  2171     FCOMIP_st(1);
  2172     SETA_t();
  2173     FPOP_st();
  2174     sh4_x86.tstate = TSTATE_A;
  2175 :}
  2177 FSCA FPUL, FRn {:  
  2178     COUNT_INST(I_FSCA);
  2179     check_fpuen();
  2180     if( sh4_x86.double_prec == 0 ) {
  2181         LEA_sh4r_rptr( REG_OFFSET(fr[0][FRn&0x0E]), R_EDX );
  2182         load_spreg( R_EAX, R_FPUL );
  2183         call_func2( sh4_fsca, R_EAX, R_EDX );
  2185     sh4_x86.tstate = TSTATE_NONE;
  2186 :}
  2187 FIPR FVm, FVn {:  
  2188     COUNT_INST(I_FIPR);
  2189     check_fpuen();
  2190     if( sh4_x86.double_prec == 0 ) {
  2191         if( sh4_x86.sse3_enabled ) {
  2192             MOVAPS_sh4r_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
  2193             MULPS_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
  2194             HADDPS_xmm_xmm( 4, 4 ); 
  2195             HADDPS_xmm_xmm( 4, 4 );
  2196             MOVSS_xmm_sh4r( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
  2197         } else {
  2198             push_fr( FVm<<2 );
  2199             push_fr( FVn<<2 );
  2200             FMULP_st(1);
  2201             push_fr( (FVm<<2)+1);
  2202             push_fr( (FVn<<2)+1);
  2203             FMULP_st(1);
  2204             FADDP_st(1);
  2205             push_fr( (FVm<<2)+2);
  2206             push_fr( (FVn<<2)+2);
  2207             FMULP_st(1);
  2208             FADDP_st(1);
  2209             push_fr( (FVm<<2)+3);
  2210             push_fr( (FVn<<2)+3);
  2211             FMULP_st(1);
  2212             FADDP_st(1);
  2213             pop_fr( (FVn<<2)+3);
  2216 :}
  2217 FTRV XMTRX, FVn {:  
  2218     COUNT_INST(I_FTRV);
  2219     check_fpuen();
  2220     if( sh4_x86.double_prec == 0 ) {
  2221         if( sh4_x86.sse3_enabled ) {
  2222             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1  M0  M3  M2
  2223             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5  M4  M7  M6
  2224             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9  M8  M11 M10
  2225             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
  2227             MOVSLDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
  2228             MOVSHDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
  2229             MOVAPS_xmm_xmm( 4, 6 );
  2230             MOVAPS_xmm_xmm( 5, 7 );
  2231             MOVLHPS_xmm_xmm( 4, 4 );  // V1 V1 V1 V1
  2232             MOVHLPS_xmm_xmm( 6, 6 );  // V3 V3 V3 V3
  2233             MOVLHPS_xmm_xmm( 5, 5 );  // V0 V0 V0 V0
  2234             MOVHLPS_xmm_xmm( 7, 7 );  // V2 V2 V2 V2
  2235             MULPS_xmm_xmm( 0, 4 );
  2236             MULPS_xmm_xmm( 1, 5 );
  2237             MULPS_xmm_xmm( 2, 6 );
  2238             MULPS_xmm_xmm( 3, 7 );
  2239             ADDPS_xmm_xmm( 5, 4 );
  2240             ADDPS_xmm_xmm( 7, 6 );
  2241             ADDPS_xmm_xmm( 6, 4 );
  2242             MOVAPS_xmm_sh4r( 4, REG_OFFSET(fr[0][FVn<<2]) );
  2243         } else {
  2244             LEA_sh4r_rptr( REG_OFFSET(fr[0][FVn<<2]), R_EAX );
  2245             call_func1( sh4_ftrv, R_EAX );
  2248     sh4_x86.tstate = TSTATE_NONE;
  2249 :}
  2251 FRCHG {:  
  2252     COUNT_INST(I_FRCHG);
  2253     check_fpuen();
  2254     XOR_imm32_sh4r( FPSCR_FR, R_FPSCR );
  2255     call_func0( sh4_switch_fr_banks );
  2256     sh4_x86.tstate = TSTATE_NONE;
  2257 :}
  2258 FSCHG {:  
  2259     COUNT_INST(I_FSCHG);
  2260     check_fpuen();
  2261     XOR_imm32_sh4r( FPSCR_SZ, R_FPSCR);
  2262     XOR_imm32_sh4r( FPSCR_SZ, REG_OFFSET(xlat_sh4_mode) );
  2263     sh4_x86.tstate = TSTATE_NONE;
  2264     sh4_x86.double_size = !sh4_x86.double_size;
  2265 :}
  2267 /* Processor control instructions */
  2268 LDC Rm, SR {:
  2269     COUNT_INST(I_LDCSR);
  2270     if( sh4_x86.in_delay_slot ) {
  2271 	SLOTILLEGAL();
  2272     } else {
  2273 	check_priv();
  2274 	load_reg( R_EAX, Rm );
  2275 	call_func1( sh4_write_sr, R_EAX );
  2276 	sh4_x86.fpuen_checked = FALSE;
  2277 	sh4_x86.tstate = TSTATE_NONE;
  2278 	return 2;
  2280 :}
  2281 LDC Rm, GBR {: 
  2282     COUNT_INST(I_LDC);
  2283     load_reg( R_EAX, Rm );
  2284     store_spreg( R_EAX, R_GBR );
  2285 :}
  2286 LDC Rm, VBR {:  
  2287     COUNT_INST(I_LDC);
  2288     check_priv();
  2289     load_reg( R_EAX, Rm );
  2290     store_spreg( R_EAX, R_VBR );
  2291     sh4_x86.tstate = TSTATE_NONE;
  2292 :}
  2293 LDC Rm, SSR {:  
  2294     COUNT_INST(I_LDC);
  2295     check_priv();
  2296     load_reg( R_EAX, Rm );
  2297     store_spreg( R_EAX, R_SSR );
  2298     sh4_x86.tstate = TSTATE_NONE;
  2299 :}
  2300 LDC Rm, SGR {:  
  2301     COUNT_INST(I_LDC);
  2302     check_priv();
  2303     load_reg( R_EAX, Rm );
  2304     store_spreg( R_EAX, R_SGR );
  2305     sh4_x86.tstate = TSTATE_NONE;
  2306 :}
  2307 LDC Rm, SPC {:  
  2308     COUNT_INST(I_LDC);
  2309     check_priv();
  2310     load_reg( R_EAX, Rm );
  2311     store_spreg( R_EAX, R_SPC );
  2312     sh4_x86.tstate = TSTATE_NONE;
  2313 :}
  2314 LDC Rm, DBR {:  
  2315     COUNT_INST(I_LDC);
  2316     check_priv();
  2317     load_reg( R_EAX, Rm );
  2318     store_spreg( R_EAX, R_DBR );
  2319     sh4_x86.tstate = TSTATE_NONE;
  2320 :}
  2321 LDC Rm, Rn_BANK {:  
  2322     COUNT_INST(I_LDC);
  2323     check_priv();
  2324     load_reg( R_EAX, Rm );
  2325     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2326     sh4_x86.tstate = TSTATE_NONE;
  2327 :}
  2328 LDC.L @Rm+, GBR {:  
  2329     COUNT_INST(I_LDCM);
  2330     load_reg( R_EAX, Rm );
  2331     check_ralign32( R_EAX );
  2332     MEM_READ_LONG( R_EAX, R_EAX );
  2333     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2334     store_spreg( R_EAX, R_GBR );
  2335     sh4_x86.tstate = TSTATE_NONE;
  2336 :}
  2337 LDC.L @Rm+, SR {:
  2338     COUNT_INST(I_LDCSRM);
  2339     if( sh4_x86.in_delay_slot ) {
  2340 	SLOTILLEGAL();
  2341     } else {
  2342 	check_priv();
  2343 	load_reg( R_EAX, Rm );
  2344 	check_ralign32( R_EAX );
  2345 	MEM_READ_LONG( R_EAX, R_EAX );
  2346 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2347 	call_func1( sh4_write_sr, R_EAX );
  2348 	sh4_x86.fpuen_checked = FALSE;
  2349 	sh4_x86.tstate = TSTATE_NONE;
  2350 	return 2;
  2352 :}
  2353 LDC.L @Rm+, VBR {:  
  2354     COUNT_INST(I_LDCM);
  2355     check_priv();
  2356     load_reg( R_EAX, Rm );
  2357     check_ralign32( R_EAX );
  2358     MEM_READ_LONG( R_EAX, R_EAX );
  2359     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2360     store_spreg( R_EAX, R_VBR );
  2361     sh4_x86.tstate = TSTATE_NONE;
  2362 :}
  2363 LDC.L @Rm+, SSR {:
  2364     COUNT_INST(I_LDCM);
  2365     check_priv();
  2366     load_reg( R_EAX, Rm );
  2367     check_ralign32( R_EAX );
  2368     MEM_READ_LONG( R_EAX, R_EAX );
  2369     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2370     store_spreg( R_EAX, R_SSR );
  2371     sh4_x86.tstate = TSTATE_NONE;
  2372 :}
  2373 LDC.L @Rm+, SGR {:  
  2374     COUNT_INST(I_LDCM);
  2375     check_priv();
  2376     load_reg( R_EAX, Rm );
  2377     check_ralign32( R_EAX );
  2378     MEM_READ_LONG( R_EAX, R_EAX );
  2379     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2380     store_spreg( R_EAX, R_SGR );
  2381     sh4_x86.tstate = TSTATE_NONE;
  2382 :}
  2383 LDC.L @Rm+, SPC {:  
  2384     COUNT_INST(I_LDCM);
  2385     check_priv();
  2386     load_reg( R_EAX, Rm );
  2387     check_ralign32( R_EAX );
  2388     MEM_READ_LONG( R_EAX, R_EAX );
  2389     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2390     store_spreg( R_EAX, R_SPC );
  2391     sh4_x86.tstate = TSTATE_NONE;
  2392 :}
  2393 LDC.L @Rm+, DBR {:  
  2394     COUNT_INST(I_LDCM);
  2395     check_priv();
  2396     load_reg( R_EAX, Rm );
  2397     check_ralign32( R_EAX );
  2398     MEM_READ_LONG( R_EAX, R_EAX );
  2399     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2400     store_spreg( R_EAX, R_DBR );
  2401     sh4_x86.tstate = TSTATE_NONE;
  2402 :}
  2403 LDC.L @Rm+, Rn_BANK {:  
  2404     COUNT_INST(I_LDCM);
  2405     check_priv();
  2406     load_reg( R_EAX, Rm );
  2407     check_ralign32( R_EAX );
  2408     MEM_READ_LONG( R_EAX, R_EAX );
  2409     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2410     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2411     sh4_x86.tstate = TSTATE_NONE;
  2412 :}
  2413 LDS Rm, FPSCR {:
  2414     COUNT_INST(I_LDSFPSCR);
  2415     check_fpuen();
  2416     load_reg( R_EAX, Rm );
  2417     call_func1( sh4_write_fpscr, R_EAX );
  2418     sh4_x86.tstate = TSTATE_NONE;
  2419     return 2;
  2420 :}
  2421 LDS.L @Rm+, FPSCR {:  
  2422     COUNT_INST(I_LDSFPSCRM);
  2423     check_fpuen();
  2424     load_reg( R_EAX, Rm );
  2425     check_ralign32( R_EAX );
  2426     MEM_READ_LONG( R_EAX, R_EAX );
  2427     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2428     call_func1( sh4_write_fpscr, R_EAX );
  2429     sh4_x86.tstate = TSTATE_NONE;
  2430     return 2;
  2431 :}
  2432 LDS Rm, FPUL {:  
  2433     COUNT_INST(I_LDS);
  2434     check_fpuen();
  2435     load_reg( R_EAX, Rm );
  2436     store_spreg( R_EAX, R_FPUL );
  2437 :}
  2438 LDS.L @Rm+, FPUL {:  
  2439     COUNT_INST(I_LDSM);
  2440     check_fpuen();
  2441     load_reg( R_EAX, Rm );
  2442     check_ralign32( R_EAX );
  2443     MEM_READ_LONG( R_EAX, R_EAX );
  2444     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2445     store_spreg( R_EAX, R_FPUL );
  2446     sh4_x86.tstate = TSTATE_NONE;
  2447 :}
  2448 LDS Rm, MACH {: 
  2449     COUNT_INST(I_LDS);
  2450     load_reg( R_EAX, Rm );
  2451     store_spreg( R_EAX, R_MACH );
  2452 :}
  2453 LDS.L @Rm+, MACH {:  
  2454     COUNT_INST(I_LDSM);
  2455     load_reg( R_EAX, Rm );
  2456     check_ralign32( R_EAX );
  2457     MEM_READ_LONG( R_EAX, R_EAX );
  2458     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2459     store_spreg( R_EAX, R_MACH );
  2460     sh4_x86.tstate = TSTATE_NONE;
  2461 :}
  2462 LDS Rm, MACL {:  
  2463     COUNT_INST(I_LDS);
  2464     load_reg( R_EAX, Rm );
  2465     store_spreg( R_EAX, R_MACL );
  2466 :}
  2467 LDS.L @Rm+, MACL {:  
  2468     COUNT_INST(I_LDSM);
  2469     load_reg( R_EAX, Rm );
  2470     check_ralign32( R_EAX );
  2471     MEM_READ_LONG( R_EAX, R_EAX );
  2472     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2473     store_spreg( R_EAX, R_MACL );
  2474     sh4_x86.tstate = TSTATE_NONE;
  2475 :}
  2476 LDS Rm, PR {:  
  2477     COUNT_INST(I_LDS);
  2478     load_reg( R_EAX, Rm );
  2479     store_spreg( R_EAX, R_PR );
  2480 :}
  2481 LDS.L @Rm+, PR {:  
  2482     COUNT_INST(I_LDSM);
  2483     load_reg( R_EAX, Rm );
  2484     check_ralign32( R_EAX );
  2485     MEM_READ_LONG( R_EAX, R_EAX );
  2486     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2487     store_spreg( R_EAX, R_PR );
  2488     sh4_x86.tstate = TSTATE_NONE;
  2489 :}
  2490 LDTLB {:  
  2491     COUNT_INST(I_LDTLB);
  2492     call_func0( MMU_ldtlb );
  2493     sh4_x86.tstate = TSTATE_NONE;
  2494 :}
  2495 OCBI @Rn {:
  2496     COUNT_INST(I_OCBI);
  2497 :}
  2498 OCBP @Rn {:
  2499     COUNT_INST(I_OCBP);
  2500 :}
  2501 OCBWB @Rn {:
  2502     COUNT_INST(I_OCBWB);
  2503 :}
  2504 PREF @Rn {:
  2505     COUNT_INST(I_PREF);
  2506     load_reg( R_EAX, Rn );
  2507     MEM_PREFETCH( R_EAX );
  2508     sh4_x86.tstate = TSTATE_NONE;
  2509 :}
  2510 SLEEP {: 
  2511     COUNT_INST(I_SLEEP);
  2512     check_priv();
  2513     call_func0( sh4_sleep );
  2514     sh4_x86.tstate = TSTATE_NONE;
  2515     sh4_x86.in_delay_slot = DELAY_NONE;
  2516     return 2;
  2517 :}
  2518 STC SR, Rn {:
  2519     COUNT_INST(I_STCSR);
  2520     check_priv();
  2521     call_func0(sh4_read_sr);
  2522     store_reg( R_EAX, Rn );
  2523     sh4_x86.tstate = TSTATE_NONE;
  2524 :}
  2525 STC GBR, Rn {:  
  2526     COUNT_INST(I_STC);
  2527     load_spreg( R_EAX, R_GBR );
  2528     store_reg( R_EAX, Rn );
  2529 :}
  2530 STC VBR, Rn {:  
  2531     COUNT_INST(I_STC);
  2532     check_priv();
  2533     load_spreg( R_EAX, R_VBR );
  2534     store_reg( R_EAX, Rn );
  2535     sh4_x86.tstate = TSTATE_NONE;
  2536 :}
  2537 STC SSR, Rn {:  
  2538     COUNT_INST(I_STC);
  2539     check_priv();
  2540     load_spreg( R_EAX, R_SSR );
  2541     store_reg( R_EAX, Rn );
  2542     sh4_x86.tstate = TSTATE_NONE;
  2543 :}
  2544 STC SPC, Rn {:  
  2545     COUNT_INST(I_STC);
  2546     check_priv();
  2547     load_spreg( R_EAX, R_SPC );
  2548     store_reg( R_EAX, Rn );
  2549     sh4_x86.tstate = TSTATE_NONE;
  2550 :}
  2551 STC SGR, Rn {:  
  2552     COUNT_INST(I_STC);
  2553     check_priv();
  2554     load_spreg( R_EAX, R_SGR );
  2555     store_reg( R_EAX, Rn );
  2556     sh4_x86.tstate = TSTATE_NONE;
  2557 :}
  2558 STC DBR, Rn {:  
  2559     COUNT_INST(I_STC);
  2560     check_priv();
  2561     load_spreg( R_EAX, R_DBR );
  2562     store_reg( R_EAX, Rn );
  2563     sh4_x86.tstate = TSTATE_NONE;
  2564 :}
  2565 STC Rm_BANK, Rn {:
  2566     COUNT_INST(I_STC);
  2567     check_priv();
  2568     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2569     store_reg( R_EAX, Rn );
  2570     sh4_x86.tstate = TSTATE_NONE;
  2571 :}
  2572 STC.L SR, @-Rn {:
  2573     COUNT_INST(I_STCSRM);
  2574     check_priv();
  2575     call_func0( sh4_read_sr );
  2576     MOV_r32_r32( R_EAX, R_EDX );
  2577     load_reg( R_EAX, Rn );
  2578     check_walign32( R_EAX );
  2579     LEA_r32disp8_r32( R_EAX, -4, R_EAX );
  2580     MEM_WRITE_LONG( R_EAX, R_EDX );
  2581     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2582     sh4_x86.tstate = TSTATE_NONE;
  2583 :}
  2584 STC.L VBR, @-Rn {:  
  2585     COUNT_INST(I_STCM);
  2586     check_priv();
  2587     load_reg( R_EAX, Rn );
  2588     check_walign32( R_EAX );
  2589     ADD_imm8s_r32( -4, R_EAX );
  2590     load_spreg( R_EDX, R_VBR );
  2591     MEM_WRITE_LONG( R_EAX, R_EDX );
  2592     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2593     sh4_x86.tstate = TSTATE_NONE;
  2594 :}
  2595 STC.L SSR, @-Rn {:  
  2596     COUNT_INST(I_STCM);
  2597     check_priv();
  2598     load_reg( R_EAX, Rn );
  2599     check_walign32( R_EAX );
  2600     ADD_imm8s_r32( -4, R_EAX );
  2601     load_spreg( R_EDX, R_SSR );
  2602     MEM_WRITE_LONG( R_EAX, R_EDX );
  2603     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2604     sh4_x86.tstate = TSTATE_NONE;
  2605 :}
  2606 STC.L SPC, @-Rn {:
  2607     COUNT_INST(I_STCM);
  2608     check_priv();
  2609     load_reg( R_EAX, Rn );
  2610     check_walign32( R_EAX );
  2611     ADD_imm8s_r32( -4, R_EAX );
  2612     load_spreg( R_EDX, R_SPC );
  2613     MEM_WRITE_LONG( R_EAX, R_EDX );
  2614     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2615     sh4_x86.tstate = TSTATE_NONE;
  2616 :}
  2617 STC.L SGR, @-Rn {:  
  2618     COUNT_INST(I_STCM);
  2619     check_priv();
  2620     load_reg( R_EAX, Rn );
  2621     check_walign32( R_EAX );
  2622     ADD_imm8s_r32( -4, R_EAX );
  2623     load_spreg( R_EDX, R_SGR );
  2624     MEM_WRITE_LONG( R_EAX, R_EDX );
  2625     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2626     sh4_x86.tstate = TSTATE_NONE;
  2627 :}
  2628 STC.L DBR, @-Rn {:  
  2629     COUNT_INST(I_STCM);
  2630     check_priv();
  2631     load_reg( R_EAX, Rn );
  2632     check_walign32( R_EAX );
  2633     ADD_imm8s_r32( -4, R_EAX );
  2634     load_spreg( R_EDX, R_DBR );
  2635     MEM_WRITE_LONG( R_EAX, R_EDX );
  2636     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2637     sh4_x86.tstate = TSTATE_NONE;
  2638 :}
  2639 STC.L Rm_BANK, @-Rn {:  
  2640     COUNT_INST(I_STCM);
  2641     check_priv();
  2642     load_reg( R_EAX, Rn );
  2643     check_walign32( R_EAX );
  2644     ADD_imm8s_r32( -4, R_EAX );
  2645     load_spreg( R_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
  2646     MEM_WRITE_LONG( R_EAX, R_EDX );
  2647     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2648     sh4_x86.tstate = TSTATE_NONE;
  2649 :}
  2650 STC.L GBR, @-Rn {:  
  2651     COUNT_INST(I_STCM);
  2652     load_reg( R_EAX, Rn );
  2653     check_walign32( R_EAX );
  2654     ADD_imm8s_r32( -4, R_EAX );
  2655     load_spreg( R_EDX, R_GBR );
  2656     MEM_WRITE_LONG( R_EAX, R_EDX );
  2657     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2658     sh4_x86.tstate = TSTATE_NONE;
  2659 :}
  2660 STS FPSCR, Rn {:  
  2661     COUNT_INST(I_STSFPSCR);
  2662     check_fpuen();
  2663     load_spreg( R_EAX, R_FPSCR );
  2664     store_reg( R_EAX, Rn );
  2665 :}
  2666 STS.L FPSCR, @-Rn {:  
  2667     COUNT_INST(I_STSFPSCRM);
  2668     check_fpuen();
  2669     load_reg( R_EAX, Rn );
  2670     check_walign32( R_EAX );
  2671     ADD_imm8s_r32( -4, R_EAX );
  2672     load_spreg( R_EDX, R_FPSCR );
  2673     MEM_WRITE_LONG( R_EAX, R_EDX );
  2674     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2675     sh4_x86.tstate = TSTATE_NONE;
  2676 :}
  2677 STS FPUL, Rn {:  
  2678     COUNT_INST(I_STS);
  2679     check_fpuen();
  2680     load_spreg( R_EAX, R_FPUL );
  2681     store_reg( R_EAX, Rn );
  2682 :}
  2683 STS.L FPUL, @-Rn {:  
  2684     COUNT_INST(I_STSM);
  2685     check_fpuen();
  2686     load_reg( R_EAX, Rn );
  2687     check_walign32( R_EAX );
  2688     ADD_imm8s_r32( -4, R_EAX );
  2689     load_spreg( R_EDX, R_FPUL );
  2690     MEM_WRITE_LONG( R_EAX, R_EDX );
  2691     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2692     sh4_x86.tstate = TSTATE_NONE;
  2693 :}
  2694 STS MACH, Rn {:  
  2695     COUNT_INST(I_STS);
  2696     load_spreg( R_EAX, R_MACH );
  2697     store_reg( R_EAX, Rn );
  2698 :}
  2699 STS.L MACH, @-Rn {:  
  2700     COUNT_INST(I_STSM);
  2701     load_reg( R_EAX, Rn );
  2702     check_walign32( R_EAX );
  2703     ADD_imm8s_r32( -4, R_EAX );
  2704     load_spreg( R_EDX, R_MACH );
  2705     MEM_WRITE_LONG( R_EAX, R_EDX );
  2706     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2707     sh4_x86.tstate = TSTATE_NONE;
  2708 :}
  2709 STS MACL, Rn {:  
  2710     COUNT_INST(I_STS);
  2711     load_spreg( R_EAX, R_MACL );
  2712     store_reg( R_EAX, Rn );
  2713 :}
  2714 STS.L MACL, @-Rn {:  
  2715     COUNT_INST(I_STSM);
  2716     load_reg( R_EAX, Rn );
  2717     check_walign32( R_EAX );
  2718     ADD_imm8s_r32( -4, R_EAX );
  2719     load_spreg( R_EDX, R_MACL );
  2720     MEM_WRITE_LONG( R_EAX, R_EDX );
  2721     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2722     sh4_x86.tstate = TSTATE_NONE;
  2723 :}
  2724 STS PR, Rn {:  
  2725     COUNT_INST(I_STS);
  2726     load_spreg( R_EAX, R_PR );
  2727     store_reg( R_EAX, Rn );
  2728 :}
  2729 STS.L PR, @-Rn {:  
  2730     COUNT_INST(I_STSM);
  2731     load_reg( R_EAX, Rn );
  2732     check_walign32( R_EAX );
  2733     ADD_imm8s_r32( -4, R_EAX );
  2734     load_spreg( R_EDX, R_PR );
  2735     MEM_WRITE_LONG( R_EAX, R_EDX );
  2736     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2737     sh4_x86.tstate = TSTATE_NONE;
  2738 :}
  2740 NOP {: 
  2741     COUNT_INST(I_NOP);
  2742     /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ 
  2743 :}
  2744 %%
  2745     sh4_x86.in_delay_slot = DELAY_NONE;
  2746     return 0;
.