Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 941:c67574ed4355
prev939:6f2302afeb89
next946:d41ee7994db7
author nkeynes
date Sat Jan 03 08:55:15 2009 +0000 (14 years ago)
branchlxdream-mem
permissions -rw-r--r--
last change Implement CORE_EXIT_EXCEPTION for use when direct frame messing about doesn't work
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "lxdream.h"
    29 #include "sh4/xltcache.h"
    30 #include "sh4/sh4core.h"
    31 #include "sh4/sh4trans.h"
    32 #include "sh4/sh4stat.h"
    33 #include "sh4/sh4mmio.h"
    34 #include "sh4/x86op.h"
    35 #include "sh4/mmu.h"
    36 #include "clock.h"
    38 #define DEFAULT_BACKPATCH_SIZE 4096
    40 struct backpatch_record {
    41     uint32_t fixup_offset;
    42     uint32_t fixup_icount;
    43     int32_t exc_code;
    44 };
    46 #define DELAY_NONE 0
    47 #define DELAY_PC 1
    48 #define DELAY_PC_PR 2
    50 /** 
    51  * Struct to manage internal translation state. This state is not saved -
    52  * it is only valid between calls to sh4_translate_begin_block() and
    53  * sh4_translate_end_block()
    54  */
    55 struct sh4_x86_state {
    56     int in_delay_slot;
    57     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    58     gboolean branch_taken; /* true if we branched unconditionally */
    59     gboolean double_prec; /* true if FPU is in double-precision mode */
    60     gboolean double_size; /* true if FPU is in double-size mode */
    61     gboolean sse3_enabled; /* true if host supports SSE3 instructions */
    62     uint32_t block_start_pc;
    63     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    64     int tstate;
    66     /* mode flags */
    67     gboolean tlb_on; /* True if tlb translation is active */
    69     /* Allocated memory for the (block-wide) back-patch list */
    70     struct backpatch_record *backpatch_list;
    71     uint32_t backpatch_posn;
    72     uint32_t backpatch_size;
    73 };
    75 #define TSTATE_NONE -1
    76 #define TSTATE_O    0
    77 #define TSTATE_C    2
    78 #define TSTATE_E    4
    79 #define TSTATE_NE   5
    80 #define TSTATE_G    0xF
    81 #define TSTATE_GE   0xD
    82 #define TSTATE_A    7
    83 #define TSTATE_AE   3
    85 #ifdef ENABLE_SH4STATS
    86 #define COUNT_INST(id) load_imm32(R_EAX,id); call_func1(sh4_stats_add, R_EAX); sh4_x86.tstate = TSTATE_NONE
    87 #else
    88 #define COUNT_INST(id)
    89 #endif
    91 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
    92 #define JT_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    93 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    94     OP(0x70+sh4_x86.tstate); MARK_JMP8(label); OP(-1)
    96 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
    97 #define JF_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    98 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    99     OP(0x70+ (sh4_x86.tstate^1)); MARK_JMP8(label); OP(-1)
   101 static struct sh4_x86_state sh4_x86;
   103 static uint32_t max_int = 0x7FFFFFFF;
   104 static uint32_t min_int = 0x80000000;
   105 static uint32_t save_fcw; /* save value for fpu control word */
   106 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   108 gboolean is_sse3_supported()
   109 {
   110     uint32_t features;
   112     __asm__ __volatile__(
   113         "mov $0x01, %%eax\n\t"
   114         "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
   115     return (features & 1) ? TRUE : FALSE;
   116 }
   118 void sh4_translate_init(void)
   119 {
   120     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   121     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   122     sh4_x86.sse3_enabled = is_sse3_supported();
   123 }
   126 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   127 {
   128     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   129 	sh4_x86.backpatch_size <<= 1;
   130 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   131 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   132 	assert( sh4_x86.backpatch_list != NULL );
   133     }
   134     if( sh4_x86.in_delay_slot ) {
   135 	fixup_pc -= 2;
   136     }
   137     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
   138 	((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code);
   139     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   140     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   141     sh4_x86.backpatch_posn++;
   142 }
   144 /**
   145  * Emit an instruction to load an SH4 reg into a real register
   146  */
   147 static inline void load_reg( int x86reg, int sh4reg ) 
   148 {
   149     /* mov [bp+n], reg */
   150     OP(0x8B);
   151     OP(0x45 + (x86reg<<3));
   152     OP(REG_OFFSET(r[sh4reg]));
   153 }
   155 static inline void load_reg16s( int x86reg, int sh4reg )
   156 {
   157     OP(0x0F);
   158     OP(0xBF);
   159     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   160 }
   162 static inline void load_reg16u( int x86reg, int sh4reg )
   163 {
   164     OP(0x0F);
   165     OP(0xB7);
   166     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   168 }
   170 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   171 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   172 /**
   173  * Emit an instruction to load an immediate value into a register
   174  */
   175 static inline void load_imm32( int x86reg, uint32_t value ) {
   176     /* mov #value, reg */
   177     OP(0xB8 + x86reg);
   178     OP32(value);
   179 }
   182 /**
   183  * Load an immediate 64-bit quantity (note: x86-64 only)
   184  */
   185 static inline void load_imm64( int x86reg, uint64_t value ) {
   186     /* mov #value, reg */
   187     REXW();
   188     OP(0xB8 + x86reg);
   189     OP64(value);
   190 }
   192 /**
   193  * Emit an instruction to store an SH4 reg (RN)
   194  */
   195 void static inline store_reg( int x86reg, int sh4reg ) {
   196     /* mov reg, [bp+n] */
   197     OP(0x89);
   198     OP(0x45 + (x86reg<<3));
   199     OP(REG_OFFSET(r[sh4reg]));
   200 }
   202 /**
   203  * Load an FR register (single-precision floating point) into an integer x86
   204  * register (eg for register-to-register moves)
   205  */
   206 #define load_fr(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[0][(frm)^1]) )
   207 #define load_xf(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[1][(frm)^1]) )
   209 /**
   210  * Load the low half of a DR register (DR or XD) into an integer x86 register 
   211  */
   212 #define load_dr0(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   213 #define load_dr1(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   215 /**
   216  * Store an FR register (single-precision floating point) from an integer x86+
   217  * register (eg for register-to-register moves)
   218  */
   219 #define store_fr(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[0][(frm)^1]) )
   220 #define store_xf(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[1][(frm)^1]) )
   222 #define store_dr0(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   223 #define store_dr1(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   226 #define push_fpul()  FLDF_sh4r(R_FPUL)
   227 #define pop_fpul()   FSTPF_sh4r(R_FPUL)
   228 #define push_fr(frm) FLDF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   229 #define pop_fr(frm)  FSTPF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   230 #define push_xf(frm) FLDF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   231 #define pop_xf(frm)  FSTPF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   232 #define push_dr(frm) FLDD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   233 #define pop_dr(frm)  FSTPD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   234 #define push_xdr(frm) FLDD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   235 #define pop_xdr(frm)  FSTPD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   239 /* Exception checks - Note that all exception checks will clobber EAX */
   241 #define check_priv( ) \
   242     if( (sh4r.xlat_sh4_mode & SR_MD) == 0 ) { \
   243         if( sh4_x86.in_delay_slot ) { \
   244             JMP_exc(EXC_SLOT_ILLEGAL); \
   245         } else { \
   246             JMP_exc(EXC_ILLEGAL ); \
   247         } \
   248         sh4_x86.in_delay_slot = DELAY_NONE; \
   249         return 2; \
   250     }
   252 #define check_fpuen( ) \
   253     if( !sh4_x86.fpuen_checked ) {\
   254 	sh4_x86.fpuen_checked = TRUE;\
   255 	load_spreg( R_EAX, R_SR );\
   256 	AND_imm32_r32( SR_FD, R_EAX );\
   257 	if( sh4_x86.in_delay_slot ) {\
   258 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   259 	} else {\
   260 	    JNE_exc(EXC_FPU_DISABLED);\
   261 	}\
   262 	sh4_x86.tstate = TSTATE_NONE; \
   263     }
   265 #define check_ralign16( x86reg ) \
   266     TEST_imm32_r32( 0x00000001, x86reg ); \
   267     JNE_exc(EXC_DATA_ADDR_READ)
   269 #define check_walign16( x86reg ) \
   270     TEST_imm32_r32( 0x00000001, x86reg ); \
   271     JNE_exc(EXC_DATA_ADDR_WRITE);
   273 #define check_ralign32( x86reg ) \
   274     TEST_imm32_r32( 0x00000003, x86reg ); \
   275     JNE_exc(EXC_DATA_ADDR_READ)
   277 #define check_walign32( x86reg ) \
   278     TEST_imm32_r32( 0x00000003, x86reg ); \
   279     JNE_exc(EXC_DATA_ADDR_WRITE);
   281 #define check_ralign64( x86reg ) \
   282     TEST_imm32_r32( 0x00000007, x86reg ); \
   283     JNE_exc(EXC_DATA_ADDR_READ)
   285 #define check_walign64( x86reg ) \
   286     TEST_imm32_r32( 0x00000007, x86reg ); \
   287     JNE_exc(EXC_DATA_ADDR_WRITE);
   289 #define UNDEF(ir)
   290 #define MEM_REGION_PTR(name) offsetof( struct mem_region_fn, name )
   291 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   292 /* Note: For SR.MD == 1 && MMUCR.AT == 0, there are no memory exceptions, so 
   293  * don't waste the cycles expecting them. Otherwise we need to save the exception pointer.
   294  */
   296 #ifdef HAVE_FRAME_ADDRESS
   297 #define _CALL_READ(addr_reg, fn) if( !sh4_x86.tlb_on && (sh4r.xlat_sh4_mode & SR_MD) ) { \
   298         call_func1_r32disp8(R_ECX, MEM_REGION_PTR(fn), addr_reg); } else { \
   299         call_func1_r32disp8_exc(R_ECX, MEM_REGION_PTR(fn), addr_reg, pc); } 
   300 #define _CALL_WRITE(addr_reg, val_reg, fn) if( !sh4_x86.tlb_on && (sh4r.xlat_sh4_mode & SR_MD) ) { \
   301         call_func2_r32disp8(R_ECX, MEM_REGION_PTR(fn), addr_reg, val_reg); } else { \
   302         call_func2_r32disp8_exc(R_ECX, MEM_REGION_PTR(fn), addr_reg, val_reg, pc); }
   303 #else 
   304 #define _CALL_READ(addr_reg, fn) call_func1_r32disp8(R_ECX, MEM_REGION_PTR(fn), addr_reg)
   305 #define _CALL_WRITE(addr_reg, val_reg, fn) call_func2_r32disp8(R_ECX, MEM_REGION_PTR(fn), addr_reg, val_reg)
   306 #endif
   308 #define MEM_READ_BYTE( addr_reg, value_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, read_byte); MEM_RESULT(value_reg)
   309 #define MEM_READ_WORD( addr_reg, value_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, read_word); MEM_RESULT(value_reg)
   310 #define MEM_READ_LONG( addr_reg, value_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, read_long); MEM_RESULT(value_reg)
   311 #define MEM_WRITE_BYTE( addr_reg, value_reg ) decode_address(addr_reg); _CALL_WRITE(addr_reg, value_reg, write_byte)
   312 #define MEM_WRITE_WORD( addr_reg, value_reg ) decode_address(addr_reg); _CALL_WRITE(addr_reg, value_reg, write_word)
   313 #define MEM_WRITE_LONG( addr_reg, value_reg ) decode_address(addr_reg); _CALL_WRITE(addr_reg, value_reg, write_long)
   315 #define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = DELAY_NONE; return 2;
   317 /****** Import appropriate calling conventions ******/
   318 #if SIZEOF_VOID_P == 8
   319 #include "sh4/ia64abi.h"
   320 #else /* 32-bit system */
   321 #include "sh4/ia32abi.h"
   322 #endif
   324 void sh4_translate_begin_block( sh4addr_t pc ) 
   325 {
   326     enter_block();
   327     sh4_x86.in_delay_slot = FALSE;
   328     sh4_x86.fpuen_checked = FALSE;
   329     sh4_x86.branch_taken = FALSE;
   330     sh4_x86.backpatch_posn = 0;
   331     sh4_x86.block_start_pc = pc;
   332     sh4_x86.tlb_on = IS_TLB_ENABLED();
   333     sh4_x86.tstate = TSTATE_NONE;
   334     sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
   335     sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
   336 }
   339 uint32_t sh4_translate_end_block_size()
   340 {
   341     if( sh4_x86.backpatch_posn <= 3 ) {
   342         return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);
   343     } else {
   344         return EPILOGUE_SIZE + 48 + (sh4_x86.backpatch_posn-3)*15;
   345     }
   346 }
   349 /**
   350  * Embed a breakpoint into the generated code
   351  */
   352 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   353 {
   354     load_imm32( R_EAX, pc );
   355     call_func1( sh4_translate_breakpoint_hit, R_EAX );
   356     sh4_x86.tstate = TSTATE_NONE;
   357 }
   360 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   362 /**
   363  * Embed a call to sh4_execute_instruction for situations that we
   364  * can't translate (just page-crossing delay slots at the moment).
   365  * Caller is responsible for setting new_pc before calling this function.
   366  *
   367  * Performs:
   368  *   Set PC = endpc
   369  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   370  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   371  *   Call sh4_execute_instruction
   372  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   373  */
   374 void exit_block_emu( sh4vma_t endpc )
   375 {
   376     load_imm32( R_ECX, endpc - sh4_x86.block_start_pc );   // 5
   377     ADD_r32_sh4r( R_ECX, R_PC );
   379     load_imm32( R_ECX, (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period ); // 5
   380     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
   381     load_imm32( R_ECX, sh4_x86.in_delay_slot ? 1 : 0 );
   382     store_spreg( R_ECX, REG_OFFSET(in_delay_slot) );
   384     call_func0( sh4_execute_instruction );    
   385     load_spreg( R_EAX, R_PC );
   386     if( sh4_x86.tlb_on ) {
   387 	call_func1(xlat_get_code_by_vma,R_EAX);
   388     } else {
   389 	call_func1(xlat_get_code,R_EAX);
   390     }
   391     exit_block();
   392 } 
   394 /**
   395  * Translate a single instruction. Delayed branches are handled specially
   396  * by translating both branch and delayed instruction as a single unit (as
   397  * 
   398  * The instruction MUST be in the icache (assert check)
   399  *
   400  * @return true if the instruction marks the end of a basic block
   401  * (eg a branch or 
   402  */
   403 uint32_t sh4_translate_instruction( sh4vma_t pc )
   404 {
   405     uint32_t ir;
   406     /* Read instruction from icache */
   407     assert( IS_IN_ICACHE(pc) );
   408     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   410     if( !sh4_x86.in_delay_slot ) {
   411 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   412     }
   413 %%
   414 /* ALU operations */
   415 ADD Rm, Rn {:
   416     COUNT_INST(I_ADD);
   417     load_reg( R_EAX, Rm );
   418     load_reg( R_ECX, Rn );
   419     ADD_r32_r32( R_EAX, R_ECX );
   420     store_reg( R_ECX, Rn );
   421     sh4_x86.tstate = TSTATE_NONE;
   422 :}
   423 ADD #imm, Rn {:  
   424     COUNT_INST(I_ADDI);
   425     ADD_imm8s_sh4r( imm, REG_OFFSET(r[Rn]) );
   426     sh4_x86.tstate = TSTATE_NONE;
   427 :}
   428 ADDC Rm, Rn {:
   429     COUNT_INST(I_ADDC);
   430     if( sh4_x86.tstate != TSTATE_C ) {
   431         LDC_t();
   432     }
   433     load_reg( R_EAX, Rm );
   434     load_reg( R_ECX, Rn );
   435     ADC_r32_r32( R_EAX, R_ECX );
   436     store_reg( R_ECX, Rn );
   437     SETC_t();
   438     sh4_x86.tstate = TSTATE_C;
   439 :}
   440 ADDV Rm, Rn {:
   441     COUNT_INST(I_ADDV);
   442     load_reg( R_EAX, Rm );
   443     load_reg( R_ECX, Rn );
   444     ADD_r32_r32( R_EAX, R_ECX );
   445     store_reg( R_ECX, Rn );
   446     SETO_t();
   447     sh4_x86.tstate = TSTATE_O;
   448 :}
   449 AND Rm, Rn {:
   450     COUNT_INST(I_AND);
   451     load_reg( R_EAX, Rm );
   452     load_reg( R_ECX, Rn );
   453     AND_r32_r32( R_EAX, R_ECX );
   454     store_reg( R_ECX, Rn );
   455     sh4_x86.tstate = TSTATE_NONE;
   456 :}
   457 AND #imm, R0 {:  
   458     COUNT_INST(I_ANDI);
   459     load_reg( R_EAX, 0 );
   460     AND_imm32_r32(imm, R_EAX); 
   461     store_reg( R_EAX, 0 );
   462     sh4_x86.tstate = TSTATE_NONE;
   463 :}
   464 AND.B #imm, @(R0, GBR) {: 
   465     COUNT_INST(I_ANDB);
   466     load_reg( R_EAX, 0 );
   467     ADD_sh4r_r32( R_GBR, R_EAX );
   468     MOV_r32_esp8(R_EAX, 0);
   469     MEM_READ_BYTE( R_EAX, R_EDX );
   470     MOV_esp8_r32(0, R_EAX);
   471     AND_imm32_r32(imm, R_EDX );
   472     MEM_WRITE_BYTE( R_EAX, R_EDX );
   473     sh4_x86.tstate = TSTATE_NONE;
   474 :}
   475 CMP/EQ Rm, Rn {:  
   476     COUNT_INST(I_CMPEQ);
   477     load_reg( R_EAX, Rm );
   478     load_reg( R_ECX, Rn );
   479     CMP_r32_r32( R_EAX, R_ECX );
   480     SETE_t();
   481     sh4_x86.tstate = TSTATE_E;
   482 :}
   483 CMP/EQ #imm, R0 {:  
   484     COUNT_INST(I_CMPEQI);
   485     load_reg( R_EAX, 0 );
   486     CMP_imm8s_r32(imm, R_EAX);
   487     SETE_t();
   488     sh4_x86.tstate = TSTATE_E;
   489 :}
   490 CMP/GE Rm, Rn {:  
   491     COUNT_INST(I_CMPGE);
   492     load_reg( R_EAX, Rm );
   493     load_reg( R_ECX, Rn );
   494     CMP_r32_r32( R_EAX, R_ECX );
   495     SETGE_t();
   496     sh4_x86.tstate = TSTATE_GE;
   497 :}
   498 CMP/GT Rm, Rn {: 
   499     COUNT_INST(I_CMPGT);
   500     load_reg( R_EAX, Rm );
   501     load_reg( R_ECX, Rn );
   502     CMP_r32_r32( R_EAX, R_ECX );
   503     SETG_t();
   504     sh4_x86.tstate = TSTATE_G;
   505 :}
   506 CMP/HI Rm, Rn {:  
   507     COUNT_INST(I_CMPHI);
   508     load_reg( R_EAX, Rm );
   509     load_reg( R_ECX, Rn );
   510     CMP_r32_r32( R_EAX, R_ECX );
   511     SETA_t();
   512     sh4_x86.tstate = TSTATE_A;
   513 :}
   514 CMP/HS Rm, Rn {: 
   515     COUNT_INST(I_CMPHS);
   516     load_reg( R_EAX, Rm );
   517     load_reg( R_ECX, Rn );
   518     CMP_r32_r32( R_EAX, R_ECX );
   519     SETAE_t();
   520     sh4_x86.tstate = TSTATE_AE;
   521  :}
   522 CMP/PL Rn {: 
   523     COUNT_INST(I_CMPPL);
   524     load_reg( R_EAX, Rn );
   525     CMP_imm8s_r32( 0, R_EAX );
   526     SETG_t();
   527     sh4_x86.tstate = TSTATE_G;
   528 :}
   529 CMP/PZ Rn {:  
   530     COUNT_INST(I_CMPPZ);
   531     load_reg( R_EAX, Rn );
   532     CMP_imm8s_r32( 0, R_EAX );
   533     SETGE_t();
   534     sh4_x86.tstate = TSTATE_GE;
   535 :}
   536 CMP/STR Rm, Rn {:  
   537     COUNT_INST(I_CMPSTR);
   538     load_reg( R_EAX, Rm );
   539     load_reg( R_ECX, Rn );
   540     XOR_r32_r32( R_ECX, R_EAX );
   541     TEST_r8_r8( R_AL, R_AL );
   542     JE_rel8(target1);
   543     TEST_r8_r8( R_AH, R_AH );
   544     JE_rel8(target2);
   545     SHR_imm8_r32( 16, R_EAX );
   546     TEST_r8_r8( R_AL, R_AL );
   547     JE_rel8(target3);
   548     TEST_r8_r8( R_AH, R_AH );
   549     JMP_TARGET(target1);
   550     JMP_TARGET(target2);
   551     JMP_TARGET(target3);
   552     SETE_t();
   553     sh4_x86.tstate = TSTATE_E;
   554 :}
   555 DIV0S Rm, Rn {:
   556     COUNT_INST(I_DIV0S);
   557     load_reg( R_EAX, Rm );
   558     load_reg( R_ECX, Rn );
   559     SHR_imm8_r32( 31, R_EAX );
   560     SHR_imm8_r32( 31, R_ECX );
   561     store_spreg( R_EAX, R_M );
   562     store_spreg( R_ECX, R_Q );
   563     CMP_r32_r32( R_EAX, R_ECX );
   564     SETNE_t();
   565     sh4_x86.tstate = TSTATE_NE;
   566 :}
   567 DIV0U {:  
   568     COUNT_INST(I_DIV0U);
   569     XOR_r32_r32( R_EAX, R_EAX );
   570     store_spreg( R_EAX, R_Q );
   571     store_spreg( R_EAX, R_M );
   572     store_spreg( R_EAX, R_T );
   573     sh4_x86.tstate = TSTATE_C; // works for DIV1
   574 :}
   575 DIV1 Rm, Rn {:
   576     COUNT_INST(I_DIV1);
   577     load_spreg( R_ECX, R_M );
   578     load_reg( R_EAX, Rn );
   579     if( sh4_x86.tstate != TSTATE_C ) {
   580 	LDC_t();
   581     }
   582     RCL1_r32( R_EAX );
   583     SETC_r8( R_DL ); // Q'
   584     CMP_sh4r_r32( R_Q, R_ECX );
   585     JE_rel8(mqequal);
   586     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   587     JMP_rel8(end);
   588     JMP_TARGET(mqequal);
   589     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   590     JMP_TARGET(end);
   591     store_reg( R_EAX, Rn ); // Done with Rn now
   592     SETC_r8(R_AL); // tmp1
   593     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   594     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   595     store_spreg( R_ECX, R_Q );
   596     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   597     MOVZX_r8_r32( R_AL, R_EAX );
   598     store_spreg( R_EAX, R_T );
   599     sh4_x86.tstate = TSTATE_NONE;
   600 :}
   601 DMULS.L Rm, Rn {:  
   602     COUNT_INST(I_DMULS);
   603     load_reg( R_EAX, Rm );
   604     load_reg( R_ECX, Rn );
   605     IMUL_r32(R_ECX);
   606     store_spreg( R_EDX, R_MACH );
   607     store_spreg( R_EAX, R_MACL );
   608     sh4_x86.tstate = TSTATE_NONE;
   609 :}
   610 DMULU.L Rm, Rn {:  
   611     COUNT_INST(I_DMULU);
   612     load_reg( R_EAX, Rm );
   613     load_reg( R_ECX, Rn );
   614     MUL_r32(R_ECX);
   615     store_spreg( R_EDX, R_MACH );
   616     store_spreg( R_EAX, R_MACL );    
   617     sh4_x86.tstate = TSTATE_NONE;
   618 :}
   619 DT Rn {:  
   620     COUNT_INST(I_DT);
   621     load_reg( R_EAX, Rn );
   622     ADD_imm8s_r32( -1, R_EAX );
   623     store_reg( R_EAX, Rn );
   624     SETE_t();
   625     sh4_x86.tstate = TSTATE_E;
   626 :}
   627 EXTS.B Rm, Rn {:  
   628     COUNT_INST(I_EXTSB);
   629     load_reg( R_EAX, Rm );
   630     MOVSX_r8_r32( R_EAX, R_EAX );
   631     store_reg( R_EAX, Rn );
   632 :}
   633 EXTS.W Rm, Rn {:  
   634     COUNT_INST(I_EXTSW);
   635     load_reg( R_EAX, Rm );
   636     MOVSX_r16_r32( R_EAX, R_EAX );
   637     store_reg( R_EAX, Rn );
   638 :}
   639 EXTU.B Rm, Rn {:  
   640     COUNT_INST(I_EXTUB);
   641     load_reg( R_EAX, Rm );
   642     MOVZX_r8_r32( R_EAX, R_EAX );
   643     store_reg( R_EAX, Rn );
   644 :}
   645 EXTU.W Rm, Rn {:  
   646     COUNT_INST(I_EXTUW);
   647     load_reg( R_EAX, Rm );
   648     MOVZX_r16_r32( R_EAX, R_EAX );
   649     store_reg( R_EAX, Rn );
   650 :}
   651 MAC.L @Rm+, @Rn+ {:
   652     COUNT_INST(I_MACL);
   653     if( Rm == Rn ) {
   654 	load_reg( R_EAX, Rm );
   655 	check_ralign32( R_EAX );
   656 	MEM_READ_LONG( R_EAX, R_EAX );
   657 	MOV_r32_esp8(R_EAX, 0);
   658 	load_reg( R_EAX, Rm );
   659 	LEA_r32disp8_r32( R_EAX, 4, R_EAX );
   660 	MEM_READ_LONG( R_EAX, R_EAX );
   661         ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
   662     } else {
   663 	load_reg( R_EAX, Rm );
   664 	check_ralign32( R_EAX );
   665 	MEM_READ_LONG( R_EAX, R_EAX );
   666 	MOV_r32_esp8( R_EAX, 0 );
   667 	load_reg( R_EAX, Rn );
   668 	check_ralign32( R_EAX );
   669 	MEM_READ_LONG( R_EAX, R_EAX );
   670 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   671 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   672     }
   674     IMUL_esp8( 0 );
   675     ADD_r32_sh4r( R_EAX, R_MACL );
   676     ADC_r32_sh4r( R_EDX, R_MACH );
   678     load_spreg( R_ECX, R_S );
   679     TEST_r32_r32(R_ECX, R_ECX);
   680     JE_rel8( nosat );
   681     call_func0( signsat48 );
   682     JMP_TARGET( nosat );
   683     sh4_x86.tstate = TSTATE_NONE;
   684 :}
   685 MAC.W @Rm+, @Rn+ {:  
   686     COUNT_INST(I_MACW);
   687     if( Rm == Rn ) {
   688 	load_reg( R_EAX, Rm );
   689 	check_ralign16( R_EAX );
   690 	MEM_READ_WORD( R_EAX, R_EAX );
   691         MOV_r32_esp8( R_EAX, 0 );
   692 	load_reg( R_EAX, Rm );
   693 	LEA_r32disp8_r32( R_EAX, 2, R_EAX );
   694 	MEM_READ_WORD( R_EAX, R_EAX );
   695 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   696 	// Note translate twice in case of page boundaries. Maybe worth
   697 	// adding a page-boundary check to skip the second translation
   698     } else {
   699 	load_reg( R_EAX, Rm );
   700 	check_ralign16( R_EAX );
   701 	MEM_READ_WORD( R_EAX, R_EAX );
   702         MOV_r32_esp8( R_EAX, 0 );
   703 	load_reg( R_EAX, Rn );
   704 	check_ralign16( R_EAX );
   705 	MEM_READ_WORD( R_EAX, R_EAX );
   706 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   707 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   708     }
   709     IMUL_esp8( 0 );
   710     load_spreg( R_ECX, R_S );
   711     TEST_r32_r32( R_ECX, R_ECX );
   712     JE_rel8( nosat );
   714     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   715     JNO_rel8( end );            // 2
   716     load_imm32( R_EDX, 1 );         // 5
   717     store_spreg( R_EDX, R_MACH );   // 6
   718     JS_rel8( positive );        // 2
   719     load_imm32( R_EAX, 0x80000000 );// 5
   720     store_spreg( R_EAX, R_MACL );   // 6
   721     JMP_rel8(end2);           // 2
   723     JMP_TARGET(positive);
   724     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   725     store_spreg( R_EAX, R_MACL );   // 6
   726     JMP_rel8(end3);            // 2
   728     JMP_TARGET(nosat);
   729     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   730     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   731     JMP_TARGET(end);
   732     JMP_TARGET(end2);
   733     JMP_TARGET(end3);
   734     sh4_x86.tstate = TSTATE_NONE;
   735 :}
   736 MOVT Rn {:  
   737     COUNT_INST(I_MOVT);
   738     load_spreg( R_EAX, R_T );
   739     store_reg( R_EAX, Rn );
   740 :}
   741 MUL.L Rm, Rn {:  
   742     COUNT_INST(I_MULL);
   743     load_reg( R_EAX, Rm );
   744     load_reg( R_ECX, Rn );
   745     MUL_r32( R_ECX );
   746     store_spreg( R_EAX, R_MACL );
   747     sh4_x86.tstate = TSTATE_NONE;
   748 :}
   749 MULS.W Rm, Rn {:
   750     COUNT_INST(I_MULSW);
   751     load_reg16s( R_EAX, Rm );
   752     load_reg16s( R_ECX, Rn );
   753     MUL_r32( R_ECX );
   754     store_spreg( R_EAX, R_MACL );
   755     sh4_x86.tstate = TSTATE_NONE;
   756 :}
   757 MULU.W Rm, Rn {:  
   758     COUNT_INST(I_MULUW);
   759     load_reg16u( R_EAX, Rm );
   760     load_reg16u( R_ECX, Rn );
   761     MUL_r32( R_ECX );
   762     store_spreg( R_EAX, R_MACL );
   763     sh4_x86.tstate = TSTATE_NONE;
   764 :}
   765 NEG Rm, Rn {:
   766     COUNT_INST(I_NEG);
   767     load_reg( R_EAX, Rm );
   768     NEG_r32( R_EAX );
   769     store_reg( R_EAX, Rn );
   770     sh4_x86.tstate = TSTATE_NONE;
   771 :}
   772 NEGC Rm, Rn {:  
   773     COUNT_INST(I_NEGC);
   774     load_reg( R_EAX, Rm );
   775     XOR_r32_r32( R_ECX, R_ECX );
   776     LDC_t();
   777     SBB_r32_r32( R_EAX, R_ECX );
   778     store_reg( R_ECX, Rn );
   779     SETC_t();
   780     sh4_x86.tstate = TSTATE_C;
   781 :}
   782 NOT Rm, Rn {:  
   783     COUNT_INST(I_NOT);
   784     load_reg( R_EAX, Rm );
   785     NOT_r32( R_EAX );
   786     store_reg( R_EAX, Rn );
   787     sh4_x86.tstate = TSTATE_NONE;
   788 :}
   789 OR Rm, Rn {:  
   790     COUNT_INST(I_OR);
   791     load_reg( R_EAX, Rm );
   792     load_reg( R_ECX, Rn );
   793     OR_r32_r32( R_EAX, R_ECX );
   794     store_reg( R_ECX, Rn );
   795     sh4_x86.tstate = TSTATE_NONE;
   796 :}
   797 OR #imm, R0 {:
   798     COUNT_INST(I_ORI);
   799     load_reg( R_EAX, 0 );
   800     OR_imm32_r32(imm, R_EAX);
   801     store_reg( R_EAX, 0 );
   802     sh4_x86.tstate = TSTATE_NONE;
   803 :}
   804 OR.B #imm, @(R0, GBR) {:  
   805     COUNT_INST(I_ORB);
   806     load_reg( R_EAX, 0 );
   807     ADD_sh4r_r32( R_GBR, R_EAX );
   808     MOV_r32_esp8( R_EAX, 0 );
   809     MEM_READ_BYTE( R_EAX, R_EDX );
   810     MOV_esp8_r32( 0, R_EAX );
   811     OR_imm32_r32(imm, R_EDX );
   812     MEM_WRITE_BYTE( R_EAX, R_EDX );
   813     sh4_x86.tstate = TSTATE_NONE;
   814 :}
   815 ROTCL Rn {:
   816     COUNT_INST(I_ROTCL);
   817     load_reg( R_EAX, Rn );
   818     if( sh4_x86.tstate != TSTATE_C ) {
   819 	LDC_t();
   820     }
   821     RCL1_r32( R_EAX );
   822     store_reg( R_EAX, Rn );
   823     SETC_t();
   824     sh4_x86.tstate = TSTATE_C;
   825 :}
   826 ROTCR Rn {:  
   827     COUNT_INST(I_ROTCR);
   828     load_reg( R_EAX, Rn );
   829     if( sh4_x86.tstate != TSTATE_C ) {
   830 	LDC_t();
   831     }
   832     RCR1_r32( R_EAX );
   833     store_reg( R_EAX, Rn );
   834     SETC_t();
   835     sh4_x86.tstate = TSTATE_C;
   836 :}
   837 ROTL Rn {:  
   838     COUNT_INST(I_ROTL);
   839     load_reg( R_EAX, Rn );
   840     ROL1_r32( R_EAX );
   841     store_reg( R_EAX, Rn );
   842     SETC_t();
   843     sh4_x86.tstate = TSTATE_C;
   844 :}
   845 ROTR Rn {:  
   846     COUNT_INST(I_ROTR);
   847     load_reg( R_EAX, Rn );
   848     ROR1_r32( R_EAX );
   849     store_reg( R_EAX, Rn );
   850     SETC_t();
   851     sh4_x86.tstate = TSTATE_C;
   852 :}
   853 SHAD Rm, Rn {:
   854     COUNT_INST(I_SHAD);
   855     /* Annoyingly enough, not directly convertible */
   856     load_reg( R_EAX, Rn );
   857     load_reg( R_ECX, Rm );
   858     CMP_imm32_r32( 0, R_ECX );
   859     JGE_rel8(doshl);
   861     NEG_r32( R_ECX );      // 2
   862     AND_imm8_r8( 0x1F, R_CL ); // 3
   863     JE_rel8(emptysar);     // 2
   864     SAR_r32_CL( R_EAX );       // 2
   865     JMP_rel8(end);          // 2
   867     JMP_TARGET(emptysar);
   868     SAR_imm8_r32(31, R_EAX );  // 3
   869     JMP_rel8(end2);
   871     JMP_TARGET(doshl);
   872     AND_imm8_r8( 0x1F, R_CL ); // 3
   873     SHL_r32_CL( R_EAX );       // 2
   874     JMP_TARGET(end);
   875     JMP_TARGET(end2);
   876     store_reg( R_EAX, Rn );
   877     sh4_x86.tstate = TSTATE_NONE;
   878 :}
   879 SHLD Rm, Rn {:  
   880     COUNT_INST(I_SHLD);
   881     load_reg( R_EAX, Rn );
   882     load_reg( R_ECX, Rm );
   883     CMP_imm32_r32( 0, R_ECX );
   884     JGE_rel8(doshl);
   886     NEG_r32( R_ECX );      // 2
   887     AND_imm8_r8( 0x1F, R_CL ); // 3
   888     JE_rel8(emptyshr );
   889     SHR_r32_CL( R_EAX );       // 2
   890     JMP_rel8(end);          // 2
   892     JMP_TARGET(emptyshr);
   893     XOR_r32_r32( R_EAX, R_EAX );
   894     JMP_rel8(end2);
   896     JMP_TARGET(doshl);
   897     AND_imm8_r8( 0x1F, R_CL ); // 3
   898     SHL_r32_CL( R_EAX );       // 2
   899     JMP_TARGET(end);
   900     JMP_TARGET(end2);
   901     store_reg( R_EAX, Rn );
   902     sh4_x86.tstate = TSTATE_NONE;
   903 :}
   904 SHAL Rn {: 
   905     COUNT_INST(I_SHAL);
   906     load_reg( R_EAX, Rn );
   907     SHL1_r32( R_EAX );
   908     SETC_t();
   909     store_reg( R_EAX, Rn );
   910     sh4_x86.tstate = TSTATE_C;
   911 :}
   912 SHAR Rn {:  
   913     COUNT_INST(I_SHAR);
   914     load_reg( R_EAX, Rn );
   915     SAR1_r32( R_EAX );
   916     SETC_t();
   917     store_reg( R_EAX, Rn );
   918     sh4_x86.tstate = TSTATE_C;
   919 :}
   920 SHLL Rn {:  
   921     COUNT_INST(I_SHLL);
   922     load_reg( R_EAX, Rn );
   923     SHL1_r32( R_EAX );
   924     SETC_t();
   925     store_reg( R_EAX, Rn );
   926     sh4_x86.tstate = TSTATE_C;
   927 :}
   928 SHLL2 Rn {:
   929     COUNT_INST(I_SHLL);
   930     load_reg( R_EAX, Rn );
   931     SHL_imm8_r32( 2, R_EAX );
   932     store_reg( R_EAX, Rn );
   933     sh4_x86.tstate = TSTATE_NONE;
   934 :}
   935 SHLL8 Rn {:  
   936     COUNT_INST(I_SHLL);
   937     load_reg( R_EAX, Rn );
   938     SHL_imm8_r32( 8, R_EAX );
   939     store_reg( R_EAX, Rn );
   940     sh4_x86.tstate = TSTATE_NONE;
   941 :}
   942 SHLL16 Rn {:  
   943     COUNT_INST(I_SHLL);
   944     load_reg( R_EAX, Rn );
   945     SHL_imm8_r32( 16, R_EAX );
   946     store_reg( R_EAX, Rn );
   947     sh4_x86.tstate = TSTATE_NONE;
   948 :}
   949 SHLR Rn {:  
   950     COUNT_INST(I_SHLR);
   951     load_reg( R_EAX, Rn );
   952     SHR1_r32( R_EAX );
   953     SETC_t();
   954     store_reg( R_EAX, Rn );
   955     sh4_x86.tstate = TSTATE_C;
   956 :}
   957 SHLR2 Rn {:  
   958     COUNT_INST(I_SHLR);
   959     load_reg( R_EAX, Rn );
   960     SHR_imm8_r32( 2, R_EAX );
   961     store_reg( R_EAX, Rn );
   962     sh4_x86.tstate = TSTATE_NONE;
   963 :}
   964 SHLR8 Rn {:  
   965     COUNT_INST(I_SHLR);
   966     load_reg( R_EAX, Rn );
   967     SHR_imm8_r32( 8, R_EAX );
   968     store_reg( R_EAX, Rn );
   969     sh4_x86.tstate = TSTATE_NONE;
   970 :}
   971 SHLR16 Rn {:  
   972     COUNT_INST(I_SHLR);
   973     load_reg( R_EAX, Rn );
   974     SHR_imm8_r32( 16, R_EAX );
   975     store_reg( R_EAX, Rn );
   976     sh4_x86.tstate = TSTATE_NONE;
   977 :}
   978 SUB Rm, Rn {:  
   979     COUNT_INST(I_SUB);
   980     load_reg( R_EAX, Rm );
   981     load_reg( R_ECX, Rn );
   982     SUB_r32_r32( R_EAX, R_ECX );
   983     store_reg( R_ECX, Rn );
   984     sh4_x86.tstate = TSTATE_NONE;
   985 :}
   986 SUBC Rm, Rn {:  
   987     COUNT_INST(I_SUBC);
   988     load_reg( R_EAX, Rm );
   989     load_reg( R_ECX, Rn );
   990     if( sh4_x86.tstate != TSTATE_C ) {
   991 	LDC_t();
   992     }
   993     SBB_r32_r32( R_EAX, R_ECX );
   994     store_reg( R_ECX, Rn );
   995     SETC_t();
   996     sh4_x86.tstate = TSTATE_C;
   997 :}
   998 SUBV Rm, Rn {:  
   999     COUNT_INST(I_SUBV);
  1000     load_reg( R_EAX, Rm );
  1001     load_reg( R_ECX, Rn );
  1002     SUB_r32_r32( R_EAX, R_ECX );
  1003     store_reg( R_ECX, Rn );
  1004     SETO_t();
  1005     sh4_x86.tstate = TSTATE_O;
  1006 :}
  1007 SWAP.B Rm, Rn {:  
  1008     COUNT_INST(I_SWAPB);
  1009     load_reg( R_EAX, Rm );
  1010     XCHG_r8_r8( R_AL, R_AH ); // NB: does not touch EFLAGS
  1011     store_reg( R_EAX, Rn );
  1012 :}
  1013 SWAP.W Rm, Rn {:  
  1014     COUNT_INST(I_SWAPB);
  1015     load_reg( R_EAX, Rm );
  1016     MOV_r32_r32( R_EAX, R_ECX );
  1017     SHL_imm8_r32( 16, R_ECX );
  1018     SHR_imm8_r32( 16, R_EAX );
  1019     OR_r32_r32( R_EAX, R_ECX );
  1020     store_reg( R_ECX, Rn );
  1021     sh4_x86.tstate = TSTATE_NONE;
  1022 :}
  1023 TAS.B @Rn {:  
  1024     COUNT_INST(I_TASB);
  1025     load_reg( R_EAX, Rn );
  1026     MOV_r32_esp8( R_EAX, 0 );
  1027     MEM_READ_BYTE( R_EAX, R_EDX );
  1028     TEST_r8_r8( R_DL, R_DL );
  1029     SETE_t();
  1030     OR_imm8_r8( 0x80, R_DL );
  1031     MOV_esp8_r32( 0, R_EAX );
  1032     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1033     sh4_x86.tstate = TSTATE_NONE;
  1034 :}
  1035 TST Rm, Rn {:  
  1036     COUNT_INST(I_TST);
  1037     load_reg( R_EAX, Rm );
  1038     load_reg( R_ECX, Rn );
  1039     TEST_r32_r32( R_EAX, R_ECX );
  1040     SETE_t();
  1041     sh4_x86.tstate = TSTATE_E;
  1042 :}
  1043 TST #imm, R0 {:  
  1044     COUNT_INST(I_TSTI);
  1045     load_reg( R_EAX, 0 );
  1046     TEST_imm32_r32( imm, R_EAX );
  1047     SETE_t();
  1048     sh4_x86.tstate = TSTATE_E;
  1049 :}
  1050 TST.B #imm, @(R0, GBR) {:  
  1051     COUNT_INST(I_TSTB);
  1052     load_reg( R_EAX, 0);
  1053     ADD_sh4r_r32( R_GBR, R_EAX );
  1054     MEM_READ_BYTE( R_EAX, R_EAX );
  1055     TEST_imm8_r8( imm, R_AL );
  1056     SETE_t();
  1057     sh4_x86.tstate = TSTATE_E;
  1058 :}
  1059 XOR Rm, Rn {:  
  1060     COUNT_INST(I_XOR);
  1061     load_reg( R_EAX, Rm );
  1062     load_reg( R_ECX, Rn );
  1063     XOR_r32_r32( R_EAX, R_ECX );
  1064     store_reg( R_ECX, Rn );
  1065     sh4_x86.tstate = TSTATE_NONE;
  1066 :}
  1067 XOR #imm, R0 {:  
  1068     COUNT_INST(I_XORI);
  1069     load_reg( R_EAX, 0 );
  1070     XOR_imm32_r32( imm, R_EAX );
  1071     store_reg( R_EAX, 0 );
  1072     sh4_x86.tstate = TSTATE_NONE;
  1073 :}
  1074 XOR.B #imm, @(R0, GBR) {:  
  1075     COUNT_INST(I_XORB);
  1076     load_reg( R_EAX, 0 );
  1077     ADD_sh4r_r32( R_GBR, R_EAX ); 
  1078     MOV_r32_esp8( R_EAX, 0 );
  1079     MEM_READ_BYTE(R_EAX, R_EDX);
  1080     MOV_esp8_r32( 0, R_EAX );
  1081     XOR_imm32_r32( imm, R_EDX );
  1082     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1083     sh4_x86.tstate = TSTATE_NONE;
  1084 :}
  1085 XTRCT Rm, Rn {:
  1086     COUNT_INST(I_XTRCT);
  1087     load_reg( R_EAX, Rm );
  1088     load_reg( R_ECX, Rn );
  1089     SHL_imm8_r32( 16, R_EAX );
  1090     SHR_imm8_r32( 16, R_ECX );
  1091     OR_r32_r32( R_EAX, R_ECX );
  1092     store_reg( R_ECX, Rn );
  1093     sh4_x86.tstate = TSTATE_NONE;
  1094 :}
  1096 /* Data move instructions */
  1097 MOV Rm, Rn {:  
  1098     COUNT_INST(I_MOV);
  1099     load_reg( R_EAX, Rm );
  1100     store_reg( R_EAX, Rn );
  1101 :}
  1102 MOV #imm, Rn {:  
  1103     COUNT_INST(I_MOVI);
  1104     load_imm32( R_EAX, imm );
  1105     store_reg( R_EAX, Rn );
  1106 :}
  1107 MOV.B Rm, @Rn {:  
  1108     COUNT_INST(I_MOVB);
  1109     load_reg( R_EAX, Rn );
  1110     load_reg( R_EDX, Rm );
  1111     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1112     sh4_x86.tstate = TSTATE_NONE;
  1113 :}
  1114 MOV.B Rm, @-Rn {:  
  1115     COUNT_INST(I_MOVB);
  1116     load_reg( R_EAX, Rn );
  1117     LEA_r32disp8_r32( R_EAX, -1, R_EAX );
  1118     load_reg( R_EDX, Rm );
  1119     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1120     ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
  1121     sh4_x86.tstate = TSTATE_NONE;
  1122 :}
  1123 MOV.B Rm, @(R0, Rn) {:  
  1124     COUNT_INST(I_MOVB);
  1125     load_reg( R_EAX, 0 );
  1126     ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1127     load_reg( R_EDX, Rm );
  1128     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1129     sh4_x86.tstate = TSTATE_NONE;
  1130 :}
  1131 MOV.B R0, @(disp, GBR) {:  
  1132     COUNT_INST(I_MOVB);
  1133     load_spreg( R_EAX, R_GBR );
  1134     ADD_imm32_r32( disp, R_EAX );
  1135     load_reg( R_EDX, 0 );
  1136     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1137     sh4_x86.tstate = TSTATE_NONE;
  1138 :}
  1139 MOV.B R0, @(disp, Rn) {:  
  1140     COUNT_INST(I_MOVB);
  1141     load_reg( R_EAX, Rn );
  1142     ADD_imm32_r32( disp, R_EAX );
  1143     load_reg( R_EDX, 0 );
  1144     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1145     sh4_x86.tstate = TSTATE_NONE;
  1146 :}
  1147 MOV.B @Rm, Rn {:  
  1148     COUNT_INST(I_MOVB);
  1149     load_reg( R_EAX, Rm );
  1150     MEM_READ_BYTE( R_EAX, R_EAX );
  1151     store_reg( R_EAX, Rn );
  1152     sh4_x86.tstate = TSTATE_NONE;
  1153 :}
  1154 MOV.B @Rm+, Rn {:  
  1155     COUNT_INST(I_MOVB);
  1156     load_reg( R_EAX, Rm );
  1157     MEM_READ_BYTE( R_EAX, R_EAX );
  1158     if( Rm != Rn ) {
  1159     	ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
  1161     store_reg( R_EAX, Rn );
  1162     sh4_x86.tstate = TSTATE_NONE;
  1163 :}
  1164 MOV.B @(R0, Rm), Rn {:  
  1165     COUNT_INST(I_MOVB);
  1166     load_reg( R_EAX, 0 );
  1167     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
  1168     MEM_READ_BYTE( R_EAX, R_EAX );
  1169     store_reg( R_EAX, Rn );
  1170     sh4_x86.tstate = TSTATE_NONE;
  1171 :}
  1172 MOV.B @(disp, GBR), R0 {:  
  1173     COUNT_INST(I_MOVB);
  1174     load_spreg( R_EAX, R_GBR );
  1175     ADD_imm32_r32( disp, R_EAX );
  1176     MEM_READ_BYTE( R_EAX, R_EAX );
  1177     store_reg( R_EAX, 0 );
  1178     sh4_x86.tstate = TSTATE_NONE;
  1179 :}
  1180 MOV.B @(disp, Rm), R0 {:  
  1181     COUNT_INST(I_MOVB);
  1182     load_reg( R_EAX, Rm );
  1183     ADD_imm32_r32( disp, R_EAX );
  1184     MEM_READ_BYTE( R_EAX, R_EAX );
  1185     store_reg( R_EAX, 0 );
  1186     sh4_x86.tstate = TSTATE_NONE;
  1187 :}
  1188 MOV.L Rm, @Rn {:
  1189     COUNT_INST(I_MOVL);
  1190     load_reg( R_EAX, Rn );
  1191     check_walign32(R_EAX);
  1192     MOV_r32_r32( R_EAX, R_ECX );
  1193     AND_imm32_r32( 0xFC000000, R_ECX );
  1194     CMP_imm32_r32( 0xE0000000, R_ECX );
  1195     JNE_rel8( notsq );
  1196     AND_imm8s_r32( 0x3C, R_EAX );
  1197     load_reg( R_EDX, Rm );
  1198     MOV_r32_ebpr32disp32( R_EDX, R_EAX, REG_OFFSET(store_queue) );
  1199     JMP_rel8(end);
  1200     JMP_TARGET(notsq);
  1201     load_reg( R_EDX, Rm );
  1202     MEM_WRITE_LONG( R_EAX, R_EDX );
  1203     JMP_TARGET(end);
  1204     sh4_x86.tstate = TSTATE_NONE;
  1205 :}
  1206 MOV.L Rm, @-Rn {:  
  1207     COUNT_INST(I_MOVL);
  1208     load_reg( R_EAX, Rn );
  1209     ADD_imm8s_r32( -4, R_EAX );
  1210     check_walign32( R_EAX );
  1211     load_reg( R_EDX, Rm );
  1212     MEM_WRITE_LONG( R_EAX, R_EDX );
  1213     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  1214     sh4_x86.tstate = TSTATE_NONE;
  1215 :}
  1216 MOV.L Rm, @(R0, Rn) {:  
  1217     COUNT_INST(I_MOVL);
  1218     load_reg( R_EAX, 0 );
  1219     ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1220     check_walign32( R_EAX );
  1221     load_reg( R_EDX, Rm );
  1222     MEM_WRITE_LONG( R_EAX, R_EDX );
  1223     sh4_x86.tstate = TSTATE_NONE;
  1224 :}
  1225 MOV.L R0, @(disp, GBR) {:  
  1226     COUNT_INST(I_MOVL);
  1227     load_spreg( R_EAX, R_GBR );
  1228     ADD_imm32_r32( disp, R_EAX );
  1229     check_walign32( R_EAX );
  1230     load_reg( R_EDX, 0 );
  1231     MEM_WRITE_LONG( R_EAX, R_EDX );
  1232     sh4_x86.tstate = TSTATE_NONE;
  1233 :}
  1234 MOV.L Rm, @(disp, Rn) {:  
  1235     COUNT_INST(I_MOVL);
  1236     load_reg( R_EAX, Rn );
  1237     ADD_imm32_r32( disp, R_EAX );
  1238     check_walign32( R_EAX );
  1239     MOV_r32_r32( R_EAX, R_ECX );
  1240     AND_imm32_r32( 0xFC000000, R_ECX );
  1241     CMP_imm32_r32( 0xE0000000, R_ECX );
  1242     JNE_rel8( notsq );
  1243     AND_imm8s_r32( 0x3C, R_EAX );
  1244     load_reg( R_EDX, Rm );
  1245     MOV_r32_ebpr32disp32( R_EDX, R_EAX, REG_OFFSET(store_queue) );
  1246     JMP_rel8(end);
  1247     JMP_TARGET(notsq);
  1248     load_reg( R_EDX, Rm );
  1249     MEM_WRITE_LONG( R_EAX, R_EDX );
  1250     JMP_TARGET(end);
  1251     sh4_x86.tstate = TSTATE_NONE;
  1252 :}
  1253 MOV.L @Rm, Rn {:  
  1254     COUNT_INST(I_MOVL);
  1255     load_reg( R_EAX, Rm );
  1256     check_ralign32( R_EAX );
  1257     MEM_READ_LONG( R_EAX, R_EAX );
  1258     store_reg( R_EAX, Rn );
  1259     sh4_x86.tstate = TSTATE_NONE;
  1260 :}
  1261 MOV.L @Rm+, Rn {:  
  1262     COUNT_INST(I_MOVL);
  1263     load_reg( R_EAX, Rm );
  1264     check_ralign32( R_EAX );
  1265     MEM_READ_LONG( R_EAX, R_EAX );
  1266     if( Rm != Rn ) {
  1267     	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1269     store_reg( R_EAX, Rn );
  1270     sh4_x86.tstate = TSTATE_NONE;
  1271 :}
  1272 MOV.L @(R0, Rm), Rn {:  
  1273     COUNT_INST(I_MOVL);
  1274     load_reg( R_EAX, 0 );
  1275     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
  1276     check_ralign32( R_EAX );
  1277     MEM_READ_LONG( R_EAX, R_EAX );
  1278     store_reg( R_EAX, Rn );
  1279     sh4_x86.tstate = TSTATE_NONE;
  1280 :}
  1281 MOV.L @(disp, GBR), R0 {:
  1282     COUNT_INST(I_MOVL);
  1283     load_spreg( R_EAX, R_GBR );
  1284     ADD_imm32_r32( disp, R_EAX );
  1285     check_ralign32( R_EAX );
  1286     MEM_READ_LONG( R_EAX, R_EAX );
  1287     store_reg( R_EAX, 0 );
  1288     sh4_x86.tstate = TSTATE_NONE;
  1289 :}
  1290 MOV.L @(disp, PC), Rn {:  
  1291     COUNT_INST(I_MOVLPC);
  1292     if( sh4_x86.in_delay_slot ) {
  1293 	SLOTILLEGAL();
  1294     } else {
  1295 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1296 	if( IS_IN_ICACHE(target) ) {
  1297 	    // If the target address is in the same page as the code, it's
  1298 	    // pretty safe to just ref it directly and circumvent the whole
  1299 	    // memory subsystem. (this is a big performance win)
  1301 	    // FIXME: There's a corner-case that's not handled here when
  1302 	    // the current code-page is in the ITLB but not in the UTLB.
  1303 	    // (should generate a TLB miss although need to test SH4 
  1304 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1305 	    // behaviour though.
  1306 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1307 	    MOV_moff32_EAX( ptr );
  1308 	} else {
  1309 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1310 	    // different virtual address than the translation was done with,
  1311 	    // but we can safely assume that the low bits are the same.
  1312 	    load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1313 	    ADD_sh4r_r32( R_PC, R_EAX );
  1314 	    MEM_READ_LONG( R_EAX, R_EAX );
  1315 	    sh4_x86.tstate = TSTATE_NONE;
  1317 	store_reg( R_EAX, Rn );
  1319 :}
  1320 MOV.L @(disp, Rm), Rn {:  
  1321     COUNT_INST(I_MOVL);
  1322     load_reg( R_EAX, Rm );
  1323     ADD_imm8s_r32( disp, R_EAX );
  1324     check_ralign32( R_EAX );
  1325     MEM_READ_LONG( R_EAX, R_EAX );
  1326     store_reg( R_EAX, Rn );
  1327     sh4_x86.tstate = TSTATE_NONE;
  1328 :}
  1329 MOV.W Rm, @Rn {:  
  1330     COUNT_INST(I_MOVW);
  1331     load_reg( R_EAX, Rn );
  1332     check_walign16( R_EAX );
  1333     load_reg( R_EDX, Rm );
  1334     MEM_WRITE_WORD( R_EAX, R_EDX );
  1335     sh4_x86.tstate = TSTATE_NONE;
  1336 :}
  1337 MOV.W Rm, @-Rn {:  
  1338     COUNT_INST(I_MOVW);
  1339     load_reg( R_EAX, Rn );
  1340     check_walign16( R_EAX );
  1341     LEA_r32disp8_r32( R_EAX, -2, R_EAX );
  1342     load_reg( R_EDX, Rm );
  1343     MEM_WRITE_WORD( R_EAX, R_EDX );
  1344     ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
  1345     sh4_x86.tstate = TSTATE_NONE;
  1346 :}
  1347 MOV.W Rm, @(R0, Rn) {:  
  1348     COUNT_INST(I_MOVW);
  1349     load_reg( R_EAX, 0 );
  1350     ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1351     check_walign16( R_EAX );
  1352     load_reg( R_EDX, Rm );
  1353     MEM_WRITE_WORD( R_EAX, R_EDX );
  1354     sh4_x86.tstate = TSTATE_NONE;
  1355 :}
  1356 MOV.W R0, @(disp, GBR) {:  
  1357     COUNT_INST(I_MOVW);
  1358     load_spreg( R_EAX, R_GBR );
  1359     ADD_imm32_r32( disp, R_EAX );
  1360     check_walign16( R_EAX );
  1361     load_reg( R_EDX, 0 );
  1362     MEM_WRITE_WORD( R_EAX, R_EDX );
  1363     sh4_x86.tstate = TSTATE_NONE;
  1364 :}
  1365 MOV.W R0, @(disp, Rn) {:  
  1366     COUNT_INST(I_MOVW);
  1367     load_reg( R_EAX, Rn );
  1368     ADD_imm32_r32( disp, R_EAX );
  1369     check_walign16( R_EAX );
  1370     load_reg( R_EDX, 0 );
  1371     MEM_WRITE_WORD( R_EAX, R_EDX );
  1372     sh4_x86.tstate = TSTATE_NONE;
  1373 :}
  1374 MOV.W @Rm, Rn {:  
  1375     COUNT_INST(I_MOVW);
  1376     load_reg( R_EAX, Rm );
  1377     check_ralign16( R_EAX );
  1378     MEM_READ_WORD( R_EAX, R_EAX );
  1379     store_reg( R_EAX, Rn );
  1380     sh4_x86.tstate = TSTATE_NONE;
  1381 :}
  1382 MOV.W @Rm+, Rn {:  
  1383     COUNT_INST(I_MOVW);
  1384     load_reg( R_EAX, Rm );
  1385     check_ralign16( R_EAX );
  1386     MEM_READ_WORD( R_EAX, R_EAX );
  1387     if( Rm != Rn ) {
  1388         ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
  1390     store_reg( R_EAX, Rn );
  1391     sh4_x86.tstate = TSTATE_NONE;
  1392 :}
  1393 MOV.W @(R0, Rm), Rn {:  
  1394     COUNT_INST(I_MOVW);
  1395     load_reg( R_EAX, 0 );
  1396     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
  1397     check_ralign16( R_EAX );
  1398     MEM_READ_WORD( R_EAX, R_EAX );
  1399     store_reg( R_EAX, Rn );
  1400     sh4_x86.tstate = TSTATE_NONE;
  1401 :}
  1402 MOV.W @(disp, GBR), R0 {:  
  1403     COUNT_INST(I_MOVW);
  1404     load_spreg( R_EAX, R_GBR );
  1405     ADD_imm32_r32( disp, R_EAX );
  1406     check_ralign16( R_EAX );
  1407     MEM_READ_WORD( R_EAX, R_EAX );
  1408     store_reg( R_EAX, 0 );
  1409     sh4_x86.tstate = TSTATE_NONE;
  1410 :}
  1411 MOV.W @(disp, PC), Rn {:  
  1412     COUNT_INST(I_MOVW);
  1413     if( sh4_x86.in_delay_slot ) {
  1414 	SLOTILLEGAL();
  1415     } else {
  1416 	// See comments for MOV.L @(disp, PC), Rn
  1417 	uint32_t target = pc + disp + 4;
  1418 	if( IS_IN_ICACHE(target) ) {
  1419 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1420 	    MOV_moff32_EAX( ptr );
  1421 	    MOVSX_r16_r32( R_EAX, R_EAX );
  1422 	} else {
  1423 	    load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
  1424 	    ADD_sh4r_r32( R_PC, R_EAX );
  1425 	    MEM_READ_WORD( R_EAX, R_EAX );
  1426 	    sh4_x86.tstate = TSTATE_NONE;
  1428 	store_reg( R_EAX, Rn );
  1430 :}
  1431 MOV.W @(disp, Rm), R0 {:  
  1432     COUNT_INST(I_MOVW);
  1433     load_reg( R_EAX, Rm );
  1434     ADD_imm32_r32( disp, R_EAX );
  1435     check_ralign16( R_EAX );
  1436     MEM_READ_WORD( R_EAX, R_EAX );
  1437     store_reg( R_EAX, 0 );
  1438     sh4_x86.tstate = TSTATE_NONE;
  1439 :}
  1440 MOVA @(disp, PC), R0 {:  
  1441     COUNT_INST(I_MOVA);
  1442     if( sh4_x86.in_delay_slot ) {
  1443 	SLOTILLEGAL();
  1444     } else {
  1445 	load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1446 	ADD_sh4r_r32( R_PC, R_ECX );
  1447 	store_reg( R_ECX, 0 );
  1448 	sh4_x86.tstate = TSTATE_NONE;
  1450 :}
  1451 MOVCA.L R0, @Rn {:  
  1452     COUNT_INST(I_MOVCA);
  1453     load_reg( R_EAX, Rn );
  1454     check_walign32( R_EAX );
  1455     load_reg( R_EDX, 0 );
  1456     MEM_WRITE_LONG( R_EAX, R_EDX );
  1457     sh4_x86.tstate = TSTATE_NONE;
  1458 :}
  1460 /* Control transfer instructions */
  1461 BF disp {:
  1462     COUNT_INST(I_BF);
  1463     if( sh4_x86.in_delay_slot ) {
  1464 	SLOTILLEGAL();
  1465     } else {
  1466 	sh4vma_t target = disp + pc + 4;
  1467 	JT_rel8( nottaken );
  1468 	exit_block_rel(target, pc+2 );
  1469 	JMP_TARGET(nottaken);
  1470 	return 2;
  1472 :}
  1473 BF/S disp {:
  1474     COUNT_INST(I_BFS);
  1475     if( sh4_x86.in_delay_slot ) {
  1476 	SLOTILLEGAL();
  1477     } else {
  1478 	sh4_x86.in_delay_slot = DELAY_PC;
  1479 	if( UNTRANSLATABLE(pc+2) ) {
  1480 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1481 	    JT_rel8(nottaken);
  1482 	    ADD_imm32_r32( disp, R_EAX );
  1483 	    JMP_TARGET(nottaken);
  1484 	    ADD_sh4r_r32( R_PC, R_EAX );
  1485 	    store_spreg( R_EAX, R_NEW_PC );
  1486 	    exit_block_emu(pc+2);
  1487 	    sh4_x86.branch_taken = TRUE;
  1488 	    return 2;
  1489 	} else {
  1490 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1491 		CMP_imm8s_sh4r( 1, R_T );
  1492 		sh4_x86.tstate = TSTATE_E;
  1494 	    sh4vma_t target = disp + pc + 4;
  1495 	    OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JT rel32
  1496 	    int save_tstate = sh4_x86.tstate;
  1497 	    sh4_translate_instruction(pc+2);
  1498 	    exit_block_rel( target, pc+4 );
  1500 	    // not taken
  1501 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1502 	    sh4_x86.tstate = save_tstate;
  1503 	    sh4_translate_instruction(pc+2);
  1504 	    return 4;
  1507 :}
  1508 BRA disp {:  
  1509     COUNT_INST(I_BRA);
  1510     if( sh4_x86.in_delay_slot ) {
  1511 	SLOTILLEGAL();
  1512     } else {
  1513 	sh4_x86.in_delay_slot = DELAY_PC;
  1514 	sh4_x86.branch_taken = TRUE;
  1515 	if( UNTRANSLATABLE(pc+2) ) {
  1516 	    load_spreg( R_EAX, R_PC );
  1517 	    ADD_imm32_r32( pc + disp + 4 - sh4_x86.block_start_pc, R_EAX );
  1518 	    store_spreg( R_EAX, R_NEW_PC );
  1519 	    exit_block_emu(pc+2);
  1520 	    return 2;
  1521 	} else {
  1522 	    sh4_translate_instruction( pc + 2 );
  1523 	    exit_block_rel( disp + pc + 4, pc+4 );
  1524 	    return 4;
  1527 :}
  1528 BRAF Rn {:  
  1529     COUNT_INST(I_BRAF);
  1530     if( sh4_x86.in_delay_slot ) {
  1531 	SLOTILLEGAL();
  1532     } else {
  1533 	load_spreg( R_EAX, R_PC );
  1534 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1535 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1536 	store_spreg( R_EAX, R_NEW_PC );
  1537 	sh4_x86.in_delay_slot = DELAY_PC;
  1538 	sh4_x86.tstate = TSTATE_NONE;
  1539 	sh4_x86.branch_taken = TRUE;
  1540 	if( UNTRANSLATABLE(pc+2) ) {
  1541 	    exit_block_emu(pc+2);
  1542 	    return 2;
  1543 	} else {
  1544 	    sh4_translate_instruction( pc + 2 );
  1545 	    exit_block_newpcset(pc+2);
  1546 	    return 4;
  1549 :}
  1550 BSR disp {:  
  1551     COUNT_INST(I_BSR);
  1552     if( sh4_x86.in_delay_slot ) {
  1553 	SLOTILLEGAL();
  1554     } else {
  1555 	load_spreg( R_EAX, R_PC );
  1556 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1557 	store_spreg( R_EAX, R_PR );
  1558 	sh4_x86.in_delay_slot = DELAY_PC;
  1559 	sh4_x86.branch_taken = TRUE;
  1560 	sh4_x86.tstate = TSTATE_NONE;
  1561 	if( UNTRANSLATABLE(pc+2) ) {
  1562 	    ADD_imm32_r32( disp, R_EAX );
  1563 	    store_spreg( R_EAX, R_NEW_PC );
  1564 	    exit_block_emu(pc+2);
  1565 	    return 2;
  1566 	} else {
  1567 	    sh4_translate_instruction( pc + 2 );
  1568 	    exit_block_rel( disp + pc + 4, pc+4 );
  1569 	    return 4;
  1572 :}
  1573 BSRF Rn {:  
  1574     COUNT_INST(I_BSRF);
  1575     if( sh4_x86.in_delay_slot ) {
  1576 	SLOTILLEGAL();
  1577     } else {
  1578 	load_spreg( R_EAX, R_PC );
  1579 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1580 	store_spreg( R_EAX, R_PR );
  1581 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1582 	store_spreg( R_EAX, R_NEW_PC );
  1584 	sh4_x86.in_delay_slot = DELAY_PC;
  1585 	sh4_x86.tstate = TSTATE_NONE;
  1586 	sh4_x86.branch_taken = TRUE;
  1587 	if( UNTRANSLATABLE(pc+2) ) {
  1588 	    exit_block_emu(pc+2);
  1589 	    return 2;
  1590 	} else {
  1591 	    sh4_translate_instruction( pc + 2 );
  1592 	    exit_block_newpcset(pc+2);
  1593 	    return 4;
  1596 :}
  1597 BT disp {:
  1598     COUNT_INST(I_BT);
  1599     if( sh4_x86.in_delay_slot ) {
  1600 	SLOTILLEGAL();
  1601     } else {
  1602 	sh4vma_t target = disp + pc + 4;
  1603 	JF_rel8( nottaken );
  1604 	exit_block_rel(target, pc+2 );
  1605 	JMP_TARGET(nottaken);
  1606 	return 2;
  1608 :}
  1609 BT/S disp {:
  1610     COUNT_INST(I_BTS);
  1611     if( sh4_x86.in_delay_slot ) {
  1612 	SLOTILLEGAL();
  1613     } else {
  1614 	sh4_x86.in_delay_slot = DELAY_PC;
  1615 	if( UNTRANSLATABLE(pc+2) ) {
  1616 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1617 	    JF_rel8(nottaken);
  1618 	    ADD_imm32_r32( disp, R_EAX );
  1619 	    JMP_TARGET(nottaken);
  1620 	    ADD_sh4r_r32( R_PC, R_EAX );
  1621 	    store_spreg( R_EAX, R_NEW_PC );
  1622 	    exit_block_emu(pc+2);
  1623 	    sh4_x86.branch_taken = TRUE;
  1624 	    return 2;
  1625 	} else {
  1626 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1627 		CMP_imm8s_sh4r( 1, R_T );
  1628 		sh4_x86.tstate = TSTATE_E;
  1630 	    OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JF rel32
  1631 	    int save_tstate = sh4_x86.tstate;
  1632 	    sh4_translate_instruction(pc+2);
  1633 	    exit_block_rel( disp + pc + 4, pc+4 );
  1634 	    // not taken
  1635 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1636 	    sh4_x86.tstate = save_tstate;
  1637 	    sh4_translate_instruction(pc+2);
  1638 	    return 4;
  1641 :}
  1642 JMP @Rn {:  
  1643     COUNT_INST(I_JMP);
  1644     if( sh4_x86.in_delay_slot ) {
  1645 	SLOTILLEGAL();
  1646     } else {
  1647 	load_reg( R_ECX, Rn );
  1648 	store_spreg( R_ECX, R_NEW_PC );
  1649 	sh4_x86.in_delay_slot = DELAY_PC;
  1650 	sh4_x86.branch_taken = TRUE;
  1651 	if( UNTRANSLATABLE(pc+2) ) {
  1652 	    exit_block_emu(pc+2);
  1653 	    return 2;
  1654 	} else {
  1655 	    sh4_translate_instruction(pc+2);
  1656 	    exit_block_newpcset(pc+2);
  1657 	    return 4;
  1660 :}
  1661 JSR @Rn {:  
  1662     COUNT_INST(I_JSR);
  1663     if( sh4_x86.in_delay_slot ) {
  1664 	SLOTILLEGAL();
  1665     } else {
  1666 	load_spreg( R_EAX, R_PC );
  1667 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1668 	store_spreg( R_EAX, R_PR );
  1669 	load_reg( R_ECX, Rn );
  1670 	store_spreg( R_ECX, R_NEW_PC );
  1671 	sh4_x86.in_delay_slot = DELAY_PC;
  1672 	sh4_x86.branch_taken = TRUE;
  1673 	sh4_x86.tstate = TSTATE_NONE;
  1674 	if( UNTRANSLATABLE(pc+2) ) {
  1675 	    exit_block_emu(pc+2);
  1676 	    return 2;
  1677 	} else {
  1678 	    sh4_translate_instruction(pc+2);
  1679 	    exit_block_newpcset(pc+2);
  1680 	    return 4;
  1683 :}
  1684 RTE {:  
  1685     COUNT_INST(I_RTE);
  1686     if( sh4_x86.in_delay_slot ) {
  1687 	SLOTILLEGAL();
  1688     } else {
  1689 	check_priv();
  1690 	load_spreg( R_ECX, R_SPC );
  1691 	store_spreg( R_ECX, R_NEW_PC );
  1692 	load_spreg( R_EAX, R_SSR );
  1693 	call_func1( sh4_write_sr, R_EAX );
  1694 	sh4_x86.in_delay_slot = DELAY_PC;
  1695 	sh4_x86.fpuen_checked = FALSE;
  1696 	sh4_x86.tstate = TSTATE_NONE;
  1697 	sh4_x86.branch_taken = TRUE;
  1698 	if( UNTRANSLATABLE(pc+2) ) {
  1699 	    exit_block_emu(pc+2);
  1700 	    return 2;
  1701 	} else {
  1702 	    sh4_translate_instruction(pc+2);
  1703 	    exit_block_newpcset(pc+2);
  1704 	    return 4;
  1707 :}
  1708 RTS {:  
  1709     COUNT_INST(I_RTS);
  1710     if( sh4_x86.in_delay_slot ) {
  1711 	SLOTILLEGAL();
  1712     } else {
  1713 	load_spreg( R_ECX, R_PR );
  1714 	store_spreg( R_ECX, R_NEW_PC );
  1715 	sh4_x86.in_delay_slot = DELAY_PC;
  1716 	sh4_x86.branch_taken = TRUE;
  1717 	if( UNTRANSLATABLE(pc+2) ) {
  1718 	    exit_block_emu(pc+2);
  1719 	    return 2;
  1720 	} else {
  1721 	    sh4_translate_instruction(pc+2);
  1722 	    exit_block_newpcset(pc+2);
  1723 	    return 4;
  1726 :}
  1727 TRAPA #imm {:  
  1728     COUNT_INST(I_TRAPA);
  1729     if( sh4_x86.in_delay_slot ) {
  1730 	SLOTILLEGAL();
  1731     } else {
  1732 	load_imm32( R_ECX, pc+2 - sh4_x86.block_start_pc );   // 5
  1733 	ADD_r32_sh4r( R_ECX, R_PC );
  1734 	load_imm32( R_EAX, imm );
  1735 	call_func1( sh4_raise_trap, R_EAX );
  1736 	sh4_x86.tstate = TSTATE_NONE;
  1737 	exit_block_pcset(pc);
  1738 	sh4_x86.branch_taken = TRUE;
  1739 	return 2;
  1741 :}
  1742 UNDEF {:  
  1743     COUNT_INST(I_UNDEF);
  1744     if( sh4_x86.in_delay_slot ) {
  1745 	SLOTILLEGAL();
  1746     } else {
  1747 	JMP_exc(EXC_ILLEGAL);
  1748 	return 2;
  1750 :}
  1752 CLRMAC {:  
  1753     COUNT_INST(I_CLRMAC);
  1754     XOR_r32_r32(R_EAX, R_EAX);
  1755     store_spreg( R_EAX, R_MACL );
  1756     store_spreg( R_EAX, R_MACH );
  1757     sh4_x86.tstate = TSTATE_NONE;
  1758 :}
  1759 CLRS {:
  1760     COUNT_INST(I_CLRS);
  1761     CLC();
  1762     SETC_sh4r(R_S);
  1763     sh4_x86.tstate = TSTATE_NONE;
  1764 :}
  1765 CLRT {:  
  1766     COUNT_INST(I_CLRT);
  1767     CLC();
  1768     SETC_t();
  1769     sh4_x86.tstate = TSTATE_C;
  1770 :}
  1771 SETS {:  
  1772     COUNT_INST(I_SETS);
  1773     STC();
  1774     SETC_sh4r(R_S);
  1775     sh4_x86.tstate = TSTATE_NONE;
  1776 :}
  1777 SETT {:  
  1778     COUNT_INST(I_SETT);
  1779     STC();
  1780     SETC_t();
  1781     sh4_x86.tstate = TSTATE_C;
  1782 :}
  1784 /* Floating point moves */
  1785 FMOV FRm, FRn {:  
  1786     COUNT_INST(I_FMOV1);
  1787     check_fpuen();
  1788     if( sh4_x86.double_size ) {
  1789         load_dr0( R_EAX, FRm );
  1790         load_dr1( R_ECX, FRm );
  1791         store_dr0( R_EAX, FRn );
  1792         store_dr1( R_ECX, FRn );
  1793     } else {
  1794         load_fr( R_EAX, FRm ); // SZ=0 branch
  1795         store_fr( R_EAX, FRn );
  1797 :}
  1798 FMOV FRm, @Rn {: 
  1799     COUNT_INST(I_FMOV2);
  1800     check_fpuen();
  1801     load_reg( R_EAX, Rn );
  1802     if( sh4_x86.double_size ) {
  1803         check_walign64( R_EAX );
  1804         load_dr0( R_EDX, FRm );
  1805         MEM_WRITE_LONG( R_EAX, R_EDX );
  1806         load_reg( R_EAX, Rn );
  1807         LEA_r32disp8_r32( R_EAX, 4, R_EAX );
  1808         load_dr1( R_EDX, FRm );
  1809         MEM_WRITE_LONG( R_EAX, R_EDX );
  1810     } else {
  1811         check_walign32( R_EAX );
  1812         load_fr( R_EDX, FRm );
  1813         MEM_WRITE_LONG( R_EAX, R_EDX );
  1815     sh4_x86.tstate = TSTATE_NONE;
  1816 :}
  1817 FMOV @Rm, FRn {:  
  1818     COUNT_INST(I_FMOV5);
  1819     check_fpuen();
  1820     load_reg( R_EAX, Rm );
  1821     if( sh4_x86.double_size ) {
  1822         check_ralign64( R_EAX );
  1823         MEM_READ_LONG( R_EAX, R_EAX );
  1824         store_dr0( R_EAX, FRn );
  1825         load_reg( R_EAX, Rm );
  1826         LEA_r32disp8_r32( R_EAX, 4, R_EAX );
  1827         MEM_READ_LONG( R_EAX, R_EAX );
  1828         store_dr1( R_EAX, FRn );
  1829     } else {
  1830         check_ralign32( R_EAX );
  1831         MEM_READ_LONG( R_EAX, R_EAX );
  1832         store_fr( R_EAX, FRn );
  1834     sh4_x86.tstate = TSTATE_NONE;
  1835 :}
  1836 FMOV FRm, @-Rn {:  
  1837     COUNT_INST(I_FMOV3);
  1838     check_fpuen();
  1839     load_reg( R_EAX, Rn );
  1840     if( sh4_x86.double_size ) {
  1841         check_walign64( R_EAX );
  1842         LEA_r32disp8_r32( R_EAX, -8, R_EAX );
  1843         load_dr0( R_EDX, FRm );
  1844         MEM_WRITE_LONG( R_EAX, R_EDX );
  1845         load_reg( R_EAX, Rn );
  1846         LEA_r32disp8_r32( R_EAX, -4, R_EAX );
  1847         load_dr1( R_EDX, FRm );
  1848         MEM_WRITE_LONG( R_EAX, R_EDX );
  1849         ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
  1850     } else {
  1851         check_walign32( R_EAX );
  1852         LEA_r32disp8_r32( R_EAX, -4, R_EAX );
  1853         load_fr( R_EDX, FRm );
  1854         MEM_WRITE_LONG( R_EAX, R_EDX );
  1855         ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
  1857     sh4_x86.tstate = TSTATE_NONE;
  1858 :}
  1859 FMOV @Rm+, FRn {:
  1860     COUNT_INST(I_FMOV6);
  1861     check_fpuen();
  1862     load_reg( R_EAX, Rm );
  1863     if( sh4_x86.double_size ) {
  1864         check_ralign64( R_EAX );
  1865         MEM_READ_LONG( R_EAX, R_EAX );
  1866         store_dr0( R_EAX, FRn );
  1867         load_reg( R_EAX, Rm );
  1868         LEA_r32disp8_r32( R_EAX, 4, R_EAX );
  1869         MEM_READ_LONG( R_EAX, R_EAX );
  1870         store_dr1( R_EAX, FRn );
  1871         ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
  1872     } else {
  1873         check_ralign32( R_EAX );
  1874         MEM_READ_LONG( R_EAX, R_EAX );
  1875         store_fr( R_EAX, FRn );
  1876         ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1878     sh4_x86.tstate = TSTATE_NONE;
  1879 :}
  1880 FMOV FRm, @(R0, Rn) {:  
  1881     COUNT_INST(I_FMOV4);
  1882     check_fpuen();
  1883     load_reg( R_EAX, Rn );
  1884     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1885     if( sh4_x86.double_size ) {
  1886         check_walign64( R_EAX );
  1887         load_dr0( R_EDX, FRm );
  1888         MEM_WRITE_LONG( R_EAX, R_EDX );
  1889         load_reg( R_EAX, Rn );
  1890         ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1891         LEA_r32disp8_r32( R_EAX, 4, R_EAX );
  1892         load_dr1( R_EDX, FRm );
  1893         MEM_WRITE_LONG( R_EAX, R_EDX );
  1894     } else {
  1895         check_walign32( R_EAX );
  1896         load_fr( R_EDX, FRm );
  1897         MEM_WRITE_LONG( R_EAX, R_EDX ); // 12
  1899     sh4_x86.tstate = TSTATE_NONE;
  1900 :}
  1901 FMOV @(R0, Rm), FRn {:  
  1902     COUNT_INST(I_FMOV7);
  1903     check_fpuen();
  1904     load_reg( R_EAX, Rm );
  1905     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1906     if( sh4_x86.double_size ) {
  1907         check_ralign64( R_EAX );
  1908         MEM_READ_LONG( R_EAX, R_EAX );
  1909         store_dr0( R_EAX, FRn );
  1910         load_reg( R_EAX, Rm );
  1911         ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1912         LEA_r32disp8_r32( R_EAX, 4, R_EAX );
  1913         MEM_READ_LONG( R_EAX, R_EAX );
  1914         store_dr1( R_EAX, FRn );
  1915     } else {
  1916         check_ralign32( R_EAX );
  1917         MEM_READ_LONG( R_EAX, R_EAX );
  1918         store_fr( R_EAX, FRn );
  1920     sh4_x86.tstate = TSTATE_NONE;
  1921 :}
  1922 FLDI0 FRn {:  /* IFF PR=0 */
  1923     COUNT_INST(I_FLDI0);
  1924     check_fpuen();
  1925     if( sh4_x86.double_prec == 0 ) {
  1926         XOR_r32_r32( R_EAX, R_EAX );
  1927         store_fr( R_EAX, FRn );
  1929     sh4_x86.tstate = TSTATE_NONE;
  1930 :}
  1931 FLDI1 FRn {:  /* IFF PR=0 */
  1932     COUNT_INST(I_FLDI1);
  1933     check_fpuen();
  1934     if( sh4_x86.double_prec == 0 ) {
  1935         load_imm32(R_EAX, 0x3F800000);
  1936         store_fr( R_EAX, FRn );
  1938 :}
  1940 FLOAT FPUL, FRn {:  
  1941     COUNT_INST(I_FLOAT);
  1942     check_fpuen();
  1943     FILD_sh4r(R_FPUL);
  1944     if( sh4_x86.double_prec ) {
  1945         pop_dr( FRn );
  1946     } else {
  1947         pop_fr( FRn );
  1949 :}
  1950 FTRC FRm, FPUL {:  
  1951     COUNT_INST(I_FTRC);
  1952     check_fpuen();
  1953     if( sh4_x86.double_prec ) {
  1954         push_dr( FRm );
  1955     } else {
  1956         push_fr( FRm );
  1958     load_ptr( R_ECX, &max_int );
  1959     FILD_r32ind( R_ECX );
  1960     FCOMIP_st(1);
  1961     JNA_rel8( sat );
  1962     load_ptr( R_ECX, &min_int );  // 5
  1963     FILD_r32ind( R_ECX );           // 2
  1964     FCOMIP_st(1);                   // 2
  1965     JAE_rel8( sat2 );            // 2
  1966     load_ptr( R_EAX, &save_fcw );
  1967     FNSTCW_r32ind( R_EAX );
  1968     load_ptr( R_EDX, &trunc_fcw );
  1969     FLDCW_r32ind( R_EDX );
  1970     FISTP_sh4r(R_FPUL);             // 3
  1971     FLDCW_r32ind( R_EAX );
  1972     JMP_rel8(end);             // 2
  1974     JMP_TARGET(sat);
  1975     JMP_TARGET(sat2);
  1976     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  1977     store_spreg( R_ECX, R_FPUL );
  1978     FPOP_st();
  1979     JMP_TARGET(end);
  1980     sh4_x86.tstate = TSTATE_NONE;
  1981 :}
  1982 FLDS FRm, FPUL {:  
  1983     COUNT_INST(I_FLDS);
  1984     check_fpuen();
  1985     load_fr( R_EAX, FRm );
  1986     store_spreg( R_EAX, R_FPUL );
  1987 :}
  1988 FSTS FPUL, FRn {:  
  1989     COUNT_INST(I_FSTS);
  1990     check_fpuen();
  1991     load_spreg( R_EAX, R_FPUL );
  1992     store_fr( R_EAX, FRn );
  1993 :}
  1994 FCNVDS FRm, FPUL {:  
  1995     COUNT_INST(I_FCNVDS);
  1996     check_fpuen();
  1997     if( sh4_x86.double_prec ) {
  1998         push_dr( FRm );
  1999         pop_fpul();
  2001 :}
  2002 FCNVSD FPUL, FRn {:  
  2003     COUNT_INST(I_FCNVSD);
  2004     check_fpuen();
  2005     if( sh4_x86.double_prec ) {
  2006         push_fpul();
  2007         pop_dr( FRn );
  2009 :}
  2011 /* Floating point instructions */
  2012 FABS FRn {:  
  2013     COUNT_INST(I_FABS);
  2014     check_fpuen();
  2015     if( sh4_x86.double_prec ) {
  2016         push_dr(FRn);
  2017         FABS_st0();
  2018         pop_dr(FRn);
  2019     } else {
  2020         push_fr(FRn);
  2021         FABS_st0();
  2022         pop_fr(FRn);
  2024 :}
  2025 FADD FRm, FRn {:  
  2026     COUNT_INST(I_FADD);
  2027     check_fpuen();
  2028     if( sh4_x86.double_prec ) {
  2029         push_dr(FRm);
  2030         push_dr(FRn);
  2031         FADDP_st(1);
  2032         pop_dr(FRn);
  2033     } else {
  2034         push_fr(FRm);
  2035         push_fr(FRn);
  2036         FADDP_st(1);
  2037         pop_fr(FRn);
  2039 :}
  2040 FDIV FRm, FRn {:  
  2041     COUNT_INST(I_FDIV);
  2042     check_fpuen();
  2043     if( sh4_x86.double_prec ) {
  2044         push_dr(FRn);
  2045         push_dr(FRm);
  2046         FDIVP_st(1);
  2047         pop_dr(FRn);
  2048     } else {
  2049         push_fr(FRn);
  2050         push_fr(FRm);
  2051         FDIVP_st(1);
  2052         pop_fr(FRn);
  2054 :}
  2055 FMAC FR0, FRm, FRn {:  
  2056     COUNT_INST(I_FMAC);
  2057     check_fpuen();
  2058     if( sh4_x86.double_prec ) {
  2059         push_dr( 0 );
  2060         push_dr( FRm );
  2061         FMULP_st(1);
  2062         push_dr( FRn );
  2063         FADDP_st(1);
  2064         pop_dr( FRn );
  2065     } else {
  2066         push_fr( 0 );
  2067         push_fr( FRm );
  2068         FMULP_st(1);
  2069         push_fr( FRn );
  2070         FADDP_st(1);
  2071         pop_fr( FRn );
  2073 :}
  2075 FMUL FRm, FRn {:  
  2076     COUNT_INST(I_FMUL);
  2077     check_fpuen();
  2078     if( sh4_x86.double_prec ) {
  2079         push_dr(FRm);
  2080         push_dr(FRn);
  2081         FMULP_st(1);
  2082         pop_dr(FRn);
  2083     } else {
  2084         push_fr(FRm);
  2085         push_fr(FRn);
  2086         FMULP_st(1);
  2087         pop_fr(FRn);
  2089 :}
  2090 FNEG FRn {:  
  2091     COUNT_INST(I_FNEG);
  2092     check_fpuen();
  2093     if( sh4_x86.double_prec ) {
  2094         push_dr(FRn);
  2095         FCHS_st0();
  2096         pop_dr(FRn);
  2097     } else {
  2098         push_fr(FRn);
  2099         FCHS_st0();
  2100         pop_fr(FRn);
  2102 :}
  2103 FSRRA FRn {:  
  2104     COUNT_INST(I_FSRRA);
  2105     check_fpuen();
  2106     if( sh4_x86.double_prec == 0 ) {
  2107         FLD1_st0();
  2108         push_fr(FRn);
  2109         FSQRT_st0();
  2110         FDIVP_st(1);
  2111         pop_fr(FRn);
  2113 :}
  2114 FSQRT FRn {:  
  2115     COUNT_INST(I_FSQRT);
  2116     check_fpuen();
  2117     if( sh4_x86.double_prec ) {
  2118         push_dr(FRn);
  2119         FSQRT_st0();
  2120         pop_dr(FRn);
  2121     } else {
  2122         push_fr(FRn);
  2123         FSQRT_st0();
  2124         pop_fr(FRn);
  2126 :}
  2127 FSUB FRm, FRn {:  
  2128     COUNT_INST(I_FSUB);
  2129     check_fpuen();
  2130     if( sh4_x86.double_prec ) {
  2131         push_dr(FRn);
  2132         push_dr(FRm);
  2133         FSUBP_st(1);
  2134         pop_dr(FRn);
  2135     } else {
  2136         push_fr(FRn);
  2137         push_fr(FRm);
  2138         FSUBP_st(1);
  2139         pop_fr(FRn);
  2141 :}
  2143 FCMP/EQ FRm, FRn {:  
  2144     COUNT_INST(I_FCMPEQ);
  2145     check_fpuen();
  2146     if( sh4_x86.double_prec ) {
  2147         push_dr(FRm);
  2148         push_dr(FRn);
  2149     } else {
  2150         push_fr(FRm);
  2151         push_fr(FRn);
  2153     FCOMIP_st(1);
  2154     SETE_t();
  2155     FPOP_st();
  2156     sh4_x86.tstate = TSTATE_E;
  2157 :}
  2158 FCMP/GT FRm, FRn {:  
  2159     COUNT_INST(I_FCMPGT);
  2160     check_fpuen();
  2161     if( sh4_x86.double_prec ) {
  2162         push_dr(FRm);
  2163         push_dr(FRn);
  2164     } else {
  2165         push_fr(FRm);
  2166         push_fr(FRn);
  2168     FCOMIP_st(1);
  2169     SETA_t();
  2170     FPOP_st();
  2171     sh4_x86.tstate = TSTATE_A;
  2172 :}
  2174 FSCA FPUL, FRn {:  
  2175     COUNT_INST(I_FSCA);
  2176     check_fpuen();
  2177     if( sh4_x86.double_prec == 0 ) {
  2178         LEA_sh4r_rptr( REG_OFFSET(fr[0][FRn&0x0E]), R_EDX );
  2179         load_spreg( R_EAX, R_FPUL );
  2180         call_func2( sh4_fsca, R_EAX, R_EDX );
  2182     sh4_x86.tstate = TSTATE_NONE;
  2183 :}
  2184 FIPR FVm, FVn {:  
  2185     COUNT_INST(I_FIPR);
  2186     check_fpuen();
  2187     if( sh4_x86.double_prec == 0 ) {
  2188         if( sh4_x86.sse3_enabled ) {
  2189             MOVAPS_sh4r_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
  2190             MULPS_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
  2191             HADDPS_xmm_xmm( 4, 4 ); 
  2192             HADDPS_xmm_xmm( 4, 4 );
  2193             MOVSS_xmm_sh4r( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
  2194         } else {
  2195             push_fr( FVm<<2 );
  2196             push_fr( FVn<<2 );
  2197             FMULP_st(1);
  2198             push_fr( (FVm<<2)+1);
  2199             push_fr( (FVn<<2)+1);
  2200             FMULP_st(1);
  2201             FADDP_st(1);
  2202             push_fr( (FVm<<2)+2);
  2203             push_fr( (FVn<<2)+2);
  2204             FMULP_st(1);
  2205             FADDP_st(1);
  2206             push_fr( (FVm<<2)+3);
  2207             push_fr( (FVn<<2)+3);
  2208             FMULP_st(1);
  2209             FADDP_st(1);
  2210             pop_fr( (FVn<<2)+3);
  2213 :}
  2214 FTRV XMTRX, FVn {:  
  2215     COUNT_INST(I_FTRV);
  2216     check_fpuen();
  2217     if( sh4_x86.double_prec == 0 ) {
  2218         if( sh4_x86.sse3_enabled ) {
  2219             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1  M0  M3  M2
  2220             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5  M4  M7  M6
  2221             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9  M8  M11 M10
  2222             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
  2224             MOVSLDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
  2225             MOVSHDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
  2226             MOVAPS_xmm_xmm( 4, 6 );
  2227             MOVAPS_xmm_xmm( 5, 7 );
  2228             MOVLHPS_xmm_xmm( 4, 4 );  // V1 V1 V1 V1
  2229             MOVHLPS_xmm_xmm( 6, 6 );  // V3 V3 V3 V3
  2230             MOVLHPS_xmm_xmm( 5, 5 );  // V0 V0 V0 V0
  2231             MOVHLPS_xmm_xmm( 7, 7 );  // V2 V2 V2 V2
  2232             MULPS_xmm_xmm( 0, 4 );
  2233             MULPS_xmm_xmm( 1, 5 );
  2234             MULPS_xmm_xmm( 2, 6 );
  2235             MULPS_xmm_xmm( 3, 7 );
  2236             ADDPS_xmm_xmm( 5, 4 );
  2237             ADDPS_xmm_xmm( 7, 6 );
  2238             ADDPS_xmm_xmm( 6, 4 );
  2239             MOVAPS_xmm_sh4r( 4, REG_OFFSET(fr[0][FVn<<2]) );
  2240         } else {
  2241             LEA_sh4r_rptr( REG_OFFSET(fr[0][FVn<<2]), R_EAX );
  2242             call_func1( sh4_ftrv, R_EAX );
  2245     sh4_x86.tstate = TSTATE_NONE;
  2246 :}
  2248 FRCHG {:  
  2249     COUNT_INST(I_FRCHG);
  2250     check_fpuen();
  2251     XOR_imm32_sh4r( FPSCR_FR, R_FPSCR );
  2252     call_func0( sh4_switch_fr_banks );
  2253     sh4_x86.tstate = TSTATE_NONE;
  2254 :}
  2255 FSCHG {:  
  2256     COUNT_INST(I_FSCHG);
  2257     check_fpuen();
  2258     XOR_imm32_sh4r( FPSCR_SZ, R_FPSCR);
  2259     XOR_imm32_sh4r( FPSCR_SZ, REG_OFFSET(xlat_sh4_mode) );
  2260     sh4_x86.tstate = TSTATE_NONE;
  2261     sh4_x86.double_size = !sh4_x86.double_size;
  2262 :}
  2264 /* Processor control instructions */
  2265 LDC Rm, SR {:
  2266     COUNT_INST(I_LDCSR);
  2267     if( sh4_x86.in_delay_slot ) {
  2268 	SLOTILLEGAL();
  2269     } else {
  2270 	check_priv();
  2271 	load_reg( R_EAX, Rm );
  2272 	call_func1( sh4_write_sr, R_EAX );
  2273 	sh4_x86.fpuen_checked = FALSE;
  2274 	sh4_x86.tstate = TSTATE_NONE;
  2275 	return 2;
  2277 :}
  2278 LDC Rm, GBR {: 
  2279     COUNT_INST(I_LDC);
  2280     load_reg( R_EAX, Rm );
  2281     store_spreg( R_EAX, R_GBR );
  2282 :}
  2283 LDC Rm, VBR {:  
  2284     COUNT_INST(I_LDC);
  2285     check_priv();
  2286     load_reg( R_EAX, Rm );
  2287     store_spreg( R_EAX, R_VBR );
  2288     sh4_x86.tstate = TSTATE_NONE;
  2289 :}
  2290 LDC Rm, SSR {:  
  2291     COUNT_INST(I_LDC);
  2292     check_priv();
  2293     load_reg( R_EAX, Rm );
  2294     store_spreg( R_EAX, R_SSR );
  2295     sh4_x86.tstate = TSTATE_NONE;
  2296 :}
  2297 LDC Rm, SGR {:  
  2298     COUNT_INST(I_LDC);
  2299     check_priv();
  2300     load_reg( R_EAX, Rm );
  2301     store_spreg( R_EAX, R_SGR );
  2302     sh4_x86.tstate = TSTATE_NONE;
  2303 :}
  2304 LDC Rm, SPC {:  
  2305     COUNT_INST(I_LDC);
  2306     check_priv();
  2307     load_reg( R_EAX, Rm );
  2308     store_spreg( R_EAX, R_SPC );
  2309     sh4_x86.tstate = TSTATE_NONE;
  2310 :}
  2311 LDC Rm, DBR {:  
  2312     COUNT_INST(I_LDC);
  2313     check_priv();
  2314     load_reg( R_EAX, Rm );
  2315     store_spreg( R_EAX, R_DBR );
  2316     sh4_x86.tstate = TSTATE_NONE;
  2317 :}
  2318 LDC Rm, Rn_BANK {:  
  2319     COUNT_INST(I_LDC);
  2320     check_priv();
  2321     load_reg( R_EAX, Rm );
  2322     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2323     sh4_x86.tstate = TSTATE_NONE;
  2324 :}
  2325 LDC.L @Rm+, GBR {:  
  2326     COUNT_INST(I_LDCM);
  2327     load_reg( R_EAX, Rm );
  2328     check_ralign32( R_EAX );
  2329     MEM_READ_LONG( R_EAX, R_EAX );
  2330     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2331     store_spreg( R_EAX, R_GBR );
  2332     sh4_x86.tstate = TSTATE_NONE;
  2333 :}
  2334 LDC.L @Rm+, SR {:
  2335     COUNT_INST(I_LDCSRM);
  2336     if( sh4_x86.in_delay_slot ) {
  2337 	SLOTILLEGAL();
  2338     } else {
  2339 	check_priv();
  2340 	load_reg( R_EAX, Rm );
  2341 	check_ralign32( R_EAX );
  2342 	MEM_READ_LONG( R_EAX, R_EAX );
  2343 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2344 	call_func1( sh4_write_sr, R_EAX );
  2345 	sh4_x86.fpuen_checked = FALSE;
  2346 	sh4_x86.tstate = TSTATE_NONE;
  2347 	return 2;
  2349 :}
  2350 LDC.L @Rm+, VBR {:  
  2351     COUNT_INST(I_LDCM);
  2352     check_priv();
  2353     load_reg( R_EAX, Rm );
  2354     check_ralign32( R_EAX );
  2355     MEM_READ_LONG( R_EAX, R_EAX );
  2356     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2357     store_spreg( R_EAX, R_VBR );
  2358     sh4_x86.tstate = TSTATE_NONE;
  2359 :}
  2360 LDC.L @Rm+, SSR {:
  2361     COUNT_INST(I_LDCM);
  2362     check_priv();
  2363     load_reg( R_EAX, Rm );
  2364     check_ralign32( R_EAX );
  2365     MEM_READ_LONG( R_EAX, R_EAX );
  2366     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2367     store_spreg( R_EAX, R_SSR );
  2368     sh4_x86.tstate = TSTATE_NONE;
  2369 :}
  2370 LDC.L @Rm+, SGR {:  
  2371     COUNT_INST(I_LDCM);
  2372     check_priv();
  2373     load_reg( R_EAX, Rm );
  2374     check_ralign32( R_EAX );
  2375     MEM_READ_LONG( R_EAX, R_EAX );
  2376     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2377     store_spreg( R_EAX, R_SGR );
  2378     sh4_x86.tstate = TSTATE_NONE;
  2379 :}
  2380 LDC.L @Rm+, SPC {:  
  2381     COUNT_INST(I_LDCM);
  2382     check_priv();
  2383     load_reg( R_EAX, Rm );
  2384     check_ralign32( R_EAX );
  2385     MEM_READ_LONG( R_EAX, R_EAX );
  2386     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2387     store_spreg( R_EAX, R_SPC );
  2388     sh4_x86.tstate = TSTATE_NONE;
  2389 :}
  2390 LDC.L @Rm+, DBR {:  
  2391     COUNT_INST(I_LDCM);
  2392     check_priv();
  2393     load_reg( R_EAX, Rm );
  2394     check_ralign32( R_EAX );
  2395     MEM_READ_LONG( R_EAX, R_EAX );
  2396     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2397     store_spreg( R_EAX, R_DBR );
  2398     sh4_x86.tstate = TSTATE_NONE;
  2399 :}
  2400 LDC.L @Rm+, Rn_BANK {:  
  2401     COUNT_INST(I_LDCM);
  2402     check_priv();
  2403     load_reg( R_EAX, Rm );
  2404     check_ralign32( R_EAX );
  2405     MEM_READ_LONG( R_EAX, R_EAX );
  2406     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2407     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2408     sh4_x86.tstate = TSTATE_NONE;
  2409 :}
  2410 LDS Rm, FPSCR {:
  2411     COUNT_INST(I_LDSFPSCR);
  2412     check_fpuen();
  2413     load_reg( R_EAX, Rm );
  2414     call_func1( sh4_write_fpscr, R_EAX );
  2415     sh4_x86.tstate = TSTATE_NONE;
  2416     return 2;
  2417 :}
  2418 LDS.L @Rm+, FPSCR {:  
  2419     COUNT_INST(I_LDSFPSCRM);
  2420     check_fpuen();
  2421     load_reg( R_EAX, Rm );
  2422     check_ralign32( R_EAX );
  2423     MEM_READ_LONG( R_EAX, R_EAX );
  2424     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2425     call_func1( sh4_write_fpscr, R_EAX );
  2426     sh4_x86.tstate = TSTATE_NONE;
  2427     return 2;
  2428 :}
  2429 LDS Rm, FPUL {:  
  2430     COUNT_INST(I_LDS);
  2431     check_fpuen();
  2432     load_reg( R_EAX, Rm );
  2433     store_spreg( R_EAX, R_FPUL );
  2434 :}
  2435 LDS.L @Rm+, FPUL {:  
  2436     COUNT_INST(I_LDSM);
  2437     check_fpuen();
  2438     load_reg( R_EAX, Rm );
  2439     check_ralign32( R_EAX );
  2440     MEM_READ_LONG( R_EAX, R_EAX );
  2441     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2442     store_spreg( R_EAX, R_FPUL );
  2443     sh4_x86.tstate = TSTATE_NONE;
  2444 :}
  2445 LDS Rm, MACH {: 
  2446     COUNT_INST(I_LDS);
  2447     load_reg( R_EAX, Rm );
  2448     store_spreg( R_EAX, R_MACH );
  2449 :}
  2450 LDS.L @Rm+, MACH {:  
  2451     COUNT_INST(I_LDSM);
  2452     load_reg( R_EAX, Rm );
  2453     check_ralign32( R_EAX );
  2454     MEM_READ_LONG( R_EAX, R_EAX );
  2455     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2456     store_spreg( R_EAX, R_MACH );
  2457     sh4_x86.tstate = TSTATE_NONE;
  2458 :}
  2459 LDS Rm, MACL {:  
  2460     COUNT_INST(I_LDS);
  2461     load_reg( R_EAX, Rm );
  2462     store_spreg( R_EAX, R_MACL );
  2463 :}
  2464 LDS.L @Rm+, MACL {:  
  2465     COUNT_INST(I_LDSM);
  2466     load_reg( R_EAX, Rm );
  2467     check_ralign32( R_EAX );
  2468     MEM_READ_LONG( R_EAX, R_EAX );
  2469     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2470     store_spreg( R_EAX, R_MACL );
  2471     sh4_x86.tstate = TSTATE_NONE;
  2472 :}
  2473 LDS Rm, PR {:  
  2474     COUNT_INST(I_LDS);
  2475     load_reg( R_EAX, Rm );
  2476     store_spreg( R_EAX, R_PR );
  2477 :}
  2478 LDS.L @Rm+, PR {:  
  2479     COUNT_INST(I_LDSM);
  2480     load_reg( R_EAX, Rm );
  2481     check_ralign32( R_EAX );
  2482     MEM_READ_LONG( R_EAX, R_EAX );
  2483     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2484     store_spreg( R_EAX, R_PR );
  2485     sh4_x86.tstate = TSTATE_NONE;
  2486 :}
  2487 LDTLB {:  
  2488     COUNT_INST(I_LDTLB);
  2489     call_func0( MMU_ldtlb );
  2490     sh4_x86.tstate = TSTATE_NONE;
  2491 :}
  2492 OCBI @Rn {:
  2493     COUNT_INST(I_OCBI);
  2494 :}
  2495 OCBP @Rn {:
  2496     COUNT_INST(I_OCBP);
  2497 :}
  2498 OCBWB @Rn {:
  2499     COUNT_INST(I_OCBWB);
  2500 :}
  2501 PREF @Rn {:
  2502     COUNT_INST(I_PREF);
  2503     load_reg( R_EAX, Rn );
  2504     MOV_r32_r32( R_EAX, R_ECX );
  2505     AND_imm32_r32( 0xFC000000, R_ECX );
  2506     CMP_imm32_r32( 0xE0000000, R_ECX );
  2507     JNE_rel8(end);
  2508     if( sh4_x86.tlb_on ) {
  2509     	call_func1( sh4_flush_store_queue_mmu, R_EAX );
  2510         TEST_r32_r32( R_EAX, R_EAX );
  2511         JE_exc(-1);
  2512     } else {
  2513     	call_func1( sh4_flush_store_queue, R_EAX );
  2515     JMP_TARGET(end);
  2516     sh4_x86.tstate = TSTATE_NONE;
  2517 :}
  2518 SLEEP {: 
  2519     COUNT_INST(I_SLEEP);
  2520     check_priv();
  2521     call_func0( sh4_sleep );
  2522     sh4_x86.tstate = TSTATE_NONE;
  2523     sh4_x86.in_delay_slot = DELAY_NONE;
  2524     return 2;
  2525 :}
  2526 STC SR, Rn {:
  2527     COUNT_INST(I_STCSR);
  2528     check_priv();
  2529     call_func0(sh4_read_sr);
  2530     store_reg( R_EAX, Rn );
  2531     sh4_x86.tstate = TSTATE_NONE;
  2532 :}
  2533 STC GBR, Rn {:  
  2534     COUNT_INST(I_STC);
  2535     load_spreg( R_EAX, R_GBR );
  2536     store_reg( R_EAX, Rn );
  2537 :}
  2538 STC VBR, Rn {:  
  2539     COUNT_INST(I_STC);
  2540     check_priv();
  2541     load_spreg( R_EAX, R_VBR );
  2542     store_reg( R_EAX, Rn );
  2543     sh4_x86.tstate = TSTATE_NONE;
  2544 :}
  2545 STC SSR, Rn {:  
  2546     COUNT_INST(I_STC);
  2547     check_priv();
  2548     load_spreg( R_EAX, R_SSR );
  2549     store_reg( R_EAX, Rn );
  2550     sh4_x86.tstate = TSTATE_NONE;
  2551 :}
  2552 STC SPC, Rn {:  
  2553     COUNT_INST(I_STC);
  2554     check_priv();
  2555     load_spreg( R_EAX, R_SPC );
  2556     store_reg( R_EAX, Rn );
  2557     sh4_x86.tstate = TSTATE_NONE;
  2558 :}
  2559 STC SGR, Rn {:  
  2560     COUNT_INST(I_STC);
  2561     check_priv();
  2562     load_spreg( R_EAX, R_SGR );
  2563     store_reg( R_EAX, Rn );
  2564     sh4_x86.tstate = TSTATE_NONE;
  2565 :}
  2566 STC DBR, Rn {:  
  2567     COUNT_INST(I_STC);
  2568     check_priv();
  2569     load_spreg( R_EAX, R_DBR );
  2570     store_reg( R_EAX, Rn );
  2571     sh4_x86.tstate = TSTATE_NONE;
  2572 :}
  2573 STC Rm_BANK, Rn {:
  2574     COUNT_INST(I_STC);
  2575     check_priv();
  2576     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2577     store_reg( R_EAX, Rn );
  2578     sh4_x86.tstate = TSTATE_NONE;
  2579 :}
  2580 STC.L SR, @-Rn {:
  2581     COUNT_INST(I_STCSRM);
  2582     check_priv();
  2583     call_func0( sh4_read_sr );
  2584     MOV_r32_r32( R_EAX, R_EDX );
  2585     load_reg( R_EAX, Rn );
  2586     check_walign32( R_EAX );
  2587     LEA_r32disp8_r32( R_EAX, -4, R_EAX );
  2588     MEM_WRITE_LONG( R_EAX, R_EDX );
  2589     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2590     sh4_x86.tstate = TSTATE_NONE;
  2591 :}
  2592 STC.L VBR, @-Rn {:  
  2593     COUNT_INST(I_STCM);
  2594     check_priv();
  2595     load_reg( R_EAX, Rn );
  2596     check_walign32( R_EAX );
  2597     ADD_imm8s_r32( -4, R_EAX );
  2598     load_spreg( R_EDX, R_VBR );
  2599     MEM_WRITE_LONG( R_EAX, R_EDX );
  2600     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2601     sh4_x86.tstate = TSTATE_NONE;
  2602 :}
  2603 STC.L SSR, @-Rn {:  
  2604     COUNT_INST(I_STCM);
  2605     check_priv();
  2606     load_reg( R_EAX, Rn );
  2607     check_walign32( R_EAX );
  2608     ADD_imm8s_r32( -4, R_EAX );
  2609     load_spreg( R_EDX, R_SSR );
  2610     MEM_WRITE_LONG( R_EAX, R_EDX );
  2611     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2612     sh4_x86.tstate = TSTATE_NONE;
  2613 :}
  2614 STC.L SPC, @-Rn {:
  2615     COUNT_INST(I_STCM);
  2616     check_priv();
  2617     load_reg( R_EAX, Rn );
  2618     check_walign32( R_EAX );
  2619     ADD_imm8s_r32( -4, R_EAX );
  2620     load_spreg( R_EDX, R_SPC );
  2621     MEM_WRITE_LONG( R_EAX, R_EDX );
  2622     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2623     sh4_x86.tstate = TSTATE_NONE;
  2624 :}
  2625 STC.L SGR, @-Rn {:  
  2626     COUNT_INST(I_STCM);
  2627     check_priv();
  2628     load_reg( R_EAX, Rn );
  2629     check_walign32( R_EAX );
  2630     ADD_imm8s_r32( -4, R_EAX );
  2631     load_spreg( R_EDX, R_SGR );
  2632     MEM_WRITE_LONG( R_EAX, R_EDX );
  2633     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2634     sh4_x86.tstate = TSTATE_NONE;
  2635 :}
  2636 STC.L DBR, @-Rn {:  
  2637     COUNT_INST(I_STCM);
  2638     check_priv();
  2639     load_reg( R_EAX, Rn );
  2640     check_walign32( R_EAX );
  2641     ADD_imm8s_r32( -4, R_EAX );
  2642     load_spreg( R_EDX, R_DBR );
  2643     MEM_WRITE_LONG( R_EAX, R_EDX );
  2644     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2645     sh4_x86.tstate = TSTATE_NONE;
  2646 :}
  2647 STC.L Rm_BANK, @-Rn {:  
  2648     COUNT_INST(I_STCM);
  2649     check_priv();
  2650     load_reg( R_EAX, Rn );
  2651     check_walign32( R_EAX );
  2652     ADD_imm8s_r32( -4, R_EAX );
  2653     load_spreg( R_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
  2654     MEM_WRITE_LONG( R_EAX, R_EDX );
  2655     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2656     sh4_x86.tstate = TSTATE_NONE;
  2657 :}
  2658 STC.L GBR, @-Rn {:  
  2659     COUNT_INST(I_STCM);
  2660     load_reg( R_EAX, Rn );
  2661     check_walign32( R_EAX );
  2662     ADD_imm8s_r32( -4, R_EAX );
  2663     load_spreg( R_EDX, R_GBR );
  2664     MEM_WRITE_LONG( R_EAX, R_EDX );
  2665     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2666     sh4_x86.tstate = TSTATE_NONE;
  2667 :}
  2668 STS FPSCR, Rn {:  
  2669     COUNT_INST(I_STSFPSCR);
  2670     check_fpuen();
  2671     load_spreg( R_EAX, R_FPSCR );
  2672     store_reg( R_EAX, Rn );
  2673 :}
  2674 STS.L FPSCR, @-Rn {:  
  2675     COUNT_INST(I_STSFPSCRM);
  2676     check_fpuen();
  2677     load_reg( R_EAX, Rn );
  2678     check_walign32( R_EAX );
  2679     ADD_imm8s_r32( -4, R_EAX );
  2680     load_spreg( R_EDX, R_FPSCR );
  2681     MEM_WRITE_LONG( R_EAX, R_EDX );
  2682     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2683     sh4_x86.tstate = TSTATE_NONE;
  2684 :}
  2685 STS FPUL, Rn {:  
  2686     COUNT_INST(I_STS);
  2687     check_fpuen();
  2688     load_spreg( R_EAX, R_FPUL );
  2689     store_reg( R_EAX, Rn );
  2690 :}
  2691 STS.L FPUL, @-Rn {:  
  2692     COUNT_INST(I_STSM);
  2693     check_fpuen();
  2694     load_reg( R_EAX, Rn );
  2695     check_walign32( R_EAX );
  2696     ADD_imm8s_r32( -4, R_EAX );
  2697     load_spreg( R_EDX, R_FPUL );
  2698     MEM_WRITE_LONG( R_EAX, R_EDX );
  2699     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2700     sh4_x86.tstate = TSTATE_NONE;
  2701 :}
  2702 STS MACH, Rn {:  
  2703     COUNT_INST(I_STS);
  2704     load_spreg( R_EAX, R_MACH );
  2705     store_reg( R_EAX, Rn );
  2706 :}
  2707 STS.L MACH, @-Rn {:  
  2708     COUNT_INST(I_STSM);
  2709     load_reg( R_EAX, Rn );
  2710     check_walign32( R_EAX );
  2711     ADD_imm8s_r32( -4, R_EAX );
  2712     load_spreg( R_EDX, R_MACH );
  2713     MEM_WRITE_LONG( R_EAX, R_EDX );
  2714     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2715     sh4_x86.tstate = TSTATE_NONE;
  2716 :}
  2717 STS MACL, Rn {:  
  2718     COUNT_INST(I_STS);
  2719     load_spreg( R_EAX, R_MACL );
  2720     store_reg( R_EAX, Rn );
  2721 :}
  2722 STS.L MACL, @-Rn {:  
  2723     COUNT_INST(I_STSM);
  2724     load_reg( R_EAX, Rn );
  2725     check_walign32( R_EAX );
  2726     ADD_imm8s_r32( -4, R_EAX );
  2727     load_spreg( R_EDX, R_MACL );
  2728     MEM_WRITE_LONG( R_EAX, R_EDX );
  2729     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2730     sh4_x86.tstate = TSTATE_NONE;
  2731 :}
  2732 STS PR, Rn {:  
  2733     COUNT_INST(I_STS);
  2734     load_spreg( R_EAX, R_PR );
  2735     store_reg( R_EAX, Rn );
  2736 :}
  2737 STS.L PR, @-Rn {:  
  2738     COUNT_INST(I_STSM);
  2739     load_reg( R_EAX, Rn );
  2740     check_walign32( R_EAX );
  2741     ADD_imm8s_r32( -4, R_EAX );
  2742     load_spreg( R_EDX, R_PR );
  2743     MEM_WRITE_LONG( R_EAX, R_EDX );
  2744     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2745     sh4_x86.tstate = TSTATE_NONE;
  2746 :}
  2748 NOP {: 
  2749     COUNT_INST(I_NOP);
  2750     /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ 
  2751 :}
  2752 %%
  2753     sh4_x86.in_delay_slot = DELAY_NONE;
  2754     return 0;
.