Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 926:68f3e0fe02f1
prev911:2f6ba75b84d1
next927:17b6b9e245d8
author nkeynes
date Sun Dec 14 07:50:48 2008 +0000 (15 years ago)
permissions -rw-r--r--
last change Setup a 'proper' stackframe in translated blocks. This doesn't affect performance noticeably,
but does ensure that
a) The stack is aligned correctly on OS X with no extra effort, and
b) We can't mess up the stack and crash that way anymore.
Replace all PUSH/POP instructions (outside of prologue/epilogue) with ESP-rel moves to stack
local variables.
Finally merge ia32mac and ia32abi together, since they're pretty much the same now anyway (and
thereby simplifying maintenance a good deal)
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "lxdream.h"
    29 #include "sh4/xltcache.h"
    30 #include "sh4/sh4core.h"
    31 #include "sh4/sh4trans.h"
    32 #include "sh4/sh4stat.h"
    33 #include "sh4/sh4mmio.h"
    34 #include "sh4/x86op.h"
    35 #include "clock.h"
    37 #define DEFAULT_BACKPATCH_SIZE 4096
    39 struct backpatch_record {
    40     uint32_t fixup_offset;
    41     uint32_t fixup_icount;
    42     int32_t exc_code;
    43 };
    45 #define DELAY_NONE 0
    46 #define DELAY_PC 1
    47 #define DELAY_PC_PR 2
    49 /** 
    50  * Struct to manage internal translation state. This state is not saved -
    51  * it is only valid between calls to sh4_translate_begin_block() and
    52  * sh4_translate_end_block()
    53  */
    54 struct sh4_x86_state {
    55     int in_delay_slot;
    56     gboolean priv_checked; /* true if we've already checked the cpu mode. */
    57     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    58     gboolean branch_taken; /* true if we branched unconditionally */
    59     gboolean double_prec; /* true if FPU is in double-precision mode */
    60     gboolean double_size; /* true if FPU is in double-size mode */
    61     gboolean sse3_enabled; /* true if host supports SSE3 instructions */
    62     uint32_t block_start_pc;
    63     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    64     int tstate;
    66     /* mode flags */
    67     gboolean tlb_on; /* True if tlb translation is active */
    69     /* Allocated memory for the (block-wide) back-patch list */
    70     struct backpatch_record *backpatch_list;
    71     uint32_t backpatch_posn;
    72     uint32_t backpatch_size;
    73 };
    75 #define TSTATE_NONE -1
    76 #define TSTATE_O    0
    77 #define TSTATE_C    2
    78 #define TSTATE_E    4
    79 #define TSTATE_NE   5
    80 #define TSTATE_G    0xF
    81 #define TSTATE_GE   0xD
    82 #define TSTATE_A    7
    83 #define TSTATE_AE   3
    85 #ifdef ENABLE_SH4STATS
    86 #define COUNT_INST(id) load_imm32(R_EAX,id); call_func1(sh4_stats_add, R_EAX); sh4_x86.tstate = TSTATE_NONE
    87 #else
    88 #define COUNT_INST(id)
    89 #endif
    91 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
    92 #define JT_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    93 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    94     OP(0x70+sh4_x86.tstate); MARK_JMP8(label); OP(-1)
    96 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
    97 #define JF_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    98 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    99     OP(0x70+ (sh4_x86.tstate^1)); MARK_JMP8(label); OP(-1)
   101 static struct sh4_x86_state sh4_x86;
   103 static uint32_t max_int = 0x7FFFFFFF;
   104 static uint32_t min_int = 0x80000000;
   105 static uint32_t save_fcw; /* save value for fpu control word */
   106 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   108 gboolean is_sse3_supported()
   109 {
   110     uint32_t features;
   112     __asm__ __volatile__(
   113         "mov $0x01, %%eax\n\t"
   114         "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
   115     return (features & 1) ? TRUE : FALSE;
   116 }
   118 void sh4_translate_init(void)
   119 {
   120     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   121     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   122     sh4_x86.sse3_enabled = is_sse3_supported();
   123 }
   126 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   127 {
   128     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   129 	sh4_x86.backpatch_size <<= 1;
   130 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   131 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   132 	assert( sh4_x86.backpatch_list != NULL );
   133     }
   134     if( sh4_x86.in_delay_slot ) {
   135 	fixup_pc -= 2;
   136     }
   137     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
   138 	((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code);
   139     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   140     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   141     sh4_x86.backpatch_posn++;
   142 }
   144 /**
   145  * Emit an instruction to load an SH4 reg into a real register
   146  */
   147 static inline void load_reg( int x86reg, int sh4reg ) 
   148 {
   149     /* mov [bp+n], reg */
   150     OP(0x8B);
   151     OP(0x45 + (x86reg<<3));
   152     OP(REG_OFFSET(r[sh4reg]));
   153 }
   155 static inline void load_reg16s( int x86reg, int sh4reg )
   156 {
   157     OP(0x0F);
   158     OP(0xBF);
   159     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   160 }
   162 static inline void load_reg16u( int x86reg, int sh4reg )
   163 {
   164     OP(0x0F);
   165     OP(0xB7);
   166     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   168 }
   170 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   171 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   172 /**
   173  * Emit an instruction to load an immediate value into a register
   174  */
   175 static inline void load_imm32( int x86reg, uint32_t value ) {
   176     /* mov #value, reg */
   177     OP(0xB8 + x86reg);
   178     OP32(value);
   179 }
   181 /**
   182  * Load an immediate 64-bit quantity (note: x86-64 only)
   183  */
   184 static inline void load_imm64( int x86reg, uint64_t value ) {
   185     /* mov #value, reg */
   186     REXW();
   187     OP(0xB8 + x86reg);
   188     OP64(value);
   189 }
   191 /**
   192  * Emit an instruction to store an SH4 reg (RN)
   193  */
   194 void static inline store_reg( int x86reg, int sh4reg ) {
   195     /* mov reg, [bp+n] */
   196     OP(0x89);
   197     OP(0x45 + (x86reg<<3));
   198     OP(REG_OFFSET(r[sh4reg]));
   199 }
   201 /**
   202  * Load an FR register (single-precision floating point) into an integer x86
   203  * register (eg for register-to-register moves)
   204  */
   205 #define load_fr(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[0][(frm)^1]) )
   206 #define load_xf(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[1][(frm)^1]) )
   208 /**
   209  * Load the low half of a DR register (DR or XD) into an integer x86 register 
   210  */
   211 #define load_dr0(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   212 #define load_dr1(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   214 /**
   215  * Store an FR register (single-precision floating point) from an integer x86+
   216  * register (eg for register-to-register moves)
   217  */
   218 #define store_fr(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[0][(frm)^1]) )
   219 #define store_xf(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[1][(frm)^1]) )
   221 #define store_dr0(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   222 #define store_dr1(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   225 #define push_fpul()  FLDF_sh4r(R_FPUL)
   226 #define pop_fpul()   FSTPF_sh4r(R_FPUL)
   227 #define push_fr(frm) FLDF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   228 #define pop_fr(frm)  FSTPF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   229 #define push_xf(frm) FLDF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   230 #define pop_xf(frm)  FSTPF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   231 #define push_dr(frm) FLDD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   232 #define pop_dr(frm)  FSTPD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   233 #define push_xdr(frm) FLDD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   234 #define pop_xdr(frm)  FSTPD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   238 /* Exception checks - Note that all exception checks will clobber EAX */
   240 #define check_priv( ) \
   241     if( !sh4_x86.priv_checked ) { \
   242 	sh4_x86.priv_checked = TRUE;\
   243 	load_spreg( R_EAX, R_SR );\
   244 	AND_imm32_r32( SR_MD, R_EAX );\
   245 	if( sh4_x86.in_delay_slot ) {\
   246 	    JE_exc( EXC_SLOT_ILLEGAL );\
   247 	} else {\
   248 	    JE_exc( EXC_ILLEGAL );\
   249 	}\
   250 	sh4_x86.tstate = TSTATE_NONE; \
   251     }\
   253 #define check_fpuen( ) \
   254     if( !sh4_x86.fpuen_checked ) {\
   255 	sh4_x86.fpuen_checked = TRUE;\
   256 	load_spreg( R_EAX, R_SR );\
   257 	AND_imm32_r32( SR_FD, R_EAX );\
   258 	if( sh4_x86.in_delay_slot ) {\
   259 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   260 	} else {\
   261 	    JNE_exc(EXC_FPU_DISABLED);\
   262 	}\
   263 	sh4_x86.tstate = TSTATE_NONE; \
   264     }
   266 #define check_ralign16( x86reg ) \
   267     TEST_imm32_r32( 0x00000001, x86reg ); \
   268     JNE_exc(EXC_DATA_ADDR_READ)
   270 #define check_walign16( x86reg ) \
   271     TEST_imm32_r32( 0x00000001, x86reg ); \
   272     JNE_exc(EXC_DATA_ADDR_WRITE);
   274 #define check_ralign32( x86reg ) \
   275     TEST_imm32_r32( 0x00000003, x86reg ); \
   276     JNE_exc(EXC_DATA_ADDR_READ)
   278 #define check_walign32( x86reg ) \
   279     TEST_imm32_r32( 0x00000003, x86reg ); \
   280     JNE_exc(EXC_DATA_ADDR_WRITE);
   282 #define check_ralign64( x86reg ) \
   283     TEST_imm32_r32( 0x00000007, x86reg ); \
   284     JNE_exc(EXC_DATA_ADDR_READ)
   286 #define check_walign64( x86reg ) \
   287     TEST_imm32_r32( 0x00000007, x86reg ); \
   288     JNE_exc(EXC_DATA_ADDR_WRITE);
   290 #define UNDEF(ir)
   291 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   292 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
   293 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
   294 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
   295 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
   296 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
   297 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
   299 /**
   300  * Perform MMU translation on the address in addr_reg for a read operation, iff the TLB is turned 
   301  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   302  */
   303 #define MMU_TRANSLATE_READ( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   305 #define MMU_TRANSLATE_READ_EXC( addr_reg, exc_code ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(exc_code); MEM_RESULT(addr_reg) }
   306 /**
   307  * Perform MMU translation on the address in addr_reg for a write operation, iff the TLB is turned 
   308  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   309  */
   310 #define MMU_TRANSLATE_WRITE( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_write, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   312 #define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = DELAY_NONE; return 1;
   314 /****** Import appropriate calling conventions ******/
   315 #if SIZEOF_VOID_P == 8
   316 #include "sh4/ia64abi.h"
   317 #else /* 32-bit system */
   318 #include "sh4/ia32abi.h"
   319 #endif
   321 void sh4_translate_begin_block( sh4addr_t pc ) 
   322 {
   323 	enter_block();
   324     sh4_x86.in_delay_slot = FALSE;
   325     sh4_x86.priv_checked = FALSE;
   326     sh4_x86.fpuen_checked = FALSE;
   327     sh4_x86.branch_taken = FALSE;
   328     sh4_x86.backpatch_posn = 0;
   329     sh4_x86.block_start_pc = pc;
   330     sh4_x86.tlb_on = IS_MMU_ENABLED();
   331     sh4_x86.tstate = TSTATE_NONE;
   332     sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
   333     sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
   334 }
   337 uint32_t sh4_translate_end_block_size()
   338 {
   339     if( sh4_x86.backpatch_posn <= 3 ) {
   340         return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);
   341     } else {
   342         return EPILOGUE_SIZE + 48 + (sh4_x86.backpatch_posn-3)*15;
   343     }
   344 }
   347 /**
   348  * Embed a breakpoint into the generated code
   349  */
   350 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   351 {
   352     load_imm32( R_EAX, pc );
   353     call_func1( sh4_translate_breakpoint_hit, R_EAX );
   354     sh4_x86.tstate = TSTATE_NONE;
   355 }
   358 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   360 /**
   361  * Embed a call to sh4_execute_instruction for situations that we
   362  * can't translate (just page-crossing delay slots at the moment).
   363  * Caller is responsible for setting new_pc before calling this function.
   364  *
   365  * Performs:
   366  *   Set PC = endpc
   367  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   368  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   369  *   Call sh4_execute_instruction
   370  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   371  */
   372 void exit_block_emu( sh4vma_t endpc )
   373 {
   374     load_imm32( R_ECX, endpc - sh4_x86.block_start_pc );   // 5
   375     ADD_r32_sh4r( R_ECX, R_PC );
   377     load_imm32( R_ECX, (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period ); // 5
   378     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
   379     load_imm32( R_ECX, sh4_x86.in_delay_slot ? 1 : 0 );
   380     store_spreg( R_ECX, REG_OFFSET(in_delay_slot) );
   382     call_func0( sh4_execute_instruction );    
   383     load_spreg( R_EAX, R_PC );
   384     if( sh4_x86.tlb_on ) {
   385 	call_func1(xlat_get_code_by_vma,R_EAX);
   386     } else {
   387 	call_func1(xlat_get_code,R_EAX);
   388     }
   389     exit_block();
   390 } 
   392 /**
   393  * Translate a single instruction. Delayed branches are handled specially
   394  * by translating both branch and delayed instruction as a single unit (as
   395  * 
   396  * The instruction MUST be in the icache (assert check)
   397  *
   398  * @return true if the instruction marks the end of a basic block
   399  * (eg a branch or 
   400  */
   401 uint32_t sh4_translate_instruction( sh4vma_t pc )
   402 {
   403     uint32_t ir;
   404     /* Read instruction from icache */
   405     assert( IS_IN_ICACHE(pc) );
   406     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   408     if( !sh4_x86.in_delay_slot ) {
   409 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   410     }
   411 %%
   412 /* ALU operations */
   413 ADD Rm, Rn {:
   414     COUNT_INST(I_ADD);
   415     load_reg( R_EAX, Rm );
   416     load_reg( R_ECX, Rn );
   417     ADD_r32_r32( R_EAX, R_ECX );
   418     store_reg( R_ECX, Rn );
   419     sh4_x86.tstate = TSTATE_NONE;
   420 :}
   421 ADD #imm, Rn {:  
   422     COUNT_INST(I_ADDI);
   423     load_reg( R_EAX, Rn );
   424     ADD_imm8s_r32( imm, R_EAX );
   425     store_reg( R_EAX, Rn );
   426     sh4_x86.tstate = TSTATE_NONE;
   427 :}
   428 ADDC Rm, Rn {:
   429     COUNT_INST(I_ADDC);
   430     if( sh4_x86.tstate != TSTATE_C ) {
   431         LDC_t();
   432     }
   433     load_reg( R_EAX, Rm );
   434     load_reg( R_ECX, Rn );
   435     ADC_r32_r32( R_EAX, R_ECX );
   436     store_reg( R_ECX, Rn );
   437     SETC_t();
   438     sh4_x86.tstate = TSTATE_C;
   439 :}
   440 ADDV Rm, Rn {:
   441     COUNT_INST(I_ADDV);
   442     load_reg( R_EAX, Rm );
   443     load_reg( R_ECX, Rn );
   444     ADD_r32_r32( R_EAX, R_ECX );
   445     store_reg( R_ECX, Rn );
   446     SETO_t();
   447     sh4_x86.tstate = TSTATE_O;
   448 :}
   449 AND Rm, Rn {:
   450     COUNT_INST(I_AND);
   451     load_reg( R_EAX, Rm );
   452     load_reg( R_ECX, Rn );
   453     AND_r32_r32( R_EAX, R_ECX );
   454     store_reg( R_ECX, Rn );
   455     sh4_x86.tstate = TSTATE_NONE;
   456 :}
   457 AND #imm, R0 {:  
   458     COUNT_INST(I_ANDI);
   459     load_reg( R_EAX, 0 );
   460     AND_imm32_r32(imm, R_EAX); 
   461     store_reg( R_EAX, 0 );
   462     sh4_x86.tstate = TSTATE_NONE;
   463 :}
   464 AND.B #imm, @(R0, GBR) {: 
   465     COUNT_INST(I_ANDB);
   466     load_reg( R_EAX, 0 );
   467     load_spreg( R_ECX, R_GBR );
   468     ADD_r32_r32( R_ECX, R_EAX );
   469     MMU_TRANSLATE_WRITE( R_EAX );
   470     MOV_r32_esp8(R_EAX, 0);
   471     MEM_READ_BYTE( R_EAX, R_EDX );
   472     MOV_esp8_r32(0, R_EAX);
   473     AND_imm32_r32(imm, R_EDX );
   474     MEM_WRITE_BYTE( R_EAX, R_EDX );
   475     sh4_x86.tstate = TSTATE_NONE;
   476 :}
   477 CMP/EQ Rm, Rn {:  
   478     COUNT_INST(I_CMPEQ);
   479     load_reg( R_EAX, Rm );
   480     load_reg( R_ECX, Rn );
   481     CMP_r32_r32( R_EAX, R_ECX );
   482     SETE_t();
   483     sh4_x86.tstate = TSTATE_E;
   484 :}
   485 CMP/EQ #imm, R0 {:  
   486     COUNT_INST(I_CMPEQI);
   487     load_reg( R_EAX, 0 );
   488     CMP_imm8s_r32(imm, R_EAX);
   489     SETE_t();
   490     sh4_x86.tstate = TSTATE_E;
   491 :}
   492 CMP/GE Rm, Rn {:  
   493     COUNT_INST(I_CMPGE);
   494     load_reg( R_EAX, Rm );
   495     load_reg( R_ECX, Rn );
   496     CMP_r32_r32( R_EAX, R_ECX );
   497     SETGE_t();
   498     sh4_x86.tstate = TSTATE_GE;
   499 :}
   500 CMP/GT Rm, Rn {: 
   501     COUNT_INST(I_CMPGT);
   502     load_reg( R_EAX, Rm );
   503     load_reg( R_ECX, Rn );
   504     CMP_r32_r32( R_EAX, R_ECX );
   505     SETG_t();
   506     sh4_x86.tstate = TSTATE_G;
   507 :}
   508 CMP/HI Rm, Rn {:  
   509     COUNT_INST(I_CMPHI);
   510     load_reg( R_EAX, Rm );
   511     load_reg( R_ECX, Rn );
   512     CMP_r32_r32( R_EAX, R_ECX );
   513     SETA_t();
   514     sh4_x86.tstate = TSTATE_A;
   515 :}
   516 CMP/HS Rm, Rn {: 
   517     COUNT_INST(I_CMPHS);
   518     load_reg( R_EAX, Rm );
   519     load_reg( R_ECX, Rn );
   520     CMP_r32_r32( R_EAX, R_ECX );
   521     SETAE_t();
   522     sh4_x86.tstate = TSTATE_AE;
   523  :}
   524 CMP/PL Rn {: 
   525     COUNT_INST(I_CMPPL);
   526     load_reg( R_EAX, Rn );
   527     CMP_imm8s_r32( 0, R_EAX );
   528     SETG_t();
   529     sh4_x86.tstate = TSTATE_G;
   530 :}
   531 CMP/PZ Rn {:  
   532     COUNT_INST(I_CMPPZ);
   533     load_reg( R_EAX, Rn );
   534     CMP_imm8s_r32( 0, R_EAX );
   535     SETGE_t();
   536     sh4_x86.tstate = TSTATE_GE;
   537 :}
   538 CMP/STR Rm, Rn {:  
   539     COUNT_INST(I_CMPSTR);
   540     load_reg( R_EAX, Rm );
   541     load_reg( R_ECX, Rn );
   542     XOR_r32_r32( R_ECX, R_EAX );
   543     TEST_r8_r8( R_AL, R_AL );
   544     JE_rel8(target1);
   545     TEST_r8_r8( R_AH, R_AH );
   546     JE_rel8(target2);
   547     SHR_imm8_r32( 16, R_EAX );
   548     TEST_r8_r8( R_AL, R_AL );
   549     JE_rel8(target3);
   550     TEST_r8_r8( R_AH, R_AH );
   551     JMP_TARGET(target1);
   552     JMP_TARGET(target2);
   553     JMP_TARGET(target3);
   554     SETE_t();
   555     sh4_x86.tstate = TSTATE_E;
   556 :}
   557 DIV0S Rm, Rn {:
   558     COUNT_INST(I_DIV0S);
   559     load_reg( R_EAX, Rm );
   560     load_reg( R_ECX, Rn );
   561     SHR_imm8_r32( 31, R_EAX );
   562     SHR_imm8_r32( 31, R_ECX );
   563     store_spreg( R_EAX, R_M );
   564     store_spreg( R_ECX, R_Q );
   565     CMP_r32_r32( R_EAX, R_ECX );
   566     SETNE_t();
   567     sh4_x86.tstate = TSTATE_NE;
   568 :}
   569 DIV0U {:  
   570     COUNT_INST(I_DIV0U);
   571     XOR_r32_r32( R_EAX, R_EAX );
   572     store_spreg( R_EAX, R_Q );
   573     store_spreg( R_EAX, R_M );
   574     store_spreg( R_EAX, R_T );
   575     sh4_x86.tstate = TSTATE_C; // works for DIV1
   576 :}
   577 DIV1 Rm, Rn {:
   578     COUNT_INST(I_DIV1);
   579     load_spreg( R_ECX, R_M );
   580     load_reg( R_EAX, Rn );
   581     if( sh4_x86.tstate != TSTATE_C ) {
   582 	LDC_t();
   583     }
   584     RCL1_r32( R_EAX );
   585     SETC_r8( R_DL ); // Q'
   586     CMP_sh4r_r32( R_Q, R_ECX );
   587     JE_rel8(mqequal);
   588     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   589     JMP_rel8(end);
   590     JMP_TARGET(mqequal);
   591     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   592     JMP_TARGET(end);
   593     store_reg( R_EAX, Rn ); // Done with Rn now
   594     SETC_r8(R_AL); // tmp1
   595     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   596     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   597     store_spreg( R_ECX, R_Q );
   598     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   599     MOVZX_r8_r32( R_AL, R_EAX );
   600     store_spreg( R_EAX, R_T );
   601     sh4_x86.tstate = TSTATE_NONE;
   602 :}
   603 DMULS.L Rm, Rn {:  
   604     COUNT_INST(I_DMULS);
   605     load_reg( R_EAX, Rm );
   606     load_reg( R_ECX, Rn );
   607     IMUL_r32(R_ECX);
   608     store_spreg( R_EDX, R_MACH );
   609     store_spreg( R_EAX, R_MACL );
   610     sh4_x86.tstate = TSTATE_NONE;
   611 :}
   612 DMULU.L Rm, Rn {:  
   613     COUNT_INST(I_DMULU);
   614     load_reg( R_EAX, Rm );
   615     load_reg( R_ECX, Rn );
   616     MUL_r32(R_ECX);
   617     store_spreg( R_EDX, R_MACH );
   618     store_spreg( R_EAX, R_MACL );    
   619     sh4_x86.tstate = TSTATE_NONE;
   620 :}
   621 DT Rn {:  
   622     COUNT_INST(I_DT);
   623     load_reg( R_EAX, Rn );
   624     ADD_imm8s_r32( -1, R_EAX );
   625     store_reg( R_EAX, Rn );
   626     SETE_t();
   627     sh4_x86.tstate = TSTATE_E;
   628 :}
   629 EXTS.B Rm, Rn {:  
   630     COUNT_INST(I_EXTSB);
   631     load_reg( R_EAX, Rm );
   632     MOVSX_r8_r32( R_EAX, R_EAX );
   633     store_reg( R_EAX, Rn );
   634 :}
   635 EXTS.W Rm, Rn {:  
   636     COUNT_INST(I_EXTSW);
   637     load_reg( R_EAX, Rm );
   638     MOVSX_r16_r32( R_EAX, R_EAX );
   639     store_reg( R_EAX, Rn );
   640 :}
   641 EXTU.B Rm, Rn {:  
   642     COUNT_INST(I_EXTUB);
   643     load_reg( R_EAX, Rm );
   644     MOVZX_r8_r32( R_EAX, R_EAX );
   645     store_reg( R_EAX, Rn );
   646 :}
   647 EXTU.W Rm, Rn {:  
   648     COUNT_INST(I_EXTUW);
   649     load_reg( R_EAX, Rm );
   650     MOVZX_r16_r32( R_EAX, R_EAX );
   651     store_reg( R_EAX, Rn );
   652 :}
   653 MAC.L @Rm+, @Rn+ {:
   654     COUNT_INST(I_MACL);
   655     if( Rm == Rn ) {
   656 	load_reg( R_EAX, Rm );
   657 	check_ralign32( R_EAX );
   658 	MMU_TRANSLATE_READ( R_EAX );
   659 	MOV_r32_esp8(R_EAX, 0);
   660 	load_reg( R_EAX, Rn );
   661 	ADD_imm8s_r32( 4, R_EAX );
   662 	MMU_TRANSLATE_READ( R_EAX );
   663 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
   664 	// Note translate twice in case of page boundaries. Maybe worth
   665 	// adding a page-boundary check to skip the second translation
   666     } else {
   667 	load_reg( R_EAX, Rm );
   668 	check_ralign32( R_EAX );
   669 	MMU_TRANSLATE_READ( R_EAX );
   670 	MOV_r32_esp8( R_EAX, 0 );
   671 	load_reg( R_EAX, Rn );
   672 	check_ralign32( R_EAX );
   673 	MMU_TRANSLATE_READ( R_EAX );
   674 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   675 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   676     }
   677     MEM_READ_LONG( R_EAX, R_EAX );
   678     MOV_r32_esp8( R_EAX, 4 );
   679     MOV_esp8_r32( 0, R_EAX );
   680     MEM_READ_LONG( R_EAX, R_EAX );
   681     MOV_esp8_r32( 4, R_ECX );
   683     IMUL_r32( R_ECX );
   684     ADD_r32_sh4r( R_EAX, R_MACL );
   685     ADC_r32_sh4r( R_EDX, R_MACH );
   687     load_spreg( R_ECX, R_S );
   688     TEST_r32_r32(R_ECX, R_ECX);
   689     JE_rel8( nosat );
   690     call_func0( signsat48 );
   691     JMP_TARGET( nosat );
   692     sh4_x86.tstate = TSTATE_NONE;
   693 :}
   694 MAC.W @Rm+, @Rn+ {:  
   695     COUNT_INST(I_MACW);
   696     if( Rm == Rn ) {
   697 	load_reg( R_EAX, Rm );
   698 	check_ralign16( R_EAX );
   699 	MMU_TRANSLATE_READ( R_EAX );
   700         MOV_r32_esp8( R_EAX, 0 );
   701 	load_reg( R_EAX, Rn );
   702 	ADD_imm8s_r32( 2, R_EAX );
   703 	MMU_TRANSLATE_READ( R_EAX );
   704 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   705 	// Note translate twice in case of page boundaries. Maybe worth
   706 	// adding a page-boundary check to skip the second translation
   707     } else {
   708 	load_reg( R_EAX, Rm );
   709 	check_ralign16( R_EAX );
   710 	MMU_TRANSLATE_READ( R_EAX );
   711         MOV_r32_esp8( R_EAX, 0 );
   712 	load_reg( R_EAX, Rn );
   713 	check_ralign16( R_EAX );
   714 	MMU_TRANSLATE_READ( R_EAX );
   715 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   716 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   717     }
   718     MEM_READ_WORD( R_EAX, R_EAX );
   719     MOV_r32_esp8( R_EAX, 4 );
   720     MOV_esp8_r32( 0, R_EAX );
   721     MEM_READ_WORD( R_EAX, R_EAX );
   722     MOV_esp8_r32( 4, R_ECX );
   724     IMUL_r32( R_ECX );
   725     load_spreg( R_ECX, R_S );
   726     TEST_r32_r32( R_ECX, R_ECX );
   727     JE_rel8( nosat );
   729     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   730     JNO_rel8( end );            // 2
   731     load_imm32( R_EDX, 1 );         // 5
   732     store_spreg( R_EDX, R_MACH );   // 6
   733     JS_rel8( positive );        // 2
   734     load_imm32( R_EAX, 0x80000000 );// 5
   735     store_spreg( R_EAX, R_MACL );   // 6
   736     JMP_rel8(end2);           // 2
   738     JMP_TARGET(positive);
   739     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   740     store_spreg( R_EAX, R_MACL );   // 6
   741     JMP_rel8(end3);            // 2
   743     JMP_TARGET(nosat);
   744     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   745     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   746     JMP_TARGET(end);
   747     JMP_TARGET(end2);
   748     JMP_TARGET(end3);
   749     sh4_x86.tstate = TSTATE_NONE;
   750 :}
   751 MOVT Rn {:  
   752     COUNT_INST(I_MOVT);
   753     load_spreg( R_EAX, R_T );
   754     store_reg( R_EAX, Rn );
   755 :}
   756 MUL.L Rm, Rn {:  
   757     COUNT_INST(I_MULL);
   758     load_reg( R_EAX, Rm );
   759     load_reg( R_ECX, Rn );
   760     MUL_r32( R_ECX );
   761     store_spreg( R_EAX, R_MACL );
   762     sh4_x86.tstate = TSTATE_NONE;
   763 :}
   764 MULS.W Rm, Rn {:
   765     COUNT_INST(I_MULSW);
   766     load_reg16s( R_EAX, Rm );
   767     load_reg16s( R_ECX, Rn );
   768     MUL_r32( R_ECX );
   769     store_spreg( R_EAX, R_MACL );
   770     sh4_x86.tstate = TSTATE_NONE;
   771 :}
   772 MULU.W Rm, Rn {:  
   773     COUNT_INST(I_MULUW);
   774     load_reg16u( R_EAX, Rm );
   775     load_reg16u( R_ECX, Rn );
   776     MUL_r32( R_ECX );
   777     store_spreg( R_EAX, R_MACL );
   778     sh4_x86.tstate = TSTATE_NONE;
   779 :}
   780 NEG Rm, Rn {:
   781     COUNT_INST(I_NEG);
   782     load_reg( R_EAX, Rm );
   783     NEG_r32( R_EAX );
   784     store_reg( R_EAX, Rn );
   785     sh4_x86.tstate = TSTATE_NONE;
   786 :}
   787 NEGC Rm, Rn {:  
   788     COUNT_INST(I_NEGC);
   789     load_reg( R_EAX, Rm );
   790     XOR_r32_r32( R_ECX, R_ECX );
   791     LDC_t();
   792     SBB_r32_r32( R_EAX, R_ECX );
   793     store_reg( R_ECX, Rn );
   794     SETC_t();
   795     sh4_x86.tstate = TSTATE_C;
   796 :}
   797 NOT Rm, Rn {:  
   798     COUNT_INST(I_NOT);
   799     load_reg( R_EAX, Rm );
   800     NOT_r32( R_EAX );
   801     store_reg( R_EAX, Rn );
   802     sh4_x86.tstate = TSTATE_NONE;
   803 :}
   804 OR Rm, Rn {:  
   805     COUNT_INST(I_OR);
   806     load_reg( R_EAX, Rm );
   807     load_reg( R_ECX, Rn );
   808     OR_r32_r32( R_EAX, R_ECX );
   809     store_reg( R_ECX, Rn );
   810     sh4_x86.tstate = TSTATE_NONE;
   811 :}
   812 OR #imm, R0 {:
   813     COUNT_INST(I_ORI);
   814     load_reg( R_EAX, 0 );
   815     OR_imm32_r32(imm, R_EAX);
   816     store_reg( R_EAX, 0 );
   817     sh4_x86.tstate = TSTATE_NONE;
   818 :}
   819 OR.B #imm, @(R0, GBR) {:  
   820     COUNT_INST(I_ORB);
   821     load_reg( R_EAX, 0 );
   822     load_spreg( R_ECX, R_GBR );
   823     ADD_r32_r32( R_ECX, R_EAX );
   824     MMU_TRANSLATE_WRITE( R_EAX );
   825     MOV_r32_esp8( R_EAX, 0 );
   826     MEM_READ_BYTE( R_EAX, R_EDX );
   827     MOV_esp8_r32( 0, R_EAX );
   828     OR_imm32_r32(imm, R_EDX );
   829     MEM_WRITE_BYTE( R_EAX, R_EDX );
   830     sh4_x86.tstate = TSTATE_NONE;
   831 :}
   832 ROTCL Rn {:
   833     COUNT_INST(I_ROTCL);
   834     load_reg( R_EAX, Rn );
   835     if( sh4_x86.tstate != TSTATE_C ) {
   836 	LDC_t();
   837     }
   838     RCL1_r32( R_EAX );
   839     store_reg( R_EAX, Rn );
   840     SETC_t();
   841     sh4_x86.tstate = TSTATE_C;
   842 :}
   843 ROTCR Rn {:  
   844     COUNT_INST(I_ROTCR);
   845     load_reg( R_EAX, Rn );
   846     if( sh4_x86.tstate != TSTATE_C ) {
   847 	LDC_t();
   848     }
   849     RCR1_r32( R_EAX );
   850     store_reg( R_EAX, Rn );
   851     SETC_t();
   852     sh4_x86.tstate = TSTATE_C;
   853 :}
   854 ROTL Rn {:  
   855     COUNT_INST(I_ROTL);
   856     load_reg( R_EAX, Rn );
   857     ROL1_r32( R_EAX );
   858     store_reg( R_EAX, Rn );
   859     SETC_t();
   860     sh4_x86.tstate = TSTATE_C;
   861 :}
   862 ROTR Rn {:  
   863     COUNT_INST(I_ROTR);
   864     load_reg( R_EAX, Rn );
   865     ROR1_r32( R_EAX );
   866     store_reg( R_EAX, Rn );
   867     SETC_t();
   868     sh4_x86.tstate = TSTATE_C;
   869 :}
   870 SHAD Rm, Rn {:
   871     COUNT_INST(I_SHAD);
   872     /* Annoyingly enough, not directly convertible */
   873     load_reg( R_EAX, Rn );
   874     load_reg( R_ECX, Rm );
   875     CMP_imm32_r32( 0, R_ECX );
   876     JGE_rel8(doshl);
   878     NEG_r32( R_ECX );      // 2
   879     AND_imm8_r8( 0x1F, R_CL ); // 3
   880     JE_rel8(emptysar);     // 2
   881     SAR_r32_CL( R_EAX );       // 2
   882     JMP_rel8(end);          // 2
   884     JMP_TARGET(emptysar);
   885     SAR_imm8_r32(31, R_EAX );  // 3
   886     JMP_rel8(end2);
   888     JMP_TARGET(doshl);
   889     AND_imm8_r8( 0x1F, R_CL ); // 3
   890     SHL_r32_CL( R_EAX );       // 2
   891     JMP_TARGET(end);
   892     JMP_TARGET(end2);
   893     store_reg( R_EAX, Rn );
   894     sh4_x86.tstate = TSTATE_NONE;
   895 :}
   896 SHLD Rm, Rn {:  
   897     COUNT_INST(I_SHLD);
   898     load_reg( R_EAX, Rn );
   899     load_reg( R_ECX, Rm );
   900     CMP_imm32_r32( 0, R_ECX );
   901     JGE_rel8(doshl);
   903     NEG_r32( R_ECX );      // 2
   904     AND_imm8_r8( 0x1F, R_CL ); // 3
   905     JE_rel8(emptyshr );
   906     SHR_r32_CL( R_EAX );       // 2
   907     JMP_rel8(end);          // 2
   909     JMP_TARGET(emptyshr);
   910     XOR_r32_r32( R_EAX, R_EAX );
   911     JMP_rel8(end2);
   913     JMP_TARGET(doshl);
   914     AND_imm8_r8( 0x1F, R_CL ); // 3
   915     SHL_r32_CL( R_EAX );       // 2
   916     JMP_TARGET(end);
   917     JMP_TARGET(end2);
   918     store_reg( R_EAX, Rn );
   919     sh4_x86.tstate = TSTATE_NONE;
   920 :}
   921 SHAL Rn {: 
   922     COUNT_INST(I_SHAL);
   923     load_reg( R_EAX, Rn );
   924     SHL1_r32( R_EAX );
   925     SETC_t();
   926     store_reg( R_EAX, Rn );
   927     sh4_x86.tstate = TSTATE_C;
   928 :}
   929 SHAR Rn {:  
   930     COUNT_INST(I_SHAR);
   931     load_reg( R_EAX, Rn );
   932     SAR1_r32( R_EAX );
   933     SETC_t();
   934     store_reg( R_EAX, Rn );
   935     sh4_x86.tstate = TSTATE_C;
   936 :}
   937 SHLL Rn {:  
   938     COUNT_INST(I_SHLL);
   939     load_reg( R_EAX, Rn );
   940     SHL1_r32( R_EAX );
   941     SETC_t();
   942     store_reg( R_EAX, Rn );
   943     sh4_x86.tstate = TSTATE_C;
   944 :}
   945 SHLL2 Rn {:
   946     COUNT_INST(I_SHLL);
   947     load_reg( R_EAX, Rn );
   948     SHL_imm8_r32( 2, R_EAX );
   949     store_reg( R_EAX, Rn );
   950     sh4_x86.tstate = TSTATE_NONE;
   951 :}
   952 SHLL8 Rn {:  
   953     COUNT_INST(I_SHLL);
   954     load_reg( R_EAX, Rn );
   955     SHL_imm8_r32( 8, R_EAX );
   956     store_reg( R_EAX, Rn );
   957     sh4_x86.tstate = TSTATE_NONE;
   958 :}
   959 SHLL16 Rn {:  
   960     COUNT_INST(I_SHLL);
   961     load_reg( R_EAX, Rn );
   962     SHL_imm8_r32( 16, R_EAX );
   963     store_reg( R_EAX, Rn );
   964     sh4_x86.tstate = TSTATE_NONE;
   965 :}
   966 SHLR Rn {:  
   967     COUNT_INST(I_SHLR);
   968     load_reg( R_EAX, Rn );
   969     SHR1_r32( R_EAX );
   970     SETC_t();
   971     store_reg( R_EAX, Rn );
   972     sh4_x86.tstate = TSTATE_C;
   973 :}
   974 SHLR2 Rn {:  
   975     COUNT_INST(I_SHLR);
   976     load_reg( R_EAX, Rn );
   977     SHR_imm8_r32( 2, R_EAX );
   978     store_reg( R_EAX, Rn );
   979     sh4_x86.tstate = TSTATE_NONE;
   980 :}
   981 SHLR8 Rn {:  
   982     COUNT_INST(I_SHLR);
   983     load_reg( R_EAX, Rn );
   984     SHR_imm8_r32( 8, R_EAX );
   985     store_reg( R_EAX, Rn );
   986     sh4_x86.tstate = TSTATE_NONE;
   987 :}
   988 SHLR16 Rn {:  
   989     COUNT_INST(I_SHLR);
   990     load_reg( R_EAX, Rn );
   991     SHR_imm8_r32( 16, R_EAX );
   992     store_reg( R_EAX, Rn );
   993     sh4_x86.tstate = TSTATE_NONE;
   994 :}
   995 SUB Rm, Rn {:  
   996     COUNT_INST(I_SUB);
   997     load_reg( R_EAX, Rm );
   998     load_reg( R_ECX, Rn );
   999     SUB_r32_r32( R_EAX, R_ECX );
  1000     store_reg( R_ECX, Rn );
  1001     sh4_x86.tstate = TSTATE_NONE;
  1002 :}
  1003 SUBC Rm, Rn {:  
  1004     COUNT_INST(I_SUBC);
  1005     load_reg( R_EAX, Rm );
  1006     load_reg( R_ECX, Rn );
  1007     if( sh4_x86.tstate != TSTATE_C ) {
  1008 	LDC_t();
  1010     SBB_r32_r32( R_EAX, R_ECX );
  1011     store_reg( R_ECX, Rn );
  1012     SETC_t();
  1013     sh4_x86.tstate = TSTATE_C;
  1014 :}
  1015 SUBV Rm, Rn {:  
  1016     COUNT_INST(I_SUBV);
  1017     load_reg( R_EAX, Rm );
  1018     load_reg( R_ECX, Rn );
  1019     SUB_r32_r32( R_EAX, R_ECX );
  1020     store_reg( R_ECX, Rn );
  1021     SETO_t();
  1022     sh4_x86.tstate = TSTATE_O;
  1023 :}
  1024 SWAP.B Rm, Rn {:  
  1025     COUNT_INST(I_SWAPB);
  1026     load_reg( R_EAX, Rm );
  1027     XCHG_r8_r8( R_AL, R_AH ); // NB: does not touch EFLAGS
  1028     store_reg( R_EAX, Rn );
  1029 :}
  1030 SWAP.W Rm, Rn {:  
  1031     COUNT_INST(I_SWAPB);
  1032     load_reg( R_EAX, Rm );
  1033     MOV_r32_r32( R_EAX, R_ECX );
  1034     SHL_imm8_r32( 16, R_ECX );
  1035     SHR_imm8_r32( 16, R_EAX );
  1036     OR_r32_r32( R_EAX, R_ECX );
  1037     store_reg( R_ECX, Rn );
  1038     sh4_x86.tstate = TSTATE_NONE;
  1039 :}
  1040 TAS.B @Rn {:  
  1041     COUNT_INST(I_TASB);
  1042     load_reg( R_EAX, Rn );
  1043     MMU_TRANSLATE_WRITE( R_EAX );
  1044     MOV_r32_esp8( R_EAX, 0 );
  1045     MEM_READ_BYTE( R_EAX, R_EDX );
  1046     TEST_r8_r8( R_DL, R_DL );
  1047     SETE_t();
  1048     OR_imm8_r8( 0x80, R_DL );
  1049     MOV_esp8_r32( 0, R_EAX );
  1050     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1051     sh4_x86.tstate = TSTATE_NONE;
  1052 :}
  1053 TST Rm, Rn {:  
  1054     COUNT_INST(I_TST);
  1055     load_reg( R_EAX, Rm );
  1056     load_reg( R_ECX, Rn );
  1057     TEST_r32_r32( R_EAX, R_ECX );
  1058     SETE_t();
  1059     sh4_x86.tstate = TSTATE_E;
  1060 :}
  1061 TST #imm, R0 {:  
  1062     COUNT_INST(I_TSTI);
  1063     load_reg( R_EAX, 0 );
  1064     TEST_imm32_r32( imm, R_EAX );
  1065     SETE_t();
  1066     sh4_x86.tstate = TSTATE_E;
  1067 :}
  1068 TST.B #imm, @(R0, GBR) {:  
  1069     COUNT_INST(I_TSTB);
  1070     load_reg( R_EAX, 0);
  1071     load_reg( R_ECX, R_GBR);
  1072     ADD_r32_r32( R_ECX, R_EAX );
  1073     MMU_TRANSLATE_READ( R_EAX );
  1074     MEM_READ_BYTE( R_EAX, R_EAX );
  1075     TEST_imm8_r8( imm, R_AL );
  1076     SETE_t();
  1077     sh4_x86.tstate = TSTATE_E;
  1078 :}
  1079 XOR Rm, Rn {:  
  1080     COUNT_INST(I_XOR);
  1081     load_reg( R_EAX, Rm );
  1082     load_reg( R_ECX, Rn );
  1083     XOR_r32_r32( R_EAX, R_ECX );
  1084     store_reg( R_ECX, Rn );
  1085     sh4_x86.tstate = TSTATE_NONE;
  1086 :}
  1087 XOR #imm, R0 {:  
  1088     COUNT_INST(I_XORI);
  1089     load_reg( R_EAX, 0 );
  1090     XOR_imm32_r32( imm, R_EAX );
  1091     store_reg( R_EAX, 0 );
  1092     sh4_x86.tstate = TSTATE_NONE;
  1093 :}
  1094 XOR.B #imm, @(R0, GBR) {:  
  1095     COUNT_INST(I_XORB);
  1096     load_reg( R_EAX, 0 );
  1097     load_spreg( R_ECX, R_GBR );
  1098     ADD_r32_r32( R_ECX, R_EAX );
  1099     MMU_TRANSLATE_WRITE( R_EAX );
  1100     MOV_r32_esp8( R_EAX, 0 );
  1101     MEM_READ_BYTE(R_EAX, R_EDX);
  1102     MOV_esp8_r32( 0, R_EAX );
  1103     XOR_imm32_r32( imm, R_EDX );
  1104     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1105     sh4_x86.tstate = TSTATE_NONE;
  1106 :}
  1107 XTRCT Rm, Rn {:
  1108     COUNT_INST(I_XTRCT);
  1109     load_reg( R_EAX, Rm );
  1110     load_reg( R_ECX, Rn );
  1111     SHL_imm8_r32( 16, R_EAX );
  1112     SHR_imm8_r32( 16, R_ECX );
  1113     OR_r32_r32( R_EAX, R_ECX );
  1114     store_reg( R_ECX, Rn );
  1115     sh4_x86.tstate = TSTATE_NONE;
  1116 :}
  1118 /* Data move instructions */
  1119 MOV Rm, Rn {:  
  1120     COUNT_INST(I_MOV);
  1121     load_reg( R_EAX, Rm );
  1122     store_reg( R_EAX, Rn );
  1123 :}
  1124 MOV #imm, Rn {:  
  1125     COUNT_INST(I_MOVI);
  1126     load_imm32( R_EAX, imm );
  1127     store_reg( R_EAX, Rn );
  1128 :}
  1129 MOV.B Rm, @Rn {:  
  1130     COUNT_INST(I_MOVB);
  1131     load_reg( R_EAX, Rn );
  1132     MMU_TRANSLATE_WRITE( R_EAX );
  1133     load_reg( R_EDX, Rm );
  1134     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1135     sh4_x86.tstate = TSTATE_NONE;
  1136 :}
  1137 MOV.B Rm, @-Rn {:  
  1138     COUNT_INST(I_MOVB);
  1139     load_reg( R_EAX, Rn );
  1140     ADD_imm8s_r32( -1, R_EAX );
  1141     MMU_TRANSLATE_WRITE( R_EAX );
  1142     load_reg( R_EDX, Rm );
  1143     ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
  1144     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1145     sh4_x86.tstate = TSTATE_NONE;
  1146 :}
  1147 MOV.B Rm, @(R0, Rn) {:  
  1148     COUNT_INST(I_MOVB);
  1149     load_reg( R_EAX, 0 );
  1150     load_reg( R_ECX, Rn );
  1151     ADD_r32_r32( R_ECX, R_EAX );
  1152     MMU_TRANSLATE_WRITE( R_EAX );
  1153     load_reg( R_EDX, Rm );
  1154     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1155     sh4_x86.tstate = TSTATE_NONE;
  1156 :}
  1157 MOV.B R0, @(disp, GBR) {:  
  1158     COUNT_INST(I_MOVB);
  1159     load_spreg( R_EAX, R_GBR );
  1160     ADD_imm32_r32( disp, R_EAX );
  1161     MMU_TRANSLATE_WRITE( R_EAX );
  1162     load_reg( R_EDX, 0 );
  1163     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1164     sh4_x86.tstate = TSTATE_NONE;
  1165 :}
  1166 MOV.B R0, @(disp, Rn) {:  
  1167     COUNT_INST(I_MOVB);
  1168     load_reg( R_EAX, Rn );
  1169     ADD_imm32_r32( disp, R_EAX );
  1170     MMU_TRANSLATE_WRITE( R_EAX );
  1171     load_reg( R_EDX, 0 );
  1172     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1173     sh4_x86.tstate = TSTATE_NONE;
  1174 :}
  1175 MOV.B @Rm, Rn {:  
  1176     COUNT_INST(I_MOVB);
  1177     load_reg( R_EAX, Rm );
  1178     MMU_TRANSLATE_READ( R_EAX );
  1179     MEM_READ_BYTE( R_EAX, R_EAX );
  1180     store_reg( R_EAX, Rn );
  1181     sh4_x86.tstate = TSTATE_NONE;
  1182 :}
  1183 MOV.B @Rm+, Rn {:  
  1184     COUNT_INST(I_MOVB);
  1185     load_reg( R_EAX, Rm );
  1186     MMU_TRANSLATE_READ( R_EAX );
  1187     ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
  1188     MEM_READ_BYTE( R_EAX, R_EAX );
  1189     store_reg( R_EAX, Rn );
  1190     sh4_x86.tstate = TSTATE_NONE;
  1191 :}
  1192 MOV.B @(R0, Rm), Rn {:  
  1193     COUNT_INST(I_MOVB);
  1194     load_reg( R_EAX, 0 );
  1195     load_reg( R_ECX, Rm );
  1196     ADD_r32_r32( R_ECX, R_EAX );
  1197     MMU_TRANSLATE_READ( R_EAX )
  1198     MEM_READ_BYTE( R_EAX, R_EAX );
  1199     store_reg( R_EAX, Rn );
  1200     sh4_x86.tstate = TSTATE_NONE;
  1201 :}
  1202 MOV.B @(disp, GBR), R0 {:  
  1203     COUNT_INST(I_MOVB);
  1204     load_spreg( R_EAX, R_GBR );
  1205     ADD_imm32_r32( disp, R_EAX );
  1206     MMU_TRANSLATE_READ( R_EAX );
  1207     MEM_READ_BYTE( R_EAX, R_EAX );
  1208     store_reg( R_EAX, 0 );
  1209     sh4_x86.tstate = TSTATE_NONE;
  1210 :}
  1211 MOV.B @(disp, Rm), R0 {:  
  1212     COUNT_INST(I_MOVB);
  1213     load_reg( R_EAX, Rm );
  1214     ADD_imm32_r32( disp, R_EAX );
  1215     MMU_TRANSLATE_READ( R_EAX );
  1216     MEM_READ_BYTE( R_EAX, R_EAX );
  1217     store_reg( R_EAX, 0 );
  1218     sh4_x86.tstate = TSTATE_NONE;
  1219 :}
  1220 MOV.L Rm, @Rn {:
  1221     COUNT_INST(I_MOVL);
  1222     load_reg( R_EAX, Rn );
  1223     check_walign32(R_EAX);
  1224     MMU_TRANSLATE_WRITE( R_EAX );
  1225     load_reg( R_EDX, Rm );
  1226     MEM_WRITE_LONG( R_EAX, R_EDX );
  1227     sh4_x86.tstate = TSTATE_NONE;
  1228 :}
  1229 MOV.L Rm, @-Rn {:  
  1230     COUNT_INST(I_MOVL);
  1231     load_reg( R_EAX, Rn );
  1232     ADD_imm8s_r32( -4, R_EAX );
  1233     check_walign32( R_EAX );
  1234     MMU_TRANSLATE_WRITE( R_EAX );
  1235     load_reg( R_EDX, Rm );
  1236     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  1237     MEM_WRITE_LONG( R_EAX, R_EDX );
  1238     sh4_x86.tstate = TSTATE_NONE;
  1239 :}
  1240 MOV.L Rm, @(R0, Rn) {:  
  1241     COUNT_INST(I_MOVL);
  1242     load_reg( R_EAX, 0 );
  1243     load_reg( R_ECX, Rn );
  1244     ADD_r32_r32( R_ECX, R_EAX );
  1245     check_walign32( R_EAX );
  1246     MMU_TRANSLATE_WRITE( R_EAX );
  1247     load_reg( R_EDX, Rm );
  1248     MEM_WRITE_LONG( R_EAX, R_EDX );
  1249     sh4_x86.tstate = TSTATE_NONE;
  1250 :}
  1251 MOV.L R0, @(disp, GBR) {:  
  1252     COUNT_INST(I_MOVL);
  1253     load_spreg( R_EAX, R_GBR );
  1254     ADD_imm32_r32( disp, R_EAX );
  1255     check_walign32( R_EAX );
  1256     MMU_TRANSLATE_WRITE( R_EAX );
  1257     load_reg( R_EDX, 0 );
  1258     MEM_WRITE_LONG( R_EAX, R_EDX );
  1259     sh4_x86.tstate = TSTATE_NONE;
  1260 :}
  1261 MOV.L Rm, @(disp, Rn) {:  
  1262     COUNT_INST(I_MOVL);
  1263     load_reg( R_EAX, Rn );
  1264     ADD_imm32_r32( disp, R_EAX );
  1265     check_walign32( R_EAX );
  1266     MMU_TRANSLATE_WRITE( R_EAX );
  1267     load_reg( R_EDX, Rm );
  1268     MEM_WRITE_LONG( R_EAX, R_EDX );
  1269     sh4_x86.tstate = TSTATE_NONE;
  1270 :}
  1271 MOV.L @Rm, Rn {:  
  1272     COUNT_INST(I_MOVL);
  1273     load_reg( R_EAX, Rm );
  1274     check_ralign32( R_EAX );
  1275     MMU_TRANSLATE_READ( R_EAX );
  1276     MEM_READ_LONG( R_EAX, R_EAX );
  1277     store_reg( R_EAX, Rn );
  1278     sh4_x86.tstate = TSTATE_NONE;
  1279 :}
  1280 MOV.L @Rm+, Rn {:  
  1281     COUNT_INST(I_MOVL);
  1282     load_reg( R_EAX, Rm );
  1283     check_ralign32( R_EAX );
  1284     MMU_TRANSLATE_READ( R_EAX );
  1285     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1286     MEM_READ_LONG( R_EAX, R_EAX );
  1287     store_reg( R_EAX, Rn );
  1288     sh4_x86.tstate = TSTATE_NONE;
  1289 :}
  1290 MOV.L @(R0, Rm), Rn {:  
  1291     COUNT_INST(I_MOVL);
  1292     load_reg( R_EAX, 0 );
  1293     load_reg( R_ECX, Rm );
  1294     ADD_r32_r32( R_ECX, R_EAX );
  1295     check_ralign32( R_EAX );
  1296     MMU_TRANSLATE_READ( R_EAX );
  1297     MEM_READ_LONG( R_EAX, R_EAX );
  1298     store_reg( R_EAX, Rn );
  1299     sh4_x86.tstate = TSTATE_NONE;
  1300 :}
  1301 MOV.L @(disp, GBR), R0 {:
  1302     COUNT_INST(I_MOVL);
  1303     load_spreg( R_EAX, R_GBR );
  1304     ADD_imm32_r32( disp, R_EAX );
  1305     check_ralign32( R_EAX );
  1306     MMU_TRANSLATE_READ( R_EAX );
  1307     MEM_READ_LONG( R_EAX, R_EAX );
  1308     store_reg( R_EAX, 0 );
  1309     sh4_x86.tstate = TSTATE_NONE;
  1310 :}
  1311 MOV.L @(disp, PC), Rn {:  
  1312     COUNT_INST(I_MOVLPC);
  1313     if( sh4_x86.in_delay_slot ) {
  1314 	SLOTILLEGAL();
  1315     } else {
  1316 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1317 	if( IS_IN_ICACHE(target) ) {
  1318 	    // If the target address is in the same page as the code, it's
  1319 	    // pretty safe to just ref it directly and circumvent the whole
  1320 	    // memory subsystem. (this is a big performance win)
  1322 	    // FIXME: There's a corner-case that's not handled here when
  1323 	    // the current code-page is in the ITLB but not in the UTLB.
  1324 	    // (should generate a TLB miss although need to test SH4 
  1325 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1326 	    // behaviour though.
  1327 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1328 	    MOV_moff32_EAX( ptr );
  1329 	} else {
  1330 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1331 	    // different virtual address than the translation was done with,
  1332 	    // but we can safely assume that the low bits are the same.
  1333 	    load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1334 	    ADD_sh4r_r32( R_PC, R_EAX );
  1335 	    MMU_TRANSLATE_READ( R_EAX );
  1336 	    MEM_READ_LONG( R_EAX, R_EAX );
  1337 	    sh4_x86.tstate = TSTATE_NONE;
  1339 	store_reg( R_EAX, Rn );
  1341 :}
  1342 MOV.L @(disp, Rm), Rn {:  
  1343     COUNT_INST(I_MOVL);
  1344     load_reg( R_EAX, Rm );
  1345     ADD_imm8s_r32( disp, R_EAX );
  1346     check_ralign32( R_EAX );
  1347     MMU_TRANSLATE_READ( R_EAX );
  1348     MEM_READ_LONG( R_EAX, R_EAX );
  1349     store_reg( R_EAX, Rn );
  1350     sh4_x86.tstate = TSTATE_NONE;
  1351 :}
  1352 MOV.W Rm, @Rn {:  
  1353     COUNT_INST(I_MOVW);
  1354     load_reg( R_EAX, Rn );
  1355     check_walign16( R_EAX );
  1356     MMU_TRANSLATE_WRITE( R_EAX )
  1357     load_reg( R_EDX, Rm );
  1358     MEM_WRITE_WORD( R_EAX, R_EDX );
  1359     sh4_x86.tstate = TSTATE_NONE;
  1360 :}
  1361 MOV.W Rm, @-Rn {:  
  1362     COUNT_INST(I_MOVW);
  1363     load_reg( R_EAX, Rn );
  1364     ADD_imm8s_r32( -2, R_EAX );
  1365     check_walign16( R_EAX );
  1366     MMU_TRANSLATE_WRITE( R_EAX );
  1367     load_reg( R_EDX, Rm );
  1368     ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
  1369     MEM_WRITE_WORD( R_EAX, R_EDX );
  1370     sh4_x86.tstate = TSTATE_NONE;
  1371 :}
  1372 MOV.W Rm, @(R0, Rn) {:  
  1373     COUNT_INST(I_MOVW);
  1374     load_reg( R_EAX, 0 );
  1375     load_reg( R_ECX, Rn );
  1376     ADD_r32_r32( R_ECX, R_EAX );
  1377     check_walign16( R_EAX );
  1378     MMU_TRANSLATE_WRITE( R_EAX );
  1379     load_reg( R_EDX, Rm );
  1380     MEM_WRITE_WORD( R_EAX, R_EDX );
  1381     sh4_x86.tstate = TSTATE_NONE;
  1382 :}
  1383 MOV.W R0, @(disp, GBR) {:  
  1384     COUNT_INST(I_MOVW);
  1385     load_spreg( R_EAX, R_GBR );
  1386     ADD_imm32_r32( disp, R_EAX );
  1387     check_walign16( R_EAX );
  1388     MMU_TRANSLATE_WRITE( R_EAX );
  1389     load_reg( R_EDX, 0 );
  1390     MEM_WRITE_WORD( R_EAX, R_EDX );
  1391     sh4_x86.tstate = TSTATE_NONE;
  1392 :}
  1393 MOV.W R0, @(disp, Rn) {:  
  1394     COUNT_INST(I_MOVW);
  1395     load_reg( R_EAX, Rn );
  1396     ADD_imm32_r32( disp, R_EAX );
  1397     check_walign16( R_EAX );
  1398     MMU_TRANSLATE_WRITE( R_EAX );
  1399     load_reg( R_EDX, 0 );
  1400     MEM_WRITE_WORD( R_EAX, R_EDX );
  1401     sh4_x86.tstate = TSTATE_NONE;
  1402 :}
  1403 MOV.W @Rm, Rn {:  
  1404     COUNT_INST(I_MOVW);
  1405     load_reg( R_EAX, Rm );
  1406     check_ralign16( R_EAX );
  1407     MMU_TRANSLATE_READ( R_EAX );
  1408     MEM_READ_WORD( R_EAX, R_EAX );
  1409     store_reg( R_EAX, Rn );
  1410     sh4_x86.tstate = TSTATE_NONE;
  1411 :}
  1412 MOV.W @Rm+, Rn {:  
  1413     COUNT_INST(I_MOVW);
  1414     load_reg( R_EAX, Rm );
  1415     check_ralign16( R_EAX );
  1416     MMU_TRANSLATE_READ( R_EAX );
  1417     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
  1418     MEM_READ_WORD( R_EAX, R_EAX );
  1419     store_reg( R_EAX, Rn );
  1420     sh4_x86.tstate = TSTATE_NONE;
  1421 :}
  1422 MOV.W @(R0, Rm), Rn {:  
  1423     COUNT_INST(I_MOVW);
  1424     load_reg( R_EAX, 0 );
  1425     load_reg( R_ECX, Rm );
  1426     ADD_r32_r32( R_ECX, R_EAX );
  1427     check_ralign16( R_EAX );
  1428     MMU_TRANSLATE_READ( R_EAX );
  1429     MEM_READ_WORD( R_EAX, R_EAX );
  1430     store_reg( R_EAX, Rn );
  1431     sh4_x86.tstate = TSTATE_NONE;
  1432 :}
  1433 MOV.W @(disp, GBR), R0 {:  
  1434     COUNT_INST(I_MOVW);
  1435     load_spreg( R_EAX, R_GBR );
  1436     ADD_imm32_r32( disp, R_EAX );
  1437     check_ralign16( R_EAX );
  1438     MMU_TRANSLATE_READ( R_EAX );
  1439     MEM_READ_WORD( R_EAX, R_EAX );
  1440     store_reg( R_EAX, 0 );
  1441     sh4_x86.tstate = TSTATE_NONE;
  1442 :}
  1443 MOV.W @(disp, PC), Rn {:  
  1444     COUNT_INST(I_MOVW);
  1445     if( sh4_x86.in_delay_slot ) {
  1446 	SLOTILLEGAL();
  1447     } else {
  1448 	// See comments for MOV.L @(disp, PC), Rn
  1449 	uint32_t target = pc + disp + 4;
  1450 	if( IS_IN_ICACHE(target) ) {
  1451 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1452 	    MOV_moff32_EAX( ptr );
  1453 	    MOVSX_r16_r32( R_EAX, R_EAX );
  1454 	} else {
  1455 	    load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
  1456 	    ADD_sh4r_r32( R_PC, R_EAX );
  1457 	    MMU_TRANSLATE_READ( R_EAX );
  1458 	    MEM_READ_WORD( R_EAX, R_EAX );
  1459 	    sh4_x86.tstate = TSTATE_NONE;
  1461 	store_reg( R_EAX, Rn );
  1463 :}
  1464 MOV.W @(disp, Rm), R0 {:  
  1465     COUNT_INST(I_MOVW);
  1466     load_reg( R_EAX, Rm );
  1467     ADD_imm32_r32( disp, R_EAX );
  1468     check_ralign16( R_EAX );
  1469     MMU_TRANSLATE_READ( R_EAX );
  1470     MEM_READ_WORD( R_EAX, R_EAX );
  1471     store_reg( R_EAX, 0 );
  1472     sh4_x86.tstate = TSTATE_NONE;
  1473 :}
  1474 MOVA @(disp, PC), R0 {:  
  1475     COUNT_INST(I_MOVA);
  1476     if( sh4_x86.in_delay_slot ) {
  1477 	SLOTILLEGAL();
  1478     } else {
  1479 	load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1480 	ADD_sh4r_r32( R_PC, R_ECX );
  1481 	store_reg( R_ECX, 0 );
  1482 	sh4_x86.tstate = TSTATE_NONE;
  1484 :}
  1485 MOVCA.L R0, @Rn {:  
  1486     COUNT_INST(I_MOVCA);
  1487     load_reg( R_EAX, Rn );
  1488     check_walign32( R_EAX );
  1489     MMU_TRANSLATE_WRITE( R_EAX );
  1490     load_reg( R_EDX, 0 );
  1491     MEM_WRITE_LONG( R_EAX, R_EDX );
  1492     sh4_x86.tstate = TSTATE_NONE;
  1493 :}
  1495 /* Control transfer instructions */
  1496 BF disp {:
  1497     COUNT_INST(I_BF);
  1498     if( sh4_x86.in_delay_slot ) {
  1499 	SLOTILLEGAL();
  1500     } else {
  1501 	sh4vma_t target = disp + pc + 4;
  1502 	JT_rel8( nottaken );
  1503 	exit_block_rel(target, pc+2 );
  1504 	JMP_TARGET(nottaken);
  1505 	return 2;
  1507 :}
  1508 BF/S disp {:
  1509     COUNT_INST(I_BFS);
  1510     if( sh4_x86.in_delay_slot ) {
  1511 	SLOTILLEGAL();
  1512     } else {
  1513 	sh4_x86.in_delay_slot = DELAY_PC;
  1514 	if( UNTRANSLATABLE(pc+2) ) {
  1515 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1516 	    JT_rel8(nottaken);
  1517 	    ADD_imm32_r32( disp, R_EAX );
  1518 	    JMP_TARGET(nottaken);
  1519 	    ADD_sh4r_r32( R_PC, R_EAX );
  1520 	    store_spreg( R_EAX, R_NEW_PC );
  1521 	    exit_block_emu(pc+2);
  1522 	    sh4_x86.branch_taken = TRUE;
  1523 	    return 2;
  1524 	} else {
  1525 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1526 		CMP_imm8s_sh4r( 1, R_T );
  1527 		sh4_x86.tstate = TSTATE_E;
  1529 	    sh4vma_t target = disp + pc + 4;
  1530 	    OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JT rel32
  1531 	    int save_tstate = sh4_x86.tstate;
  1532 	    sh4_translate_instruction(pc+2);
  1533 	    exit_block_rel( target, pc+4 );
  1535 	    // not taken
  1536 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1537 	    sh4_x86.tstate = save_tstate;
  1538 	    sh4_translate_instruction(pc+2);
  1539 	    return 4;
  1542 :}
  1543 BRA disp {:  
  1544     COUNT_INST(I_BRA);
  1545     if( sh4_x86.in_delay_slot ) {
  1546 	SLOTILLEGAL();
  1547     } else {
  1548 	sh4_x86.in_delay_slot = DELAY_PC;
  1549 	sh4_x86.branch_taken = TRUE;
  1550 	if( UNTRANSLATABLE(pc+2) ) {
  1551 	    load_spreg( R_EAX, R_PC );
  1552 	    ADD_imm32_r32( pc + disp + 4 - sh4_x86.block_start_pc, R_EAX );
  1553 	    store_spreg( R_EAX, R_NEW_PC );
  1554 	    exit_block_emu(pc+2);
  1555 	    return 2;
  1556 	} else {
  1557 	    sh4_translate_instruction( pc + 2 );
  1558 	    exit_block_rel( disp + pc + 4, pc+4 );
  1559 	    return 4;
  1562 :}
  1563 BRAF Rn {:  
  1564     COUNT_INST(I_BRAF);
  1565     if( sh4_x86.in_delay_slot ) {
  1566 	SLOTILLEGAL();
  1567     } else {
  1568 	load_spreg( R_EAX, R_PC );
  1569 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1570 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1571 	store_spreg( R_EAX, R_NEW_PC );
  1572 	sh4_x86.in_delay_slot = DELAY_PC;
  1573 	sh4_x86.tstate = TSTATE_NONE;
  1574 	sh4_x86.branch_taken = TRUE;
  1575 	if( UNTRANSLATABLE(pc+2) ) {
  1576 	    exit_block_emu(pc+2);
  1577 	    return 2;
  1578 	} else {
  1579 	    sh4_translate_instruction( pc + 2 );
  1580 	    exit_block_newpcset(pc+2);
  1581 	    return 4;
  1584 :}
  1585 BSR disp {:  
  1586     COUNT_INST(I_BSR);
  1587     if( sh4_x86.in_delay_slot ) {
  1588 	SLOTILLEGAL();
  1589     } else {
  1590 	load_spreg( R_EAX, R_PC );
  1591 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1592 	store_spreg( R_EAX, R_PR );
  1593 	sh4_x86.in_delay_slot = DELAY_PC;
  1594 	sh4_x86.branch_taken = TRUE;
  1595 	sh4_x86.tstate = TSTATE_NONE;
  1596 	if( UNTRANSLATABLE(pc+2) ) {
  1597 	    ADD_imm32_r32( disp, R_EAX );
  1598 	    store_spreg( R_EAX, R_NEW_PC );
  1599 	    exit_block_emu(pc+2);
  1600 	    return 2;
  1601 	} else {
  1602 	    sh4_translate_instruction( pc + 2 );
  1603 	    exit_block_rel( disp + pc + 4, pc+4 );
  1604 	    return 4;
  1607 :}
  1608 BSRF Rn {:  
  1609     COUNT_INST(I_BSRF);
  1610     if( sh4_x86.in_delay_slot ) {
  1611 	SLOTILLEGAL();
  1612     } else {
  1613 	load_spreg( R_EAX, R_PC );
  1614 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1615 	store_spreg( R_EAX, R_PR );
  1616 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1617 	store_spreg( R_EAX, R_NEW_PC );
  1619 	sh4_x86.in_delay_slot = DELAY_PC;
  1620 	sh4_x86.tstate = TSTATE_NONE;
  1621 	sh4_x86.branch_taken = TRUE;
  1622 	if( UNTRANSLATABLE(pc+2) ) {
  1623 	    exit_block_emu(pc+2);
  1624 	    return 2;
  1625 	} else {
  1626 	    sh4_translate_instruction( pc + 2 );
  1627 	    exit_block_newpcset(pc+2);
  1628 	    return 4;
  1631 :}
  1632 BT disp {:
  1633     COUNT_INST(I_BT);
  1634     if( sh4_x86.in_delay_slot ) {
  1635 	SLOTILLEGAL();
  1636     } else {
  1637 	sh4vma_t target = disp + pc + 4;
  1638 	JF_rel8( nottaken );
  1639 	exit_block_rel(target, pc+2 );
  1640 	JMP_TARGET(nottaken);
  1641 	return 2;
  1643 :}
  1644 BT/S disp {:
  1645     COUNT_INST(I_BTS);
  1646     if( sh4_x86.in_delay_slot ) {
  1647 	SLOTILLEGAL();
  1648     } else {
  1649 	sh4_x86.in_delay_slot = DELAY_PC;
  1650 	if( UNTRANSLATABLE(pc+2) ) {
  1651 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1652 	    JF_rel8(nottaken);
  1653 	    ADD_imm32_r32( disp, R_EAX );
  1654 	    JMP_TARGET(nottaken);
  1655 	    ADD_sh4r_r32( R_PC, R_EAX );
  1656 	    store_spreg( R_EAX, R_NEW_PC );
  1657 	    exit_block_emu(pc+2);
  1658 	    sh4_x86.branch_taken = TRUE;
  1659 	    return 2;
  1660 	} else {
  1661 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1662 		CMP_imm8s_sh4r( 1, R_T );
  1663 		sh4_x86.tstate = TSTATE_E;
  1665 	    OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JF rel32
  1666 	    int save_tstate = sh4_x86.tstate;
  1667 	    sh4_translate_instruction(pc+2);
  1668 	    exit_block_rel( disp + pc + 4, pc+4 );
  1669 	    // not taken
  1670 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1671 	    sh4_x86.tstate = save_tstate;
  1672 	    sh4_translate_instruction(pc+2);
  1673 	    return 4;
  1676 :}
  1677 JMP @Rn {:  
  1678     COUNT_INST(I_JMP);
  1679     if( sh4_x86.in_delay_slot ) {
  1680 	SLOTILLEGAL();
  1681     } else {
  1682 	load_reg( R_ECX, Rn );
  1683 	store_spreg( R_ECX, R_NEW_PC );
  1684 	sh4_x86.in_delay_slot = DELAY_PC;
  1685 	sh4_x86.branch_taken = TRUE;
  1686 	if( UNTRANSLATABLE(pc+2) ) {
  1687 	    exit_block_emu(pc+2);
  1688 	    return 2;
  1689 	} else {
  1690 	    sh4_translate_instruction(pc+2);
  1691 	    exit_block_newpcset(pc+2);
  1692 	    return 4;
  1695 :}
  1696 JSR @Rn {:  
  1697     COUNT_INST(I_JSR);
  1698     if( sh4_x86.in_delay_slot ) {
  1699 	SLOTILLEGAL();
  1700     } else {
  1701 	load_spreg( R_EAX, R_PC );
  1702 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1703 	store_spreg( R_EAX, R_PR );
  1704 	load_reg( R_ECX, Rn );
  1705 	store_spreg( R_ECX, R_NEW_PC );
  1706 	sh4_x86.in_delay_slot = DELAY_PC;
  1707 	sh4_x86.branch_taken = TRUE;
  1708 	sh4_x86.tstate = TSTATE_NONE;
  1709 	if( UNTRANSLATABLE(pc+2) ) {
  1710 	    exit_block_emu(pc+2);
  1711 	    return 2;
  1712 	} else {
  1713 	    sh4_translate_instruction(pc+2);
  1714 	    exit_block_newpcset(pc+2);
  1715 	    return 4;
  1718 :}
  1719 RTE {:  
  1720     COUNT_INST(I_RTE);
  1721     if( sh4_x86.in_delay_slot ) {
  1722 	SLOTILLEGAL();
  1723     } else {
  1724 	check_priv();
  1725 	load_spreg( R_ECX, R_SPC );
  1726 	store_spreg( R_ECX, R_NEW_PC );
  1727 	load_spreg( R_EAX, R_SSR );
  1728 	call_func1( sh4_write_sr, R_EAX );
  1729 	sh4_x86.in_delay_slot = DELAY_PC;
  1730 	sh4_x86.priv_checked = FALSE;
  1731 	sh4_x86.fpuen_checked = FALSE;
  1732 	sh4_x86.tstate = TSTATE_NONE;
  1733 	sh4_x86.branch_taken = TRUE;
  1734 	if( UNTRANSLATABLE(pc+2) ) {
  1735 	    exit_block_emu(pc+2);
  1736 	    return 2;
  1737 	} else {
  1738 	    sh4_translate_instruction(pc+2);
  1739 	    exit_block_newpcset(pc+2);
  1740 	    return 4;
  1743 :}
  1744 RTS {:  
  1745     COUNT_INST(I_RTS);
  1746     if( sh4_x86.in_delay_slot ) {
  1747 	SLOTILLEGAL();
  1748     } else {
  1749 	load_spreg( R_ECX, R_PR );
  1750 	store_spreg( R_ECX, R_NEW_PC );
  1751 	sh4_x86.in_delay_slot = DELAY_PC;
  1752 	sh4_x86.branch_taken = TRUE;
  1753 	if( UNTRANSLATABLE(pc+2) ) {
  1754 	    exit_block_emu(pc+2);
  1755 	    return 2;
  1756 	} else {
  1757 	    sh4_translate_instruction(pc+2);
  1758 	    exit_block_newpcset(pc+2);
  1759 	    return 4;
  1762 :}
  1763 TRAPA #imm {:  
  1764     COUNT_INST(I_TRAPA);
  1765     if( sh4_x86.in_delay_slot ) {
  1766 	SLOTILLEGAL();
  1767     } else {
  1768 	load_imm32( R_ECX, pc+2 - sh4_x86.block_start_pc );   // 5
  1769 	ADD_r32_sh4r( R_ECX, R_PC );
  1770 	load_imm32( R_EAX, imm );
  1771 	call_func1( sh4_raise_trap, R_EAX );
  1772 	sh4_x86.tstate = TSTATE_NONE;
  1773 	exit_block_pcset(pc);
  1774 	sh4_x86.branch_taken = TRUE;
  1775 	return 2;
  1777 :}
  1778 UNDEF {:  
  1779     COUNT_INST(I_UNDEF);
  1780     if( sh4_x86.in_delay_slot ) {
  1781 	SLOTILLEGAL();
  1782     } else {
  1783 	JMP_exc(EXC_ILLEGAL);
  1784 	return 2;
  1786 :}
  1788 CLRMAC {:  
  1789     COUNT_INST(I_CLRMAC);
  1790     XOR_r32_r32(R_EAX, R_EAX);
  1791     store_spreg( R_EAX, R_MACL );
  1792     store_spreg( R_EAX, R_MACH );
  1793     sh4_x86.tstate = TSTATE_NONE;
  1794 :}
  1795 CLRS {:
  1796     COUNT_INST(I_CLRS);
  1797     CLC();
  1798     SETC_sh4r(R_S);
  1799     sh4_x86.tstate = TSTATE_NONE;
  1800 :}
  1801 CLRT {:  
  1802     COUNT_INST(I_CLRT);
  1803     CLC();
  1804     SETC_t();
  1805     sh4_x86.tstate = TSTATE_C;
  1806 :}
  1807 SETS {:  
  1808     COUNT_INST(I_SETS);
  1809     STC();
  1810     SETC_sh4r(R_S);
  1811     sh4_x86.tstate = TSTATE_NONE;
  1812 :}
  1813 SETT {:  
  1814     COUNT_INST(I_SETT);
  1815     STC();
  1816     SETC_t();
  1817     sh4_x86.tstate = TSTATE_C;
  1818 :}
  1820 /* Floating point moves */
  1821 FMOV FRm, FRn {:  
  1822     COUNT_INST(I_FMOV1);
  1823     check_fpuen();
  1824     if( sh4_x86.double_size ) {
  1825         load_dr0( R_EAX, FRm );
  1826         load_dr1( R_ECX, FRm );
  1827         store_dr0( R_EAX, FRn );
  1828         store_dr1( R_ECX, FRn );
  1829     } else {
  1830         load_fr( R_EAX, FRm ); // SZ=0 branch
  1831         store_fr( R_EAX, FRn );
  1833 :}
  1834 FMOV FRm, @Rn {: 
  1835     COUNT_INST(I_FMOV2);
  1836     check_fpuen();
  1837     load_reg( R_EAX, Rn );
  1838     if( sh4_x86.double_size ) {
  1839         check_walign64( R_EAX );
  1840         MMU_TRANSLATE_WRITE( R_EAX );
  1841         load_dr0( R_EDX, FRm );
  1842         load_dr1( R_ECX, FRm );
  1843         MEM_WRITE_DOUBLE( R_EAX, R_EDX, R_ECX );
  1844     } else {
  1845         check_walign32( R_EAX );
  1846         MMU_TRANSLATE_WRITE( R_EAX );
  1847         load_fr( R_EDX, FRm );
  1848         MEM_WRITE_LONG( R_EAX, R_EDX );
  1850     sh4_x86.tstate = TSTATE_NONE;
  1851 :}
  1852 FMOV @Rm, FRn {:  
  1853     COUNT_INST(I_FMOV5);
  1854     check_fpuen();
  1855     load_reg( R_EAX, Rm );
  1856     if( sh4_x86.double_size ) {
  1857         check_ralign64( R_EAX );
  1858         MMU_TRANSLATE_READ( R_EAX );
  1859         MEM_READ_DOUBLE( R_EAX, R_EDX, R_EAX );
  1860         store_dr0( R_EDX, FRn );
  1861         store_dr1( R_EAX, FRn );    
  1862     } else {
  1863         check_ralign32( R_EAX );
  1864         MMU_TRANSLATE_READ( R_EAX );
  1865         MEM_READ_LONG( R_EAX, R_EAX );
  1866         store_fr( R_EAX, FRn );
  1868     sh4_x86.tstate = TSTATE_NONE;
  1869 :}
  1870 FMOV FRm, @-Rn {:  
  1871     COUNT_INST(I_FMOV3);
  1872     check_fpuen();
  1873     load_reg( R_EAX, Rn );
  1874     if( sh4_x86.double_size ) {
  1875         check_walign64( R_EAX );
  1876         ADD_imm8s_r32(-8,R_EAX);
  1877         MMU_TRANSLATE_WRITE( R_EAX );
  1878         load_dr0( R_EDX, FRm );
  1879         load_dr1( R_ECX, FRm );
  1880         ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
  1881         MEM_WRITE_DOUBLE( R_EAX, R_EDX, R_ECX );
  1882     } else {
  1883         check_walign32( R_EAX );
  1884         ADD_imm8s_r32( -4, R_EAX );
  1885         MMU_TRANSLATE_WRITE( R_EAX );
  1886         load_fr( R_EDX, FRm );
  1887         ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
  1888         MEM_WRITE_LONG( R_EAX, R_EDX );
  1890     sh4_x86.tstate = TSTATE_NONE;
  1891 :}
  1892 FMOV @Rm+, FRn {:
  1893     COUNT_INST(I_FMOV6);
  1894     check_fpuen();
  1895     load_reg( R_EAX, Rm );
  1896     if( sh4_x86.double_size ) {
  1897         check_ralign64( R_EAX );
  1898         MMU_TRANSLATE_READ( R_EAX );
  1899         ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
  1900         MEM_READ_DOUBLE( R_EAX, R_EDX, R_EAX );
  1901         store_dr0( R_EDX, FRn );
  1902         store_dr1( R_EAX, FRn );
  1903     } else {
  1904         check_ralign32( R_EAX );
  1905         MMU_TRANSLATE_READ( R_EAX );
  1906         ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1907         MEM_READ_LONG( R_EAX, R_EAX );
  1908         store_fr( R_EAX, FRn );
  1910     sh4_x86.tstate = TSTATE_NONE;
  1911 :}
  1912 FMOV FRm, @(R0, Rn) {:  
  1913     COUNT_INST(I_FMOV4);
  1914     check_fpuen();
  1915     load_reg( R_EAX, Rn );
  1916     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1917     if( sh4_x86.double_size ) {
  1918         check_walign64( R_EAX );
  1919         MMU_TRANSLATE_WRITE( R_EAX );
  1920         load_dr0( R_EDX, FRm );
  1921         load_dr1( R_ECX, FRm );
  1922         MEM_WRITE_DOUBLE( R_EAX, R_EDX, R_ECX );
  1923     } else {
  1924         check_walign32( R_EAX );
  1925         MMU_TRANSLATE_WRITE( R_EAX );
  1926         load_fr( R_EDX, FRm );
  1927         MEM_WRITE_LONG( R_EAX, R_EDX ); // 12
  1929     sh4_x86.tstate = TSTATE_NONE;
  1930 :}
  1931 FMOV @(R0, Rm), FRn {:  
  1932     COUNT_INST(I_FMOV7);
  1933     check_fpuen();
  1934     load_reg( R_EAX, Rm );
  1935     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1936     if( sh4_x86.double_size ) {
  1937         check_ralign64( R_EAX );
  1938         MMU_TRANSLATE_READ( R_EAX );
  1939         MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1940         store_dr0( R_ECX, FRn );
  1941         store_dr1( R_EAX, FRn );
  1942     } else {
  1943         check_ralign32( R_EAX );
  1944         MMU_TRANSLATE_READ( R_EAX );
  1945         MEM_READ_LONG( R_EAX, R_EAX );
  1946         store_fr( R_EAX, FRn );
  1948     sh4_x86.tstate = TSTATE_NONE;
  1949 :}
  1950 FLDI0 FRn {:  /* IFF PR=0 */
  1951     COUNT_INST(I_FLDI0);
  1952     check_fpuen();
  1953     if( sh4_x86.double_prec == 0 ) {
  1954         XOR_r32_r32( R_EAX, R_EAX );
  1955         store_fr( R_EAX, FRn );
  1957     sh4_x86.tstate = TSTATE_NONE;
  1958 :}
  1959 FLDI1 FRn {:  /* IFF PR=0 */
  1960     COUNT_INST(I_FLDI1);
  1961     check_fpuen();
  1962     if( sh4_x86.double_prec == 0 ) {
  1963         load_imm32(R_EAX, 0x3F800000);
  1964         store_fr( R_EAX, FRn );
  1966 :}
  1968 FLOAT FPUL, FRn {:  
  1969     COUNT_INST(I_FLOAT);
  1970     check_fpuen();
  1971     FILD_sh4r(R_FPUL);
  1972     if( sh4_x86.double_prec ) {
  1973         pop_dr( FRn );
  1974     } else {
  1975         pop_fr( FRn );
  1977 :}
  1978 FTRC FRm, FPUL {:  
  1979     COUNT_INST(I_FTRC);
  1980     check_fpuen();
  1981     if( sh4_x86.double_prec ) {
  1982         push_dr( FRm );
  1983     } else {
  1984         push_fr( FRm );
  1986     load_ptr( R_ECX, &max_int );
  1987     FILD_r32ind( R_ECX );
  1988     FCOMIP_st(1);
  1989     JNA_rel8( sat );
  1990     load_ptr( R_ECX, &min_int );  // 5
  1991     FILD_r32ind( R_ECX );           // 2
  1992     FCOMIP_st(1);                   // 2
  1993     JAE_rel8( sat2 );            // 2
  1994     load_ptr( R_EAX, &save_fcw );
  1995     FNSTCW_r32ind( R_EAX );
  1996     load_ptr( R_EDX, &trunc_fcw );
  1997     FLDCW_r32ind( R_EDX );
  1998     FISTP_sh4r(R_FPUL);             // 3
  1999     FLDCW_r32ind( R_EAX );
  2000     JMP_rel8(end);             // 2
  2002     JMP_TARGET(sat);
  2003     JMP_TARGET(sat2);
  2004     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  2005     store_spreg( R_ECX, R_FPUL );
  2006     FPOP_st();
  2007     JMP_TARGET(end);
  2008     sh4_x86.tstate = TSTATE_NONE;
  2009 :}
  2010 FLDS FRm, FPUL {:  
  2011     COUNT_INST(I_FLDS);
  2012     check_fpuen();
  2013     load_fr( R_EAX, FRm );
  2014     store_spreg( R_EAX, R_FPUL );
  2015 :}
  2016 FSTS FPUL, FRn {:  
  2017     COUNT_INST(I_FSTS);
  2018     check_fpuen();
  2019     load_spreg( R_EAX, R_FPUL );
  2020     store_fr( R_EAX, FRn );
  2021 :}
  2022 FCNVDS FRm, FPUL {:  
  2023     COUNT_INST(I_FCNVDS);
  2024     check_fpuen();
  2025     if( sh4_x86.double_prec ) {
  2026         push_dr( FRm );
  2027         pop_fpul();
  2029 :}
  2030 FCNVSD FPUL, FRn {:  
  2031     COUNT_INST(I_FCNVSD);
  2032     check_fpuen();
  2033     if( sh4_x86.double_prec ) {
  2034         push_fpul();
  2035         pop_dr( FRn );
  2037 :}
  2039 /* Floating point instructions */
  2040 FABS FRn {:  
  2041     COUNT_INST(I_FABS);
  2042     check_fpuen();
  2043     if( sh4_x86.double_prec ) {
  2044         push_dr(FRn);
  2045         FABS_st0();
  2046         pop_dr(FRn);
  2047     } else {
  2048         push_fr(FRn);
  2049         FABS_st0();
  2050         pop_fr(FRn);
  2052 :}
  2053 FADD FRm, FRn {:  
  2054     COUNT_INST(I_FADD);
  2055     check_fpuen();
  2056     if( sh4_x86.double_prec ) {
  2057         push_dr(FRm);
  2058         push_dr(FRn);
  2059         FADDP_st(1);
  2060         pop_dr(FRn);
  2061     } else {
  2062         push_fr(FRm);
  2063         push_fr(FRn);
  2064         FADDP_st(1);
  2065         pop_fr(FRn);
  2067 :}
  2068 FDIV FRm, FRn {:  
  2069     COUNT_INST(I_FDIV);
  2070     check_fpuen();
  2071     if( sh4_x86.double_prec ) {
  2072         push_dr(FRn);
  2073         push_dr(FRm);
  2074         FDIVP_st(1);
  2075         pop_dr(FRn);
  2076     } else {
  2077         push_fr(FRn);
  2078         push_fr(FRm);
  2079         FDIVP_st(1);
  2080         pop_fr(FRn);
  2082 :}
  2083 FMAC FR0, FRm, FRn {:  
  2084     COUNT_INST(I_FMAC);
  2085     check_fpuen();
  2086     if( sh4_x86.double_prec ) {
  2087         push_dr( 0 );
  2088         push_dr( FRm );
  2089         FMULP_st(1);
  2090         push_dr( FRn );
  2091         FADDP_st(1);
  2092         pop_dr( FRn );
  2093     } else {
  2094         push_fr( 0 );
  2095         push_fr( FRm );
  2096         FMULP_st(1);
  2097         push_fr( FRn );
  2098         FADDP_st(1);
  2099         pop_fr( FRn );
  2101 :}
  2103 FMUL FRm, FRn {:  
  2104     COUNT_INST(I_FMUL);
  2105     check_fpuen();
  2106     if( sh4_x86.double_prec ) {
  2107         push_dr(FRm);
  2108         push_dr(FRn);
  2109         FMULP_st(1);
  2110         pop_dr(FRn);
  2111     } else {
  2112         push_fr(FRm);
  2113         push_fr(FRn);
  2114         FMULP_st(1);
  2115         pop_fr(FRn);
  2117 :}
  2118 FNEG FRn {:  
  2119     COUNT_INST(I_FNEG);
  2120     check_fpuen();
  2121     if( sh4_x86.double_prec ) {
  2122         push_dr(FRn);
  2123         FCHS_st0();
  2124         pop_dr(FRn);
  2125     } else {
  2126         push_fr(FRn);
  2127         FCHS_st0();
  2128         pop_fr(FRn);
  2130 :}
  2131 FSRRA FRn {:  
  2132     COUNT_INST(I_FSRRA);
  2133     check_fpuen();
  2134     if( sh4_x86.double_prec == 0 ) {
  2135         FLD1_st0();
  2136         push_fr(FRn);
  2137         FSQRT_st0();
  2138         FDIVP_st(1);
  2139         pop_fr(FRn);
  2141 :}
  2142 FSQRT FRn {:  
  2143     COUNT_INST(I_FSQRT);
  2144     check_fpuen();
  2145     if( sh4_x86.double_prec ) {
  2146         push_dr(FRn);
  2147         FSQRT_st0();
  2148         pop_dr(FRn);
  2149     } else {
  2150         push_fr(FRn);
  2151         FSQRT_st0();
  2152         pop_fr(FRn);
  2154 :}
  2155 FSUB FRm, FRn {:  
  2156     COUNT_INST(I_FSUB);
  2157     check_fpuen();
  2158     if( sh4_x86.double_prec ) {
  2159         push_dr(FRn);
  2160         push_dr(FRm);
  2161         FSUBP_st(1);
  2162         pop_dr(FRn);
  2163     } else {
  2164         push_fr(FRn);
  2165         push_fr(FRm);
  2166         FSUBP_st(1);
  2167         pop_fr(FRn);
  2169 :}
  2171 FCMP/EQ FRm, FRn {:  
  2172     COUNT_INST(I_FCMPEQ);
  2173     check_fpuen();
  2174     if( sh4_x86.double_prec ) {
  2175         push_dr(FRm);
  2176         push_dr(FRn);
  2177     } else {
  2178         push_fr(FRm);
  2179         push_fr(FRn);
  2181     FCOMIP_st(1);
  2182     SETE_t();
  2183     FPOP_st();
  2184     sh4_x86.tstate = TSTATE_E;
  2185 :}
  2186 FCMP/GT FRm, FRn {:  
  2187     COUNT_INST(I_FCMPGT);
  2188     check_fpuen();
  2189     if( sh4_x86.double_prec ) {
  2190         push_dr(FRm);
  2191         push_dr(FRn);
  2192     } else {
  2193         push_fr(FRm);
  2194         push_fr(FRn);
  2196     FCOMIP_st(1);
  2197     SETA_t();
  2198     FPOP_st();
  2199     sh4_x86.tstate = TSTATE_A;
  2200 :}
  2202 FSCA FPUL, FRn {:  
  2203     COUNT_INST(I_FSCA);
  2204     check_fpuen();
  2205     if( sh4_x86.double_prec == 0 ) {
  2206         LEA_sh4r_rptr( REG_OFFSET(fr[0][FRn&0x0E]), R_EDX );
  2207         load_spreg( R_EAX, R_FPUL );
  2208         call_func2( sh4_fsca, R_EAX, R_EDX );
  2210     sh4_x86.tstate = TSTATE_NONE;
  2211 :}
  2212 FIPR FVm, FVn {:  
  2213     COUNT_INST(I_FIPR);
  2214     check_fpuen();
  2215     if( sh4_x86.double_prec == 0 ) {
  2216         if( sh4_x86.sse3_enabled ) {
  2217             MOVAPS_sh4r_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
  2218             MULPS_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
  2219             HADDPS_xmm_xmm( 4, 4 ); 
  2220             HADDPS_xmm_xmm( 4, 4 );
  2221             MOVSS_xmm_sh4r( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
  2222         } else {
  2223             push_fr( FVm<<2 );
  2224             push_fr( FVn<<2 );
  2225             FMULP_st(1);
  2226             push_fr( (FVm<<2)+1);
  2227             push_fr( (FVn<<2)+1);
  2228             FMULP_st(1);
  2229             FADDP_st(1);
  2230             push_fr( (FVm<<2)+2);
  2231             push_fr( (FVn<<2)+2);
  2232             FMULP_st(1);
  2233             FADDP_st(1);
  2234             push_fr( (FVm<<2)+3);
  2235             push_fr( (FVn<<2)+3);
  2236             FMULP_st(1);
  2237             FADDP_st(1);
  2238             pop_fr( (FVn<<2)+3);
  2241 :}
  2242 FTRV XMTRX, FVn {:  
  2243     COUNT_INST(I_FTRV);
  2244     check_fpuen();
  2245     if( sh4_x86.double_prec == 0 ) {
  2246         if( sh4_x86.sse3_enabled ) {
  2247             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1  M0  M3  M2
  2248             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5  M4  M7  M6
  2249             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9  M8  M11 M10
  2250             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
  2252             MOVSLDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
  2253             MOVSHDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
  2254             MOVAPS_xmm_xmm( 4, 6 );
  2255             MOVAPS_xmm_xmm( 5, 7 );
  2256             MOVLHPS_xmm_xmm( 4, 4 );  // V1 V1 V1 V1
  2257             MOVHLPS_xmm_xmm( 6, 6 );  // V3 V3 V3 V3
  2258             MOVLHPS_xmm_xmm( 5, 5 );  // V0 V0 V0 V0
  2259             MOVHLPS_xmm_xmm( 7, 7 );  // V2 V2 V2 V2
  2260             MULPS_xmm_xmm( 0, 4 );
  2261             MULPS_xmm_xmm( 1, 5 );
  2262             MULPS_xmm_xmm( 2, 6 );
  2263             MULPS_xmm_xmm( 3, 7 );
  2264             ADDPS_xmm_xmm( 5, 4 );
  2265             ADDPS_xmm_xmm( 7, 6 );
  2266             ADDPS_xmm_xmm( 6, 4 );
  2267             MOVAPS_xmm_sh4r( 4, REG_OFFSET(fr[0][FVn<<2]) );
  2268         } else {
  2269             LEA_sh4r_rptr( REG_OFFSET(fr[0][FVn<<2]), R_EAX );
  2270             call_func1( sh4_ftrv, R_EAX );
  2273     sh4_x86.tstate = TSTATE_NONE;
  2274 :}
  2276 FRCHG {:  
  2277     COUNT_INST(I_FRCHG);
  2278     check_fpuen();
  2279     load_spreg( R_ECX, R_FPSCR );
  2280     XOR_imm32_r32( FPSCR_FR, R_ECX );
  2281     store_spreg( R_ECX, R_FPSCR );
  2282     call_func0( sh4_switch_fr_banks );
  2283     sh4_x86.tstate = TSTATE_NONE;
  2284 :}
  2285 FSCHG {:  
  2286     COUNT_INST(I_FSCHG);
  2287     check_fpuen();
  2288     load_spreg( R_ECX, R_FPSCR );
  2289     XOR_imm32_r32( FPSCR_SZ, R_ECX );
  2290     store_spreg( R_ECX, R_FPSCR );
  2291     sh4_x86.tstate = TSTATE_NONE;
  2292     sh4_x86.double_size = !sh4_x86.double_size;
  2293 :}
  2295 /* Processor control instructions */
  2296 LDC Rm, SR {:
  2297     COUNT_INST(I_LDCSR);
  2298     if( sh4_x86.in_delay_slot ) {
  2299 	SLOTILLEGAL();
  2300     } else {
  2301 	check_priv();
  2302 	load_reg( R_EAX, Rm );
  2303 	call_func1( sh4_write_sr, R_EAX );
  2304 	sh4_x86.priv_checked = FALSE;
  2305 	sh4_x86.fpuen_checked = FALSE;
  2306 	sh4_x86.tstate = TSTATE_NONE;
  2308 :}
  2309 LDC Rm, GBR {: 
  2310     COUNT_INST(I_LDC);
  2311     load_reg( R_EAX, Rm );
  2312     store_spreg( R_EAX, R_GBR );
  2313 :}
  2314 LDC Rm, VBR {:  
  2315     COUNT_INST(I_LDC);
  2316     check_priv();
  2317     load_reg( R_EAX, Rm );
  2318     store_spreg( R_EAX, R_VBR );
  2319     sh4_x86.tstate = TSTATE_NONE;
  2320 :}
  2321 LDC Rm, SSR {:  
  2322     COUNT_INST(I_LDC);
  2323     check_priv();
  2324     load_reg( R_EAX, Rm );
  2325     store_spreg( R_EAX, R_SSR );
  2326     sh4_x86.tstate = TSTATE_NONE;
  2327 :}
  2328 LDC Rm, SGR {:  
  2329     COUNT_INST(I_LDC);
  2330     check_priv();
  2331     load_reg( R_EAX, Rm );
  2332     store_spreg( R_EAX, R_SGR );
  2333     sh4_x86.tstate = TSTATE_NONE;
  2334 :}
  2335 LDC Rm, SPC {:  
  2336     COUNT_INST(I_LDC);
  2337     check_priv();
  2338     load_reg( R_EAX, Rm );
  2339     store_spreg( R_EAX, R_SPC );
  2340     sh4_x86.tstate = TSTATE_NONE;
  2341 :}
  2342 LDC Rm, DBR {:  
  2343     COUNT_INST(I_LDC);
  2344     check_priv();
  2345     load_reg( R_EAX, Rm );
  2346     store_spreg( R_EAX, R_DBR );
  2347     sh4_x86.tstate = TSTATE_NONE;
  2348 :}
  2349 LDC Rm, Rn_BANK {:  
  2350     COUNT_INST(I_LDC);
  2351     check_priv();
  2352     load_reg( R_EAX, Rm );
  2353     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2354     sh4_x86.tstate = TSTATE_NONE;
  2355 :}
  2356 LDC.L @Rm+, GBR {:  
  2357     COUNT_INST(I_LDCM);
  2358     load_reg( R_EAX, Rm );
  2359     check_ralign32( R_EAX );
  2360     MMU_TRANSLATE_READ( R_EAX );
  2361     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2362     MEM_READ_LONG( R_EAX, R_EAX );
  2363     store_spreg( R_EAX, R_GBR );
  2364     sh4_x86.tstate = TSTATE_NONE;
  2365 :}
  2366 LDC.L @Rm+, SR {:
  2367     COUNT_INST(I_LDCSRM);
  2368     if( sh4_x86.in_delay_slot ) {
  2369 	SLOTILLEGAL();
  2370     } else {
  2371 	check_priv();
  2372 	load_reg( R_EAX, Rm );
  2373 	check_ralign32( R_EAX );
  2374 	MMU_TRANSLATE_READ( R_EAX );
  2375 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2376 	MEM_READ_LONG( R_EAX, R_EAX );
  2377 	call_func1( sh4_write_sr, R_EAX );
  2378 	sh4_x86.priv_checked = FALSE;
  2379 	sh4_x86.fpuen_checked = FALSE;
  2380 	sh4_x86.tstate = TSTATE_NONE;
  2382 :}
  2383 LDC.L @Rm+, VBR {:  
  2384     COUNT_INST(I_LDCM);
  2385     check_priv();
  2386     load_reg( R_EAX, Rm );
  2387     check_ralign32( R_EAX );
  2388     MMU_TRANSLATE_READ( R_EAX );
  2389     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2390     MEM_READ_LONG( R_EAX, R_EAX );
  2391     store_spreg( R_EAX, R_VBR );
  2392     sh4_x86.tstate = TSTATE_NONE;
  2393 :}
  2394 LDC.L @Rm+, SSR {:
  2395     COUNT_INST(I_LDCM);
  2396     check_priv();
  2397     load_reg( R_EAX, Rm );
  2398     check_ralign32( R_EAX );
  2399     MMU_TRANSLATE_READ( R_EAX );
  2400     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2401     MEM_READ_LONG( R_EAX, R_EAX );
  2402     store_spreg( R_EAX, R_SSR );
  2403     sh4_x86.tstate = TSTATE_NONE;
  2404 :}
  2405 LDC.L @Rm+, SGR {:  
  2406     COUNT_INST(I_LDCM);
  2407     check_priv();
  2408     load_reg( R_EAX, Rm );
  2409     check_ralign32( R_EAX );
  2410     MMU_TRANSLATE_READ( R_EAX );
  2411     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2412     MEM_READ_LONG( R_EAX, R_EAX );
  2413     store_spreg( R_EAX, R_SGR );
  2414     sh4_x86.tstate = TSTATE_NONE;
  2415 :}
  2416 LDC.L @Rm+, SPC {:  
  2417     COUNT_INST(I_LDCM);
  2418     check_priv();
  2419     load_reg( R_EAX, Rm );
  2420     check_ralign32( R_EAX );
  2421     MMU_TRANSLATE_READ( R_EAX );
  2422     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2423     MEM_READ_LONG( R_EAX, R_EAX );
  2424     store_spreg( R_EAX, R_SPC );
  2425     sh4_x86.tstate = TSTATE_NONE;
  2426 :}
  2427 LDC.L @Rm+, DBR {:  
  2428     COUNT_INST(I_LDCM);
  2429     check_priv();
  2430     load_reg( R_EAX, Rm );
  2431     check_ralign32( R_EAX );
  2432     MMU_TRANSLATE_READ( R_EAX );
  2433     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2434     MEM_READ_LONG( R_EAX, R_EAX );
  2435     store_spreg( R_EAX, R_DBR );
  2436     sh4_x86.tstate = TSTATE_NONE;
  2437 :}
  2438 LDC.L @Rm+, Rn_BANK {:  
  2439     COUNT_INST(I_LDCM);
  2440     check_priv();
  2441     load_reg( R_EAX, Rm );
  2442     check_ralign32( R_EAX );
  2443     MMU_TRANSLATE_READ( R_EAX );
  2444     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2445     MEM_READ_LONG( R_EAX, R_EAX );
  2446     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2447     sh4_x86.tstate = TSTATE_NONE;
  2448 :}
  2449 LDS Rm, FPSCR {:
  2450     COUNT_INST(I_LDSFPSCR);
  2451     check_fpuen();
  2452     load_reg( R_EAX, Rm );
  2453     call_func1( sh4_write_fpscr, R_EAX );
  2454     sh4_x86.tstate = TSTATE_NONE;
  2455     return 2;
  2456 :}
  2457 LDS.L @Rm+, FPSCR {:  
  2458     COUNT_INST(I_LDSFPSCRM);
  2459     check_fpuen();
  2460     load_reg( R_EAX, Rm );
  2461     check_ralign32( R_EAX );
  2462     MMU_TRANSLATE_READ( R_EAX );
  2463     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2464     MEM_READ_LONG( R_EAX, R_EAX );
  2465     call_func1( sh4_write_fpscr, R_EAX );
  2466     sh4_x86.tstate = TSTATE_NONE;
  2467     return 2;
  2468 :}
  2469 LDS Rm, FPUL {:  
  2470     COUNT_INST(I_LDS);
  2471     check_fpuen();
  2472     load_reg( R_EAX, Rm );
  2473     store_spreg( R_EAX, R_FPUL );
  2474 :}
  2475 LDS.L @Rm+, FPUL {:  
  2476     COUNT_INST(I_LDSM);
  2477     check_fpuen();
  2478     load_reg( R_EAX, Rm );
  2479     check_ralign32( R_EAX );
  2480     MMU_TRANSLATE_READ( R_EAX );
  2481     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2482     MEM_READ_LONG( R_EAX, R_EAX );
  2483     store_spreg( R_EAX, R_FPUL );
  2484     sh4_x86.tstate = TSTATE_NONE;
  2485 :}
  2486 LDS Rm, MACH {: 
  2487     COUNT_INST(I_LDS);
  2488     load_reg( R_EAX, Rm );
  2489     store_spreg( R_EAX, R_MACH );
  2490 :}
  2491 LDS.L @Rm+, MACH {:  
  2492     COUNT_INST(I_LDSM);
  2493     load_reg( R_EAX, Rm );
  2494     check_ralign32( R_EAX );
  2495     MMU_TRANSLATE_READ( R_EAX );
  2496     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2497     MEM_READ_LONG( R_EAX, R_EAX );
  2498     store_spreg( R_EAX, R_MACH );
  2499     sh4_x86.tstate = TSTATE_NONE;
  2500 :}
  2501 LDS Rm, MACL {:  
  2502     COUNT_INST(I_LDS);
  2503     load_reg( R_EAX, Rm );
  2504     store_spreg( R_EAX, R_MACL );
  2505 :}
  2506 LDS.L @Rm+, MACL {:  
  2507     COUNT_INST(I_LDSM);
  2508     load_reg( R_EAX, Rm );
  2509     check_ralign32( R_EAX );
  2510     MMU_TRANSLATE_READ( R_EAX );
  2511     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2512     MEM_READ_LONG( R_EAX, R_EAX );
  2513     store_spreg( R_EAX, R_MACL );
  2514     sh4_x86.tstate = TSTATE_NONE;
  2515 :}
  2516 LDS Rm, PR {:  
  2517     COUNT_INST(I_LDS);
  2518     load_reg( R_EAX, Rm );
  2519     store_spreg( R_EAX, R_PR );
  2520 :}
  2521 LDS.L @Rm+, PR {:  
  2522     COUNT_INST(I_LDSM);
  2523     load_reg( R_EAX, Rm );
  2524     check_ralign32( R_EAX );
  2525     MMU_TRANSLATE_READ( R_EAX );
  2526     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2527     MEM_READ_LONG( R_EAX, R_EAX );
  2528     store_spreg( R_EAX, R_PR );
  2529     sh4_x86.tstate = TSTATE_NONE;
  2530 :}
  2531 LDTLB {:  
  2532     COUNT_INST(I_LDTLB);
  2533     call_func0( MMU_ldtlb );
  2534     sh4_x86.tstate = TSTATE_NONE;
  2535 :}
  2536 OCBI @Rn {:
  2537     COUNT_INST(I_OCBI);
  2538 :}
  2539 OCBP @Rn {:
  2540     COUNT_INST(I_OCBP);
  2541 :}
  2542 OCBWB @Rn {:
  2543     COUNT_INST(I_OCBWB);
  2544 :}
  2545 PREF @Rn {:
  2546     COUNT_INST(I_PREF);
  2547     load_reg( R_EAX, Rn );
  2548     MOV_r32_r32( R_EAX, R_ECX );
  2549     AND_imm32_r32( 0xFC000000, R_ECX );
  2550     CMP_imm32_r32( 0xE0000000, R_ECX );
  2551     JNE_rel8(end);
  2552     if( sh4_x86.tlb_on ) {
  2553     	call_func1( sh4_flush_store_queue_mmu, R_EAX );
  2554         TEST_r32_r32( R_EAX, R_EAX );
  2555         JE_exc(-1);
  2556     } else {
  2557     	call_func1( sh4_flush_store_queue, R_EAX );
  2559     JMP_TARGET(end);
  2560     sh4_x86.tstate = TSTATE_NONE;
  2561 :}
  2562 SLEEP {: 
  2563     COUNT_INST(I_SLEEP);
  2564     check_priv();
  2565     call_func0( sh4_sleep );
  2566     sh4_x86.tstate = TSTATE_NONE;
  2567     sh4_x86.in_delay_slot = DELAY_NONE;
  2568     return 2;
  2569 :}
  2570 STC SR, Rn {:
  2571     COUNT_INST(I_STCSR);
  2572     check_priv();
  2573     call_func0(sh4_read_sr);
  2574     store_reg( R_EAX, Rn );
  2575     sh4_x86.tstate = TSTATE_NONE;
  2576 :}
  2577 STC GBR, Rn {:  
  2578     COUNT_INST(I_STC);
  2579     load_spreg( R_EAX, R_GBR );
  2580     store_reg( R_EAX, Rn );
  2581 :}
  2582 STC VBR, Rn {:  
  2583     COUNT_INST(I_STC);
  2584     check_priv();
  2585     load_spreg( R_EAX, R_VBR );
  2586     store_reg( R_EAX, Rn );
  2587     sh4_x86.tstate = TSTATE_NONE;
  2588 :}
  2589 STC SSR, Rn {:  
  2590     COUNT_INST(I_STC);
  2591     check_priv();
  2592     load_spreg( R_EAX, R_SSR );
  2593     store_reg( R_EAX, Rn );
  2594     sh4_x86.tstate = TSTATE_NONE;
  2595 :}
  2596 STC SPC, Rn {:  
  2597     COUNT_INST(I_STC);
  2598     check_priv();
  2599     load_spreg( R_EAX, R_SPC );
  2600     store_reg( R_EAX, Rn );
  2601     sh4_x86.tstate = TSTATE_NONE;
  2602 :}
  2603 STC SGR, Rn {:  
  2604     COUNT_INST(I_STC);
  2605     check_priv();
  2606     load_spreg( R_EAX, R_SGR );
  2607     store_reg( R_EAX, Rn );
  2608     sh4_x86.tstate = TSTATE_NONE;
  2609 :}
  2610 STC DBR, Rn {:  
  2611     COUNT_INST(I_STC);
  2612     check_priv();
  2613     load_spreg( R_EAX, R_DBR );
  2614     store_reg( R_EAX, Rn );
  2615     sh4_x86.tstate = TSTATE_NONE;
  2616 :}
  2617 STC Rm_BANK, Rn {:
  2618     COUNT_INST(I_STC);
  2619     check_priv();
  2620     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2621     store_reg( R_EAX, Rn );
  2622     sh4_x86.tstate = TSTATE_NONE;
  2623 :}
  2624 STC.L SR, @-Rn {:
  2625     COUNT_INST(I_STCSRM);
  2626     check_priv();
  2627     load_reg( R_EAX, Rn );
  2628     check_walign32( R_EAX );
  2629     ADD_imm8s_r32( -4, R_EAX );
  2630     MMU_TRANSLATE_WRITE( R_EAX );
  2631     MOV_r32_esp8( R_EAX, 0 );
  2632     call_func0( sh4_read_sr );
  2633     MOV_r32_r32( R_EAX, R_EDX );
  2634     MOV_esp8_r32( 0, R_EAX );
  2635     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2636     MEM_WRITE_LONG( R_EAX, R_EDX );
  2637     sh4_x86.tstate = TSTATE_NONE;
  2638 :}
  2639 STC.L VBR, @-Rn {:  
  2640     COUNT_INST(I_STCM);
  2641     check_priv();
  2642     load_reg( R_EAX, Rn );
  2643     check_walign32( R_EAX );
  2644     ADD_imm8s_r32( -4, R_EAX );
  2645     MMU_TRANSLATE_WRITE( R_EAX );
  2646     load_spreg( R_EDX, R_VBR );
  2647     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2648     MEM_WRITE_LONG( R_EAX, R_EDX );
  2649     sh4_x86.tstate = TSTATE_NONE;
  2650 :}
  2651 STC.L SSR, @-Rn {:  
  2652     COUNT_INST(I_STCM);
  2653     check_priv();
  2654     load_reg( R_EAX, Rn );
  2655     check_walign32( R_EAX );
  2656     ADD_imm8s_r32( -4, R_EAX );
  2657     MMU_TRANSLATE_WRITE( R_EAX );
  2658     load_spreg( R_EDX, R_SSR );
  2659     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2660     MEM_WRITE_LONG( R_EAX, R_EDX );
  2661     sh4_x86.tstate = TSTATE_NONE;
  2662 :}
  2663 STC.L SPC, @-Rn {:
  2664     COUNT_INST(I_STCM);
  2665     check_priv();
  2666     load_reg( R_EAX, Rn );
  2667     check_walign32( R_EAX );
  2668     ADD_imm8s_r32( -4, R_EAX );
  2669     MMU_TRANSLATE_WRITE( R_EAX );
  2670     load_spreg( R_EDX, R_SPC );
  2671     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2672     MEM_WRITE_LONG( R_EAX, R_EDX );
  2673     sh4_x86.tstate = TSTATE_NONE;
  2674 :}
  2675 STC.L SGR, @-Rn {:  
  2676     COUNT_INST(I_STCM);
  2677     check_priv();
  2678     load_reg( R_EAX, Rn );
  2679     check_walign32( R_EAX );
  2680     ADD_imm8s_r32( -4, R_EAX );
  2681     MMU_TRANSLATE_WRITE( R_EAX );
  2682     load_spreg( R_EDX, R_SGR );
  2683     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2684     MEM_WRITE_LONG( R_EAX, R_EDX );
  2685     sh4_x86.tstate = TSTATE_NONE;
  2686 :}
  2687 STC.L DBR, @-Rn {:  
  2688     COUNT_INST(I_STCM);
  2689     check_priv();
  2690     load_reg( R_EAX, Rn );
  2691     check_walign32( R_EAX );
  2692     ADD_imm8s_r32( -4, R_EAX );
  2693     MMU_TRANSLATE_WRITE( R_EAX );
  2694     load_spreg( R_EDX, R_DBR );
  2695     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2696     MEM_WRITE_LONG( R_EAX, R_EDX );
  2697     sh4_x86.tstate = TSTATE_NONE;
  2698 :}
  2699 STC.L Rm_BANK, @-Rn {:  
  2700     COUNT_INST(I_STCM);
  2701     check_priv();
  2702     load_reg( R_EAX, Rn );
  2703     check_walign32( R_EAX );
  2704     ADD_imm8s_r32( -4, R_EAX );
  2705     MMU_TRANSLATE_WRITE( R_EAX );
  2706     load_spreg( R_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
  2707     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2708     MEM_WRITE_LONG( R_EAX, R_EDX );
  2709     sh4_x86.tstate = TSTATE_NONE;
  2710 :}
  2711 STC.L GBR, @-Rn {:  
  2712     COUNT_INST(I_STCM);
  2713     load_reg( R_EAX, Rn );
  2714     check_walign32( R_EAX );
  2715     ADD_imm8s_r32( -4, R_EAX );
  2716     MMU_TRANSLATE_WRITE( R_EAX );
  2717     load_spreg( R_EDX, R_GBR );
  2718     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2719     MEM_WRITE_LONG( R_EAX, R_EDX );
  2720     sh4_x86.tstate = TSTATE_NONE;
  2721 :}
  2722 STS FPSCR, Rn {:  
  2723     COUNT_INST(I_STSFPSCR);
  2724     check_fpuen();
  2725     load_spreg( R_EAX, R_FPSCR );
  2726     store_reg( R_EAX, Rn );
  2727 :}
  2728 STS.L FPSCR, @-Rn {:  
  2729     COUNT_INST(I_STSFPSCRM);
  2730     check_fpuen();
  2731     load_reg( R_EAX, Rn );
  2732     check_walign32( R_EAX );
  2733     ADD_imm8s_r32( -4, R_EAX );
  2734     MMU_TRANSLATE_WRITE( R_EAX );
  2735     load_spreg( R_EDX, R_FPSCR );
  2736     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2737     MEM_WRITE_LONG( R_EAX, R_EDX );
  2738     sh4_x86.tstate = TSTATE_NONE;
  2739 :}
  2740 STS FPUL, Rn {:  
  2741     COUNT_INST(I_STS);
  2742     check_fpuen();
  2743     load_spreg( R_EAX, R_FPUL );
  2744     store_reg( R_EAX, Rn );
  2745 :}
  2746 STS.L FPUL, @-Rn {:  
  2747     COUNT_INST(I_STSM);
  2748     check_fpuen();
  2749     load_reg( R_EAX, Rn );
  2750     check_walign32( R_EAX );
  2751     ADD_imm8s_r32( -4, R_EAX );
  2752     MMU_TRANSLATE_WRITE( R_EAX );
  2753     load_spreg( R_EDX, R_FPUL );
  2754     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2755     MEM_WRITE_LONG( R_EAX, R_EDX );
  2756     sh4_x86.tstate = TSTATE_NONE;
  2757 :}
  2758 STS MACH, Rn {:  
  2759     COUNT_INST(I_STS);
  2760     load_spreg( R_EAX, R_MACH );
  2761     store_reg( R_EAX, Rn );
  2762 :}
  2763 STS.L MACH, @-Rn {:  
  2764     COUNT_INST(I_STSM);
  2765     load_reg( R_EAX, Rn );
  2766     check_walign32( R_EAX );
  2767     ADD_imm8s_r32( -4, R_EAX );
  2768     MMU_TRANSLATE_WRITE( R_EAX );
  2769     load_spreg( R_EDX, R_MACH );
  2770     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2771     MEM_WRITE_LONG( R_EAX, R_EDX );
  2772     sh4_x86.tstate = TSTATE_NONE;
  2773 :}
  2774 STS MACL, Rn {:  
  2775     COUNT_INST(I_STS);
  2776     load_spreg( R_EAX, R_MACL );
  2777     store_reg( R_EAX, Rn );
  2778 :}
  2779 STS.L MACL, @-Rn {:  
  2780     COUNT_INST(I_STSM);
  2781     load_reg( R_EAX, Rn );
  2782     check_walign32( R_EAX );
  2783     ADD_imm8s_r32( -4, R_EAX );
  2784     MMU_TRANSLATE_WRITE( R_EAX );
  2785     load_spreg( R_EDX, R_MACL );
  2786     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2787     MEM_WRITE_LONG( R_EAX, R_EDX );
  2788     sh4_x86.tstate = TSTATE_NONE;
  2789 :}
  2790 STS PR, Rn {:  
  2791     COUNT_INST(I_STS);
  2792     load_spreg( R_EAX, R_PR );
  2793     store_reg( R_EAX, Rn );
  2794 :}
  2795 STS.L PR, @-Rn {:  
  2796     COUNT_INST(I_STSM);
  2797     load_reg( R_EAX, Rn );
  2798     check_walign32( R_EAX );
  2799     ADD_imm8s_r32( -4, R_EAX );
  2800     MMU_TRANSLATE_WRITE( R_EAX );
  2801     load_spreg( R_EDX, R_PR );
  2802     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2803     MEM_WRITE_LONG( R_EAX, R_EDX );
  2804     sh4_x86.tstate = TSTATE_NONE;
  2805 :}
  2807 NOP {: 
  2808     COUNT_INST(I_NOP);
  2809     /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ 
  2810 :}
  2811 %%
  2812     sh4_x86.in_delay_slot = DELAY_NONE;
  2813     return 0;
.