Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 901:32c5cf5e206f
prev879:a07af43e03c4
next903:1337c7a7dd6b
author nkeynes
date Sun Oct 26 02:28:29 2008 +0000 (15 years ago)
permissions -rw-r--r--
last change Move the precision/size tests to translation-time rather than execution-time,
and flush/retranslate on a mismatch. Shaves a few percent off the core runtime
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "sh4/xltcache.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4trans.h"
    31 #include "sh4/sh4stat.h"
    32 #include "sh4/sh4mmio.h"
    33 #include "sh4/x86op.h"
    34 #include "clock.h"
    36 #define DEFAULT_BACKPATCH_SIZE 4096
    38 struct backpatch_record {
    39     uint32_t fixup_offset;
    40     uint32_t fixup_icount;
    41     int32_t exc_code;
    42 };
    44 #define DELAY_NONE 0
    45 #define DELAY_PC 1
    46 #define DELAY_PC_PR 2
    48 /** 
    49  * Struct to manage internal translation state. This state is not saved -
    50  * it is only valid between calls to sh4_translate_begin_block() and
    51  * sh4_translate_end_block()
    52  */
    53 struct sh4_x86_state {
    54     int in_delay_slot;
    55     gboolean priv_checked; /* true if we've already checked the cpu mode. */
    56     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    57     gboolean branch_taken; /* true if we branched unconditionally */
    58     gboolean double_prec; /* true if FPU is in double-precision mode */
    59     gboolean double_size; /* true if FPU is in double-size mode */ 
    60     uint32_t block_start_pc;
    61     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    62     int tstate;
    64     /* mode flags */
    65     gboolean tlb_on; /* True if tlb translation is active */
    67     /* Allocated memory for the (block-wide) back-patch list */
    68     struct backpatch_record *backpatch_list;
    69     uint32_t backpatch_posn;
    70     uint32_t backpatch_size;
    71 };
    73 #define TSTATE_NONE -1
    74 #define TSTATE_O    0
    75 #define TSTATE_C    2
    76 #define TSTATE_E    4
    77 #define TSTATE_NE   5
    78 #define TSTATE_G    0xF
    79 #define TSTATE_GE   0xD
    80 #define TSTATE_A    7
    81 #define TSTATE_AE   3
    83 #ifdef ENABLE_SH4STATS
    84 #define COUNT_INST(id) load_imm32(R_EAX,id); call_func1(sh4_stats_add, R_EAX); sh4_x86.tstate = TSTATE_NONE
    85 #else
    86 #define COUNT_INST(id)
    87 #endif
    89 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
    90 #define JT_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    91 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    92     OP(0x70+sh4_x86.tstate); MARK_JMP8(label); OP(-1)
    94 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
    95 #define JF_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    96 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    97     OP(0x70+ (sh4_x86.tstate^1)); MARK_JMP8(label); OP(-1)
    99 static struct sh4_x86_state sh4_x86;
   101 static uint32_t max_int = 0x7FFFFFFF;
   102 static uint32_t min_int = 0x80000000;
   103 static uint32_t save_fcw; /* save value for fpu control word */
   104 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   106 void sh4_translate_init(void)
   107 {
   108     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   109     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   110 }
   113 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   114 {
   115     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   116 	sh4_x86.backpatch_size <<= 1;
   117 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   118 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   119 	assert( sh4_x86.backpatch_list != NULL );
   120     }
   121     if( sh4_x86.in_delay_slot ) {
   122 	fixup_pc -= 2;
   123     }
   124     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
   125 	((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code);
   126     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   127     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   128     sh4_x86.backpatch_posn++;
   129 }
   131 /**
   132  * Emit an instruction to load an SH4 reg into a real register
   133  */
   134 static inline void load_reg( int x86reg, int sh4reg ) 
   135 {
   136     /* mov [bp+n], reg */
   137     OP(0x8B);
   138     OP(0x45 + (x86reg<<3));
   139     OP(REG_OFFSET(r[sh4reg]));
   140 }
   142 static inline void load_reg16s( int x86reg, int sh4reg )
   143 {
   144     OP(0x0F);
   145     OP(0xBF);
   146     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   147 }
   149 static inline void load_reg16u( int x86reg, int sh4reg )
   150 {
   151     OP(0x0F);
   152     OP(0xB7);
   153     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   155 }
   157 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   158 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   159 /**
   160  * Emit an instruction to load an immediate value into a register
   161  */
   162 static inline void load_imm32( int x86reg, uint32_t value ) {
   163     /* mov #value, reg */
   164     OP(0xB8 + x86reg);
   165     OP32(value);
   166 }
   168 /**
   169  * Load an immediate 64-bit quantity (note: x86-64 only)
   170  */
   171 static inline void load_imm64( int x86reg, uint64_t value ) {
   172     /* mov #value, reg */
   173     REXW();
   174     OP(0xB8 + x86reg);
   175     OP64(value);
   176 }
   178 /**
   179  * Emit an instruction to store an SH4 reg (RN)
   180  */
   181 void static inline store_reg( int x86reg, int sh4reg ) {
   182     /* mov reg, [bp+n] */
   183     OP(0x89);
   184     OP(0x45 + (x86reg<<3));
   185     OP(REG_OFFSET(r[sh4reg]));
   186 }
   188 /**
   189  * Load an FR register (single-precision floating point) into an integer x86
   190  * register (eg for register-to-register moves)
   191  */
   192 #define load_fr(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[0][(frm)^1]) )
   193 #define load_xf(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[1][(frm)^1]) )
   195 /**
   196  * Load the low half of a DR register (DR or XD) into an integer x86 register 
   197  */
   198 #define load_dr0(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   199 #define load_dr1(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   201 /**
   202  * Store an FR register (single-precision floating point) from an integer x86+
   203  * register (eg for register-to-register moves)
   204  */
   205 #define store_fr(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[0][(frm)^1]) )
   206 #define store_xf(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[1][(frm)^1]) )
   208 #define store_dr0(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   209 #define store_dr1(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   212 #define push_fpul()  FLDF_sh4r(R_FPUL)
   213 #define pop_fpul()   FSTPF_sh4r(R_FPUL)
   214 #define push_fr(frm) FLDF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   215 #define pop_fr(frm)  FSTPF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   216 #define push_xf(frm) FLDF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   217 #define pop_xf(frm)  FSTPF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   218 #define push_dr(frm) FLDD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   219 #define pop_dr(frm)  FSTPD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   220 #define push_xdr(frm) FLDD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   221 #define pop_xdr(frm)  FSTPD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   225 /* Exception checks - Note that all exception checks will clobber EAX */
   227 #define check_priv( ) \
   228     if( !sh4_x86.priv_checked ) { \
   229 	sh4_x86.priv_checked = TRUE;\
   230 	load_spreg( R_EAX, R_SR );\
   231 	AND_imm32_r32( SR_MD, R_EAX );\
   232 	if( sh4_x86.in_delay_slot ) {\
   233 	    JE_exc( EXC_SLOT_ILLEGAL );\
   234 	} else {\
   235 	    JE_exc( EXC_ILLEGAL );\
   236 	}\
   237 	sh4_x86.tstate = TSTATE_NONE; \
   238     }\
   240 #define check_fpuen( ) \
   241     if( !sh4_x86.fpuen_checked ) {\
   242 	sh4_x86.fpuen_checked = TRUE;\
   243 	load_spreg( R_EAX, R_SR );\
   244 	AND_imm32_r32( SR_FD, R_EAX );\
   245 	if( sh4_x86.in_delay_slot ) {\
   246 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   247 	} else {\
   248 	    JNE_exc(EXC_FPU_DISABLED);\
   249 	}\
   250 	sh4_x86.tstate = TSTATE_NONE; \
   251     }
   253 #define check_ralign16( x86reg ) \
   254     TEST_imm32_r32( 0x00000001, x86reg ); \
   255     JNE_exc(EXC_DATA_ADDR_READ)
   257 #define check_walign16( x86reg ) \
   258     TEST_imm32_r32( 0x00000001, x86reg ); \
   259     JNE_exc(EXC_DATA_ADDR_WRITE);
   261 #define check_ralign32( x86reg ) \
   262     TEST_imm32_r32( 0x00000003, x86reg ); \
   263     JNE_exc(EXC_DATA_ADDR_READ)
   265 #define check_walign32( x86reg ) \
   266     TEST_imm32_r32( 0x00000003, x86reg ); \
   267     JNE_exc(EXC_DATA_ADDR_WRITE);
   269 #define check_ralign64( x86reg ) \
   270     TEST_imm32_r32( 0x00000007, x86reg ); \
   271     JNE_exc(EXC_DATA_ADDR_READ)
   273 #define check_walign64( x86reg ) \
   274     TEST_imm32_r32( 0x00000007, x86reg ); \
   275     JNE_exc(EXC_DATA_ADDR_WRITE);
   277 #define UNDEF(ir)
   278 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   279 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
   280 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
   281 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
   282 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
   283 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
   284 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
   286 /**
   287  * Perform MMU translation on the address in addr_reg for a read operation, iff the TLB is turned 
   288  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   289  */
   290 #define MMU_TRANSLATE_READ( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   292 #define MMU_TRANSLATE_READ_EXC( addr_reg, exc_code ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(exc_code); MEM_RESULT(addr_reg) }
   293 /**
   294  * Perform MMU translation on the address in addr_reg for a write operation, iff the TLB is turned 
   295  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   296  */
   297 #define MMU_TRANSLATE_WRITE( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_write, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   299 #define MEM_READ_SIZE (CALL_FUNC1_SIZE)
   300 #define MEM_WRITE_SIZE (CALL_FUNC2_SIZE)
   301 #define MMU_TRANSLATE_SIZE (sh4_x86.tlb_on ? (CALL_FUNC1_SIZE + 12) : 0 )
   303 #define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = DELAY_NONE; return 1;
   305 /****** Import appropriate calling conventions ******/
   306 #if SIZEOF_VOID_P == 8
   307 #include "sh4/ia64abi.h"
   308 #else /* 32-bit system */
   309 #ifdef APPLE_BUILD
   310 #include "sh4/ia32mac.h"
   311 #else
   312 #include "sh4/ia32abi.h"
   313 #endif
   314 #endif
   316 void sh4_translate_begin_block( sh4addr_t pc ) 
   317 {
   318 	enter_block();
   319     sh4_x86.in_delay_slot = FALSE;
   320     sh4_x86.priv_checked = FALSE;
   321     sh4_x86.fpuen_checked = FALSE;
   322     sh4_x86.branch_taken = FALSE;
   323     sh4_x86.backpatch_posn = 0;
   324     sh4_x86.block_start_pc = pc;
   325     sh4_x86.tlb_on = IS_MMU_ENABLED();
   326     sh4_x86.tstate = TSTATE_NONE;
   327     sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
   328     sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;    
   329 }
   332 uint32_t sh4_translate_end_block_size()
   333 {
   334     if( sh4_x86.backpatch_posn <= 3 ) {
   335         return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);
   336     } else {
   337         return EPILOGUE_SIZE + 48 + (sh4_x86.backpatch_posn-3)*15;
   338     }
   339 }
   342 /**
   343  * Embed a breakpoint into the generated code
   344  */
   345 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   346 {
   347     load_imm32( R_EAX, pc );
   348     call_func1( sh4_translate_breakpoint_hit, R_EAX );
   349     sh4_x86.tstate = TSTATE_NONE;
   350 }
   353 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   355 /**
   356  * Embed a call to sh4_execute_instruction for situations that we
   357  * can't translate (just page-crossing delay slots at the moment).
   358  * Caller is responsible for setting new_pc before calling this function.
   359  *
   360  * Performs:
   361  *   Set PC = endpc
   362  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   363  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   364  *   Call sh4_execute_instruction
   365  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   366  */
   367 void exit_block_emu( sh4vma_t endpc )
   368 {
   369     load_imm32( R_ECX, endpc - sh4_x86.block_start_pc );   // 5
   370     ADD_r32_sh4r( R_ECX, R_PC );
   372     load_imm32( R_ECX, (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period ); // 5
   373     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
   374     load_imm32( R_ECX, sh4_x86.in_delay_slot ? 1 : 0 );
   375     store_spreg( R_ECX, REG_OFFSET(in_delay_slot) );
   377     call_func0( sh4_execute_instruction );    
   378     load_spreg( R_EAX, R_PC );
   379     if( sh4_x86.tlb_on ) {
   380 	call_func1(xlat_get_code_by_vma,R_EAX);
   381     } else {
   382 	call_func1(xlat_get_code,R_EAX);
   383     }
   384     AND_imm8s_rptr( 0xFC, R_EAX );
   385     POP_r32(R_EBP);
   386     RET();
   387 } 
   389 /**
   390  * Translate a single instruction. Delayed branches are handled specially
   391  * by translating both branch and delayed instruction as a single unit (as
   392  * 
   393  * The instruction MUST be in the icache (assert check)
   394  *
   395  * @return true if the instruction marks the end of a basic block
   396  * (eg a branch or 
   397  */
   398 uint32_t sh4_translate_instruction( sh4vma_t pc )
   399 {
   400     uint32_t ir;
   401     /* Read instruction from icache */
   402     assert( IS_IN_ICACHE(pc) );
   403     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   405 	/* PC is not in the current icache - this usually means we're running
   406 	 * with MMU on, and we've gone past the end of the page. And since 
   407 	 * sh4_translate_block is pretty careful about this, it means we're
   408 	 * almost certainly in a delay slot.
   409 	 *
   410 	 * Since we can't assume the page is present (and we can't fault it in
   411 	 * at this point, inline a call to sh4_execute_instruction (with a few
   412 	 * small repairs to cope with the different environment).
   413 	 */
   415     if( !sh4_x86.in_delay_slot ) {
   416 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   417     }
   418 %%
   419 /* ALU operations */
   420 ADD Rm, Rn {:
   421     COUNT_INST(I_ADD);
   422     load_reg( R_EAX, Rm );
   423     load_reg( R_ECX, Rn );
   424     ADD_r32_r32( R_EAX, R_ECX );
   425     store_reg( R_ECX, Rn );
   426     sh4_x86.tstate = TSTATE_NONE;
   427 :}
   428 ADD #imm, Rn {:  
   429     COUNT_INST(I_ADDI);
   430     load_reg( R_EAX, Rn );
   431     ADD_imm8s_r32( imm, R_EAX );
   432     store_reg( R_EAX, Rn );
   433     sh4_x86.tstate = TSTATE_NONE;
   434 :}
   435 ADDC Rm, Rn {:
   436     COUNT_INST(I_ADDC);
   437     if( sh4_x86.tstate != TSTATE_C ) {
   438 	LDC_t();
   439     }
   440     load_reg( R_EAX, Rm );
   441     load_reg( R_ECX, Rn );
   442     ADC_r32_r32( R_EAX, R_ECX );
   443     store_reg( R_ECX, Rn );
   444     SETC_t();
   445     sh4_x86.tstate = TSTATE_C;
   446 :}
   447 ADDV Rm, Rn {:
   448     COUNT_INST(I_ADDV);
   449     load_reg( R_EAX, Rm );
   450     load_reg( R_ECX, Rn );
   451     ADD_r32_r32( R_EAX, R_ECX );
   452     store_reg( R_ECX, Rn );
   453     SETO_t();
   454     sh4_x86.tstate = TSTATE_O;
   455 :}
   456 AND Rm, Rn {:
   457     COUNT_INST(I_AND);
   458     load_reg( R_EAX, Rm );
   459     load_reg( R_ECX, Rn );
   460     AND_r32_r32( R_EAX, R_ECX );
   461     store_reg( R_ECX, Rn );
   462     sh4_x86.tstate = TSTATE_NONE;
   463 :}
   464 AND #imm, R0 {:  
   465     COUNT_INST(I_ANDI);
   466     load_reg( R_EAX, 0 );
   467     AND_imm32_r32(imm, R_EAX); 
   468     store_reg( R_EAX, 0 );
   469     sh4_x86.tstate = TSTATE_NONE;
   470 :}
   471 AND.B #imm, @(R0, GBR) {: 
   472     COUNT_INST(I_ANDB);
   473     load_reg( R_EAX, 0 );
   474     load_spreg( R_ECX, R_GBR );
   475     ADD_r32_r32( R_ECX, R_EAX );
   476     MMU_TRANSLATE_WRITE( R_EAX );
   477     PUSH_realigned_r32(R_EAX);
   478     MEM_READ_BYTE( R_EAX, R_EAX );
   479     POP_realigned_r32(R_ECX);
   480     AND_imm32_r32(imm, R_EAX );
   481     MEM_WRITE_BYTE( R_ECX, R_EAX );
   482     sh4_x86.tstate = TSTATE_NONE;
   483 :}
   484 CMP/EQ Rm, Rn {:  
   485     COUNT_INST(I_CMPEQ);
   486     load_reg( R_EAX, Rm );
   487     load_reg( R_ECX, Rn );
   488     CMP_r32_r32( R_EAX, R_ECX );
   489     SETE_t();
   490     sh4_x86.tstate = TSTATE_E;
   491 :}
   492 CMP/EQ #imm, R0 {:  
   493     COUNT_INST(I_CMPEQI);
   494     load_reg( R_EAX, 0 );
   495     CMP_imm8s_r32(imm, R_EAX);
   496     SETE_t();
   497     sh4_x86.tstate = TSTATE_E;
   498 :}
   499 CMP/GE Rm, Rn {:  
   500     COUNT_INST(I_CMPGE);
   501     load_reg( R_EAX, Rm );
   502     load_reg( R_ECX, Rn );
   503     CMP_r32_r32( R_EAX, R_ECX );
   504     SETGE_t();
   505     sh4_x86.tstate = TSTATE_GE;
   506 :}
   507 CMP/GT Rm, Rn {: 
   508     COUNT_INST(I_CMPGT);
   509     load_reg( R_EAX, Rm );
   510     load_reg( R_ECX, Rn );
   511     CMP_r32_r32( R_EAX, R_ECX );
   512     SETG_t();
   513     sh4_x86.tstate = TSTATE_G;
   514 :}
   515 CMP/HI Rm, Rn {:  
   516     COUNT_INST(I_CMPHI);
   517     load_reg( R_EAX, Rm );
   518     load_reg( R_ECX, Rn );
   519     CMP_r32_r32( R_EAX, R_ECX );
   520     SETA_t();
   521     sh4_x86.tstate = TSTATE_A;
   522 :}
   523 CMP/HS Rm, Rn {: 
   524     COUNT_INST(I_CMPHS);
   525     load_reg( R_EAX, Rm );
   526     load_reg( R_ECX, Rn );
   527     CMP_r32_r32( R_EAX, R_ECX );
   528     SETAE_t();
   529     sh4_x86.tstate = TSTATE_AE;
   530  :}
   531 CMP/PL Rn {: 
   532     COUNT_INST(I_CMPPL);
   533     load_reg( R_EAX, Rn );
   534     CMP_imm8s_r32( 0, R_EAX );
   535     SETG_t();
   536     sh4_x86.tstate = TSTATE_G;
   537 :}
   538 CMP/PZ Rn {:  
   539     COUNT_INST(I_CMPPZ);
   540     load_reg( R_EAX, Rn );
   541     CMP_imm8s_r32( 0, R_EAX );
   542     SETGE_t();
   543     sh4_x86.tstate = TSTATE_GE;
   544 :}
   545 CMP/STR Rm, Rn {:  
   546     COUNT_INST(I_CMPSTR);
   547     load_reg( R_EAX, Rm );
   548     load_reg( R_ECX, Rn );
   549     XOR_r32_r32( R_ECX, R_EAX );
   550     TEST_r8_r8( R_AL, R_AL );
   551     JE_rel8(target1);
   552     TEST_r8_r8( R_AH, R_AH );
   553     JE_rel8(target2);
   554     SHR_imm8_r32( 16, R_EAX );
   555     TEST_r8_r8( R_AL, R_AL );
   556     JE_rel8(target3);
   557     TEST_r8_r8( R_AH, R_AH );
   558     JMP_TARGET(target1);
   559     JMP_TARGET(target2);
   560     JMP_TARGET(target3);
   561     SETE_t();
   562     sh4_x86.tstate = TSTATE_E;
   563 :}
   564 DIV0S Rm, Rn {:
   565     COUNT_INST(I_DIV0S);
   566     load_reg( R_EAX, Rm );
   567     load_reg( R_ECX, Rn );
   568     SHR_imm8_r32( 31, R_EAX );
   569     SHR_imm8_r32( 31, R_ECX );
   570     store_spreg( R_EAX, R_M );
   571     store_spreg( R_ECX, R_Q );
   572     CMP_r32_r32( R_EAX, R_ECX );
   573     SETNE_t();
   574     sh4_x86.tstate = TSTATE_NE;
   575 :}
   576 DIV0U {:  
   577     COUNT_INST(I_DIV0U);
   578     XOR_r32_r32( R_EAX, R_EAX );
   579     store_spreg( R_EAX, R_Q );
   580     store_spreg( R_EAX, R_M );
   581     store_spreg( R_EAX, R_T );
   582     sh4_x86.tstate = TSTATE_C; // works for DIV1
   583 :}
   584 DIV1 Rm, Rn {:
   585     COUNT_INST(I_DIV1);
   586     load_spreg( R_ECX, R_M );
   587     load_reg( R_EAX, Rn );
   588     if( sh4_x86.tstate != TSTATE_C ) {
   589 	LDC_t();
   590     }
   591     RCL1_r32( R_EAX );
   592     SETC_r8( R_DL ); // Q'
   593     CMP_sh4r_r32( R_Q, R_ECX );
   594     JE_rel8(mqequal);
   595     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   596     JMP_rel8(end);
   597     JMP_TARGET(mqequal);
   598     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   599     JMP_TARGET(end);
   600     store_reg( R_EAX, Rn ); // Done with Rn now
   601     SETC_r8(R_AL); // tmp1
   602     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   603     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   604     store_spreg( R_ECX, R_Q );
   605     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   606     MOVZX_r8_r32( R_AL, R_EAX );
   607     store_spreg( R_EAX, R_T );
   608     sh4_x86.tstate = TSTATE_NONE;
   609 :}
   610 DMULS.L Rm, Rn {:  
   611     COUNT_INST(I_DMULS);
   612     load_reg( R_EAX, Rm );
   613     load_reg( R_ECX, Rn );
   614     IMUL_r32(R_ECX);
   615     store_spreg( R_EDX, R_MACH );
   616     store_spreg( R_EAX, R_MACL );
   617     sh4_x86.tstate = TSTATE_NONE;
   618 :}
   619 DMULU.L Rm, Rn {:  
   620     COUNT_INST(I_DMULU);
   621     load_reg( R_EAX, Rm );
   622     load_reg( R_ECX, Rn );
   623     MUL_r32(R_ECX);
   624     store_spreg( R_EDX, R_MACH );
   625     store_spreg( R_EAX, R_MACL );    
   626     sh4_x86.tstate = TSTATE_NONE;
   627 :}
   628 DT Rn {:  
   629     COUNT_INST(I_DT);
   630     load_reg( R_EAX, Rn );
   631     ADD_imm8s_r32( -1, R_EAX );
   632     store_reg( R_EAX, Rn );
   633     SETE_t();
   634     sh4_x86.tstate = TSTATE_E;
   635 :}
   636 EXTS.B Rm, Rn {:  
   637     COUNT_INST(I_EXTSB);
   638     load_reg( R_EAX, Rm );
   639     MOVSX_r8_r32( R_EAX, R_EAX );
   640     store_reg( R_EAX, Rn );
   641 :}
   642 EXTS.W Rm, Rn {:  
   643     COUNT_INST(I_EXTSW);
   644     load_reg( R_EAX, Rm );
   645     MOVSX_r16_r32( R_EAX, R_EAX );
   646     store_reg( R_EAX, Rn );
   647 :}
   648 EXTU.B Rm, Rn {:  
   649     COUNT_INST(I_EXTUB);
   650     load_reg( R_EAX, Rm );
   651     MOVZX_r8_r32( R_EAX, R_EAX );
   652     store_reg( R_EAX, Rn );
   653 :}
   654 EXTU.W Rm, Rn {:  
   655     COUNT_INST(I_EXTUW);
   656     load_reg( R_EAX, Rm );
   657     MOVZX_r16_r32( R_EAX, R_EAX );
   658     store_reg( R_EAX, Rn );
   659 :}
   660 MAC.L @Rm+, @Rn+ {:
   661     COUNT_INST(I_MACL);
   662     if( Rm == Rn ) {
   663 	load_reg( R_EAX, Rm );
   664 	check_ralign32( R_EAX );
   665 	MMU_TRANSLATE_READ( R_EAX );
   666 	PUSH_realigned_r32( R_EAX );
   667 	load_reg( R_EAX, Rn );
   668 	ADD_imm8s_r32( 4, R_EAX );
   669 	MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
   670 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
   671 	// Note translate twice in case of page boundaries. Maybe worth
   672 	// adding a page-boundary check to skip the second translation
   673     } else {
   674 	load_reg( R_EAX, Rm );
   675 	check_ralign32( R_EAX );
   676 	MMU_TRANSLATE_READ( R_EAX );
   677 	load_reg( R_ECX, Rn );
   678 	check_ralign32( R_ECX );
   679 	PUSH_realigned_r32( R_EAX );
   680 	MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
   681 	MOV_r32_r32( R_ECX, R_EAX );
   682 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   683 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   684     }
   685     MEM_READ_LONG( R_EAX, R_EAX );
   686     POP_r32( R_ECX );
   687     PUSH_r32( R_EAX );
   688     MEM_READ_LONG( R_ECX, R_EAX );
   689     POP_realigned_r32( R_ECX );
   691     IMUL_r32( R_ECX );
   692     ADD_r32_sh4r( R_EAX, R_MACL );
   693     ADC_r32_sh4r( R_EDX, R_MACH );
   695     load_spreg( R_ECX, R_S );
   696     TEST_r32_r32(R_ECX, R_ECX);
   697     JE_rel8( nosat );
   698     call_func0( signsat48 );
   699     JMP_TARGET( nosat );
   700     sh4_x86.tstate = TSTATE_NONE;
   701 :}
   702 MAC.W @Rm+, @Rn+ {:  
   703     COUNT_INST(I_MACW);
   704     if( Rm == Rn ) {
   705 	load_reg( R_EAX, Rm );
   706 	check_ralign16( R_EAX );
   707 	MMU_TRANSLATE_READ( R_EAX );
   708 	PUSH_realigned_r32( R_EAX );
   709 	load_reg( R_EAX, Rn );
   710 	ADD_imm8s_r32( 2, R_EAX );
   711 	MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
   712 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   713 	// Note translate twice in case of page boundaries. Maybe worth
   714 	// adding a page-boundary check to skip the second translation
   715     } else {
   716 	load_reg( R_EAX, Rm );
   717 	check_ralign16( R_EAX );
   718 	MMU_TRANSLATE_READ( R_EAX );
   719 	load_reg( R_ECX, Rn );
   720 	check_ralign16( R_ECX );
   721 	PUSH_realigned_r32( R_EAX );
   722 	MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
   723 	MOV_r32_r32( R_ECX, R_EAX );
   724 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   725 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   726     }
   727     MEM_READ_WORD( R_EAX, R_EAX );
   728     POP_r32( R_ECX );
   729     PUSH_r32( R_EAX );
   730     MEM_READ_WORD( R_ECX, R_EAX );
   731     POP_realigned_r32( R_ECX );
   732     IMUL_r32( R_ECX );
   734     load_spreg( R_ECX, R_S );
   735     TEST_r32_r32( R_ECX, R_ECX );
   736     JE_rel8( nosat );
   738     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   739     JNO_rel8( end );            // 2
   740     load_imm32( R_EDX, 1 );         // 5
   741     store_spreg( R_EDX, R_MACH );   // 6
   742     JS_rel8( positive );        // 2
   743     load_imm32( R_EAX, 0x80000000 );// 5
   744     store_spreg( R_EAX, R_MACL );   // 6
   745     JMP_rel8(end2);           // 2
   747     JMP_TARGET(positive);
   748     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   749     store_spreg( R_EAX, R_MACL );   // 6
   750     JMP_rel8(end3);            // 2
   752     JMP_TARGET(nosat);
   753     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   754     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   755     JMP_TARGET(end);
   756     JMP_TARGET(end2);
   757     JMP_TARGET(end3);
   758     sh4_x86.tstate = TSTATE_NONE;
   759 :}
   760 MOVT Rn {:  
   761     COUNT_INST(I_MOVT);
   762     load_spreg( R_EAX, R_T );
   763     store_reg( R_EAX, Rn );
   764 :}
   765 MUL.L Rm, Rn {:  
   766     COUNT_INST(I_MULL);
   767     load_reg( R_EAX, Rm );
   768     load_reg( R_ECX, Rn );
   769     MUL_r32( R_ECX );
   770     store_spreg( R_EAX, R_MACL );
   771     sh4_x86.tstate = TSTATE_NONE;
   772 :}
   773 MULS.W Rm, Rn {:
   774     COUNT_INST(I_MULSW);
   775     load_reg16s( R_EAX, Rm );
   776     load_reg16s( R_ECX, Rn );
   777     MUL_r32( R_ECX );
   778     store_spreg( R_EAX, R_MACL );
   779     sh4_x86.tstate = TSTATE_NONE;
   780 :}
   781 MULU.W Rm, Rn {:  
   782     COUNT_INST(I_MULUW);
   783     load_reg16u( R_EAX, Rm );
   784     load_reg16u( R_ECX, Rn );
   785     MUL_r32( R_ECX );
   786     store_spreg( R_EAX, R_MACL );
   787     sh4_x86.tstate = TSTATE_NONE;
   788 :}
   789 NEG Rm, Rn {:
   790     COUNT_INST(I_NEG);
   791     load_reg( R_EAX, Rm );
   792     NEG_r32( R_EAX );
   793     store_reg( R_EAX, Rn );
   794     sh4_x86.tstate = TSTATE_NONE;
   795 :}
   796 NEGC Rm, Rn {:  
   797     COUNT_INST(I_NEGC);
   798     load_reg( R_EAX, Rm );
   799     XOR_r32_r32( R_ECX, R_ECX );
   800     LDC_t();
   801     SBB_r32_r32( R_EAX, R_ECX );
   802     store_reg( R_ECX, Rn );
   803     SETC_t();
   804     sh4_x86.tstate = TSTATE_C;
   805 :}
   806 NOT Rm, Rn {:  
   807     COUNT_INST(I_NOT);
   808     load_reg( R_EAX, Rm );
   809     NOT_r32( R_EAX );
   810     store_reg( R_EAX, Rn );
   811     sh4_x86.tstate = TSTATE_NONE;
   812 :}
   813 OR Rm, Rn {:  
   814     COUNT_INST(I_OR);
   815     load_reg( R_EAX, Rm );
   816     load_reg( R_ECX, Rn );
   817     OR_r32_r32( R_EAX, R_ECX );
   818     store_reg( R_ECX, Rn );
   819     sh4_x86.tstate = TSTATE_NONE;
   820 :}
   821 OR #imm, R0 {:
   822     COUNT_INST(I_ORI);
   823     load_reg( R_EAX, 0 );
   824     OR_imm32_r32(imm, R_EAX);
   825     store_reg( R_EAX, 0 );
   826     sh4_x86.tstate = TSTATE_NONE;
   827 :}
   828 OR.B #imm, @(R0, GBR) {:  
   829     COUNT_INST(I_ORB);
   830     load_reg( R_EAX, 0 );
   831     load_spreg( R_ECX, R_GBR );
   832     ADD_r32_r32( R_ECX, R_EAX );
   833     MMU_TRANSLATE_WRITE( R_EAX );
   834     PUSH_realigned_r32(R_EAX);
   835     MEM_READ_BYTE( R_EAX, R_EAX );
   836     POP_realigned_r32(R_ECX);
   837     OR_imm32_r32(imm, R_EAX );
   838     MEM_WRITE_BYTE( R_ECX, R_EAX );
   839     sh4_x86.tstate = TSTATE_NONE;
   840 :}
   841 ROTCL Rn {:
   842     COUNT_INST(I_ROTCL);
   843     load_reg( R_EAX, Rn );
   844     if( sh4_x86.tstate != TSTATE_C ) {
   845 	LDC_t();
   846     }
   847     RCL1_r32( R_EAX );
   848     store_reg( R_EAX, Rn );
   849     SETC_t();
   850     sh4_x86.tstate = TSTATE_C;
   851 :}
   852 ROTCR Rn {:  
   853     COUNT_INST(I_ROTCR);
   854     load_reg( R_EAX, Rn );
   855     if( sh4_x86.tstate != TSTATE_C ) {
   856 	LDC_t();
   857     }
   858     RCR1_r32( R_EAX );
   859     store_reg( R_EAX, Rn );
   860     SETC_t();
   861     sh4_x86.tstate = TSTATE_C;
   862 :}
   863 ROTL Rn {:  
   864     COUNT_INST(I_ROTL);
   865     load_reg( R_EAX, Rn );
   866     ROL1_r32( R_EAX );
   867     store_reg( R_EAX, Rn );
   868     SETC_t();
   869     sh4_x86.tstate = TSTATE_C;
   870 :}
   871 ROTR Rn {:  
   872     COUNT_INST(I_ROTR);
   873     load_reg( R_EAX, Rn );
   874     ROR1_r32( R_EAX );
   875     store_reg( R_EAX, Rn );
   876     SETC_t();
   877     sh4_x86.tstate = TSTATE_C;
   878 :}
   879 SHAD Rm, Rn {:
   880     COUNT_INST(I_SHAD);
   881     /* Annoyingly enough, not directly convertible */
   882     load_reg( R_EAX, Rn );
   883     load_reg( R_ECX, Rm );
   884     CMP_imm32_r32( 0, R_ECX );
   885     JGE_rel8(doshl);
   887     NEG_r32( R_ECX );      // 2
   888     AND_imm8_r8( 0x1F, R_CL ); // 3
   889     JE_rel8(emptysar);     // 2
   890     SAR_r32_CL( R_EAX );       // 2
   891     JMP_rel8(end);          // 2
   893     JMP_TARGET(emptysar);
   894     SAR_imm8_r32(31, R_EAX );  // 3
   895     JMP_rel8(end2);
   897     JMP_TARGET(doshl);
   898     AND_imm8_r8( 0x1F, R_CL ); // 3
   899     SHL_r32_CL( R_EAX );       // 2
   900     JMP_TARGET(end);
   901     JMP_TARGET(end2);
   902     store_reg( R_EAX, Rn );
   903     sh4_x86.tstate = TSTATE_NONE;
   904 :}
   905 SHLD Rm, Rn {:  
   906     COUNT_INST(I_SHLD);
   907     load_reg( R_EAX, Rn );
   908     load_reg( R_ECX, Rm );
   909     CMP_imm32_r32( 0, R_ECX );
   910     JGE_rel8(doshl);
   912     NEG_r32( R_ECX );      // 2
   913     AND_imm8_r8( 0x1F, R_CL ); // 3
   914     JE_rel8(emptyshr );
   915     SHR_r32_CL( R_EAX );       // 2
   916     JMP_rel8(end);          // 2
   918     JMP_TARGET(emptyshr);
   919     XOR_r32_r32( R_EAX, R_EAX );
   920     JMP_rel8(end2);
   922     JMP_TARGET(doshl);
   923     AND_imm8_r8( 0x1F, R_CL ); // 3
   924     SHL_r32_CL( R_EAX );       // 2
   925     JMP_TARGET(end);
   926     JMP_TARGET(end2);
   927     store_reg( R_EAX, Rn );
   928     sh4_x86.tstate = TSTATE_NONE;
   929 :}
   930 SHAL Rn {: 
   931     COUNT_INST(I_SHAL);
   932     load_reg( R_EAX, Rn );
   933     SHL1_r32( R_EAX );
   934     SETC_t();
   935     store_reg( R_EAX, Rn );
   936     sh4_x86.tstate = TSTATE_C;
   937 :}
   938 SHAR Rn {:  
   939     COUNT_INST(I_SHAR);
   940     load_reg( R_EAX, Rn );
   941     SAR1_r32( R_EAX );
   942     SETC_t();
   943     store_reg( R_EAX, Rn );
   944     sh4_x86.tstate = TSTATE_C;
   945 :}
   946 SHLL Rn {:  
   947     COUNT_INST(I_SHLL);
   948     load_reg( R_EAX, Rn );
   949     SHL1_r32( R_EAX );
   950     SETC_t();
   951     store_reg( R_EAX, Rn );
   952     sh4_x86.tstate = TSTATE_C;
   953 :}
   954 SHLL2 Rn {:
   955     COUNT_INST(I_SHLL);
   956     load_reg( R_EAX, Rn );
   957     SHL_imm8_r32( 2, R_EAX );
   958     store_reg( R_EAX, Rn );
   959     sh4_x86.tstate = TSTATE_NONE;
   960 :}
   961 SHLL8 Rn {:  
   962     COUNT_INST(I_SHLL);
   963     load_reg( R_EAX, Rn );
   964     SHL_imm8_r32( 8, R_EAX );
   965     store_reg( R_EAX, Rn );
   966     sh4_x86.tstate = TSTATE_NONE;
   967 :}
   968 SHLL16 Rn {:  
   969     COUNT_INST(I_SHLL);
   970     load_reg( R_EAX, Rn );
   971     SHL_imm8_r32( 16, R_EAX );
   972     store_reg( R_EAX, Rn );
   973     sh4_x86.tstate = TSTATE_NONE;
   974 :}
   975 SHLR Rn {:  
   976     COUNT_INST(I_SHLR);
   977     load_reg( R_EAX, Rn );
   978     SHR1_r32( R_EAX );
   979     SETC_t();
   980     store_reg( R_EAX, Rn );
   981     sh4_x86.tstate = TSTATE_C;
   982 :}
   983 SHLR2 Rn {:  
   984     COUNT_INST(I_SHLR);
   985     load_reg( R_EAX, Rn );
   986     SHR_imm8_r32( 2, R_EAX );
   987     store_reg( R_EAX, Rn );
   988     sh4_x86.tstate = TSTATE_NONE;
   989 :}
   990 SHLR8 Rn {:  
   991     COUNT_INST(I_SHLR);
   992     load_reg( R_EAX, Rn );
   993     SHR_imm8_r32( 8, R_EAX );
   994     store_reg( R_EAX, Rn );
   995     sh4_x86.tstate = TSTATE_NONE;
   996 :}
   997 SHLR16 Rn {:  
   998     COUNT_INST(I_SHLR);
   999     load_reg( R_EAX, Rn );
  1000     SHR_imm8_r32( 16, R_EAX );
  1001     store_reg( R_EAX, Rn );
  1002     sh4_x86.tstate = TSTATE_NONE;
  1003 :}
  1004 SUB Rm, Rn {:  
  1005     COUNT_INST(I_SUB);
  1006     load_reg( R_EAX, Rm );
  1007     load_reg( R_ECX, Rn );
  1008     SUB_r32_r32( R_EAX, R_ECX );
  1009     store_reg( R_ECX, Rn );
  1010     sh4_x86.tstate = TSTATE_NONE;
  1011 :}
  1012 SUBC Rm, Rn {:  
  1013     COUNT_INST(I_SUBC);
  1014     load_reg( R_EAX, Rm );
  1015     load_reg( R_ECX, Rn );
  1016     if( sh4_x86.tstate != TSTATE_C ) {
  1017 	LDC_t();
  1019     SBB_r32_r32( R_EAX, R_ECX );
  1020     store_reg( R_ECX, Rn );
  1021     SETC_t();
  1022     sh4_x86.tstate = TSTATE_C;
  1023 :}
  1024 SUBV Rm, Rn {:  
  1025     COUNT_INST(I_SUBV);
  1026     load_reg( R_EAX, Rm );
  1027     load_reg( R_ECX, Rn );
  1028     SUB_r32_r32( R_EAX, R_ECX );
  1029     store_reg( R_ECX, Rn );
  1030     SETO_t();
  1031     sh4_x86.tstate = TSTATE_O;
  1032 :}
  1033 SWAP.B Rm, Rn {:  
  1034     COUNT_INST(I_SWAPB);
  1035     load_reg( R_EAX, Rm );
  1036     XCHG_r8_r8( R_AL, R_AH ); // NB: does not touch EFLAGS
  1037     store_reg( R_EAX, Rn );
  1038 :}
  1039 SWAP.W Rm, Rn {:  
  1040     COUNT_INST(I_SWAPB);
  1041     load_reg( R_EAX, Rm );
  1042     MOV_r32_r32( R_EAX, R_ECX );
  1043     SHL_imm8_r32( 16, R_ECX );
  1044     SHR_imm8_r32( 16, R_EAX );
  1045     OR_r32_r32( R_EAX, R_ECX );
  1046     store_reg( R_ECX, Rn );
  1047     sh4_x86.tstate = TSTATE_NONE;
  1048 :}
  1049 TAS.B @Rn {:  
  1050     COUNT_INST(I_TASB);
  1051     load_reg( R_EAX, Rn );
  1052     MMU_TRANSLATE_WRITE( R_EAX );
  1053     PUSH_realigned_r32( R_EAX );
  1054     MEM_READ_BYTE( R_EAX, R_EAX );
  1055     TEST_r8_r8( R_AL, R_AL );
  1056     SETE_t();
  1057     OR_imm8_r8( 0x80, R_AL );
  1058     POP_realigned_r32( R_ECX );
  1059     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1060     sh4_x86.tstate = TSTATE_NONE;
  1061 :}
  1062 TST Rm, Rn {:  
  1063     COUNT_INST(I_TST);
  1064     load_reg( R_EAX, Rm );
  1065     load_reg( R_ECX, Rn );
  1066     TEST_r32_r32( R_EAX, R_ECX );
  1067     SETE_t();
  1068     sh4_x86.tstate = TSTATE_E;
  1069 :}
  1070 TST #imm, R0 {:  
  1071     COUNT_INST(I_TSTI);
  1072     load_reg( R_EAX, 0 );
  1073     TEST_imm32_r32( imm, R_EAX );
  1074     SETE_t();
  1075     sh4_x86.tstate = TSTATE_E;
  1076 :}
  1077 TST.B #imm, @(R0, GBR) {:  
  1078     COUNT_INST(I_TSTB);
  1079     load_reg( R_EAX, 0);
  1080     load_reg( R_ECX, R_GBR);
  1081     ADD_r32_r32( R_ECX, R_EAX );
  1082     MMU_TRANSLATE_READ( R_EAX );
  1083     MEM_READ_BYTE( R_EAX, R_EAX );
  1084     TEST_imm8_r8( imm, R_AL );
  1085     SETE_t();
  1086     sh4_x86.tstate = TSTATE_E;
  1087 :}
  1088 XOR Rm, Rn {:  
  1089     COUNT_INST(I_XOR);
  1090     load_reg( R_EAX, Rm );
  1091     load_reg( R_ECX, Rn );
  1092     XOR_r32_r32( R_EAX, R_ECX );
  1093     store_reg( R_ECX, Rn );
  1094     sh4_x86.tstate = TSTATE_NONE;
  1095 :}
  1096 XOR #imm, R0 {:  
  1097     COUNT_INST(I_XORI);
  1098     load_reg( R_EAX, 0 );
  1099     XOR_imm32_r32( imm, R_EAX );
  1100     store_reg( R_EAX, 0 );
  1101     sh4_x86.tstate = TSTATE_NONE;
  1102 :}
  1103 XOR.B #imm, @(R0, GBR) {:  
  1104     COUNT_INST(I_XORB);
  1105     load_reg( R_EAX, 0 );
  1106     load_spreg( R_ECX, R_GBR );
  1107     ADD_r32_r32( R_ECX, R_EAX );
  1108     MMU_TRANSLATE_WRITE( R_EAX );
  1109     PUSH_realigned_r32(R_EAX);
  1110     MEM_READ_BYTE(R_EAX, R_EAX);
  1111     POP_realigned_r32(R_ECX);
  1112     XOR_imm32_r32( imm, R_EAX );
  1113     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1114     sh4_x86.tstate = TSTATE_NONE;
  1115 :}
  1116 XTRCT Rm, Rn {:
  1117     COUNT_INST(I_XTRCT);
  1118     load_reg( R_EAX, Rm );
  1119     load_reg( R_ECX, Rn );
  1120     SHL_imm8_r32( 16, R_EAX );
  1121     SHR_imm8_r32( 16, R_ECX );
  1122     OR_r32_r32( R_EAX, R_ECX );
  1123     store_reg( R_ECX, Rn );
  1124     sh4_x86.tstate = TSTATE_NONE;
  1125 :}
  1127 /* Data move instructions */
  1128 MOV Rm, Rn {:  
  1129     COUNT_INST(I_MOV);
  1130     load_reg( R_EAX, Rm );
  1131     store_reg( R_EAX, Rn );
  1132 :}
  1133 MOV #imm, Rn {:  
  1134     COUNT_INST(I_MOVI);
  1135     load_imm32( R_EAX, imm );
  1136     store_reg( R_EAX, Rn );
  1137 :}
  1138 MOV.B Rm, @Rn {:  
  1139     COUNT_INST(I_MOVB);
  1140     load_reg( R_EAX, Rn );
  1141     MMU_TRANSLATE_WRITE( R_EAX );
  1142     load_reg( R_EDX, Rm );
  1143     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1144     sh4_x86.tstate = TSTATE_NONE;
  1145 :}
  1146 MOV.B Rm, @-Rn {:  
  1147     COUNT_INST(I_MOVB);
  1148     load_reg( R_EAX, Rn );
  1149     ADD_imm8s_r32( -1, R_EAX );
  1150     MMU_TRANSLATE_WRITE( R_EAX );
  1151     load_reg( R_EDX, Rm );
  1152     ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
  1153     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1154     sh4_x86.tstate = TSTATE_NONE;
  1155 :}
  1156 MOV.B Rm, @(R0, Rn) {:  
  1157     COUNT_INST(I_MOVB);
  1158     load_reg( R_EAX, 0 );
  1159     load_reg( R_ECX, Rn );
  1160     ADD_r32_r32( R_ECX, R_EAX );
  1161     MMU_TRANSLATE_WRITE( R_EAX );
  1162     load_reg( R_EDX, Rm );
  1163     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1164     sh4_x86.tstate = TSTATE_NONE;
  1165 :}
  1166 MOV.B R0, @(disp, GBR) {:  
  1167     COUNT_INST(I_MOVB);
  1168     load_spreg( R_EAX, R_GBR );
  1169     ADD_imm32_r32( disp, R_EAX );
  1170     MMU_TRANSLATE_WRITE( R_EAX );
  1171     load_reg( R_EDX, 0 );
  1172     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1173     sh4_x86.tstate = TSTATE_NONE;
  1174 :}
  1175 MOV.B R0, @(disp, Rn) {:  
  1176     COUNT_INST(I_MOVB);
  1177     load_reg( R_EAX, Rn );
  1178     ADD_imm32_r32( disp, R_EAX );
  1179     MMU_TRANSLATE_WRITE( R_EAX );
  1180     load_reg( R_EDX, 0 );
  1181     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1182     sh4_x86.tstate = TSTATE_NONE;
  1183 :}
  1184 MOV.B @Rm, Rn {:  
  1185     COUNT_INST(I_MOVB);
  1186     load_reg( R_EAX, Rm );
  1187     MMU_TRANSLATE_READ( R_EAX );
  1188     MEM_READ_BYTE( R_EAX, R_EAX );
  1189     store_reg( R_EAX, Rn );
  1190     sh4_x86.tstate = TSTATE_NONE;
  1191 :}
  1192 MOV.B @Rm+, Rn {:  
  1193     COUNT_INST(I_MOVB);
  1194     load_reg( R_EAX, Rm );
  1195     MMU_TRANSLATE_READ( R_EAX );
  1196     ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
  1197     MEM_READ_BYTE( R_EAX, R_EAX );
  1198     store_reg( R_EAX, Rn );
  1199     sh4_x86.tstate = TSTATE_NONE;
  1200 :}
  1201 MOV.B @(R0, Rm), Rn {:  
  1202     COUNT_INST(I_MOVB);
  1203     load_reg( R_EAX, 0 );
  1204     load_reg( R_ECX, Rm );
  1205     ADD_r32_r32( R_ECX, R_EAX );
  1206     MMU_TRANSLATE_READ( R_EAX )
  1207     MEM_READ_BYTE( R_EAX, R_EAX );
  1208     store_reg( R_EAX, Rn );
  1209     sh4_x86.tstate = TSTATE_NONE;
  1210 :}
  1211 MOV.B @(disp, GBR), R0 {:  
  1212     COUNT_INST(I_MOVB);
  1213     load_spreg( R_EAX, R_GBR );
  1214     ADD_imm32_r32( disp, R_EAX );
  1215     MMU_TRANSLATE_READ( R_EAX );
  1216     MEM_READ_BYTE( R_EAX, R_EAX );
  1217     store_reg( R_EAX, 0 );
  1218     sh4_x86.tstate = TSTATE_NONE;
  1219 :}
  1220 MOV.B @(disp, Rm), R0 {:  
  1221     COUNT_INST(I_MOVB);
  1222     load_reg( R_EAX, Rm );
  1223     ADD_imm32_r32( disp, R_EAX );
  1224     MMU_TRANSLATE_READ( R_EAX );
  1225     MEM_READ_BYTE( R_EAX, R_EAX );
  1226     store_reg( R_EAX, 0 );
  1227     sh4_x86.tstate = TSTATE_NONE;
  1228 :}
  1229 MOV.L Rm, @Rn {:
  1230     COUNT_INST(I_MOVL);
  1231     load_reg( R_EAX, Rn );
  1232     check_walign32(R_EAX);
  1233     MMU_TRANSLATE_WRITE( R_EAX );
  1234     load_reg( R_EDX, Rm );
  1235     MEM_WRITE_LONG( R_EAX, R_EDX );
  1236     sh4_x86.tstate = TSTATE_NONE;
  1237 :}
  1238 MOV.L Rm, @-Rn {:  
  1239     COUNT_INST(I_MOVL);
  1240     load_reg( R_EAX, Rn );
  1241     ADD_imm8s_r32( -4, R_EAX );
  1242     check_walign32( R_EAX );
  1243     MMU_TRANSLATE_WRITE( R_EAX );
  1244     load_reg( R_EDX, Rm );
  1245     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  1246     MEM_WRITE_LONG( R_EAX, R_EDX );
  1247     sh4_x86.tstate = TSTATE_NONE;
  1248 :}
  1249 MOV.L Rm, @(R0, Rn) {:  
  1250     COUNT_INST(I_MOVL);
  1251     load_reg( R_EAX, 0 );
  1252     load_reg( R_ECX, Rn );
  1253     ADD_r32_r32( R_ECX, R_EAX );
  1254     check_walign32( R_EAX );
  1255     MMU_TRANSLATE_WRITE( R_EAX );
  1256     load_reg( R_EDX, Rm );
  1257     MEM_WRITE_LONG( R_EAX, R_EDX );
  1258     sh4_x86.tstate = TSTATE_NONE;
  1259 :}
  1260 MOV.L R0, @(disp, GBR) {:  
  1261     COUNT_INST(I_MOVL);
  1262     load_spreg( R_EAX, R_GBR );
  1263     ADD_imm32_r32( disp, R_EAX );
  1264     check_walign32( R_EAX );
  1265     MMU_TRANSLATE_WRITE( R_EAX );
  1266     load_reg( R_EDX, 0 );
  1267     MEM_WRITE_LONG( R_EAX, R_EDX );
  1268     sh4_x86.tstate = TSTATE_NONE;
  1269 :}
  1270 MOV.L Rm, @(disp, Rn) {:  
  1271     COUNT_INST(I_MOVL);
  1272     load_reg( R_EAX, Rn );
  1273     ADD_imm32_r32( disp, R_EAX );
  1274     check_walign32( R_EAX );
  1275     MMU_TRANSLATE_WRITE( R_EAX );
  1276     load_reg( R_EDX, Rm );
  1277     MEM_WRITE_LONG( R_EAX, R_EDX );
  1278     sh4_x86.tstate = TSTATE_NONE;
  1279 :}
  1280 MOV.L @Rm, Rn {:  
  1281     COUNT_INST(I_MOVL);
  1282     load_reg( R_EAX, Rm );
  1283     check_ralign32( R_EAX );
  1284     MMU_TRANSLATE_READ( R_EAX );
  1285     MEM_READ_LONG( R_EAX, R_EAX );
  1286     store_reg( R_EAX, Rn );
  1287     sh4_x86.tstate = TSTATE_NONE;
  1288 :}
  1289 MOV.L @Rm+, Rn {:  
  1290     COUNT_INST(I_MOVL);
  1291     load_reg( R_EAX, Rm );
  1292     check_ralign32( R_EAX );
  1293     MMU_TRANSLATE_READ( R_EAX );
  1294     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1295     MEM_READ_LONG( R_EAX, R_EAX );
  1296     store_reg( R_EAX, Rn );
  1297     sh4_x86.tstate = TSTATE_NONE;
  1298 :}
  1299 MOV.L @(R0, Rm), Rn {:  
  1300     COUNT_INST(I_MOVL);
  1301     load_reg( R_EAX, 0 );
  1302     load_reg( R_ECX, Rm );
  1303     ADD_r32_r32( R_ECX, R_EAX );
  1304     check_ralign32( R_EAX );
  1305     MMU_TRANSLATE_READ( R_EAX );
  1306     MEM_READ_LONG( R_EAX, R_EAX );
  1307     store_reg( R_EAX, Rn );
  1308     sh4_x86.tstate = TSTATE_NONE;
  1309 :}
  1310 MOV.L @(disp, GBR), R0 {:
  1311     COUNT_INST(I_MOVL);
  1312     load_spreg( R_EAX, R_GBR );
  1313     ADD_imm32_r32( disp, R_EAX );
  1314     check_ralign32( R_EAX );
  1315     MMU_TRANSLATE_READ( R_EAX );
  1316     MEM_READ_LONG( R_EAX, R_EAX );
  1317     store_reg( R_EAX, 0 );
  1318     sh4_x86.tstate = TSTATE_NONE;
  1319 :}
  1320 MOV.L @(disp, PC), Rn {:  
  1321     COUNT_INST(I_MOVLPC);
  1322     if( sh4_x86.in_delay_slot ) {
  1323 	SLOTILLEGAL();
  1324     } else {
  1325 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1326 	if( IS_IN_ICACHE(target) ) {
  1327 	    // If the target address is in the same page as the code, it's
  1328 	    // pretty safe to just ref it directly and circumvent the whole
  1329 	    // memory subsystem. (this is a big performance win)
  1331 	    // FIXME: There's a corner-case that's not handled here when
  1332 	    // the current code-page is in the ITLB but not in the UTLB.
  1333 	    // (should generate a TLB miss although need to test SH4 
  1334 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1335 	    // behaviour though.
  1336 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1337 	    MOV_moff32_EAX( ptr );
  1338 	} else {
  1339 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1340 	    // different virtual address than the translation was done with,
  1341 	    // but we can safely assume that the low bits are the same.
  1342 	    load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1343 	    ADD_sh4r_r32( R_PC, R_EAX );
  1344 	    MMU_TRANSLATE_READ( R_EAX );
  1345 	    MEM_READ_LONG( R_EAX, R_EAX );
  1346 	    sh4_x86.tstate = TSTATE_NONE;
  1348 	store_reg( R_EAX, Rn );
  1350 :}
  1351 MOV.L @(disp, Rm), Rn {:  
  1352     COUNT_INST(I_MOVL);
  1353     load_reg( R_EAX, Rm );
  1354     ADD_imm8s_r32( disp, R_EAX );
  1355     check_ralign32( R_EAX );
  1356     MMU_TRANSLATE_READ( R_EAX );
  1357     MEM_READ_LONG( R_EAX, R_EAX );
  1358     store_reg( R_EAX, Rn );
  1359     sh4_x86.tstate = TSTATE_NONE;
  1360 :}
  1361 MOV.W Rm, @Rn {:  
  1362     COUNT_INST(I_MOVW);
  1363     load_reg( R_EAX, Rn );
  1364     check_walign16( R_EAX );
  1365     MMU_TRANSLATE_WRITE( R_EAX )
  1366     load_reg( R_EDX, Rm );
  1367     MEM_WRITE_WORD( R_EAX, R_EDX );
  1368     sh4_x86.tstate = TSTATE_NONE;
  1369 :}
  1370 MOV.W Rm, @-Rn {:  
  1371     COUNT_INST(I_MOVW);
  1372     load_reg( R_EAX, Rn );
  1373     ADD_imm8s_r32( -2, R_EAX );
  1374     check_walign16( R_EAX );
  1375     MMU_TRANSLATE_WRITE( R_EAX );
  1376     load_reg( R_EDX, Rm );
  1377     ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
  1378     MEM_WRITE_WORD( R_EAX, R_EDX );
  1379     sh4_x86.tstate = TSTATE_NONE;
  1380 :}
  1381 MOV.W Rm, @(R0, Rn) {:  
  1382     COUNT_INST(I_MOVW);
  1383     load_reg( R_EAX, 0 );
  1384     load_reg( R_ECX, Rn );
  1385     ADD_r32_r32( R_ECX, R_EAX );
  1386     check_walign16( R_EAX );
  1387     MMU_TRANSLATE_WRITE( R_EAX );
  1388     load_reg( R_EDX, Rm );
  1389     MEM_WRITE_WORD( R_EAX, R_EDX );
  1390     sh4_x86.tstate = TSTATE_NONE;
  1391 :}
  1392 MOV.W R0, @(disp, GBR) {:  
  1393     COUNT_INST(I_MOVW);
  1394     load_spreg( R_EAX, R_GBR );
  1395     ADD_imm32_r32( disp, R_EAX );
  1396     check_walign16( R_EAX );
  1397     MMU_TRANSLATE_WRITE( R_EAX );
  1398     load_reg( R_EDX, 0 );
  1399     MEM_WRITE_WORD( R_EAX, R_EDX );
  1400     sh4_x86.tstate = TSTATE_NONE;
  1401 :}
  1402 MOV.W R0, @(disp, Rn) {:  
  1403     COUNT_INST(I_MOVW);
  1404     load_reg( R_EAX, Rn );
  1405     ADD_imm32_r32( disp, R_EAX );
  1406     check_walign16( R_EAX );
  1407     MMU_TRANSLATE_WRITE( R_EAX );
  1408     load_reg( R_EDX, 0 );
  1409     MEM_WRITE_WORD( R_EAX, R_EDX );
  1410     sh4_x86.tstate = TSTATE_NONE;
  1411 :}
  1412 MOV.W @Rm, Rn {:  
  1413     COUNT_INST(I_MOVW);
  1414     load_reg( R_EAX, Rm );
  1415     check_ralign16( R_EAX );
  1416     MMU_TRANSLATE_READ( R_EAX );
  1417     MEM_READ_WORD( R_EAX, R_EAX );
  1418     store_reg( R_EAX, Rn );
  1419     sh4_x86.tstate = TSTATE_NONE;
  1420 :}
  1421 MOV.W @Rm+, Rn {:  
  1422     COUNT_INST(I_MOVW);
  1423     load_reg( R_EAX, Rm );
  1424     check_ralign16( R_EAX );
  1425     MMU_TRANSLATE_READ( R_EAX );
  1426     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
  1427     MEM_READ_WORD( R_EAX, R_EAX );
  1428     store_reg( R_EAX, Rn );
  1429     sh4_x86.tstate = TSTATE_NONE;
  1430 :}
  1431 MOV.W @(R0, Rm), Rn {:  
  1432     COUNT_INST(I_MOVW);
  1433     load_reg( R_EAX, 0 );
  1434     load_reg( R_ECX, Rm );
  1435     ADD_r32_r32( R_ECX, R_EAX );
  1436     check_ralign16( R_EAX );
  1437     MMU_TRANSLATE_READ( R_EAX );
  1438     MEM_READ_WORD( R_EAX, R_EAX );
  1439     store_reg( R_EAX, Rn );
  1440     sh4_x86.tstate = TSTATE_NONE;
  1441 :}
  1442 MOV.W @(disp, GBR), R0 {:  
  1443     COUNT_INST(I_MOVW);
  1444     load_spreg( R_EAX, R_GBR );
  1445     ADD_imm32_r32( disp, R_EAX );
  1446     check_ralign16( R_EAX );
  1447     MMU_TRANSLATE_READ( R_EAX );
  1448     MEM_READ_WORD( R_EAX, R_EAX );
  1449     store_reg( R_EAX, 0 );
  1450     sh4_x86.tstate = TSTATE_NONE;
  1451 :}
  1452 MOV.W @(disp, PC), Rn {:  
  1453     COUNT_INST(I_MOVW);
  1454     if( sh4_x86.in_delay_slot ) {
  1455 	SLOTILLEGAL();
  1456     } else {
  1457 	// See comments for MOV.L @(disp, PC), Rn
  1458 	uint32_t target = pc + disp + 4;
  1459 	if( IS_IN_ICACHE(target) ) {
  1460 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1461 	    MOV_moff32_EAX( ptr );
  1462 	    MOVSX_r16_r32( R_EAX, R_EAX );
  1463 	} else {
  1464 	    load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
  1465 	    ADD_sh4r_r32( R_PC, R_EAX );
  1466 	    MMU_TRANSLATE_READ( R_EAX );
  1467 	    MEM_READ_WORD( R_EAX, R_EAX );
  1468 	    sh4_x86.tstate = TSTATE_NONE;
  1470 	store_reg( R_EAX, Rn );
  1472 :}
  1473 MOV.W @(disp, Rm), R0 {:  
  1474     COUNT_INST(I_MOVW);
  1475     load_reg( R_EAX, Rm );
  1476     ADD_imm32_r32( disp, R_EAX );
  1477     check_ralign16( R_EAX );
  1478     MMU_TRANSLATE_READ( R_EAX );
  1479     MEM_READ_WORD( R_EAX, R_EAX );
  1480     store_reg( R_EAX, 0 );
  1481     sh4_x86.tstate = TSTATE_NONE;
  1482 :}
  1483 MOVA @(disp, PC), R0 {:  
  1484     COUNT_INST(I_MOVA);
  1485     if( sh4_x86.in_delay_slot ) {
  1486 	SLOTILLEGAL();
  1487     } else {
  1488 	load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1489 	ADD_sh4r_r32( R_PC, R_ECX );
  1490 	store_reg( R_ECX, 0 );
  1491 	sh4_x86.tstate = TSTATE_NONE;
  1493 :}
  1494 MOVCA.L R0, @Rn {:  
  1495     COUNT_INST(I_MOVCA);
  1496     load_reg( R_EAX, Rn );
  1497     check_walign32( R_EAX );
  1498     MMU_TRANSLATE_WRITE( R_EAX );
  1499     load_reg( R_EDX, 0 );
  1500     MEM_WRITE_LONG( R_EAX, R_EDX );
  1501     sh4_x86.tstate = TSTATE_NONE;
  1502 :}
  1504 /* Control transfer instructions */
  1505 BF disp {:
  1506     COUNT_INST(I_BF);
  1507     if( sh4_x86.in_delay_slot ) {
  1508 	SLOTILLEGAL();
  1509     } else {
  1510 	sh4vma_t target = disp + pc + 4;
  1511 	JT_rel8( nottaken );
  1512 	exit_block_rel(target, pc+2 );
  1513 	JMP_TARGET(nottaken);
  1514 	return 2;
  1516 :}
  1517 BF/S disp {:
  1518     COUNT_INST(I_BFS);
  1519     if( sh4_x86.in_delay_slot ) {
  1520 	SLOTILLEGAL();
  1521     } else {
  1522 	sh4_x86.in_delay_slot = DELAY_PC;
  1523 	if( UNTRANSLATABLE(pc+2) ) {
  1524 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1525 	    JT_rel8(nottaken);
  1526 	    ADD_imm32_r32( disp, R_EAX );
  1527 	    JMP_TARGET(nottaken);
  1528 	    ADD_sh4r_r32( R_PC, R_EAX );
  1529 	    store_spreg( R_EAX, R_NEW_PC );
  1530 	    exit_block_emu(pc+2);
  1531 	    sh4_x86.branch_taken = TRUE;
  1532 	    return 2;
  1533 	} else {
  1534 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1535 		CMP_imm8s_sh4r( 1, R_T );
  1536 		sh4_x86.tstate = TSTATE_E;
  1538 	    sh4vma_t target = disp + pc + 4;
  1539 	    OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JT rel32
  1540 	    int save_tstate = sh4_x86.tstate;
  1541 	    sh4_translate_instruction(pc+2);
  1542 	    exit_block_rel( target, pc+4 );
  1544 	    // not taken
  1545 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1546 	    sh4_x86.tstate = save_tstate;
  1547 	    sh4_translate_instruction(pc+2);
  1548 	    return 4;
  1551 :}
  1552 BRA disp {:  
  1553     COUNT_INST(I_BRA);
  1554     if( sh4_x86.in_delay_slot ) {
  1555 	SLOTILLEGAL();
  1556     } else {
  1557 	sh4_x86.in_delay_slot = DELAY_PC;
  1558 	sh4_x86.branch_taken = TRUE;
  1559 	if( UNTRANSLATABLE(pc+2) ) {
  1560 	    load_spreg( R_EAX, R_PC );
  1561 	    ADD_imm32_r32( pc + disp + 4 - sh4_x86.block_start_pc, R_EAX );
  1562 	    store_spreg( R_EAX, R_NEW_PC );
  1563 	    exit_block_emu(pc+2);
  1564 	    return 2;
  1565 	} else {
  1566 	    sh4_translate_instruction( pc + 2 );
  1567 	    exit_block_rel( disp + pc + 4, pc+4 );
  1568 	    return 4;
  1571 :}
  1572 BRAF Rn {:  
  1573     COUNT_INST(I_BRAF);
  1574     if( sh4_x86.in_delay_slot ) {
  1575 	SLOTILLEGAL();
  1576     } else {
  1577 	load_spreg( R_EAX, R_PC );
  1578 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1579 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1580 	store_spreg( R_EAX, R_NEW_PC );
  1581 	sh4_x86.in_delay_slot = DELAY_PC;
  1582 	sh4_x86.tstate = TSTATE_NONE;
  1583 	sh4_x86.branch_taken = TRUE;
  1584 	if( UNTRANSLATABLE(pc+2) ) {
  1585 	    exit_block_emu(pc+2);
  1586 	    return 2;
  1587 	} else {
  1588 	    sh4_translate_instruction( pc + 2 );
  1589 	    exit_block_newpcset(pc+2);
  1590 	    return 4;
  1593 :}
  1594 BSR disp {:  
  1595     COUNT_INST(I_BSR);
  1596     if( sh4_x86.in_delay_slot ) {
  1597 	SLOTILLEGAL();
  1598     } else {
  1599 	load_spreg( R_EAX, R_PC );
  1600 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1601 	store_spreg( R_EAX, R_PR );
  1602 	sh4_x86.in_delay_slot = DELAY_PC;
  1603 	sh4_x86.branch_taken = TRUE;
  1604 	sh4_x86.tstate = TSTATE_NONE;
  1605 	if( UNTRANSLATABLE(pc+2) ) {
  1606 	    ADD_imm32_r32( disp, R_EAX );
  1607 	    store_spreg( R_EAX, R_NEW_PC );
  1608 	    exit_block_emu(pc+2);
  1609 	    return 2;
  1610 	} else {
  1611 	    sh4_translate_instruction( pc + 2 );
  1612 	    exit_block_rel( disp + pc + 4, pc+4 );
  1613 	    return 4;
  1616 :}
  1617 BSRF Rn {:  
  1618     COUNT_INST(I_BSRF);
  1619     if( sh4_x86.in_delay_slot ) {
  1620 	SLOTILLEGAL();
  1621     } else {
  1622 	load_spreg( R_EAX, R_PC );
  1623 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1624 	store_spreg( R_EAX, R_PR );
  1625 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1626 	store_spreg( R_EAX, R_NEW_PC );
  1628 	sh4_x86.in_delay_slot = DELAY_PC;
  1629 	sh4_x86.tstate = TSTATE_NONE;
  1630 	sh4_x86.branch_taken = TRUE;
  1631 	if( UNTRANSLATABLE(pc+2) ) {
  1632 	    exit_block_emu(pc+2);
  1633 	    return 2;
  1634 	} else {
  1635 	    sh4_translate_instruction( pc + 2 );
  1636 	    exit_block_newpcset(pc+2);
  1637 	    return 4;
  1640 :}
  1641 BT disp {:
  1642     COUNT_INST(I_BT);
  1643     if( sh4_x86.in_delay_slot ) {
  1644 	SLOTILLEGAL();
  1645     } else {
  1646 	sh4vma_t target = disp + pc + 4;
  1647 	JF_rel8( nottaken );
  1648 	exit_block_rel(target, pc+2 );
  1649 	JMP_TARGET(nottaken);
  1650 	return 2;
  1652 :}
  1653 BT/S disp {:
  1654     COUNT_INST(I_BTS);
  1655     if( sh4_x86.in_delay_slot ) {
  1656 	SLOTILLEGAL();
  1657     } else {
  1658 	sh4_x86.in_delay_slot = DELAY_PC;
  1659 	if( UNTRANSLATABLE(pc+2) ) {
  1660 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1661 	    JF_rel8(nottaken);
  1662 	    ADD_imm32_r32( disp, R_EAX );
  1663 	    JMP_TARGET(nottaken);
  1664 	    ADD_sh4r_r32( R_PC, R_EAX );
  1665 	    store_spreg( R_EAX, R_NEW_PC );
  1666 	    exit_block_emu(pc+2);
  1667 	    sh4_x86.branch_taken = TRUE;
  1668 	    return 2;
  1669 	} else {
  1670 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1671 		CMP_imm8s_sh4r( 1, R_T );
  1672 		sh4_x86.tstate = TSTATE_E;
  1674 	    OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JF rel32
  1675 	    int save_tstate = sh4_x86.tstate;
  1676 	    sh4_translate_instruction(pc+2);
  1677 	    exit_block_rel( disp + pc + 4, pc+4 );
  1678 	    // not taken
  1679 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1680 	    sh4_x86.tstate = save_tstate;
  1681 	    sh4_translate_instruction(pc+2);
  1682 	    return 4;
  1685 :}
  1686 JMP @Rn {:  
  1687     COUNT_INST(I_JMP);
  1688     if( sh4_x86.in_delay_slot ) {
  1689 	SLOTILLEGAL();
  1690     } else {
  1691 	load_reg( R_ECX, Rn );
  1692 	store_spreg( R_ECX, R_NEW_PC );
  1693 	sh4_x86.in_delay_slot = DELAY_PC;
  1694 	sh4_x86.branch_taken = TRUE;
  1695 	if( UNTRANSLATABLE(pc+2) ) {
  1696 	    exit_block_emu(pc+2);
  1697 	    return 2;
  1698 	} else {
  1699 	    sh4_translate_instruction(pc+2);
  1700 	    exit_block_newpcset(pc+2);
  1701 	    return 4;
  1704 :}
  1705 JSR @Rn {:  
  1706     COUNT_INST(I_JSR);
  1707     if( sh4_x86.in_delay_slot ) {
  1708 	SLOTILLEGAL();
  1709     } else {
  1710 	load_spreg( R_EAX, R_PC );
  1711 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1712 	store_spreg( R_EAX, R_PR );
  1713 	load_reg( R_ECX, Rn );
  1714 	store_spreg( R_ECX, R_NEW_PC );
  1715 	sh4_x86.in_delay_slot = DELAY_PC;
  1716 	sh4_x86.branch_taken = TRUE;
  1717 	sh4_x86.tstate = TSTATE_NONE;
  1718 	if( UNTRANSLATABLE(pc+2) ) {
  1719 	    exit_block_emu(pc+2);
  1720 	    return 2;
  1721 	} else {
  1722 	    sh4_translate_instruction(pc+2);
  1723 	    exit_block_newpcset(pc+2);
  1724 	    return 4;
  1727 :}
  1728 RTE {:  
  1729     COUNT_INST(I_RTE);
  1730     if( sh4_x86.in_delay_slot ) {
  1731 	SLOTILLEGAL();
  1732     } else {
  1733 	check_priv();
  1734 	load_spreg( R_ECX, R_SPC );
  1735 	store_spreg( R_ECX, R_NEW_PC );
  1736 	load_spreg( R_EAX, R_SSR );
  1737 	call_func1( sh4_write_sr, R_EAX );
  1738 	sh4_x86.in_delay_slot = DELAY_PC;
  1739 	sh4_x86.priv_checked = FALSE;
  1740 	sh4_x86.fpuen_checked = FALSE;
  1741 	sh4_x86.tstate = TSTATE_NONE;
  1742 	sh4_x86.branch_taken = TRUE;
  1743 	if( UNTRANSLATABLE(pc+2) ) {
  1744 	    exit_block_emu(pc+2);
  1745 	    return 2;
  1746 	} else {
  1747 	    sh4_translate_instruction(pc+2);
  1748 	    exit_block_newpcset(pc+2);
  1749 	    return 4;
  1752 :}
  1753 RTS {:  
  1754     COUNT_INST(I_RTS);
  1755     if( sh4_x86.in_delay_slot ) {
  1756 	SLOTILLEGAL();
  1757     } else {
  1758 	load_spreg( R_ECX, R_PR );
  1759 	store_spreg( R_ECX, R_NEW_PC );
  1760 	sh4_x86.in_delay_slot = DELAY_PC;
  1761 	sh4_x86.branch_taken = TRUE;
  1762 	if( UNTRANSLATABLE(pc+2) ) {
  1763 	    exit_block_emu(pc+2);
  1764 	    return 2;
  1765 	} else {
  1766 	    sh4_translate_instruction(pc+2);
  1767 	    exit_block_newpcset(pc+2);
  1768 	    return 4;
  1771 :}
  1772 TRAPA #imm {:  
  1773     COUNT_INST(I_TRAPA);
  1774     if( sh4_x86.in_delay_slot ) {
  1775 	SLOTILLEGAL();
  1776     } else {
  1777 	load_imm32( R_ECX, pc+2 - sh4_x86.block_start_pc );   // 5
  1778 	ADD_r32_sh4r( R_ECX, R_PC );
  1779 	load_imm32( R_EAX, imm );
  1780 	call_func1( sh4_raise_trap, R_EAX );
  1781 	sh4_x86.tstate = TSTATE_NONE;
  1782 	exit_block_pcset(pc);
  1783 	sh4_x86.branch_taken = TRUE;
  1784 	return 2;
  1786 :}
  1787 UNDEF {:  
  1788     COUNT_INST(I_UNDEF);
  1789     if( sh4_x86.in_delay_slot ) {
  1790 	SLOTILLEGAL();
  1791     } else {
  1792 	JMP_exc(EXC_ILLEGAL);
  1793 	return 2;
  1795 :}
  1797 CLRMAC {:  
  1798     COUNT_INST(I_CLRMAC);
  1799     XOR_r32_r32(R_EAX, R_EAX);
  1800     store_spreg( R_EAX, R_MACL );
  1801     store_spreg( R_EAX, R_MACH );
  1802     sh4_x86.tstate = TSTATE_NONE;
  1803 :}
  1804 CLRS {:
  1805     COUNT_INST(I_CLRS);
  1806     CLC();
  1807     SETC_sh4r(R_S);
  1808     sh4_x86.tstate = TSTATE_NONE;
  1809 :}
  1810 CLRT {:  
  1811     COUNT_INST(I_CLRT);
  1812     CLC();
  1813     SETC_t();
  1814     sh4_x86.tstate = TSTATE_C;
  1815 :}
  1816 SETS {:  
  1817     COUNT_INST(I_SETS);
  1818     STC();
  1819     SETC_sh4r(R_S);
  1820     sh4_x86.tstate = TSTATE_NONE;
  1821 :}
  1822 SETT {:  
  1823     COUNT_INST(I_SETT);
  1824     STC();
  1825     SETC_t();
  1826     sh4_x86.tstate = TSTATE_C;
  1827 :}
  1829 /* Floating point moves */
  1830 FMOV FRm, FRn {:  
  1831     COUNT_INST(I_FMOV1);
  1832     check_fpuen();
  1833     if( sh4_x86.double_size ) {
  1834         load_dr0( R_EAX, FRm );
  1835         load_dr1( R_ECX, FRm );
  1836         store_dr0( R_EAX, FRn );
  1837         store_dr1( R_ECX, FRn );
  1838     } else {
  1839         load_fr( R_EAX, FRm ); // SZ=0 branch
  1840         store_fr( R_EAX, FRn );
  1842 :}
  1843 FMOV FRm, @Rn {: 
  1844     COUNT_INST(I_FMOV2);
  1845     check_fpuen();
  1846     load_reg( R_EAX, Rn );
  1847     if( sh4_x86.double_size ) {
  1848         check_walign64( R_EAX );
  1849         MMU_TRANSLATE_WRITE( R_EAX );
  1850         load_dr0( R_ECX, FRm );
  1851         load_dr1( R_EDX, FRm );
  1852         MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1853     } else {
  1854         check_walign32( R_EAX );
  1855         MMU_TRANSLATE_WRITE( R_EAX );
  1856         load_fr( R_ECX, FRm );
  1857         MEM_WRITE_LONG( R_EAX, R_ECX );
  1859     sh4_x86.tstate = TSTATE_NONE;
  1860 :}
  1861 FMOV @Rm, FRn {:  
  1862     COUNT_INST(I_FMOV5);
  1863     check_fpuen();
  1864     load_reg( R_EAX, Rm );
  1865     if( sh4_x86.double_size ) {
  1866         check_ralign64( R_EAX );
  1867         MMU_TRANSLATE_READ( R_EAX );
  1868         MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1869         store_dr0( R_ECX, FRn );
  1870         store_dr1( R_EAX, FRn );    
  1871     } else {
  1872         check_ralign32( R_EAX );
  1873         MMU_TRANSLATE_READ( R_EAX );
  1874         MEM_READ_LONG( R_EAX, R_EAX );
  1875         store_fr( R_EAX, FRn );
  1877     sh4_x86.tstate = TSTATE_NONE;
  1878 :}
  1879 FMOV FRm, @-Rn {:  
  1880     COUNT_INST(I_FMOV3);
  1881     check_fpuen();
  1882     load_reg( R_EAX, Rn );
  1883     if( sh4_x86.double_size ) {
  1884         check_walign64( R_EAX );
  1885         ADD_imm8s_r32(-8,R_EAX);
  1886         MMU_TRANSLATE_WRITE( R_EAX );
  1887         load_dr0( R_ECX, FRm );
  1888         load_dr1( R_EDX, FRm );
  1889         ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
  1890         MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1891     } else {
  1892         check_walign32( R_EAX );
  1893         ADD_imm8s_r32( -4, R_EAX );
  1894         MMU_TRANSLATE_WRITE( R_EAX );
  1895         load_fr( R_ECX, FRm );
  1896         ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
  1897         MEM_WRITE_LONG( R_EAX, R_ECX );
  1899     sh4_x86.tstate = TSTATE_NONE;
  1900 :}
  1901 FMOV @Rm+, FRn {:
  1902     COUNT_INST(I_FMOV6);
  1903     check_fpuen();
  1904     load_reg( R_EAX, Rm );
  1905     if( sh4_x86.double_size ) {
  1906         check_ralign64( R_EAX );
  1907         MMU_TRANSLATE_READ( R_EAX );
  1908         ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
  1909         MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1910         store_dr0( R_ECX, FRn );
  1911         store_dr1( R_EAX, FRn );
  1912     } else {
  1913         check_ralign32( R_EAX );
  1914         MMU_TRANSLATE_READ( R_EAX );
  1915         ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1916         MEM_READ_LONG( R_EAX, R_EAX );
  1917         store_fr( R_EAX, FRn );
  1919     sh4_x86.tstate = TSTATE_NONE;
  1920 :}
  1921 FMOV FRm, @(R0, Rn) {:  
  1922     COUNT_INST(I_FMOV4);
  1923     check_fpuen();
  1924     load_reg( R_EAX, Rn );
  1925     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1926     if( sh4_x86.double_size ) {
  1927         check_walign64( R_EAX );
  1928         MMU_TRANSLATE_WRITE( R_EAX );
  1929         load_dr0( R_ECX, FRm );
  1930         load_dr1( R_EDX, FRm );
  1931         MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1932     } else {
  1933         check_walign32( R_EAX );
  1934         MMU_TRANSLATE_WRITE( R_EAX );
  1935         load_fr( R_ECX, FRm );
  1936         MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
  1938     sh4_x86.tstate = TSTATE_NONE;
  1939 :}
  1940 FMOV @(R0, Rm), FRn {:  
  1941     COUNT_INST(I_FMOV7);
  1942     check_fpuen();
  1943     load_reg( R_EAX, Rm );
  1944     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1945     if( sh4_x86.double_size ) {
  1946         check_ralign64( R_EAX );
  1947         MMU_TRANSLATE_READ( R_EAX );
  1948         MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1949         store_dr0( R_ECX, FRn );
  1950         store_dr1( R_EAX, FRn );
  1951     } else {
  1952         check_ralign32( R_EAX );
  1953         MMU_TRANSLATE_READ( R_EAX );
  1954         MEM_READ_LONG( R_EAX, R_EAX );
  1955         store_fr( R_EAX, FRn );
  1957     sh4_x86.tstate = TSTATE_NONE;
  1958 :}
  1959 FLDI0 FRn {:  /* IFF PR=0 */
  1960     COUNT_INST(I_FLDI0);
  1961     check_fpuen();
  1962     if( sh4_x86.double_prec == 0 ) {
  1963         XOR_r32_r32( R_EAX, R_EAX );
  1964         store_fr( R_EAX, FRn );
  1966     sh4_x86.tstate = TSTATE_NONE;
  1967 :}
  1968 FLDI1 FRn {:  /* IFF PR=0 */
  1969     COUNT_INST(I_FLDI1);
  1970     check_fpuen();
  1971     if( sh4_x86.double_prec == 0 ) {
  1972         load_imm32(R_EAX, 0x3F800000);
  1973         store_fr( R_EAX, FRn );
  1975 :}
  1977 FLOAT FPUL, FRn {:  
  1978     COUNT_INST(I_FLOAT);
  1979     check_fpuen();
  1980     FILD_sh4r(R_FPUL);
  1981     if( sh4_x86.double_prec ) {
  1982         pop_dr( FRn );
  1983     } else {
  1984         pop_fr( FRn );
  1986 :}
  1987 FTRC FRm, FPUL {:  
  1988     COUNT_INST(I_FTRC);
  1989     check_fpuen();
  1990     if( sh4_x86.double_prec ) {
  1991         push_dr( FRm );
  1992     } else {
  1993         push_fr( FRm );
  1995     load_ptr( R_ECX, &max_int );
  1996     FILD_r32ind( R_ECX );
  1997     FCOMIP_st(1);
  1998     JNA_rel8( sat );
  1999     load_ptr( R_ECX, &min_int );  // 5
  2000     FILD_r32ind( R_ECX );           // 2
  2001     FCOMIP_st(1);                   // 2
  2002     JAE_rel8( sat2 );            // 2
  2003     load_ptr( R_EAX, &save_fcw );
  2004     FNSTCW_r32ind( R_EAX );
  2005     load_ptr( R_EDX, &trunc_fcw );
  2006     FLDCW_r32ind( R_EDX );
  2007     FISTP_sh4r(R_FPUL);             // 3
  2008     FLDCW_r32ind( R_EAX );
  2009     JMP_rel8(end);             // 2
  2011     JMP_TARGET(sat);
  2012     JMP_TARGET(sat2);
  2013     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  2014     store_spreg( R_ECX, R_FPUL );
  2015     FPOP_st();
  2016     JMP_TARGET(end);
  2017     sh4_x86.tstate = TSTATE_NONE;
  2018 :}
  2019 FLDS FRm, FPUL {:  
  2020     COUNT_INST(I_FLDS);
  2021     check_fpuen();
  2022     load_fr( R_EAX, FRm );
  2023     store_spreg( R_EAX, R_FPUL );
  2024 :}
  2025 FSTS FPUL, FRn {:  
  2026     COUNT_INST(I_FSTS);
  2027     check_fpuen();
  2028     load_spreg( R_EAX, R_FPUL );
  2029     store_fr( R_EAX, FRn );
  2030 :}
  2031 FCNVDS FRm, FPUL {:  
  2032     COUNT_INST(I_FCNVDS);
  2033     check_fpuen();
  2034     if( sh4_x86.double_prec ) {
  2035         push_dr( FRm );
  2036         pop_fpul();
  2038 :}
  2039 FCNVSD FPUL, FRn {:  
  2040     COUNT_INST(I_FCNVSD);
  2041     check_fpuen();
  2042     if( sh4_x86.double_prec ) {
  2043         push_fpul();
  2044         pop_dr( FRn );
  2046 :}
  2048 /* Floating point instructions */
  2049 FABS FRn {:  
  2050     COUNT_INST(I_FABS);
  2051     check_fpuen();
  2052     if( sh4_x86.double_prec ) {
  2053         push_dr(FRn);
  2054         FABS_st0();
  2055         pop_dr(FRn);
  2056     } else {
  2057         push_fr(FRn);
  2058         FABS_st0();
  2059         pop_fr(FRn);
  2061 :}
  2062 FADD FRm, FRn {:  
  2063     COUNT_INST(I_FADD);
  2064     check_fpuen();
  2065     if( sh4_x86.double_prec ) {
  2066         push_dr(FRm);
  2067         push_dr(FRn);
  2068         FADDP_st(1);
  2069         pop_dr(FRn);
  2070     } else {
  2071         push_fr(FRm);
  2072         push_fr(FRn);
  2073         FADDP_st(1);
  2074         pop_fr(FRn);
  2076 :}
  2077 FDIV FRm, FRn {:  
  2078     COUNT_INST(I_FDIV);
  2079     check_fpuen();
  2080     if( sh4_x86.double_prec ) {
  2081         push_dr(FRn);
  2082         push_dr(FRm);
  2083         FDIVP_st(1);
  2084         pop_dr(FRn);
  2085     } else {
  2086         push_fr(FRn);
  2087         push_fr(FRm);
  2088         FDIVP_st(1);
  2089         pop_fr(FRn);
  2091 :}
  2092 FMAC FR0, FRm, FRn {:  
  2093     COUNT_INST(I_FMAC);
  2094     check_fpuen();
  2095     if( sh4_x86.double_prec ) {
  2096         push_dr( 0 );
  2097         push_dr( FRm );
  2098         FMULP_st(1);
  2099         push_dr( FRn );
  2100         FADDP_st(1);
  2101         pop_dr( FRn );
  2102     } else {
  2103         push_fr( 0 );
  2104         push_fr( FRm );
  2105         FMULP_st(1);
  2106         push_fr( FRn );
  2107         FADDP_st(1);
  2108         pop_fr( FRn );
  2110 :}
  2112 FMUL FRm, FRn {:  
  2113     COUNT_INST(I_FMUL);
  2114     check_fpuen();
  2115     if( sh4_x86.double_prec ) {
  2116         push_dr(FRm);
  2117         push_dr(FRn);
  2118         FMULP_st(1);
  2119         pop_dr(FRn);
  2120     } else {
  2121         push_fr(FRm);
  2122         push_fr(FRn);
  2123         FMULP_st(1);
  2124         pop_fr(FRn);
  2126 :}
  2127 FNEG FRn {:  
  2128     COUNT_INST(I_FNEG);
  2129     check_fpuen();
  2130     if( sh4_x86.double_prec ) {
  2131         push_dr(FRn);
  2132         FCHS_st0();
  2133         pop_dr(FRn);
  2134     } else {
  2135         push_fr(FRn);
  2136         FCHS_st0();
  2137         pop_fr(FRn);
  2139 :}
  2140 FSRRA FRn {:  
  2141     COUNT_INST(I_FSRRA);
  2142     check_fpuen();
  2143     if( sh4_x86.double_prec == 0 ) {
  2144         FLD1_st0();
  2145         push_fr(FRn);
  2146         FSQRT_st0();
  2147         FDIVP_st(1);
  2148         pop_fr(FRn);
  2150 :}
  2151 FSQRT FRn {:  
  2152     COUNT_INST(I_FSQRT);
  2153     check_fpuen();
  2154     if( sh4_x86.double_prec ) {
  2155         push_dr(FRn);
  2156         FSQRT_st0();
  2157         pop_dr(FRn);
  2158     } else {
  2159         push_fr(FRn);
  2160         FSQRT_st0();
  2161         pop_fr(FRn);
  2163 :}
  2164 FSUB FRm, FRn {:  
  2165     COUNT_INST(I_FSUB);
  2166     check_fpuen();
  2167     if( sh4_x86.double_prec ) {
  2168         push_dr(FRn);
  2169         push_dr(FRm);
  2170         FSUBP_st(1);
  2171         pop_dr(FRn);
  2172     } else {
  2173         push_fr(FRn);
  2174         push_fr(FRm);
  2175         FSUBP_st(1);
  2176         pop_fr(FRn);
  2178 :}
  2180 FCMP/EQ FRm, FRn {:  
  2181     COUNT_INST(I_FCMPEQ);
  2182     check_fpuen();
  2183     if( sh4_x86.double_prec ) {
  2184         push_dr(FRm);
  2185         push_dr(FRn);
  2186     } else {
  2187         push_fr(FRm);
  2188         push_fr(FRn);
  2190     FCOMIP_st(1);
  2191     SETE_t();
  2192     FPOP_st();
  2193     sh4_x86.tstate = TSTATE_E;
  2194 :}
  2195 FCMP/GT FRm, FRn {:  
  2196     COUNT_INST(I_FCMPGT);
  2197     check_fpuen();
  2198     if( sh4_x86.double_prec ) {
  2199         push_dr(FRm);
  2200         push_dr(FRn);
  2201     } else {
  2202         push_fr(FRm);
  2203         push_fr(FRn);
  2205     FCOMIP_st(1);
  2206     SETA_t();
  2207     FPOP_st();
  2208     sh4_x86.tstate = TSTATE_A;
  2209 :}
  2211 FSCA FPUL, FRn {:  
  2212     COUNT_INST(I_FSCA);
  2213     check_fpuen();
  2214     if( sh4_x86.double_prec == 0 ) {
  2215         LEA_sh4r_rptr( REG_OFFSET(fr[0][FRn&0x0E]), R_ECX );
  2216         load_spreg( R_EDX, R_FPUL );
  2217         call_func2( sh4_fsca, R_EDX, R_ECX );
  2219     sh4_x86.tstate = TSTATE_NONE;
  2220 :}
  2221 FIPR FVm, FVn {:  
  2222     COUNT_INST(I_FIPR);
  2223     check_fpuen();
  2224     if( sh4_x86.double_prec == 0 ) {
  2225         push_fr( FVm<<2 );
  2226         push_fr( FVn<<2 );
  2227         FMULP_st(1);
  2228         push_fr( (FVm<<2)+1);
  2229         push_fr( (FVn<<2)+1);
  2230         FMULP_st(1);
  2231         FADDP_st(1);
  2232         push_fr( (FVm<<2)+2);
  2233         push_fr( (FVn<<2)+2);
  2234         FMULP_st(1);
  2235         FADDP_st(1);
  2236         push_fr( (FVm<<2)+3);
  2237         push_fr( (FVn<<2)+3);
  2238         FMULP_st(1);
  2239         FADDP_st(1);
  2240         pop_fr( (FVn<<2)+3);
  2242 :}
  2243 FTRV XMTRX, FVn {:  
  2244     COUNT_INST(I_FTRV);
  2245     check_fpuen();
  2246     if( sh4_x86.double_prec == 0 ) {
  2247         LEA_sh4r_rptr( REG_OFFSET(fr[0][FVn<<2]), R_EDX );
  2248         call_func1( sh4_ftrv, R_EDX );
  2250     sh4_x86.tstate = TSTATE_NONE;
  2251 :}
  2253 FRCHG {:  
  2254     COUNT_INST(I_FRCHG);
  2255     check_fpuen();
  2256     load_spreg( R_ECX, R_FPSCR );
  2257     XOR_imm32_r32( FPSCR_FR, R_ECX );
  2258     store_spreg( R_ECX, R_FPSCR );
  2259     call_func0( sh4_switch_fr_banks );
  2260     sh4_x86.tstate = TSTATE_NONE;
  2261 :}
  2262 FSCHG {:  
  2263     COUNT_INST(I_FSCHG);
  2264     check_fpuen();
  2265     load_spreg( R_ECX, R_FPSCR );
  2266     XOR_imm32_r32( FPSCR_SZ, R_ECX );
  2267     store_spreg( R_ECX, R_FPSCR );
  2268     sh4_x86.tstate = TSTATE_NONE;
  2269     sh4_x86.double_size = !sh4_x86.double_size;
  2270 :}
  2272 /* Processor control instructions */
  2273 LDC Rm, SR {:
  2274     COUNT_INST(I_LDCSR);
  2275     if( sh4_x86.in_delay_slot ) {
  2276 	SLOTILLEGAL();
  2277     } else {
  2278 	check_priv();
  2279 	load_reg( R_EAX, Rm );
  2280 	call_func1( sh4_write_sr, R_EAX );
  2281 	sh4_x86.priv_checked = FALSE;
  2282 	sh4_x86.fpuen_checked = FALSE;
  2283 	sh4_x86.tstate = TSTATE_NONE;
  2285 :}
  2286 LDC Rm, GBR {: 
  2287     COUNT_INST(I_LDC);
  2288     load_reg( R_EAX, Rm );
  2289     store_spreg( R_EAX, R_GBR );
  2290 :}
  2291 LDC Rm, VBR {:  
  2292     COUNT_INST(I_LDC);
  2293     check_priv();
  2294     load_reg( R_EAX, Rm );
  2295     store_spreg( R_EAX, R_VBR );
  2296     sh4_x86.tstate = TSTATE_NONE;
  2297 :}
  2298 LDC Rm, SSR {:  
  2299     COUNT_INST(I_LDC);
  2300     check_priv();
  2301     load_reg( R_EAX, Rm );
  2302     store_spreg( R_EAX, R_SSR );
  2303     sh4_x86.tstate = TSTATE_NONE;
  2304 :}
  2305 LDC Rm, SGR {:  
  2306     COUNT_INST(I_LDC);
  2307     check_priv();
  2308     load_reg( R_EAX, Rm );
  2309     store_spreg( R_EAX, R_SGR );
  2310     sh4_x86.tstate = TSTATE_NONE;
  2311 :}
  2312 LDC Rm, SPC {:  
  2313     COUNT_INST(I_LDC);
  2314     check_priv();
  2315     load_reg( R_EAX, Rm );
  2316     store_spreg( R_EAX, R_SPC );
  2317     sh4_x86.tstate = TSTATE_NONE;
  2318 :}
  2319 LDC Rm, DBR {:  
  2320     COUNT_INST(I_LDC);
  2321     check_priv();
  2322     load_reg( R_EAX, Rm );
  2323     store_spreg( R_EAX, R_DBR );
  2324     sh4_x86.tstate = TSTATE_NONE;
  2325 :}
  2326 LDC Rm, Rn_BANK {:  
  2327     COUNT_INST(I_LDC);
  2328     check_priv();
  2329     load_reg( R_EAX, Rm );
  2330     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2331     sh4_x86.tstate = TSTATE_NONE;
  2332 :}
  2333 LDC.L @Rm+, GBR {:  
  2334     COUNT_INST(I_LDCM);
  2335     load_reg( R_EAX, Rm );
  2336     check_ralign32( R_EAX );
  2337     MMU_TRANSLATE_READ( R_EAX );
  2338     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2339     MEM_READ_LONG( R_EAX, R_EAX );
  2340     store_spreg( R_EAX, R_GBR );
  2341     sh4_x86.tstate = TSTATE_NONE;
  2342 :}
  2343 LDC.L @Rm+, SR {:
  2344     COUNT_INST(I_LDCSRM);
  2345     if( sh4_x86.in_delay_slot ) {
  2346 	SLOTILLEGAL();
  2347     } else {
  2348 	check_priv();
  2349 	load_reg( R_EAX, Rm );
  2350 	check_ralign32( R_EAX );
  2351 	MMU_TRANSLATE_READ( R_EAX );
  2352 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2353 	MEM_READ_LONG( R_EAX, R_EAX );
  2354 	call_func1( sh4_write_sr, R_EAX );
  2355 	sh4_x86.priv_checked = FALSE;
  2356 	sh4_x86.fpuen_checked = FALSE;
  2357 	sh4_x86.tstate = TSTATE_NONE;
  2359 :}
  2360 LDC.L @Rm+, VBR {:  
  2361     COUNT_INST(I_LDCM);
  2362     check_priv();
  2363     load_reg( R_EAX, Rm );
  2364     check_ralign32( R_EAX );
  2365     MMU_TRANSLATE_READ( R_EAX );
  2366     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2367     MEM_READ_LONG( R_EAX, R_EAX );
  2368     store_spreg( R_EAX, R_VBR );
  2369     sh4_x86.tstate = TSTATE_NONE;
  2370 :}
  2371 LDC.L @Rm+, SSR {:
  2372     COUNT_INST(I_LDCM);
  2373     check_priv();
  2374     load_reg( R_EAX, Rm );
  2375     check_ralign32( R_EAX );
  2376     MMU_TRANSLATE_READ( R_EAX );
  2377     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2378     MEM_READ_LONG( R_EAX, R_EAX );
  2379     store_spreg( R_EAX, R_SSR );
  2380     sh4_x86.tstate = TSTATE_NONE;
  2381 :}
  2382 LDC.L @Rm+, SGR {:  
  2383     COUNT_INST(I_LDCM);
  2384     check_priv();
  2385     load_reg( R_EAX, Rm );
  2386     check_ralign32( R_EAX );
  2387     MMU_TRANSLATE_READ( R_EAX );
  2388     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2389     MEM_READ_LONG( R_EAX, R_EAX );
  2390     store_spreg( R_EAX, R_SGR );
  2391     sh4_x86.tstate = TSTATE_NONE;
  2392 :}
  2393 LDC.L @Rm+, SPC {:  
  2394     COUNT_INST(I_LDCM);
  2395     check_priv();
  2396     load_reg( R_EAX, Rm );
  2397     check_ralign32( R_EAX );
  2398     MMU_TRANSLATE_READ( R_EAX );
  2399     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2400     MEM_READ_LONG( R_EAX, R_EAX );
  2401     store_spreg( R_EAX, R_SPC );
  2402     sh4_x86.tstate = TSTATE_NONE;
  2403 :}
  2404 LDC.L @Rm+, DBR {:  
  2405     COUNT_INST(I_LDCM);
  2406     check_priv();
  2407     load_reg( R_EAX, Rm );
  2408     check_ralign32( R_EAX );
  2409     MMU_TRANSLATE_READ( R_EAX );
  2410     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2411     MEM_READ_LONG( R_EAX, R_EAX );
  2412     store_spreg( R_EAX, R_DBR );
  2413     sh4_x86.tstate = TSTATE_NONE;
  2414 :}
  2415 LDC.L @Rm+, Rn_BANK {:  
  2416     COUNT_INST(I_LDCM);
  2417     check_priv();
  2418     load_reg( R_EAX, Rm );
  2419     check_ralign32( R_EAX );
  2420     MMU_TRANSLATE_READ( R_EAX );
  2421     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2422     MEM_READ_LONG( R_EAX, R_EAX );
  2423     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2424     sh4_x86.tstate = TSTATE_NONE;
  2425 :}
  2426 LDS Rm, FPSCR {:
  2427     COUNT_INST(I_LDSFPSCR);
  2428     check_fpuen();
  2429     load_reg( R_EAX, Rm );
  2430     call_func1( sh4_write_fpscr, R_EAX );
  2431     sh4_x86.tstate = TSTATE_NONE;
  2432     return 2;
  2433 :}
  2434 LDS.L @Rm+, FPSCR {:  
  2435     COUNT_INST(I_LDSFPSCRM);
  2436     check_fpuen();
  2437     load_reg( R_EAX, Rm );
  2438     check_ralign32( R_EAX );
  2439     MMU_TRANSLATE_READ( R_EAX );
  2440     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2441     MEM_READ_LONG( R_EAX, R_EAX );
  2442     call_func1( sh4_write_fpscr, R_EAX );
  2443     sh4_x86.tstate = TSTATE_NONE;
  2444     return 2;
  2445 :}
  2446 LDS Rm, FPUL {:  
  2447     COUNT_INST(I_LDS);
  2448     check_fpuen();
  2449     load_reg( R_EAX, Rm );
  2450     store_spreg( R_EAX, R_FPUL );
  2451 :}
  2452 LDS.L @Rm+, FPUL {:  
  2453     COUNT_INST(I_LDSM);
  2454     check_fpuen();
  2455     load_reg( R_EAX, Rm );
  2456     check_ralign32( R_EAX );
  2457     MMU_TRANSLATE_READ( R_EAX );
  2458     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2459     MEM_READ_LONG( R_EAX, R_EAX );
  2460     store_spreg( R_EAX, R_FPUL );
  2461     sh4_x86.tstate = TSTATE_NONE;
  2462 :}
  2463 LDS Rm, MACH {: 
  2464     COUNT_INST(I_LDS);
  2465     load_reg( R_EAX, Rm );
  2466     store_spreg( R_EAX, R_MACH );
  2467 :}
  2468 LDS.L @Rm+, MACH {:  
  2469     COUNT_INST(I_LDSM);
  2470     load_reg( R_EAX, Rm );
  2471     check_ralign32( R_EAX );
  2472     MMU_TRANSLATE_READ( R_EAX );
  2473     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2474     MEM_READ_LONG( R_EAX, R_EAX );
  2475     store_spreg( R_EAX, R_MACH );
  2476     sh4_x86.tstate = TSTATE_NONE;
  2477 :}
  2478 LDS Rm, MACL {:  
  2479     COUNT_INST(I_LDS);
  2480     load_reg( R_EAX, Rm );
  2481     store_spreg( R_EAX, R_MACL );
  2482 :}
  2483 LDS.L @Rm+, MACL {:  
  2484     COUNT_INST(I_LDSM);
  2485     load_reg( R_EAX, Rm );
  2486     check_ralign32( R_EAX );
  2487     MMU_TRANSLATE_READ( R_EAX );
  2488     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2489     MEM_READ_LONG( R_EAX, R_EAX );
  2490     store_spreg( R_EAX, R_MACL );
  2491     sh4_x86.tstate = TSTATE_NONE;
  2492 :}
  2493 LDS Rm, PR {:  
  2494     COUNT_INST(I_LDS);
  2495     load_reg( R_EAX, Rm );
  2496     store_spreg( R_EAX, R_PR );
  2497 :}
  2498 LDS.L @Rm+, PR {:  
  2499     COUNT_INST(I_LDSM);
  2500     load_reg( R_EAX, Rm );
  2501     check_ralign32( R_EAX );
  2502     MMU_TRANSLATE_READ( R_EAX );
  2503     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2504     MEM_READ_LONG( R_EAX, R_EAX );
  2505     store_spreg( R_EAX, R_PR );
  2506     sh4_x86.tstate = TSTATE_NONE;
  2507 :}
  2508 LDTLB {:  
  2509     COUNT_INST(I_LDTLB);
  2510     call_func0( MMU_ldtlb );
  2511     sh4_x86.tstate = TSTATE_NONE;
  2512 :}
  2513 OCBI @Rn {:
  2514     COUNT_INST(I_OCBI);
  2515 :}
  2516 OCBP @Rn {:
  2517     COUNT_INST(I_OCBP);
  2518 :}
  2519 OCBWB @Rn {:
  2520     COUNT_INST(I_OCBWB);
  2521 :}
  2522 PREF @Rn {:
  2523     COUNT_INST(I_PREF);
  2524     load_reg( R_EAX, Rn );
  2525     MOV_r32_r32( R_EAX, R_ECX );
  2526     AND_imm32_r32( 0xFC000000, R_EAX );
  2527     CMP_imm32_r32( 0xE0000000, R_EAX );
  2528     JNE_rel8(end);
  2529     call_func1( sh4_flush_store_queue, R_ECX );
  2530     TEST_r32_r32( R_EAX, R_EAX );
  2531     JE_exc(-1);
  2532     JMP_TARGET(end);
  2533     sh4_x86.tstate = TSTATE_NONE;
  2534 :}
  2535 SLEEP {: 
  2536     COUNT_INST(I_SLEEP);
  2537     check_priv();
  2538     call_func0( sh4_sleep );
  2539     sh4_x86.tstate = TSTATE_NONE;
  2540     sh4_x86.in_delay_slot = DELAY_NONE;
  2541     return 2;
  2542 :}
  2543 STC SR, Rn {:
  2544     COUNT_INST(I_STCSR);
  2545     check_priv();
  2546     call_func0(sh4_read_sr);
  2547     store_reg( R_EAX, Rn );
  2548     sh4_x86.tstate = TSTATE_NONE;
  2549 :}
  2550 STC GBR, Rn {:  
  2551     COUNT_INST(I_STC);
  2552     load_spreg( R_EAX, R_GBR );
  2553     store_reg( R_EAX, Rn );
  2554 :}
  2555 STC VBR, Rn {:  
  2556     COUNT_INST(I_STC);
  2557     check_priv();
  2558     load_spreg( R_EAX, R_VBR );
  2559     store_reg( R_EAX, Rn );
  2560     sh4_x86.tstate = TSTATE_NONE;
  2561 :}
  2562 STC SSR, Rn {:  
  2563     COUNT_INST(I_STC);
  2564     check_priv();
  2565     load_spreg( R_EAX, R_SSR );
  2566     store_reg( R_EAX, Rn );
  2567     sh4_x86.tstate = TSTATE_NONE;
  2568 :}
  2569 STC SPC, Rn {:  
  2570     COUNT_INST(I_STC);
  2571     check_priv();
  2572     load_spreg( R_EAX, R_SPC );
  2573     store_reg( R_EAX, Rn );
  2574     sh4_x86.tstate = TSTATE_NONE;
  2575 :}
  2576 STC SGR, Rn {:  
  2577     COUNT_INST(I_STC);
  2578     check_priv();
  2579     load_spreg( R_EAX, R_SGR );
  2580     store_reg( R_EAX, Rn );
  2581     sh4_x86.tstate = TSTATE_NONE;
  2582 :}
  2583 STC DBR, Rn {:  
  2584     COUNT_INST(I_STC);
  2585     check_priv();
  2586     load_spreg( R_EAX, R_DBR );
  2587     store_reg( R_EAX, Rn );
  2588     sh4_x86.tstate = TSTATE_NONE;
  2589 :}
  2590 STC Rm_BANK, Rn {:
  2591     COUNT_INST(I_STC);
  2592     check_priv();
  2593     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2594     store_reg( R_EAX, Rn );
  2595     sh4_x86.tstate = TSTATE_NONE;
  2596 :}
  2597 STC.L SR, @-Rn {:
  2598     COUNT_INST(I_STCSRM);
  2599     check_priv();
  2600     load_reg( R_EAX, Rn );
  2601     check_walign32( R_EAX );
  2602     ADD_imm8s_r32( -4, R_EAX );
  2603     MMU_TRANSLATE_WRITE( R_EAX );
  2604     PUSH_realigned_r32( R_EAX );
  2605     call_func0( sh4_read_sr );
  2606     POP_realigned_r32( R_ECX );
  2607     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2608     MEM_WRITE_LONG( R_ECX, R_EAX );
  2609     sh4_x86.tstate = TSTATE_NONE;
  2610 :}
  2611 STC.L VBR, @-Rn {:  
  2612     COUNT_INST(I_STCM);
  2613     check_priv();
  2614     load_reg( R_EAX, Rn );
  2615     check_walign32( R_EAX );
  2616     ADD_imm8s_r32( -4, R_EAX );
  2617     MMU_TRANSLATE_WRITE( R_EAX );
  2618     load_spreg( R_EDX, R_VBR );
  2619     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2620     MEM_WRITE_LONG( R_EAX, R_EDX );
  2621     sh4_x86.tstate = TSTATE_NONE;
  2622 :}
  2623 STC.L SSR, @-Rn {:  
  2624     COUNT_INST(I_STCM);
  2625     check_priv();
  2626     load_reg( R_EAX, Rn );
  2627     check_walign32( R_EAX );
  2628     ADD_imm8s_r32( -4, R_EAX );
  2629     MMU_TRANSLATE_WRITE( R_EAX );
  2630     load_spreg( R_EDX, R_SSR );
  2631     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2632     MEM_WRITE_LONG( R_EAX, R_EDX );
  2633     sh4_x86.tstate = TSTATE_NONE;
  2634 :}
  2635 STC.L SPC, @-Rn {:
  2636     COUNT_INST(I_STCM);
  2637     check_priv();
  2638     load_reg( R_EAX, Rn );
  2639     check_walign32( R_EAX );
  2640     ADD_imm8s_r32( -4, R_EAX );
  2641     MMU_TRANSLATE_WRITE( R_EAX );
  2642     load_spreg( R_EDX, R_SPC );
  2643     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2644     MEM_WRITE_LONG( R_EAX, R_EDX );
  2645     sh4_x86.tstate = TSTATE_NONE;
  2646 :}
  2647 STC.L SGR, @-Rn {:  
  2648     COUNT_INST(I_STCM);
  2649     check_priv();
  2650     load_reg( R_EAX, Rn );
  2651     check_walign32( R_EAX );
  2652     ADD_imm8s_r32( -4, R_EAX );
  2653     MMU_TRANSLATE_WRITE( R_EAX );
  2654     load_spreg( R_EDX, R_SGR );
  2655     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2656     MEM_WRITE_LONG( R_EAX, R_EDX );
  2657     sh4_x86.tstate = TSTATE_NONE;
  2658 :}
  2659 STC.L DBR, @-Rn {:  
  2660     COUNT_INST(I_STCM);
  2661     check_priv();
  2662     load_reg( R_EAX, Rn );
  2663     check_walign32( R_EAX );
  2664     ADD_imm8s_r32( -4, R_EAX );
  2665     MMU_TRANSLATE_WRITE( R_EAX );
  2666     load_spreg( R_EDX, R_DBR );
  2667     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2668     MEM_WRITE_LONG( R_EAX, R_EDX );
  2669     sh4_x86.tstate = TSTATE_NONE;
  2670 :}
  2671 STC.L Rm_BANK, @-Rn {:  
  2672     COUNT_INST(I_STCM);
  2673     check_priv();
  2674     load_reg( R_EAX, Rn );
  2675     check_walign32( R_EAX );
  2676     ADD_imm8s_r32( -4, R_EAX );
  2677     MMU_TRANSLATE_WRITE( R_EAX );
  2678     load_spreg( R_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
  2679     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2680     MEM_WRITE_LONG( R_EAX, R_EDX );
  2681     sh4_x86.tstate = TSTATE_NONE;
  2682 :}
  2683 STC.L GBR, @-Rn {:  
  2684     COUNT_INST(I_STCM);
  2685     load_reg( R_EAX, Rn );
  2686     check_walign32( R_EAX );
  2687     ADD_imm8s_r32( -4, R_EAX );
  2688     MMU_TRANSLATE_WRITE( R_EAX );
  2689     load_spreg( R_EDX, R_GBR );
  2690     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2691     MEM_WRITE_LONG( R_EAX, R_EDX );
  2692     sh4_x86.tstate = TSTATE_NONE;
  2693 :}
  2694 STS FPSCR, Rn {:  
  2695     COUNT_INST(I_STSFPSCR);
  2696     check_fpuen();
  2697     load_spreg( R_EAX, R_FPSCR );
  2698     store_reg( R_EAX, Rn );
  2699 :}
  2700 STS.L FPSCR, @-Rn {:  
  2701     COUNT_INST(I_STSFPSCRM);
  2702     check_fpuen();
  2703     load_reg( R_EAX, Rn );
  2704     check_walign32( R_EAX );
  2705     ADD_imm8s_r32( -4, R_EAX );
  2706     MMU_TRANSLATE_WRITE( R_EAX );
  2707     load_spreg( R_EDX, R_FPSCR );
  2708     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2709     MEM_WRITE_LONG( R_EAX, R_EDX );
  2710     sh4_x86.tstate = TSTATE_NONE;
  2711 :}
  2712 STS FPUL, Rn {:  
  2713     COUNT_INST(I_STS);
  2714     check_fpuen();
  2715     load_spreg( R_EAX, R_FPUL );
  2716     store_reg( R_EAX, Rn );
  2717 :}
  2718 STS.L FPUL, @-Rn {:  
  2719     COUNT_INST(I_STSM);
  2720     check_fpuen();
  2721     load_reg( R_EAX, Rn );
  2722     check_walign32( R_EAX );
  2723     ADD_imm8s_r32( -4, R_EAX );
  2724     MMU_TRANSLATE_WRITE( R_EAX );
  2725     load_spreg( R_EDX, R_FPUL );
  2726     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2727     MEM_WRITE_LONG( R_EAX, R_EDX );
  2728     sh4_x86.tstate = TSTATE_NONE;
  2729 :}
  2730 STS MACH, Rn {:  
  2731     COUNT_INST(I_STS);
  2732     load_spreg( R_EAX, R_MACH );
  2733     store_reg( R_EAX, Rn );
  2734 :}
  2735 STS.L MACH, @-Rn {:  
  2736     COUNT_INST(I_STSM);
  2737     load_reg( R_EAX, Rn );
  2738     check_walign32( R_EAX );
  2739     ADD_imm8s_r32( -4, R_EAX );
  2740     MMU_TRANSLATE_WRITE( R_EAX );
  2741     load_spreg( R_EDX, R_MACH );
  2742     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2743     MEM_WRITE_LONG( R_EAX, R_EDX );
  2744     sh4_x86.tstate = TSTATE_NONE;
  2745 :}
  2746 STS MACL, Rn {:  
  2747     COUNT_INST(I_STS);
  2748     load_spreg( R_EAX, R_MACL );
  2749     store_reg( R_EAX, Rn );
  2750 :}
  2751 STS.L MACL, @-Rn {:  
  2752     COUNT_INST(I_STSM);
  2753     load_reg( R_EAX, Rn );
  2754     check_walign32( R_EAX );
  2755     ADD_imm8s_r32( -4, R_EAX );
  2756     MMU_TRANSLATE_WRITE( R_EAX );
  2757     load_spreg( R_EDX, R_MACL );
  2758     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2759     MEM_WRITE_LONG( R_EAX, R_EDX );
  2760     sh4_x86.tstate = TSTATE_NONE;
  2761 :}
  2762 STS PR, Rn {:  
  2763     COUNT_INST(I_STS);
  2764     load_spreg( R_EAX, R_PR );
  2765     store_reg( R_EAX, Rn );
  2766 :}
  2767 STS.L PR, @-Rn {:  
  2768     COUNT_INST(I_STSM);
  2769     load_reg( R_EAX, Rn );
  2770     check_walign32( R_EAX );
  2771     ADD_imm8s_r32( -4, R_EAX );
  2772     MMU_TRANSLATE_WRITE( R_EAX );
  2773     load_spreg( R_EDX, R_PR );
  2774     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2775     MEM_WRITE_LONG( R_EAX, R_EDX );
  2776     sh4_x86.tstate = TSTATE_NONE;
  2777 :}
  2779 NOP {: 
  2780     COUNT_INST(I_NOP);
  2781     /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ 
  2782 :}
  2783 %%
  2784     sh4_x86.in_delay_slot = DELAY_NONE;
  2785     return 0;
.