Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 904:5b92e51ac06b
prev903:1337c7a7dd6b
next905:4c17ebd9ef5e
author nkeynes
date Wed Oct 29 23:36:31 2008 +0000 (11 years ago)
permissions -rw-r--r--
last change Enable the FIPR SSE3 code for now, and add a comment on the sh4r.fr alignment
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "sh4/xltcache.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4trans.h"
    31 #include "sh4/sh4stat.h"
    32 #include "sh4/sh4mmio.h"
    33 #include "sh4/x86op.h"
    34 #include "clock.h"
    36 #define DEFAULT_BACKPATCH_SIZE 4096
    38 struct backpatch_record {
    39     uint32_t fixup_offset;
    40     uint32_t fixup_icount;
    41     int32_t exc_code;
    42 };
    44 #define DELAY_NONE 0
    45 #define DELAY_PC 1
    46 #define DELAY_PC_PR 2
    48 /** 
    49  * Struct to manage internal translation state. This state is not saved -
    50  * it is only valid between calls to sh4_translate_begin_block() and
    51  * sh4_translate_end_block()
    52  */
    53 struct sh4_x86_state {
    54     int in_delay_slot;
    55     gboolean priv_checked; /* true if we've already checked the cpu mode. */
    56     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    57     gboolean branch_taken; /* true if we branched unconditionally */
    58     gboolean double_prec; /* true if FPU is in double-precision mode */
    59     gboolean double_size; /* true if FPU is in double-size mode */
    60     gboolean sse3_enabled; /* true if host supports SSE3 instructions */
    61     uint32_t block_start_pc;
    62     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    63     int tstate;
    65     /* mode flags */
    66     gboolean tlb_on; /* True if tlb translation is active */
    68     /* Allocated memory for the (block-wide) back-patch list */
    69     struct backpatch_record *backpatch_list;
    70     uint32_t backpatch_posn;
    71     uint32_t backpatch_size;
    72 };
    74 #define TSTATE_NONE -1
    75 #define TSTATE_O    0
    76 #define TSTATE_C    2
    77 #define TSTATE_E    4
    78 #define TSTATE_NE   5
    79 #define TSTATE_G    0xF
    80 #define TSTATE_GE   0xD
    81 #define TSTATE_A    7
    82 #define TSTATE_AE   3
    84 #ifdef ENABLE_SH4STATS
    85 #define COUNT_INST(id) load_imm32(R_EAX,id); call_func1(sh4_stats_add, R_EAX); sh4_x86.tstate = TSTATE_NONE
    86 #else
    87 #define COUNT_INST(id)
    88 #endif
    90 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
    91 #define JT_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    92 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    93     OP(0x70+sh4_x86.tstate); MARK_JMP8(label); OP(-1)
    95 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
    96 #define JF_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    97 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    98     OP(0x70+ (sh4_x86.tstate^1)); MARK_JMP8(label); OP(-1)
   100 static struct sh4_x86_state sh4_x86;
   102 static uint32_t max_int = 0x7FFFFFFF;
   103 static uint32_t min_int = 0x80000000;
   104 static uint32_t save_fcw; /* save value for fpu control word */
   105 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   107 gboolean is_sse3_supported()
   108 {
   109     uint32_t features;
   111     // Note: Include the push/pop ebx sequence in case of PIC builds. This 
   112     // isn't exactly on a critical path anyway
   113     __asm__ __volatile__(
   114         "pushl %%ebx\n\t"
   115         "mov $0x01, %%eax\n\t"
   116         "cpuid\n\t"
   117         "popl %%ebx" : "=c" (features) : : "eax", "edx");
   118     return (features & 1) ? TRUE : FALSE;
   119 }
   121 void sh4_translate_init(void)
   122 {
   123     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   124     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   125     sh4_x86.sse3_enabled = is_sse3_supported();
   126 }
   129 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   130 {
   131     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   132 	sh4_x86.backpatch_size <<= 1;
   133 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   134 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   135 	assert( sh4_x86.backpatch_list != NULL );
   136     }
   137     if( sh4_x86.in_delay_slot ) {
   138 	fixup_pc -= 2;
   139     }
   140     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
   141 	((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code);
   142     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   143     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   144     sh4_x86.backpatch_posn++;
   145 }
   147 /**
   148  * Emit an instruction to load an SH4 reg into a real register
   149  */
   150 static inline void load_reg( int x86reg, int sh4reg ) 
   151 {
   152     /* mov [bp+n], reg */
   153     OP(0x8B);
   154     OP(0x45 + (x86reg<<3));
   155     OP(REG_OFFSET(r[sh4reg]));
   156 }
   158 static inline void load_reg16s( int x86reg, int sh4reg )
   159 {
   160     OP(0x0F);
   161     OP(0xBF);
   162     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   163 }
   165 static inline void load_reg16u( int x86reg, int sh4reg )
   166 {
   167     OP(0x0F);
   168     OP(0xB7);
   169     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   171 }
   173 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   174 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   175 /**
   176  * Emit an instruction to load an immediate value into a register
   177  */
   178 static inline void load_imm32( int x86reg, uint32_t value ) {
   179     /* mov #value, reg */
   180     OP(0xB8 + x86reg);
   181     OP32(value);
   182 }
   184 /**
   185  * Load an immediate 64-bit quantity (note: x86-64 only)
   186  */
   187 static inline void load_imm64( int x86reg, uint64_t value ) {
   188     /* mov #value, reg */
   189     REXW();
   190     OP(0xB8 + x86reg);
   191     OP64(value);
   192 }
   194 /**
   195  * Emit an instruction to store an SH4 reg (RN)
   196  */
   197 void static inline store_reg( int x86reg, int sh4reg ) {
   198     /* mov reg, [bp+n] */
   199     OP(0x89);
   200     OP(0x45 + (x86reg<<3));
   201     OP(REG_OFFSET(r[sh4reg]));
   202 }
   204 /**
   205  * Load an FR register (single-precision floating point) into an integer x86
   206  * register (eg for register-to-register moves)
   207  */
   208 #define load_fr(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[0][(frm)^1]) )
   209 #define load_xf(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[1][(frm)^1]) )
   211 /**
   212  * Load the low half of a DR register (DR or XD) into an integer x86 register 
   213  */
   214 #define load_dr0(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   215 #define load_dr1(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   217 /**
   218  * Store an FR register (single-precision floating point) from an integer x86+
   219  * register (eg for register-to-register moves)
   220  */
   221 #define store_fr(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[0][(frm)^1]) )
   222 #define store_xf(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[1][(frm)^1]) )
   224 #define store_dr0(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   225 #define store_dr1(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   228 #define push_fpul()  FLDF_sh4r(R_FPUL)
   229 #define pop_fpul()   FSTPF_sh4r(R_FPUL)
   230 #define push_fr(frm) FLDF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   231 #define pop_fr(frm)  FSTPF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   232 #define push_xf(frm) FLDF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   233 #define pop_xf(frm)  FSTPF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   234 #define push_dr(frm) FLDD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   235 #define pop_dr(frm)  FSTPD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   236 #define push_xdr(frm) FLDD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   237 #define pop_xdr(frm)  FSTPD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   241 /* Exception checks - Note that all exception checks will clobber EAX */
   243 #define check_priv( ) \
   244     if( !sh4_x86.priv_checked ) { \
   245 	sh4_x86.priv_checked = TRUE;\
   246 	load_spreg( R_EAX, R_SR );\
   247 	AND_imm32_r32( SR_MD, R_EAX );\
   248 	if( sh4_x86.in_delay_slot ) {\
   249 	    JE_exc( EXC_SLOT_ILLEGAL );\
   250 	} else {\
   251 	    JE_exc( EXC_ILLEGAL );\
   252 	}\
   253 	sh4_x86.tstate = TSTATE_NONE; \
   254     }\
   256 #define check_fpuen( ) \
   257     if( !sh4_x86.fpuen_checked ) {\
   258 	sh4_x86.fpuen_checked = TRUE;\
   259 	load_spreg( R_EAX, R_SR );\
   260 	AND_imm32_r32( SR_FD, R_EAX );\
   261 	if( sh4_x86.in_delay_slot ) {\
   262 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   263 	} else {\
   264 	    JNE_exc(EXC_FPU_DISABLED);\
   265 	}\
   266 	sh4_x86.tstate = TSTATE_NONE; \
   267     }
   269 #define check_ralign16( x86reg ) \
   270     TEST_imm32_r32( 0x00000001, x86reg ); \
   271     JNE_exc(EXC_DATA_ADDR_READ)
   273 #define check_walign16( x86reg ) \
   274     TEST_imm32_r32( 0x00000001, x86reg ); \
   275     JNE_exc(EXC_DATA_ADDR_WRITE);
   277 #define check_ralign32( x86reg ) \
   278     TEST_imm32_r32( 0x00000003, x86reg ); \
   279     JNE_exc(EXC_DATA_ADDR_READ)
   281 #define check_walign32( x86reg ) \
   282     TEST_imm32_r32( 0x00000003, x86reg ); \
   283     JNE_exc(EXC_DATA_ADDR_WRITE);
   285 #define check_ralign64( x86reg ) \
   286     TEST_imm32_r32( 0x00000007, x86reg ); \
   287     JNE_exc(EXC_DATA_ADDR_READ)
   289 #define check_walign64( x86reg ) \
   290     TEST_imm32_r32( 0x00000007, x86reg ); \
   291     JNE_exc(EXC_DATA_ADDR_WRITE);
   293 #define UNDEF(ir)
   294 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   295 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
   296 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
   297 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
   298 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
   299 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
   300 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
   302 /**
   303  * Perform MMU translation on the address in addr_reg for a read operation, iff the TLB is turned 
   304  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   305  */
   306 #define MMU_TRANSLATE_READ( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   308 #define MMU_TRANSLATE_READ_EXC( addr_reg, exc_code ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(exc_code); MEM_RESULT(addr_reg) }
   309 /**
   310  * Perform MMU translation on the address in addr_reg for a write operation, iff the TLB is turned 
   311  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   312  */
   313 #define MMU_TRANSLATE_WRITE( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_write, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   315 #define MEM_READ_SIZE (CALL_FUNC1_SIZE)
   316 #define MEM_WRITE_SIZE (CALL_FUNC2_SIZE)
   317 #define MMU_TRANSLATE_SIZE (sh4_x86.tlb_on ? (CALL_FUNC1_SIZE + 12) : 0 )
   319 #define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = DELAY_NONE; return 1;
   321 /****** Import appropriate calling conventions ******/
   322 #if SIZEOF_VOID_P == 8
   323 #include "sh4/ia64abi.h"
   324 #else /* 32-bit system */
   325 #ifdef APPLE_BUILD
   326 #include "sh4/ia32mac.h"
   327 #else
   328 #include "sh4/ia32abi.h"
   329 #endif
   330 #endif
   332 void sh4_translate_begin_block( sh4addr_t pc ) 
   333 {
   334 	enter_block();
   335     sh4_x86.in_delay_slot = FALSE;
   336     sh4_x86.priv_checked = FALSE;
   337     sh4_x86.fpuen_checked = FALSE;
   338     sh4_x86.branch_taken = FALSE;
   339     sh4_x86.backpatch_posn = 0;
   340     sh4_x86.block_start_pc = pc;
   341     sh4_x86.tlb_on = IS_MMU_ENABLED();
   342     sh4_x86.tstate = TSTATE_NONE;
   343     sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
   344     sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
   345 }
   348 uint32_t sh4_translate_end_block_size()
   349 {
   350     if( sh4_x86.backpatch_posn <= 3 ) {
   351         return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);
   352     } else {
   353         return EPILOGUE_SIZE + 48 + (sh4_x86.backpatch_posn-3)*15;
   354     }
   355 }
   358 /**
   359  * Embed a breakpoint into the generated code
   360  */
   361 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   362 {
   363     load_imm32( R_EAX, pc );
   364     call_func1( sh4_translate_breakpoint_hit, R_EAX );
   365     sh4_x86.tstate = TSTATE_NONE;
   366 }
   369 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   371 /**
   372  * Embed a call to sh4_execute_instruction for situations that we
   373  * can't translate (just page-crossing delay slots at the moment).
   374  * Caller is responsible for setting new_pc before calling this function.
   375  *
   376  * Performs:
   377  *   Set PC = endpc
   378  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   379  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   380  *   Call sh4_execute_instruction
   381  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   382  */
   383 void exit_block_emu( sh4vma_t endpc )
   384 {
   385     load_imm32( R_ECX, endpc - sh4_x86.block_start_pc );   // 5
   386     ADD_r32_sh4r( R_ECX, R_PC );
   388     load_imm32( R_ECX, (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period ); // 5
   389     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
   390     load_imm32( R_ECX, sh4_x86.in_delay_slot ? 1 : 0 );
   391     store_spreg( R_ECX, REG_OFFSET(in_delay_slot) );
   393     call_func0( sh4_execute_instruction );    
   394     load_spreg( R_EAX, R_PC );
   395     if( sh4_x86.tlb_on ) {
   396 	call_func1(xlat_get_code_by_vma,R_EAX);
   397     } else {
   398 	call_func1(xlat_get_code,R_EAX);
   399     }
   400     AND_imm8s_rptr( 0xFC, R_EAX );
   401     POP_r32(R_EBP);
   402     RET();
   403 } 
   405 /**
   406  * Translate a single instruction. Delayed branches are handled specially
   407  * by translating both branch and delayed instruction as a single unit (as
   408  * 
   409  * The instruction MUST be in the icache (assert check)
   410  *
   411  * @return true if the instruction marks the end of a basic block
   412  * (eg a branch or 
   413  */
   414 uint32_t sh4_translate_instruction( sh4vma_t pc )
   415 {
   416     uint32_t ir;
   417     /* Read instruction from icache */
   418     assert( IS_IN_ICACHE(pc) );
   419     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   421 	/* PC is not in the current icache - this usually means we're running
   422 	 * with MMU on, and we've gone past the end of the page. And since 
   423 	 * sh4_translate_block is pretty careful about this, it means we're
   424 	 * almost certainly in a delay slot.
   425 	 *
   426 	 * Since we can't assume the page is present (and we can't fault it in
   427 	 * at this point, inline a call to sh4_execute_instruction (with a few
   428 	 * small repairs to cope with the different environment).
   429 	 */
   431     if( !sh4_x86.in_delay_slot ) {
   432 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   433     }
   434 %%
   435 /* ALU operations */
   436 ADD Rm, Rn {:
   437     COUNT_INST(I_ADD);
   438     load_reg( R_EAX, Rm );
   439     load_reg( R_ECX, Rn );
   440     ADD_r32_r32( R_EAX, R_ECX );
   441     store_reg( R_ECX, Rn );
   442     sh4_x86.tstate = TSTATE_NONE;
   443 :}
   444 ADD #imm, Rn {:  
   445     COUNT_INST(I_ADDI);
   446     load_reg( R_EAX, Rn );
   447     ADD_imm8s_r32( imm, R_EAX );
   448     store_reg( R_EAX, Rn );
   449     sh4_x86.tstate = TSTATE_NONE;
   450 :}
   451 ADDC Rm, Rn {:
   452     COUNT_INST(I_ADDC);
   453     if( sh4_x86.tstate != TSTATE_C ) {
   454 	LDC_t();
   455     }
   456     load_reg( R_EAX, Rm );
   457     load_reg( R_ECX, Rn );
   458     ADC_r32_r32( R_EAX, R_ECX );
   459     store_reg( R_ECX, Rn );
   460     SETC_t();
   461     sh4_x86.tstate = TSTATE_C;
   462 :}
   463 ADDV Rm, Rn {:
   464     COUNT_INST(I_ADDV);
   465     load_reg( R_EAX, Rm );
   466     load_reg( R_ECX, Rn );
   467     ADD_r32_r32( R_EAX, R_ECX );
   468     store_reg( R_ECX, Rn );
   469     SETO_t();
   470     sh4_x86.tstate = TSTATE_O;
   471 :}
   472 AND Rm, Rn {:
   473     COUNT_INST(I_AND);
   474     load_reg( R_EAX, Rm );
   475     load_reg( R_ECX, Rn );
   476     AND_r32_r32( R_EAX, R_ECX );
   477     store_reg( R_ECX, Rn );
   478     sh4_x86.tstate = TSTATE_NONE;
   479 :}
   480 AND #imm, R0 {:  
   481     COUNT_INST(I_ANDI);
   482     load_reg( R_EAX, 0 );
   483     AND_imm32_r32(imm, R_EAX); 
   484     store_reg( R_EAX, 0 );
   485     sh4_x86.tstate = TSTATE_NONE;
   486 :}
   487 AND.B #imm, @(R0, GBR) {: 
   488     COUNT_INST(I_ANDB);
   489     load_reg( R_EAX, 0 );
   490     load_spreg( R_ECX, R_GBR );
   491     ADD_r32_r32( R_ECX, R_EAX );
   492     MMU_TRANSLATE_WRITE( R_EAX );
   493     PUSH_realigned_r32(R_EAX);
   494     MEM_READ_BYTE( R_EAX, R_EAX );
   495     POP_realigned_r32(R_ECX);
   496     AND_imm32_r32(imm, R_EAX );
   497     MEM_WRITE_BYTE( R_ECX, R_EAX );
   498     sh4_x86.tstate = TSTATE_NONE;
   499 :}
   500 CMP/EQ Rm, Rn {:  
   501     COUNT_INST(I_CMPEQ);
   502     load_reg( R_EAX, Rm );
   503     load_reg( R_ECX, Rn );
   504     CMP_r32_r32( R_EAX, R_ECX );
   505     SETE_t();
   506     sh4_x86.tstate = TSTATE_E;
   507 :}
   508 CMP/EQ #imm, R0 {:  
   509     COUNT_INST(I_CMPEQI);
   510     load_reg( R_EAX, 0 );
   511     CMP_imm8s_r32(imm, R_EAX);
   512     SETE_t();
   513     sh4_x86.tstate = TSTATE_E;
   514 :}
   515 CMP/GE Rm, Rn {:  
   516     COUNT_INST(I_CMPGE);
   517     load_reg( R_EAX, Rm );
   518     load_reg( R_ECX, Rn );
   519     CMP_r32_r32( R_EAX, R_ECX );
   520     SETGE_t();
   521     sh4_x86.tstate = TSTATE_GE;
   522 :}
   523 CMP/GT Rm, Rn {: 
   524     COUNT_INST(I_CMPGT);
   525     load_reg( R_EAX, Rm );
   526     load_reg( R_ECX, Rn );
   527     CMP_r32_r32( R_EAX, R_ECX );
   528     SETG_t();
   529     sh4_x86.tstate = TSTATE_G;
   530 :}
   531 CMP/HI Rm, Rn {:  
   532     COUNT_INST(I_CMPHI);
   533     load_reg( R_EAX, Rm );
   534     load_reg( R_ECX, Rn );
   535     CMP_r32_r32( R_EAX, R_ECX );
   536     SETA_t();
   537     sh4_x86.tstate = TSTATE_A;
   538 :}
   539 CMP/HS Rm, Rn {: 
   540     COUNT_INST(I_CMPHS);
   541     load_reg( R_EAX, Rm );
   542     load_reg( R_ECX, Rn );
   543     CMP_r32_r32( R_EAX, R_ECX );
   544     SETAE_t();
   545     sh4_x86.tstate = TSTATE_AE;
   546  :}
   547 CMP/PL Rn {: 
   548     COUNT_INST(I_CMPPL);
   549     load_reg( R_EAX, Rn );
   550     CMP_imm8s_r32( 0, R_EAX );
   551     SETG_t();
   552     sh4_x86.tstate = TSTATE_G;
   553 :}
   554 CMP/PZ Rn {:  
   555     COUNT_INST(I_CMPPZ);
   556     load_reg( R_EAX, Rn );
   557     CMP_imm8s_r32( 0, R_EAX );
   558     SETGE_t();
   559     sh4_x86.tstate = TSTATE_GE;
   560 :}
   561 CMP/STR Rm, Rn {:  
   562     COUNT_INST(I_CMPSTR);
   563     load_reg( R_EAX, Rm );
   564     load_reg( R_ECX, Rn );
   565     XOR_r32_r32( R_ECX, R_EAX );
   566     TEST_r8_r8( R_AL, R_AL );
   567     JE_rel8(target1);
   568     TEST_r8_r8( R_AH, R_AH );
   569     JE_rel8(target2);
   570     SHR_imm8_r32( 16, R_EAX );
   571     TEST_r8_r8( R_AL, R_AL );
   572     JE_rel8(target3);
   573     TEST_r8_r8( R_AH, R_AH );
   574     JMP_TARGET(target1);
   575     JMP_TARGET(target2);
   576     JMP_TARGET(target3);
   577     SETE_t();
   578     sh4_x86.tstate = TSTATE_E;
   579 :}
   580 DIV0S Rm, Rn {:
   581     COUNT_INST(I_DIV0S);
   582     load_reg( R_EAX, Rm );
   583     load_reg( R_ECX, Rn );
   584     SHR_imm8_r32( 31, R_EAX );
   585     SHR_imm8_r32( 31, R_ECX );
   586     store_spreg( R_EAX, R_M );
   587     store_spreg( R_ECX, R_Q );
   588     CMP_r32_r32( R_EAX, R_ECX );
   589     SETNE_t();
   590     sh4_x86.tstate = TSTATE_NE;
   591 :}
   592 DIV0U {:  
   593     COUNT_INST(I_DIV0U);
   594     XOR_r32_r32( R_EAX, R_EAX );
   595     store_spreg( R_EAX, R_Q );
   596     store_spreg( R_EAX, R_M );
   597     store_spreg( R_EAX, R_T );
   598     sh4_x86.tstate = TSTATE_C; // works for DIV1
   599 :}
   600 DIV1 Rm, Rn {:
   601     COUNT_INST(I_DIV1);
   602     load_spreg( R_ECX, R_M );
   603     load_reg( R_EAX, Rn );
   604     if( sh4_x86.tstate != TSTATE_C ) {
   605 	LDC_t();
   606     }
   607     RCL1_r32( R_EAX );
   608     SETC_r8( R_DL ); // Q'
   609     CMP_sh4r_r32( R_Q, R_ECX );
   610     JE_rel8(mqequal);
   611     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   612     JMP_rel8(end);
   613     JMP_TARGET(mqequal);
   614     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   615     JMP_TARGET(end);
   616     store_reg( R_EAX, Rn ); // Done with Rn now
   617     SETC_r8(R_AL); // tmp1
   618     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   619     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   620     store_spreg( R_ECX, R_Q );
   621     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   622     MOVZX_r8_r32( R_AL, R_EAX );
   623     store_spreg( R_EAX, R_T );
   624     sh4_x86.tstate = TSTATE_NONE;
   625 :}
   626 DMULS.L Rm, Rn {:  
   627     COUNT_INST(I_DMULS);
   628     load_reg( R_EAX, Rm );
   629     load_reg( R_ECX, Rn );
   630     IMUL_r32(R_ECX);
   631     store_spreg( R_EDX, R_MACH );
   632     store_spreg( R_EAX, R_MACL );
   633     sh4_x86.tstate = TSTATE_NONE;
   634 :}
   635 DMULU.L Rm, Rn {:  
   636     COUNT_INST(I_DMULU);
   637     load_reg( R_EAX, Rm );
   638     load_reg( R_ECX, Rn );
   639     MUL_r32(R_ECX);
   640     store_spreg( R_EDX, R_MACH );
   641     store_spreg( R_EAX, R_MACL );    
   642     sh4_x86.tstate = TSTATE_NONE;
   643 :}
   644 DT Rn {:  
   645     COUNT_INST(I_DT);
   646     load_reg( R_EAX, Rn );
   647     ADD_imm8s_r32( -1, R_EAX );
   648     store_reg( R_EAX, Rn );
   649     SETE_t();
   650     sh4_x86.tstate = TSTATE_E;
   651 :}
   652 EXTS.B Rm, Rn {:  
   653     COUNT_INST(I_EXTSB);
   654     load_reg( R_EAX, Rm );
   655     MOVSX_r8_r32( R_EAX, R_EAX );
   656     store_reg( R_EAX, Rn );
   657 :}
   658 EXTS.W Rm, Rn {:  
   659     COUNT_INST(I_EXTSW);
   660     load_reg( R_EAX, Rm );
   661     MOVSX_r16_r32( R_EAX, R_EAX );
   662     store_reg( R_EAX, Rn );
   663 :}
   664 EXTU.B Rm, Rn {:  
   665     COUNT_INST(I_EXTUB);
   666     load_reg( R_EAX, Rm );
   667     MOVZX_r8_r32( R_EAX, R_EAX );
   668     store_reg( R_EAX, Rn );
   669 :}
   670 EXTU.W Rm, Rn {:  
   671     COUNT_INST(I_EXTUW);
   672     load_reg( R_EAX, Rm );
   673     MOVZX_r16_r32( R_EAX, R_EAX );
   674     store_reg( R_EAX, Rn );
   675 :}
   676 MAC.L @Rm+, @Rn+ {:
   677     COUNT_INST(I_MACL);
   678     if( Rm == Rn ) {
   679 	load_reg( R_EAX, Rm );
   680 	check_ralign32( R_EAX );
   681 	MMU_TRANSLATE_READ( R_EAX );
   682 	PUSH_realigned_r32( R_EAX );
   683 	load_reg( R_EAX, Rn );
   684 	ADD_imm8s_r32( 4, R_EAX );
   685 	MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
   686 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
   687 	// Note translate twice in case of page boundaries. Maybe worth
   688 	// adding a page-boundary check to skip the second translation
   689     } else {
   690 	load_reg( R_EAX, Rm );
   691 	check_ralign32( R_EAX );
   692 	MMU_TRANSLATE_READ( R_EAX );
   693 	load_reg( R_ECX, Rn );
   694 	check_ralign32( R_ECX );
   695 	PUSH_realigned_r32( R_EAX );
   696 	MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
   697 	MOV_r32_r32( R_ECX, R_EAX );
   698 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   699 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   700     }
   701     MEM_READ_LONG( R_EAX, R_EAX );
   702     POP_r32( R_ECX );
   703     PUSH_r32( R_EAX );
   704     MEM_READ_LONG( R_ECX, R_EAX );
   705     POP_realigned_r32( R_ECX );
   707     IMUL_r32( R_ECX );
   708     ADD_r32_sh4r( R_EAX, R_MACL );
   709     ADC_r32_sh4r( R_EDX, R_MACH );
   711     load_spreg( R_ECX, R_S );
   712     TEST_r32_r32(R_ECX, R_ECX);
   713     JE_rel8( nosat );
   714     call_func0( signsat48 );
   715     JMP_TARGET( nosat );
   716     sh4_x86.tstate = TSTATE_NONE;
   717 :}
   718 MAC.W @Rm+, @Rn+ {:  
   719     COUNT_INST(I_MACW);
   720     if( Rm == Rn ) {
   721 	load_reg( R_EAX, Rm );
   722 	check_ralign16( R_EAX );
   723 	MMU_TRANSLATE_READ( R_EAX );
   724 	PUSH_realigned_r32( R_EAX );
   725 	load_reg( R_EAX, Rn );
   726 	ADD_imm8s_r32( 2, R_EAX );
   727 	MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
   728 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   729 	// Note translate twice in case of page boundaries. Maybe worth
   730 	// adding a page-boundary check to skip the second translation
   731     } else {
   732 	load_reg( R_EAX, Rm );
   733 	check_ralign16( R_EAX );
   734 	MMU_TRANSLATE_READ( R_EAX );
   735 	load_reg( R_ECX, Rn );
   736 	check_ralign16( R_ECX );
   737 	PUSH_realigned_r32( R_EAX );
   738 	MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
   739 	MOV_r32_r32( R_ECX, R_EAX );
   740 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   741 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   742     }
   743     MEM_READ_WORD( R_EAX, R_EAX );
   744     POP_r32( R_ECX );
   745     PUSH_r32( R_EAX );
   746     MEM_READ_WORD( R_ECX, R_EAX );
   747     POP_realigned_r32( R_ECX );
   748     IMUL_r32( R_ECX );
   750     load_spreg( R_ECX, R_S );
   751     TEST_r32_r32( R_ECX, R_ECX );
   752     JE_rel8( nosat );
   754     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   755     JNO_rel8( end );            // 2
   756     load_imm32( R_EDX, 1 );         // 5
   757     store_spreg( R_EDX, R_MACH );   // 6
   758     JS_rel8( positive );        // 2
   759     load_imm32( R_EAX, 0x80000000 );// 5
   760     store_spreg( R_EAX, R_MACL );   // 6
   761     JMP_rel8(end2);           // 2
   763     JMP_TARGET(positive);
   764     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   765     store_spreg( R_EAX, R_MACL );   // 6
   766     JMP_rel8(end3);            // 2
   768     JMP_TARGET(nosat);
   769     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   770     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   771     JMP_TARGET(end);
   772     JMP_TARGET(end2);
   773     JMP_TARGET(end3);
   774     sh4_x86.tstate = TSTATE_NONE;
   775 :}
   776 MOVT Rn {:  
   777     COUNT_INST(I_MOVT);
   778     load_spreg( R_EAX, R_T );
   779     store_reg( R_EAX, Rn );
   780 :}
   781 MUL.L Rm, Rn {:  
   782     COUNT_INST(I_MULL);
   783     load_reg( R_EAX, Rm );
   784     load_reg( R_ECX, Rn );
   785     MUL_r32( R_ECX );
   786     store_spreg( R_EAX, R_MACL );
   787     sh4_x86.tstate = TSTATE_NONE;
   788 :}
   789 MULS.W Rm, Rn {:
   790     COUNT_INST(I_MULSW);
   791     load_reg16s( R_EAX, Rm );
   792     load_reg16s( R_ECX, Rn );
   793     MUL_r32( R_ECX );
   794     store_spreg( R_EAX, R_MACL );
   795     sh4_x86.tstate = TSTATE_NONE;
   796 :}
   797 MULU.W Rm, Rn {:  
   798     COUNT_INST(I_MULUW);
   799     load_reg16u( R_EAX, Rm );
   800     load_reg16u( R_ECX, Rn );
   801     MUL_r32( R_ECX );
   802     store_spreg( R_EAX, R_MACL );
   803     sh4_x86.tstate = TSTATE_NONE;
   804 :}
   805 NEG Rm, Rn {:
   806     COUNT_INST(I_NEG);
   807     load_reg( R_EAX, Rm );
   808     NEG_r32( R_EAX );
   809     store_reg( R_EAX, Rn );
   810     sh4_x86.tstate = TSTATE_NONE;
   811 :}
   812 NEGC Rm, Rn {:  
   813     COUNT_INST(I_NEGC);
   814     load_reg( R_EAX, Rm );
   815     XOR_r32_r32( R_ECX, R_ECX );
   816     LDC_t();
   817     SBB_r32_r32( R_EAX, R_ECX );
   818     store_reg( R_ECX, Rn );
   819     SETC_t();
   820     sh4_x86.tstate = TSTATE_C;
   821 :}
   822 NOT Rm, Rn {:  
   823     COUNT_INST(I_NOT);
   824     load_reg( R_EAX, Rm );
   825     NOT_r32( R_EAX );
   826     store_reg( R_EAX, Rn );
   827     sh4_x86.tstate = TSTATE_NONE;
   828 :}
   829 OR Rm, Rn {:  
   830     COUNT_INST(I_OR);
   831     load_reg( R_EAX, Rm );
   832     load_reg( R_ECX, Rn );
   833     OR_r32_r32( R_EAX, R_ECX );
   834     store_reg( R_ECX, Rn );
   835     sh4_x86.tstate = TSTATE_NONE;
   836 :}
   837 OR #imm, R0 {:
   838     COUNT_INST(I_ORI);
   839     load_reg( R_EAX, 0 );
   840     OR_imm32_r32(imm, R_EAX);
   841     store_reg( R_EAX, 0 );
   842     sh4_x86.tstate = TSTATE_NONE;
   843 :}
   844 OR.B #imm, @(R0, GBR) {:  
   845     COUNT_INST(I_ORB);
   846     load_reg( R_EAX, 0 );
   847     load_spreg( R_ECX, R_GBR );
   848     ADD_r32_r32( R_ECX, R_EAX );
   849     MMU_TRANSLATE_WRITE( R_EAX );
   850     PUSH_realigned_r32(R_EAX);
   851     MEM_READ_BYTE( R_EAX, R_EAX );
   852     POP_realigned_r32(R_ECX);
   853     OR_imm32_r32(imm, R_EAX );
   854     MEM_WRITE_BYTE( R_ECX, R_EAX );
   855     sh4_x86.tstate = TSTATE_NONE;
   856 :}
   857 ROTCL Rn {:
   858     COUNT_INST(I_ROTCL);
   859     load_reg( R_EAX, Rn );
   860     if( sh4_x86.tstate != TSTATE_C ) {
   861 	LDC_t();
   862     }
   863     RCL1_r32( R_EAX );
   864     store_reg( R_EAX, Rn );
   865     SETC_t();
   866     sh4_x86.tstate = TSTATE_C;
   867 :}
   868 ROTCR Rn {:  
   869     COUNT_INST(I_ROTCR);
   870     load_reg( R_EAX, Rn );
   871     if( sh4_x86.tstate != TSTATE_C ) {
   872 	LDC_t();
   873     }
   874     RCR1_r32( R_EAX );
   875     store_reg( R_EAX, Rn );
   876     SETC_t();
   877     sh4_x86.tstate = TSTATE_C;
   878 :}
   879 ROTL Rn {:  
   880     COUNT_INST(I_ROTL);
   881     load_reg( R_EAX, Rn );
   882     ROL1_r32( R_EAX );
   883     store_reg( R_EAX, Rn );
   884     SETC_t();
   885     sh4_x86.tstate = TSTATE_C;
   886 :}
   887 ROTR Rn {:  
   888     COUNT_INST(I_ROTR);
   889     load_reg( R_EAX, Rn );
   890     ROR1_r32( R_EAX );
   891     store_reg( R_EAX, Rn );
   892     SETC_t();
   893     sh4_x86.tstate = TSTATE_C;
   894 :}
   895 SHAD Rm, Rn {:
   896     COUNT_INST(I_SHAD);
   897     /* Annoyingly enough, not directly convertible */
   898     load_reg( R_EAX, Rn );
   899     load_reg( R_ECX, Rm );
   900     CMP_imm32_r32( 0, R_ECX );
   901     JGE_rel8(doshl);
   903     NEG_r32( R_ECX );      // 2
   904     AND_imm8_r8( 0x1F, R_CL ); // 3
   905     JE_rel8(emptysar);     // 2
   906     SAR_r32_CL( R_EAX );       // 2
   907     JMP_rel8(end);          // 2
   909     JMP_TARGET(emptysar);
   910     SAR_imm8_r32(31, R_EAX );  // 3
   911     JMP_rel8(end2);
   913     JMP_TARGET(doshl);
   914     AND_imm8_r8( 0x1F, R_CL ); // 3
   915     SHL_r32_CL( R_EAX );       // 2
   916     JMP_TARGET(end);
   917     JMP_TARGET(end2);
   918     store_reg( R_EAX, Rn );
   919     sh4_x86.tstate = TSTATE_NONE;
   920 :}
   921 SHLD Rm, Rn {:  
   922     COUNT_INST(I_SHLD);
   923     load_reg( R_EAX, Rn );
   924     load_reg( R_ECX, Rm );
   925     CMP_imm32_r32( 0, R_ECX );
   926     JGE_rel8(doshl);
   928     NEG_r32( R_ECX );      // 2
   929     AND_imm8_r8( 0x1F, R_CL ); // 3
   930     JE_rel8(emptyshr );
   931     SHR_r32_CL( R_EAX );       // 2
   932     JMP_rel8(end);          // 2
   934     JMP_TARGET(emptyshr);
   935     XOR_r32_r32( R_EAX, R_EAX );
   936     JMP_rel8(end2);
   938     JMP_TARGET(doshl);
   939     AND_imm8_r8( 0x1F, R_CL ); // 3
   940     SHL_r32_CL( R_EAX );       // 2
   941     JMP_TARGET(end);
   942     JMP_TARGET(end2);
   943     store_reg( R_EAX, Rn );
   944     sh4_x86.tstate = TSTATE_NONE;
   945 :}
   946 SHAL Rn {: 
   947     COUNT_INST(I_SHAL);
   948     load_reg( R_EAX, Rn );
   949     SHL1_r32( R_EAX );
   950     SETC_t();
   951     store_reg( R_EAX, Rn );
   952     sh4_x86.tstate = TSTATE_C;
   953 :}
   954 SHAR Rn {:  
   955     COUNT_INST(I_SHAR);
   956     load_reg( R_EAX, Rn );
   957     SAR1_r32( R_EAX );
   958     SETC_t();
   959     store_reg( R_EAX, Rn );
   960     sh4_x86.tstate = TSTATE_C;
   961 :}
   962 SHLL Rn {:  
   963     COUNT_INST(I_SHLL);
   964     load_reg( R_EAX, Rn );
   965     SHL1_r32( R_EAX );
   966     SETC_t();
   967     store_reg( R_EAX, Rn );
   968     sh4_x86.tstate = TSTATE_C;
   969 :}
   970 SHLL2 Rn {:
   971     COUNT_INST(I_SHLL);
   972     load_reg( R_EAX, Rn );
   973     SHL_imm8_r32( 2, R_EAX );
   974     store_reg( R_EAX, Rn );
   975     sh4_x86.tstate = TSTATE_NONE;
   976 :}
   977 SHLL8 Rn {:  
   978     COUNT_INST(I_SHLL);
   979     load_reg( R_EAX, Rn );
   980     SHL_imm8_r32( 8, R_EAX );
   981     store_reg( R_EAX, Rn );
   982     sh4_x86.tstate = TSTATE_NONE;
   983 :}
   984 SHLL16 Rn {:  
   985     COUNT_INST(I_SHLL);
   986     load_reg( R_EAX, Rn );
   987     SHL_imm8_r32( 16, R_EAX );
   988     store_reg( R_EAX, Rn );
   989     sh4_x86.tstate = TSTATE_NONE;
   990 :}
   991 SHLR Rn {:  
   992     COUNT_INST(I_SHLR);
   993     load_reg( R_EAX, Rn );
   994     SHR1_r32( R_EAX );
   995     SETC_t();
   996     store_reg( R_EAX, Rn );
   997     sh4_x86.tstate = TSTATE_C;
   998 :}
   999 SHLR2 Rn {:  
  1000     COUNT_INST(I_SHLR);
  1001     load_reg( R_EAX, Rn );
  1002     SHR_imm8_r32( 2, R_EAX );
  1003     store_reg( R_EAX, Rn );
  1004     sh4_x86.tstate = TSTATE_NONE;
  1005 :}
  1006 SHLR8 Rn {:  
  1007     COUNT_INST(I_SHLR);
  1008     load_reg( R_EAX, Rn );
  1009     SHR_imm8_r32( 8, R_EAX );
  1010     store_reg( R_EAX, Rn );
  1011     sh4_x86.tstate = TSTATE_NONE;
  1012 :}
  1013 SHLR16 Rn {:  
  1014     COUNT_INST(I_SHLR);
  1015     load_reg( R_EAX, Rn );
  1016     SHR_imm8_r32( 16, R_EAX );
  1017     store_reg( R_EAX, Rn );
  1018     sh4_x86.tstate = TSTATE_NONE;
  1019 :}
  1020 SUB Rm, Rn {:  
  1021     COUNT_INST(I_SUB);
  1022     load_reg( R_EAX, Rm );
  1023     load_reg( R_ECX, Rn );
  1024     SUB_r32_r32( R_EAX, R_ECX );
  1025     store_reg( R_ECX, Rn );
  1026     sh4_x86.tstate = TSTATE_NONE;
  1027 :}
  1028 SUBC Rm, Rn {:  
  1029     COUNT_INST(I_SUBC);
  1030     load_reg( R_EAX, Rm );
  1031     load_reg( R_ECX, Rn );
  1032     if( sh4_x86.tstate != TSTATE_C ) {
  1033 	LDC_t();
  1035     SBB_r32_r32( R_EAX, R_ECX );
  1036     store_reg( R_ECX, Rn );
  1037     SETC_t();
  1038     sh4_x86.tstate = TSTATE_C;
  1039 :}
  1040 SUBV Rm, Rn {:  
  1041     COUNT_INST(I_SUBV);
  1042     load_reg( R_EAX, Rm );
  1043     load_reg( R_ECX, Rn );
  1044     SUB_r32_r32( R_EAX, R_ECX );
  1045     store_reg( R_ECX, Rn );
  1046     SETO_t();
  1047     sh4_x86.tstate = TSTATE_O;
  1048 :}
  1049 SWAP.B Rm, Rn {:  
  1050     COUNT_INST(I_SWAPB);
  1051     load_reg( R_EAX, Rm );
  1052     XCHG_r8_r8( R_AL, R_AH ); // NB: does not touch EFLAGS
  1053     store_reg( R_EAX, Rn );
  1054 :}
  1055 SWAP.W Rm, Rn {:  
  1056     COUNT_INST(I_SWAPB);
  1057     load_reg( R_EAX, Rm );
  1058     MOV_r32_r32( R_EAX, R_ECX );
  1059     SHL_imm8_r32( 16, R_ECX );
  1060     SHR_imm8_r32( 16, R_EAX );
  1061     OR_r32_r32( R_EAX, R_ECX );
  1062     store_reg( R_ECX, Rn );
  1063     sh4_x86.tstate = TSTATE_NONE;
  1064 :}
  1065 TAS.B @Rn {:  
  1066     COUNT_INST(I_TASB);
  1067     load_reg( R_EAX, Rn );
  1068     MMU_TRANSLATE_WRITE( R_EAX );
  1069     PUSH_realigned_r32( R_EAX );
  1070     MEM_READ_BYTE( R_EAX, R_EAX );
  1071     TEST_r8_r8( R_AL, R_AL );
  1072     SETE_t();
  1073     OR_imm8_r8( 0x80, R_AL );
  1074     POP_realigned_r32( R_ECX );
  1075     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1076     sh4_x86.tstate = TSTATE_NONE;
  1077 :}
  1078 TST Rm, Rn {:  
  1079     COUNT_INST(I_TST);
  1080     load_reg( R_EAX, Rm );
  1081     load_reg( R_ECX, Rn );
  1082     TEST_r32_r32( R_EAX, R_ECX );
  1083     SETE_t();
  1084     sh4_x86.tstate = TSTATE_E;
  1085 :}
  1086 TST #imm, R0 {:  
  1087     COUNT_INST(I_TSTI);
  1088     load_reg( R_EAX, 0 );
  1089     TEST_imm32_r32( imm, R_EAX );
  1090     SETE_t();
  1091     sh4_x86.tstate = TSTATE_E;
  1092 :}
  1093 TST.B #imm, @(R0, GBR) {:  
  1094     COUNT_INST(I_TSTB);
  1095     load_reg( R_EAX, 0);
  1096     load_reg( R_ECX, R_GBR);
  1097     ADD_r32_r32( R_ECX, R_EAX );
  1098     MMU_TRANSLATE_READ( R_EAX );
  1099     MEM_READ_BYTE( R_EAX, R_EAX );
  1100     TEST_imm8_r8( imm, R_AL );
  1101     SETE_t();
  1102     sh4_x86.tstate = TSTATE_E;
  1103 :}
  1104 XOR Rm, Rn {:  
  1105     COUNT_INST(I_XOR);
  1106     load_reg( R_EAX, Rm );
  1107     load_reg( R_ECX, Rn );
  1108     XOR_r32_r32( R_EAX, R_ECX );
  1109     store_reg( R_ECX, Rn );
  1110     sh4_x86.tstate = TSTATE_NONE;
  1111 :}
  1112 XOR #imm, R0 {:  
  1113     COUNT_INST(I_XORI);
  1114     load_reg( R_EAX, 0 );
  1115     XOR_imm32_r32( imm, R_EAX );
  1116     store_reg( R_EAX, 0 );
  1117     sh4_x86.tstate = TSTATE_NONE;
  1118 :}
  1119 XOR.B #imm, @(R0, GBR) {:  
  1120     COUNT_INST(I_XORB);
  1121     load_reg( R_EAX, 0 );
  1122     load_spreg( R_ECX, R_GBR );
  1123     ADD_r32_r32( R_ECX, R_EAX );
  1124     MMU_TRANSLATE_WRITE( R_EAX );
  1125     PUSH_realigned_r32(R_EAX);
  1126     MEM_READ_BYTE(R_EAX, R_EAX);
  1127     POP_realigned_r32(R_ECX);
  1128     XOR_imm32_r32( imm, R_EAX );
  1129     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1130     sh4_x86.tstate = TSTATE_NONE;
  1131 :}
  1132 XTRCT Rm, Rn {:
  1133     COUNT_INST(I_XTRCT);
  1134     load_reg( R_EAX, Rm );
  1135     load_reg( R_ECX, Rn );
  1136     SHL_imm8_r32( 16, R_EAX );
  1137     SHR_imm8_r32( 16, R_ECX );
  1138     OR_r32_r32( R_EAX, R_ECX );
  1139     store_reg( R_ECX, Rn );
  1140     sh4_x86.tstate = TSTATE_NONE;
  1141 :}
  1143 /* Data move instructions */
  1144 MOV Rm, Rn {:  
  1145     COUNT_INST(I_MOV);
  1146     load_reg( R_EAX, Rm );
  1147     store_reg( R_EAX, Rn );
  1148 :}
  1149 MOV #imm, Rn {:  
  1150     COUNT_INST(I_MOVI);
  1151     load_imm32( R_EAX, imm );
  1152     store_reg( R_EAX, Rn );
  1153 :}
  1154 MOV.B Rm, @Rn {:  
  1155     COUNT_INST(I_MOVB);
  1156     load_reg( R_EAX, Rn );
  1157     MMU_TRANSLATE_WRITE( R_EAX );
  1158     load_reg( R_EDX, Rm );
  1159     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1160     sh4_x86.tstate = TSTATE_NONE;
  1161 :}
  1162 MOV.B Rm, @-Rn {:  
  1163     COUNT_INST(I_MOVB);
  1164     load_reg( R_EAX, Rn );
  1165     ADD_imm8s_r32( -1, R_EAX );
  1166     MMU_TRANSLATE_WRITE( R_EAX );
  1167     load_reg( R_EDX, Rm );
  1168     ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
  1169     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1170     sh4_x86.tstate = TSTATE_NONE;
  1171 :}
  1172 MOV.B Rm, @(R0, Rn) {:  
  1173     COUNT_INST(I_MOVB);
  1174     load_reg( R_EAX, 0 );
  1175     load_reg( R_ECX, Rn );
  1176     ADD_r32_r32( R_ECX, R_EAX );
  1177     MMU_TRANSLATE_WRITE( R_EAX );
  1178     load_reg( R_EDX, Rm );
  1179     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1180     sh4_x86.tstate = TSTATE_NONE;
  1181 :}
  1182 MOV.B R0, @(disp, GBR) {:  
  1183     COUNT_INST(I_MOVB);
  1184     load_spreg( R_EAX, R_GBR );
  1185     ADD_imm32_r32( disp, R_EAX );
  1186     MMU_TRANSLATE_WRITE( R_EAX );
  1187     load_reg( R_EDX, 0 );
  1188     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1189     sh4_x86.tstate = TSTATE_NONE;
  1190 :}
  1191 MOV.B R0, @(disp, Rn) {:  
  1192     COUNT_INST(I_MOVB);
  1193     load_reg( R_EAX, Rn );
  1194     ADD_imm32_r32( disp, R_EAX );
  1195     MMU_TRANSLATE_WRITE( R_EAX );
  1196     load_reg( R_EDX, 0 );
  1197     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1198     sh4_x86.tstate = TSTATE_NONE;
  1199 :}
  1200 MOV.B @Rm, Rn {:  
  1201     COUNT_INST(I_MOVB);
  1202     load_reg( R_EAX, Rm );
  1203     MMU_TRANSLATE_READ( R_EAX );
  1204     MEM_READ_BYTE( R_EAX, R_EAX );
  1205     store_reg( R_EAX, Rn );
  1206     sh4_x86.tstate = TSTATE_NONE;
  1207 :}
  1208 MOV.B @Rm+, Rn {:  
  1209     COUNT_INST(I_MOVB);
  1210     load_reg( R_EAX, Rm );
  1211     MMU_TRANSLATE_READ( R_EAX );
  1212     ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
  1213     MEM_READ_BYTE( R_EAX, R_EAX );
  1214     store_reg( R_EAX, Rn );
  1215     sh4_x86.tstate = TSTATE_NONE;
  1216 :}
  1217 MOV.B @(R0, Rm), Rn {:  
  1218     COUNT_INST(I_MOVB);
  1219     load_reg( R_EAX, 0 );
  1220     load_reg( R_ECX, Rm );
  1221     ADD_r32_r32( R_ECX, R_EAX );
  1222     MMU_TRANSLATE_READ( R_EAX )
  1223     MEM_READ_BYTE( R_EAX, R_EAX );
  1224     store_reg( R_EAX, Rn );
  1225     sh4_x86.tstate = TSTATE_NONE;
  1226 :}
  1227 MOV.B @(disp, GBR), R0 {:  
  1228     COUNT_INST(I_MOVB);
  1229     load_spreg( R_EAX, R_GBR );
  1230     ADD_imm32_r32( disp, R_EAX );
  1231     MMU_TRANSLATE_READ( R_EAX );
  1232     MEM_READ_BYTE( R_EAX, R_EAX );
  1233     store_reg( R_EAX, 0 );
  1234     sh4_x86.tstate = TSTATE_NONE;
  1235 :}
  1236 MOV.B @(disp, Rm), R0 {:  
  1237     COUNT_INST(I_MOVB);
  1238     load_reg( R_EAX, Rm );
  1239     ADD_imm32_r32( disp, R_EAX );
  1240     MMU_TRANSLATE_READ( R_EAX );
  1241     MEM_READ_BYTE( R_EAX, R_EAX );
  1242     store_reg( R_EAX, 0 );
  1243     sh4_x86.tstate = TSTATE_NONE;
  1244 :}
  1245 MOV.L Rm, @Rn {:
  1246     COUNT_INST(I_MOVL);
  1247     load_reg( R_EAX, Rn );
  1248     check_walign32(R_EAX);
  1249     MMU_TRANSLATE_WRITE( R_EAX );
  1250     load_reg( R_EDX, Rm );
  1251     MEM_WRITE_LONG( R_EAX, R_EDX );
  1252     sh4_x86.tstate = TSTATE_NONE;
  1253 :}
  1254 MOV.L Rm, @-Rn {:  
  1255     COUNT_INST(I_MOVL);
  1256     load_reg( R_EAX, Rn );
  1257     ADD_imm8s_r32( -4, R_EAX );
  1258     check_walign32( R_EAX );
  1259     MMU_TRANSLATE_WRITE( R_EAX );
  1260     load_reg( R_EDX, Rm );
  1261     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  1262     MEM_WRITE_LONG( R_EAX, R_EDX );
  1263     sh4_x86.tstate = TSTATE_NONE;
  1264 :}
  1265 MOV.L Rm, @(R0, Rn) {:  
  1266     COUNT_INST(I_MOVL);
  1267     load_reg( R_EAX, 0 );
  1268     load_reg( R_ECX, Rn );
  1269     ADD_r32_r32( R_ECX, R_EAX );
  1270     check_walign32( R_EAX );
  1271     MMU_TRANSLATE_WRITE( R_EAX );
  1272     load_reg( R_EDX, Rm );
  1273     MEM_WRITE_LONG( R_EAX, R_EDX );
  1274     sh4_x86.tstate = TSTATE_NONE;
  1275 :}
  1276 MOV.L R0, @(disp, GBR) {:  
  1277     COUNT_INST(I_MOVL);
  1278     load_spreg( R_EAX, R_GBR );
  1279     ADD_imm32_r32( disp, R_EAX );
  1280     check_walign32( R_EAX );
  1281     MMU_TRANSLATE_WRITE( R_EAX );
  1282     load_reg( R_EDX, 0 );
  1283     MEM_WRITE_LONG( R_EAX, R_EDX );
  1284     sh4_x86.tstate = TSTATE_NONE;
  1285 :}
  1286 MOV.L Rm, @(disp, Rn) {:  
  1287     COUNT_INST(I_MOVL);
  1288     load_reg( R_EAX, Rn );
  1289     ADD_imm32_r32( disp, R_EAX );
  1290     check_walign32( R_EAX );
  1291     MMU_TRANSLATE_WRITE( R_EAX );
  1292     load_reg( R_EDX, Rm );
  1293     MEM_WRITE_LONG( R_EAX, R_EDX );
  1294     sh4_x86.tstate = TSTATE_NONE;
  1295 :}
  1296 MOV.L @Rm, Rn {:  
  1297     COUNT_INST(I_MOVL);
  1298     load_reg( R_EAX, Rm );
  1299     check_ralign32( R_EAX );
  1300     MMU_TRANSLATE_READ( R_EAX );
  1301     MEM_READ_LONG( R_EAX, R_EAX );
  1302     store_reg( R_EAX, Rn );
  1303     sh4_x86.tstate = TSTATE_NONE;
  1304 :}
  1305 MOV.L @Rm+, Rn {:  
  1306     COUNT_INST(I_MOVL);
  1307     load_reg( R_EAX, Rm );
  1308     check_ralign32( R_EAX );
  1309     MMU_TRANSLATE_READ( R_EAX );
  1310     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1311     MEM_READ_LONG( R_EAX, R_EAX );
  1312     store_reg( R_EAX, Rn );
  1313     sh4_x86.tstate = TSTATE_NONE;
  1314 :}
  1315 MOV.L @(R0, Rm), Rn {:  
  1316     COUNT_INST(I_MOVL);
  1317     load_reg( R_EAX, 0 );
  1318     load_reg( R_ECX, Rm );
  1319     ADD_r32_r32( R_ECX, R_EAX );
  1320     check_ralign32( R_EAX );
  1321     MMU_TRANSLATE_READ( R_EAX );
  1322     MEM_READ_LONG( R_EAX, R_EAX );
  1323     store_reg( R_EAX, Rn );
  1324     sh4_x86.tstate = TSTATE_NONE;
  1325 :}
  1326 MOV.L @(disp, GBR), R0 {:
  1327     COUNT_INST(I_MOVL);
  1328     load_spreg( R_EAX, R_GBR );
  1329     ADD_imm32_r32( disp, R_EAX );
  1330     check_ralign32( R_EAX );
  1331     MMU_TRANSLATE_READ( R_EAX );
  1332     MEM_READ_LONG( R_EAX, R_EAX );
  1333     store_reg( R_EAX, 0 );
  1334     sh4_x86.tstate = TSTATE_NONE;
  1335 :}
  1336 MOV.L @(disp, PC), Rn {:  
  1337     COUNT_INST(I_MOVLPC);
  1338     if( sh4_x86.in_delay_slot ) {
  1339 	SLOTILLEGAL();
  1340     } else {
  1341 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1342 	if( IS_IN_ICACHE(target) ) {
  1343 	    // If the target address is in the same page as the code, it's
  1344 	    // pretty safe to just ref it directly and circumvent the whole
  1345 	    // memory subsystem. (this is a big performance win)
  1347 	    // FIXME: There's a corner-case that's not handled here when
  1348 	    // the current code-page is in the ITLB but not in the UTLB.
  1349 	    // (should generate a TLB miss although need to test SH4 
  1350 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1351 	    // behaviour though.
  1352 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1353 	    MOV_moff32_EAX( ptr );
  1354 	} else {
  1355 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1356 	    // different virtual address than the translation was done with,
  1357 	    // but we can safely assume that the low bits are the same.
  1358 	    load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1359 	    ADD_sh4r_r32( R_PC, R_EAX );
  1360 	    MMU_TRANSLATE_READ( R_EAX );
  1361 	    MEM_READ_LONG( R_EAX, R_EAX );
  1362 	    sh4_x86.tstate = TSTATE_NONE;
  1364 	store_reg( R_EAX, Rn );
  1366 :}
  1367 MOV.L @(disp, Rm), Rn {:  
  1368     COUNT_INST(I_MOVL);
  1369     load_reg( R_EAX, Rm );
  1370     ADD_imm8s_r32( disp, R_EAX );
  1371     check_ralign32( R_EAX );
  1372     MMU_TRANSLATE_READ( R_EAX );
  1373     MEM_READ_LONG( R_EAX, R_EAX );
  1374     store_reg( R_EAX, Rn );
  1375     sh4_x86.tstate = TSTATE_NONE;
  1376 :}
  1377 MOV.W Rm, @Rn {:  
  1378     COUNT_INST(I_MOVW);
  1379     load_reg( R_EAX, Rn );
  1380     check_walign16( R_EAX );
  1381     MMU_TRANSLATE_WRITE( R_EAX )
  1382     load_reg( R_EDX, Rm );
  1383     MEM_WRITE_WORD( R_EAX, R_EDX );
  1384     sh4_x86.tstate = TSTATE_NONE;
  1385 :}
  1386 MOV.W Rm, @-Rn {:  
  1387     COUNT_INST(I_MOVW);
  1388     load_reg( R_EAX, Rn );
  1389     ADD_imm8s_r32( -2, R_EAX );
  1390     check_walign16( R_EAX );
  1391     MMU_TRANSLATE_WRITE( R_EAX );
  1392     load_reg( R_EDX, Rm );
  1393     ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
  1394     MEM_WRITE_WORD( R_EAX, R_EDX );
  1395     sh4_x86.tstate = TSTATE_NONE;
  1396 :}
  1397 MOV.W Rm, @(R0, Rn) {:  
  1398     COUNT_INST(I_MOVW);
  1399     load_reg( R_EAX, 0 );
  1400     load_reg( R_ECX, Rn );
  1401     ADD_r32_r32( R_ECX, R_EAX );
  1402     check_walign16( R_EAX );
  1403     MMU_TRANSLATE_WRITE( R_EAX );
  1404     load_reg( R_EDX, Rm );
  1405     MEM_WRITE_WORD( R_EAX, R_EDX );
  1406     sh4_x86.tstate = TSTATE_NONE;
  1407 :}
  1408 MOV.W R0, @(disp, GBR) {:  
  1409     COUNT_INST(I_MOVW);
  1410     load_spreg( R_EAX, R_GBR );
  1411     ADD_imm32_r32( disp, R_EAX );
  1412     check_walign16( R_EAX );
  1413     MMU_TRANSLATE_WRITE( R_EAX );
  1414     load_reg( R_EDX, 0 );
  1415     MEM_WRITE_WORD( R_EAX, R_EDX );
  1416     sh4_x86.tstate = TSTATE_NONE;
  1417 :}
  1418 MOV.W R0, @(disp, Rn) {:  
  1419     COUNT_INST(I_MOVW);
  1420     load_reg( R_EAX, Rn );
  1421     ADD_imm32_r32( disp, R_EAX );
  1422     check_walign16( R_EAX );
  1423     MMU_TRANSLATE_WRITE( R_EAX );
  1424     load_reg( R_EDX, 0 );
  1425     MEM_WRITE_WORD( R_EAX, R_EDX );
  1426     sh4_x86.tstate = TSTATE_NONE;
  1427 :}
  1428 MOV.W @Rm, Rn {:  
  1429     COUNT_INST(I_MOVW);
  1430     load_reg( R_EAX, Rm );
  1431     check_ralign16( R_EAX );
  1432     MMU_TRANSLATE_READ( R_EAX );
  1433     MEM_READ_WORD( R_EAX, R_EAX );
  1434     store_reg( R_EAX, Rn );
  1435     sh4_x86.tstate = TSTATE_NONE;
  1436 :}
  1437 MOV.W @Rm+, Rn {:  
  1438     COUNT_INST(I_MOVW);
  1439     load_reg( R_EAX, Rm );
  1440     check_ralign16( R_EAX );
  1441     MMU_TRANSLATE_READ( R_EAX );
  1442     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
  1443     MEM_READ_WORD( R_EAX, R_EAX );
  1444     store_reg( R_EAX, Rn );
  1445     sh4_x86.tstate = TSTATE_NONE;
  1446 :}
  1447 MOV.W @(R0, Rm), Rn {:  
  1448     COUNT_INST(I_MOVW);
  1449     load_reg( R_EAX, 0 );
  1450     load_reg( R_ECX, Rm );
  1451     ADD_r32_r32( R_ECX, R_EAX );
  1452     check_ralign16( R_EAX );
  1453     MMU_TRANSLATE_READ( R_EAX );
  1454     MEM_READ_WORD( R_EAX, R_EAX );
  1455     store_reg( R_EAX, Rn );
  1456     sh4_x86.tstate = TSTATE_NONE;
  1457 :}
  1458 MOV.W @(disp, GBR), R0 {:  
  1459     COUNT_INST(I_MOVW);
  1460     load_spreg( R_EAX, R_GBR );
  1461     ADD_imm32_r32( disp, R_EAX );
  1462     check_ralign16( R_EAX );
  1463     MMU_TRANSLATE_READ( R_EAX );
  1464     MEM_READ_WORD( R_EAX, R_EAX );
  1465     store_reg( R_EAX, 0 );
  1466     sh4_x86.tstate = TSTATE_NONE;
  1467 :}
  1468 MOV.W @(disp, PC), Rn {:  
  1469     COUNT_INST(I_MOVW);
  1470     if( sh4_x86.in_delay_slot ) {
  1471 	SLOTILLEGAL();
  1472     } else {
  1473 	// See comments for MOV.L @(disp, PC), Rn
  1474 	uint32_t target = pc + disp + 4;
  1475 	if( IS_IN_ICACHE(target) ) {
  1476 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1477 	    MOV_moff32_EAX( ptr );
  1478 	    MOVSX_r16_r32( R_EAX, R_EAX );
  1479 	} else {
  1480 	    load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
  1481 	    ADD_sh4r_r32( R_PC, R_EAX );
  1482 	    MMU_TRANSLATE_READ( R_EAX );
  1483 	    MEM_READ_WORD( R_EAX, R_EAX );
  1484 	    sh4_x86.tstate = TSTATE_NONE;
  1486 	store_reg( R_EAX, Rn );
  1488 :}
  1489 MOV.W @(disp, Rm), R0 {:  
  1490     COUNT_INST(I_MOVW);
  1491     load_reg( R_EAX, Rm );
  1492     ADD_imm32_r32( disp, R_EAX );
  1493     check_ralign16( R_EAX );
  1494     MMU_TRANSLATE_READ( R_EAX );
  1495     MEM_READ_WORD( R_EAX, R_EAX );
  1496     store_reg( R_EAX, 0 );
  1497     sh4_x86.tstate = TSTATE_NONE;
  1498 :}
  1499 MOVA @(disp, PC), R0 {:  
  1500     COUNT_INST(I_MOVA);
  1501     if( sh4_x86.in_delay_slot ) {
  1502 	SLOTILLEGAL();
  1503     } else {
  1504 	load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1505 	ADD_sh4r_r32( R_PC, R_ECX );
  1506 	store_reg( R_ECX, 0 );
  1507 	sh4_x86.tstate = TSTATE_NONE;
  1509 :}
  1510 MOVCA.L R0, @Rn {:  
  1511     COUNT_INST(I_MOVCA);
  1512     load_reg( R_EAX, Rn );
  1513     check_walign32( R_EAX );
  1514     MMU_TRANSLATE_WRITE( R_EAX );
  1515     load_reg( R_EDX, 0 );
  1516     MEM_WRITE_LONG( R_EAX, R_EDX );
  1517     sh4_x86.tstate = TSTATE_NONE;
  1518 :}
  1520 /* Control transfer instructions */
  1521 BF disp {:
  1522     COUNT_INST(I_BF);
  1523     if( sh4_x86.in_delay_slot ) {
  1524 	SLOTILLEGAL();
  1525     } else {
  1526 	sh4vma_t target = disp + pc + 4;
  1527 	JT_rel8( nottaken );
  1528 	exit_block_rel(target, pc+2 );
  1529 	JMP_TARGET(nottaken);
  1530 	return 2;
  1532 :}
  1533 BF/S disp {:
  1534     COUNT_INST(I_BFS);
  1535     if( sh4_x86.in_delay_slot ) {
  1536 	SLOTILLEGAL();
  1537     } else {
  1538 	sh4_x86.in_delay_slot = DELAY_PC;
  1539 	if( UNTRANSLATABLE(pc+2) ) {
  1540 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1541 	    JT_rel8(nottaken);
  1542 	    ADD_imm32_r32( disp, R_EAX );
  1543 	    JMP_TARGET(nottaken);
  1544 	    ADD_sh4r_r32( R_PC, R_EAX );
  1545 	    store_spreg( R_EAX, R_NEW_PC );
  1546 	    exit_block_emu(pc+2);
  1547 	    sh4_x86.branch_taken = TRUE;
  1548 	    return 2;
  1549 	} else {
  1550 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1551 		CMP_imm8s_sh4r( 1, R_T );
  1552 		sh4_x86.tstate = TSTATE_E;
  1554 	    sh4vma_t target = disp + pc + 4;
  1555 	    OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JT rel32
  1556 	    int save_tstate = sh4_x86.tstate;
  1557 	    sh4_translate_instruction(pc+2);
  1558 	    exit_block_rel( target, pc+4 );
  1560 	    // not taken
  1561 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1562 	    sh4_x86.tstate = save_tstate;
  1563 	    sh4_translate_instruction(pc+2);
  1564 	    return 4;
  1567 :}
  1568 BRA disp {:  
  1569     COUNT_INST(I_BRA);
  1570     if( sh4_x86.in_delay_slot ) {
  1571 	SLOTILLEGAL();
  1572     } else {
  1573 	sh4_x86.in_delay_slot = DELAY_PC;
  1574 	sh4_x86.branch_taken = TRUE;
  1575 	if( UNTRANSLATABLE(pc+2) ) {
  1576 	    load_spreg( R_EAX, R_PC );
  1577 	    ADD_imm32_r32( pc + disp + 4 - sh4_x86.block_start_pc, R_EAX );
  1578 	    store_spreg( R_EAX, R_NEW_PC );
  1579 	    exit_block_emu(pc+2);
  1580 	    return 2;
  1581 	} else {
  1582 	    sh4_translate_instruction( pc + 2 );
  1583 	    exit_block_rel( disp + pc + 4, pc+4 );
  1584 	    return 4;
  1587 :}
  1588 BRAF Rn {:  
  1589     COUNT_INST(I_BRAF);
  1590     if( sh4_x86.in_delay_slot ) {
  1591 	SLOTILLEGAL();
  1592     } else {
  1593 	load_spreg( R_EAX, R_PC );
  1594 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1595 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1596 	store_spreg( R_EAX, R_NEW_PC );
  1597 	sh4_x86.in_delay_slot = DELAY_PC;
  1598 	sh4_x86.tstate = TSTATE_NONE;
  1599 	sh4_x86.branch_taken = TRUE;
  1600 	if( UNTRANSLATABLE(pc+2) ) {
  1601 	    exit_block_emu(pc+2);
  1602 	    return 2;
  1603 	} else {
  1604 	    sh4_translate_instruction( pc + 2 );
  1605 	    exit_block_newpcset(pc+2);
  1606 	    return 4;
  1609 :}
  1610 BSR disp {:  
  1611     COUNT_INST(I_BSR);
  1612     if( sh4_x86.in_delay_slot ) {
  1613 	SLOTILLEGAL();
  1614     } else {
  1615 	load_spreg( R_EAX, R_PC );
  1616 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1617 	store_spreg( R_EAX, R_PR );
  1618 	sh4_x86.in_delay_slot = DELAY_PC;
  1619 	sh4_x86.branch_taken = TRUE;
  1620 	sh4_x86.tstate = TSTATE_NONE;
  1621 	if( UNTRANSLATABLE(pc+2) ) {
  1622 	    ADD_imm32_r32( disp, R_EAX );
  1623 	    store_spreg( R_EAX, R_NEW_PC );
  1624 	    exit_block_emu(pc+2);
  1625 	    return 2;
  1626 	} else {
  1627 	    sh4_translate_instruction( pc + 2 );
  1628 	    exit_block_rel( disp + pc + 4, pc+4 );
  1629 	    return 4;
  1632 :}
  1633 BSRF Rn {:  
  1634     COUNT_INST(I_BSRF);
  1635     if( sh4_x86.in_delay_slot ) {
  1636 	SLOTILLEGAL();
  1637     } else {
  1638 	load_spreg( R_EAX, R_PC );
  1639 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1640 	store_spreg( R_EAX, R_PR );
  1641 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1642 	store_spreg( R_EAX, R_NEW_PC );
  1644 	sh4_x86.in_delay_slot = DELAY_PC;
  1645 	sh4_x86.tstate = TSTATE_NONE;
  1646 	sh4_x86.branch_taken = TRUE;
  1647 	if( UNTRANSLATABLE(pc+2) ) {
  1648 	    exit_block_emu(pc+2);
  1649 	    return 2;
  1650 	} else {
  1651 	    sh4_translate_instruction( pc + 2 );
  1652 	    exit_block_newpcset(pc+2);
  1653 	    return 4;
  1656 :}
  1657 BT disp {:
  1658     COUNT_INST(I_BT);
  1659     if( sh4_x86.in_delay_slot ) {
  1660 	SLOTILLEGAL();
  1661     } else {
  1662 	sh4vma_t target = disp + pc + 4;
  1663 	JF_rel8( nottaken );
  1664 	exit_block_rel(target, pc+2 );
  1665 	JMP_TARGET(nottaken);
  1666 	return 2;
  1668 :}
  1669 BT/S disp {:
  1670     COUNT_INST(I_BTS);
  1671     if( sh4_x86.in_delay_slot ) {
  1672 	SLOTILLEGAL();
  1673     } else {
  1674 	sh4_x86.in_delay_slot = DELAY_PC;
  1675 	if( UNTRANSLATABLE(pc+2) ) {
  1676 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1677 	    JF_rel8(nottaken);
  1678 	    ADD_imm32_r32( disp, R_EAX );
  1679 	    JMP_TARGET(nottaken);
  1680 	    ADD_sh4r_r32( R_PC, R_EAX );
  1681 	    store_spreg( R_EAX, R_NEW_PC );
  1682 	    exit_block_emu(pc+2);
  1683 	    sh4_x86.branch_taken = TRUE;
  1684 	    return 2;
  1685 	} else {
  1686 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1687 		CMP_imm8s_sh4r( 1, R_T );
  1688 		sh4_x86.tstate = TSTATE_E;
  1690 	    OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JF rel32
  1691 	    int save_tstate = sh4_x86.tstate;
  1692 	    sh4_translate_instruction(pc+2);
  1693 	    exit_block_rel( disp + pc + 4, pc+4 );
  1694 	    // not taken
  1695 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1696 	    sh4_x86.tstate = save_tstate;
  1697 	    sh4_translate_instruction(pc+2);
  1698 	    return 4;
  1701 :}
  1702 JMP @Rn {:  
  1703     COUNT_INST(I_JMP);
  1704     if( sh4_x86.in_delay_slot ) {
  1705 	SLOTILLEGAL();
  1706     } else {
  1707 	load_reg( R_ECX, Rn );
  1708 	store_spreg( R_ECX, R_NEW_PC );
  1709 	sh4_x86.in_delay_slot = DELAY_PC;
  1710 	sh4_x86.branch_taken = TRUE;
  1711 	if( UNTRANSLATABLE(pc+2) ) {
  1712 	    exit_block_emu(pc+2);
  1713 	    return 2;
  1714 	} else {
  1715 	    sh4_translate_instruction(pc+2);
  1716 	    exit_block_newpcset(pc+2);
  1717 	    return 4;
  1720 :}
  1721 JSR @Rn {:  
  1722     COUNT_INST(I_JSR);
  1723     if( sh4_x86.in_delay_slot ) {
  1724 	SLOTILLEGAL();
  1725     } else {
  1726 	load_spreg( R_EAX, R_PC );
  1727 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1728 	store_spreg( R_EAX, R_PR );
  1729 	load_reg( R_ECX, Rn );
  1730 	store_spreg( R_ECX, R_NEW_PC );
  1731 	sh4_x86.in_delay_slot = DELAY_PC;
  1732 	sh4_x86.branch_taken = TRUE;
  1733 	sh4_x86.tstate = TSTATE_NONE;
  1734 	if( UNTRANSLATABLE(pc+2) ) {
  1735 	    exit_block_emu(pc+2);
  1736 	    return 2;
  1737 	} else {
  1738 	    sh4_translate_instruction(pc+2);
  1739 	    exit_block_newpcset(pc+2);
  1740 	    return 4;
  1743 :}
  1744 RTE {:  
  1745     COUNT_INST(I_RTE);
  1746     if( sh4_x86.in_delay_slot ) {
  1747 	SLOTILLEGAL();
  1748     } else {
  1749 	check_priv();
  1750 	load_spreg( R_ECX, R_SPC );
  1751 	store_spreg( R_ECX, R_NEW_PC );
  1752 	load_spreg( R_EAX, R_SSR );
  1753 	call_func1( sh4_write_sr, R_EAX );
  1754 	sh4_x86.in_delay_slot = DELAY_PC;
  1755 	sh4_x86.priv_checked = FALSE;
  1756 	sh4_x86.fpuen_checked = FALSE;
  1757 	sh4_x86.tstate = TSTATE_NONE;
  1758 	sh4_x86.branch_taken = TRUE;
  1759 	if( UNTRANSLATABLE(pc+2) ) {
  1760 	    exit_block_emu(pc+2);
  1761 	    return 2;
  1762 	} else {
  1763 	    sh4_translate_instruction(pc+2);
  1764 	    exit_block_newpcset(pc+2);
  1765 	    return 4;
  1768 :}
  1769 RTS {:  
  1770     COUNT_INST(I_RTS);
  1771     if( sh4_x86.in_delay_slot ) {
  1772 	SLOTILLEGAL();
  1773     } else {
  1774 	load_spreg( R_ECX, R_PR );
  1775 	store_spreg( R_ECX, R_NEW_PC );
  1776 	sh4_x86.in_delay_slot = DELAY_PC;
  1777 	sh4_x86.branch_taken = TRUE;
  1778 	if( UNTRANSLATABLE(pc+2) ) {
  1779 	    exit_block_emu(pc+2);
  1780 	    return 2;
  1781 	} else {
  1782 	    sh4_translate_instruction(pc+2);
  1783 	    exit_block_newpcset(pc+2);
  1784 	    return 4;
  1787 :}
  1788 TRAPA #imm {:  
  1789     COUNT_INST(I_TRAPA);
  1790     if( sh4_x86.in_delay_slot ) {
  1791 	SLOTILLEGAL();
  1792     } else {
  1793 	load_imm32( R_ECX, pc+2 - sh4_x86.block_start_pc );   // 5
  1794 	ADD_r32_sh4r( R_ECX, R_PC );
  1795 	load_imm32( R_EAX, imm );
  1796 	call_func1( sh4_raise_trap, R_EAX );
  1797 	sh4_x86.tstate = TSTATE_NONE;
  1798 	exit_block_pcset(pc);
  1799 	sh4_x86.branch_taken = TRUE;
  1800 	return 2;
  1802 :}
  1803 UNDEF {:  
  1804     COUNT_INST(I_UNDEF);
  1805     if( sh4_x86.in_delay_slot ) {
  1806 	SLOTILLEGAL();
  1807     } else {
  1808 	JMP_exc(EXC_ILLEGAL);
  1809 	return 2;
  1811 :}
  1813 CLRMAC {:  
  1814     COUNT_INST(I_CLRMAC);
  1815     XOR_r32_r32(R_EAX, R_EAX);
  1816     store_spreg( R_EAX, R_MACL );
  1817     store_spreg( R_EAX, R_MACH );
  1818     sh4_x86.tstate = TSTATE_NONE;
  1819 :}
  1820 CLRS {:
  1821     COUNT_INST(I_CLRS);
  1822     CLC();
  1823     SETC_sh4r(R_S);
  1824     sh4_x86.tstate = TSTATE_NONE;
  1825 :}
  1826 CLRT {:  
  1827     COUNT_INST(I_CLRT);
  1828     CLC();
  1829     SETC_t();
  1830     sh4_x86.tstate = TSTATE_C;
  1831 :}
  1832 SETS {:  
  1833     COUNT_INST(I_SETS);
  1834     STC();
  1835     SETC_sh4r(R_S);
  1836     sh4_x86.tstate = TSTATE_NONE;
  1837 :}
  1838 SETT {:  
  1839     COUNT_INST(I_SETT);
  1840     STC();
  1841     SETC_t();
  1842     sh4_x86.tstate = TSTATE_C;
  1843 :}
  1845 /* Floating point moves */
  1846 FMOV FRm, FRn {:  
  1847     COUNT_INST(I_FMOV1);
  1848     check_fpuen();
  1849     if( sh4_x86.double_size ) {
  1850         load_dr0( R_EAX, FRm );
  1851         load_dr1( R_ECX, FRm );
  1852         store_dr0( R_EAX, FRn );
  1853         store_dr1( R_ECX, FRn );
  1854     } else {
  1855         load_fr( R_EAX, FRm ); // SZ=0 branch
  1856         store_fr( R_EAX, FRn );
  1858 :}
  1859 FMOV FRm, @Rn {: 
  1860     COUNT_INST(I_FMOV2);
  1861     check_fpuen();
  1862     load_reg( R_EAX, Rn );
  1863     if( sh4_x86.double_size ) {
  1864         check_walign64( R_EAX );
  1865         MMU_TRANSLATE_WRITE( R_EAX );
  1866         load_dr0( R_ECX, FRm );
  1867         load_dr1( R_EDX, FRm );
  1868         MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1869     } else {
  1870         check_walign32( R_EAX );
  1871         MMU_TRANSLATE_WRITE( R_EAX );
  1872         load_fr( R_ECX, FRm );
  1873         MEM_WRITE_LONG( R_EAX, R_ECX );
  1875     sh4_x86.tstate = TSTATE_NONE;
  1876 :}
  1877 FMOV @Rm, FRn {:  
  1878     COUNT_INST(I_FMOV5);
  1879     check_fpuen();
  1880     load_reg( R_EAX, Rm );
  1881     if( sh4_x86.double_size ) {
  1882         check_ralign64( R_EAX );
  1883         MMU_TRANSLATE_READ( R_EAX );
  1884         MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1885         store_dr0( R_ECX, FRn );
  1886         store_dr1( R_EAX, FRn );    
  1887     } else {
  1888         check_ralign32( R_EAX );
  1889         MMU_TRANSLATE_READ( R_EAX );
  1890         MEM_READ_LONG( R_EAX, R_EAX );
  1891         store_fr( R_EAX, FRn );
  1893     sh4_x86.tstate = TSTATE_NONE;
  1894 :}
  1895 FMOV FRm, @-Rn {:  
  1896     COUNT_INST(I_FMOV3);
  1897     check_fpuen();
  1898     load_reg( R_EAX, Rn );
  1899     if( sh4_x86.double_size ) {
  1900         check_walign64( R_EAX );
  1901         ADD_imm8s_r32(-8,R_EAX);
  1902         MMU_TRANSLATE_WRITE( R_EAX );
  1903         load_dr0( R_ECX, FRm );
  1904         load_dr1( R_EDX, FRm );
  1905         ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
  1906         MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1907     } else {
  1908         check_walign32( R_EAX );
  1909         ADD_imm8s_r32( -4, R_EAX );
  1910         MMU_TRANSLATE_WRITE( R_EAX );
  1911         load_fr( R_ECX, FRm );
  1912         ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
  1913         MEM_WRITE_LONG( R_EAX, R_ECX );
  1915     sh4_x86.tstate = TSTATE_NONE;
  1916 :}
  1917 FMOV @Rm+, FRn {:
  1918     COUNT_INST(I_FMOV6);
  1919     check_fpuen();
  1920     load_reg( R_EAX, Rm );
  1921     if( sh4_x86.double_size ) {
  1922         check_ralign64( R_EAX );
  1923         MMU_TRANSLATE_READ( R_EAX );
  1924         ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
  1925         MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1926         store_dr0( R_ECX, FRn );
  1927         store_dr1( R_EAX, FRn );
  1928     } else {
  1929         check_ralign32( R_EAX );
  1930         MMU_TRANSLATE_READ( R_EAX );
  1931         ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1932         MEM_READ_LONG( R_EAX, R_EAX );
  1933         store_fr( R_EAX, FRn );
  1935     sh4_x86.tstate = TSTATE_NONE;
  1936 :}
  1937 FMOV FRm, @(R0, Rn) {:  
  1938     COUNT_INST(I_FMOV4);
  1939     check_fpuen();
  1940     load_reg( R_EAX, Rn );
  1941     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1942     if( sh4_x86.double_size ) {
  1943         check_walign64( R_EAX );
  1944         MMU_TRANSLATE_WRITE( R_EAX );
  1945         load_dr0( R_ECX, FRm );
  1946         load_dr1( R_EDX, FRm );
  1947         MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1948     } else {
  1949         check_walign32( R_EAX );
  1950         MMU_TRANSLATE_WRITE( R_EAX );
  1951         load_fr( R_ECX, FRm );
  1952         MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
  1954     sh4_x86.tstate = TSTATE_NONE;
  1955 :}
  1956 FMOV @(R0, Rm), FRn {:  
  1957     COUNT_INST(I_FMOV7);
  1958     check_fpuen();
  1959     load_reg( R_EAX, Rm );
  1960     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1961     if( sh4_x86.double_size ) {
  1962         check_ralign64( R_EAX );
  1963         MMU_TRANSLATE_READ( R_EAX );
  1964         MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1965         store_dr0( R_ECX, FRn );
  1966         store_dr1( R_EAX, FRn );
  1967     } else {
  1968         check_ralign32( R_EAX );
  1969         MMU_TRANSLATE_READ( R_EAX );
  1970         MEM_READ_LONG( R_EAX, R_EAX );
  1971         store_fr( R_EAX, FRn );
  1973     sh4_x86.tstate = TSTATE_NONE;
  1974 :}
  1975 FLDI0 FRn {:  /* IFF PR=0 */
  1976     COUNT_INST(I_FLDI0);
  1977     check_fpuen();
  1978     if( sh4_x86.double_prec == 0 ) {
  1979         XOR_r32_r32( R_EAX, R_EAX );
  1980         store_fr( R_EAX, FRn );
  1982     sh4_x86.tstate = TSTATE_NONE;
  1983 :}
  1984 FLDI1 FRn {:  /* IFF PR=0 */
  1985     COUNT_INST(I_FLDI1);
  1986     check_fpuen();
  1987     if( sh4_x86.double_prec == 0 ) {
  1988         load_imm32(R_EAX, 0x3F800000);
  1989         store_fr( R_EAX, FRn );
  1991 :}
  1993 FLOAT FPUL, FRn {:  
  1994     COUNT_INST(I_FLOAT);
  1995     check_fpuen();
  1996     FILD_sh4r(R_FPUL);
  1997     if( sh4_x86.double_prec ) {
  1998         pop_dr( FRn );
  1999     } else {
  2000         pop_fr( FRn );
  2002 :}
  2003 FTRC FRm, FPUL {:  
  2004     COUNT_INST(I_FTRC);
  2005     check_fpuen();
  2006     if( sh4_x86.double_prec ) {
  2007         push_dr( FRm );
  2008     } else {
  2009         push_fr( FRm );
  2011     load_ptr( R_ECX, &max_int );
  2012     FILD_r32ind( R_ECX );
  2013     FCOMIP_st(1);
  2014     JNA_rel8( sat );
  2015     load_ptr( R_ECX, &min_int );  // 5
  2016     FILD_r32ind( R_ECX );           // 2
  2017     FCOMIP_st(1);                   // 2
  2018     JAE_rel8( sat2 );            // 2
  2019     load_ptr( R_EAX, &save_fcw );
  2020     FNSTCW_r32ind( R_EAX );
  2021     load_ptr( R_EDX, &trunc_fcw );
  2022     FLDCW_r32ind( R_EDX );
  2023     FISTP_sh4r(R_FPUL);             // 3
  2024     FLDCW_r32ind( R_EAX );
  2025     JMP_rel8(end);             // 2
  2027     JMP_TARGET(sat);
  2028     JMP_TARGET(sat2);
  2029     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  2030     store_spreg( R_ECX, R_FPUL );
  2031     FPOP_st();
  2032     JMP_TARGET(end);
  2033     sh4_x86.tstate = TSTATE_NONE;
  2034 :}
  2035 FLDS FRm, FPUL {:  
  2036     COUNT_INST(I_FLDS);
  2037     check_fpuen();
  2038     load_fr( R_EAX, FRm );
  2039     store_spreg( R_EAX, R_FPUL );
  2040 :}
  2041 FSTS FPUL, FRn {:  
  2042     COUNT_INST(I_FSTS);
  2043     check_fpuen();
  2044     load_spreg( R_EAX, R_FPUL );
  2045     store_fr( R_EAX, FRn );
  2046 :}
  2047 FCNVDS FRm, FPUL {:  
  2048     COUNT_INST(I_FCNVDS);
  2049     check_fpuen();
  2050     if( sh4_x86.double_prec ) {
  2051         push_dr( FRm );
  2052         pop_fpul();
  2054 :}
  2055 FCNVSD FPUL, FRn {:  
  2056     COUNT_INST(I_FCNVSD);
  2057     check_fpuen();
  2058     if( sh4_x86.double_prec ) {
  2059         push_fpul();
  2060         pop_dr( FRn );
  2062 :}
  2064 /* Floating point instructions */
  2065 FABS FRn {:  
  2066     COUNT_INST(I_FABS);
  2067     check_fpuen();
  2068     if( sh4_x86.double_prec ) {
  2069         push_dr(FRn);
  2070         FABS_st0();
  2071         pop_dr(FRn);
  2072     } else {
  2073         push_fr(FRn);
  2074         FABS_st0();
  2075         pop_fr(FRn);
  2077 :}
  2078 FADD FRm, FRn {:  
  2079     COUNT_INST(I_FADD);
  2080     check_fpuen();
  2081     if( sh4_x86.double_prec ) {
  2082         push_dr(FRm);
  2083         push_dr(FRn);
  2084         FADDP_st(1);
  2085         pop_dr(FRn);
  2086     } else {
  2087         push_fr(FRm);
  2088         push_fr(FRn);
  2089         FADDP_st(1);
  2090         pop_fr(FRn);
  2092 :}
  2093 FDIV FRm, FRn {:  
  2094     COUNT_INST(I_FDIV);
  2095     check_fpuen();
  2096     if( sh4_x86.double_prec ) {
  2097         push_dr(FRn);
  2098         push_dr(FRm);
  2099         FDIVP_st(1);
  2100         pop_dr(FRn);
  2101     } else {
  2102         push_fr(FRn);
  2103         push_fr(FRm);
  2104         FDIVP_st(1);
  2105         pop_fr(FRn);
  2107 :}
  2108 FMAC FR0, FRm, FRn {:  
  2109     COUNT_INST(I_FMAC);
  2110     check_fpuen();
  2111     if( sh4_x86.double_prec ) {
  2112         push_dr( 0 );
  2113         push_dr( FRm );
  2114         FMULP_st(1);
  2115         push_dr( FRn );
  2116         FADDP_st(1);
  2117         pop_dr( FRn );
  2118     } else {
  2119         push_fr( 0 );
  2120         push_fr( FRm );
  2121         FMULP_st(1);
  2122         push_fr( FRn );
  2123         FADDP_st(1);
  2124         pop_fr( FRn );
  2126 :}
  2128 FMUL FRm, FRn {:  
  2129     COUNT_INST(I_FMUL);
  2130     check_fpuen();
  2131     if( sh4_x86.double_prec ) {
  2132         push_dr(FRm);
  2133         push_dr(FRn);
  2134         FMULP_st(1);
  2135         pop_dr(FRn);
  2136     } else {
  2137         push_fr(FRm);
  2138         push_fr(FRn);
  2139         FMULP_st(1);
  2140         pop_fr(FRn);
  2142 :}
  2143 FNEG FRn {:  
  2144     COUNT_INST(I_FNEG);
  2145     check_fpuen();
  2146     if( sh4_x86.double_prec ) {
  2147         push_dr(FRn);
  2148         FCHS_st0();
  2149         pop_dr(FRn);
  2150     } else {
  2151         push_fr(FRn);
  2152         FCHS_st0();
  2153         pop_fr(FRn);
  2155 :}
  2156 FSRRA FRn {:  
  2157     COUNT_INST(I_FSRRA);
  2158     check_fpuen();
  2159     if( sh4_x86.double_prec == 0 ) {
  2160         FLD1_st0();
  2161         push_fr(FRn);
  2162         FSQRT_st0();
  2163         FDIVP_st(1);
  2164         pop_fr(FRn);
  2166 :}
  2167 FSQRT FRn {:  
  2168     COUNT_INST(I_FSQRT);
  2169     check_fpuen();
  2170     if( sh4_x86.double_prec ) {
  2171         push_dr(FRn);
  2172         FSQRT_st0();
  2173         pop_dr(FRn);
  2174     } else {
  2175         push_fr(FRn);
  2176         FSQRT_st0();
  2177         pop_fr(FRn);
  2179 :}
  2180 FSUB FRm, FRn {:  
  2181     COUNT_INST(I_FSUB);
  2182     check_fpuen();
  2183     if( sh4_x86.double_prec ) {
  2184         push_dr(FRn);
  2185         push_dr(FRm);
  2186         FSUBP_st(1);
  2187         pop_dr(FRn);
  2188     } else {
  2189         push_fr(FRn);
  2190         push_fr(FRm);
  2191         FSUBP_st(1);
  2192         pop_fr(FRn);
  2194 :}
  2196 FCMP/EQ FRm, FRn {:  
  2197     COUNT_INST(I_FCMPEQ);
  2198     check_fpuen();
  2199     if( sh4_x86.double_prec ) {
  2200         push_dr(FRm);
  2201         push_dr(FRn);
  2202     } else {
  2203         push_fr(FRm);
  2204         push_fr(FRn);
  2206     FCOMIP_st(1);
  2207     SETE_t();
  2208     FPOP_st();
  2209     sh4_x86.tstate = TSTATE_E;
  2210 :}
  2211 FCMP/GT FRm, FRn {:  
  2212     COUNT_INST(I_FCMPGT);
  2213     check_fpuen();
  2214     if( sh4_x86.double_prec ) {
  2215         push_dr(FRm);
  2216         push_dr(FRn);
  2217     } else {
  2218         push_fr(FRm);
  2219         push_fr(FRn);
  2221     FCOMIP_st(1);
  2222     SETA_t();
  2223     FPOP_st();
  2224     sh4_x86.tstate = TSTATE_A;
  2225 :}
  2227 FSCA FPUL, FRn {:  
  2228     COUNT_INST(I_FSCA);
  2229     check_fpuen();
  2230     if( sh4_x86.double_prec == 0 ) {
  2231         LEA_sh4r_rptr( REG_OFFSET(fr[0][FRn&0x0E]), R_ECX );
  2232         load_spreg( R_EDX, R_FPUL );
  2233         call_func2( sh4_fsca, R_EDX, R_ECX );
  2235     sh4_x86.tstate = TSTATE_NONE;
  2236 :}
  2237 FIPR FVm, FVn {:  
  2238     COUNT_INST(I_FIPR);
  2239     check_fpuen();
  2240     if( sh4_x86.double_prec == 0 ) {
  2241         if( sh4_x86.sse3_enabled ) {
  2242             MOVAPS_sh4r_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
  2243             MULPS_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
  2244             HADDPS_xmm_xmm( 4, 4 ); 
  2245             HADDPS_xmm_xmm( 4, 4 );
  2246             MOVSS_xmm_sh4r( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
  2247         } else {
  2248             push_fr( FVm<<2 );
  2249             push_fr( FVn<<2 );
  2250             FMULP_st(1);
  2251             push_fr( (FVm<<2)+1);
  2252             push_fr( (FVn<<2)+1);
  2253             FMULP_st(1);
  2254             FADDP_st(1);
  2255             push_fr( (FVm<<2)+2);
  2256             push_fr( (FVn<<2)+2);
  2257             FMULP_st(1);
  2258             FADDP_st(1);
  2259             push_fr( (FVm<<2)+3);
  2260             push_fr( (FVn<<2)+3);
  2261             FMULP_st(1);
  2262             FADDP_st(1);
  2263             pop_fr( (FVn<<2)+3);
  2266 :}
  2267 FTRV XMTRX, FVn {:  
  2268     COUNT_INST(I_FTRV);
  2269     check_fpuen();
  2270     if( sh4_x86.double_prec == 0 ) {
  2271         if( sh4_x86.sse3_enabled ) {
  2272             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1  M0  M3  M2
  2273             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5  M4  M7  M6
  2274             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9  M8  M11 M10
  2275             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
  2277             MOVSLDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
  2278             MOVSHDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
  2279             MOVAPS_xmm_xmm( 4, 6 );
  2280             MOVAPS_xmm_xmm( 5, 7 );
  2281             MOVLHPS_xmm_xmm( 4, 4 );  // V1 V1 V1 V1
  2282             MOVHLPS_xmm_xmm( 6, 6 );  // V3 V3 V3 V3
  2283             MOVLHPS_xmm_xmm( 5, 5 );  // V0 V0 V0 V0
  2284             MOVHLPS_xmm_xmm( 7, 7 );  // V2 V2 V2 V2
  2285             MULPS_xmm_xmm( 0, 4 );
  2286             MULPS_xmm_xmm( 1, 5 );
  2287             MULPS_xmm_xmm( 2, 6 );
  2288             MULPS_xmm_xmm( 3, 7 );
  2289             ADDPS_xmm_xmm( 5, 4 );
  2290             ADDPS_xmm_xmm( 7, 6 );
  2291             ADDPS_xmm_xmm( 6, 4 );
  2292             MOVAPS_xmm_sh4r( 4, REG_OFFSET(fr[0][FVn<<2]) );
  2293         } else {
  2294             LEA_sh4r_rptr( REG_OFFSET(fr[0][FVn<<2]), R_EAX );
  2295             call_func1( sh4_ftrv, R_EAX );
  2298     sh4_x86.tstate = TSTATE_NONE;
  2299 :}
  2301 FRCHG {:  
  2302     COUNT_INST(I_FRCHG);
  2303     check_fpuen();
  2304     load_spreg( R_ECX, R_FPSCR );
  2305     XOR_imm32_r32( FPSCR_FR, R_ECX );
  2306     store_spreg( R_ECX, R_FPSCR );
  2307     call_func0( sh4_switch_fr_banks );
  2308     sh4_x86.tstate = TSTATE_NONE;
  2309 :}
  2310 FSCHG {:  
  2311     COUNT_INST(I_FSCHG);
  2312     check_fpuen();
  2313     load_spreg( R_ECX, R_FPSCR );
  2314     XOR_imm32_r32( FPSCR_SZ, R_ECX );
  2315     store_spreg( R_ECX, R_FPSCR );
  2316     sh4_x86.tstate = TSTATE_NONE;
  2317     sh4_x86.double_size = !sh4_x86.double_size;
  2318 :}
  2320 /* Processor control instructions */
  2321 LDC Rm, SR {:
  2322     COUNT_INST(I_LDCSR);
  2323     if( sh4_x86.in_delay_slot ) {
  2324 	SLOTILLEGAL();
  2325     } else {
  2326 	check_priv();
  2327 	load_reg( R_EAX, Rm );
  2328 	call_func1( sh4_write_sr, R_EAX );
  2329 	sh4_x86.priv_checked = FALSE;
  2330 	sh4_x86.fpuen_checked = FALSE;
  2331 	sh4_x86.tstate = TSTATE_NONE;
  2333 :}
  2334 LDC Rm, GBR {: 
  2335     COUNT_INST(I_LDC);
  2336     load_reg( R_EAX, Rm );
  2337     store_spreg( R_EAX, R_GBR );
  2338 :}
  2339 LDC Rm, VBR {:  
  2340     COUNT_INST(I_LDC);
  2341     check_priv();
  2342     load_reg( R_EAX, Rm );
  2343     store_spreg( R_EAX, R_VBR );
  2344     sh4_x86.tstate = TSTATE_NONE;
  2345 :}
  2346 LDC Rm, SSR {:  
  2347     COUNT_INST(I_LDC);
  2348     check_priv();
  2349     load_reg( R_EAX, Rm );
  2350     store_spreg( R_EAX, R_SSR );
  2351     sh4_x86.tstate = TSTATE_NONE;
  2352 :}
  2353 LDC Rm, SGR {:  
  2354     COUNT_INST(I_LDC);
  2355     check_priv();
  2356     load_reg( R_EAX, Rm );
  2357     store_spreg( R_EAX, R_SGR );
  2358     sh4_x86.tstate = TSTATE_NONE;
  2359 :}
  2360 LDC Rm, SPC {:  
  2361     COUNT_INST(I_LDC);
  2362     check_priv();
  2363     load_reg( R_EAX, Rm );
  2364     store_spreg( R_EAX, R_SPC );
  2365     sh4_x86.tstate = TSTATE_NONE;
  2366 :}
  2367 LDC Rm, DBR {:  
  2368     COUNT_INST(I_LDC);
  2369     check_priv();
  2370     load_reg( R_EAX, Rm );
  2371     store_spreg( R_EAX, R_DBR );
  2372     sh4_x86.tstate = TSTATE_NONE;
  2373 :}
  2374 LDC Rm, Rn_BANK {:  
  2375     COUNT_INST(I_LDC);
  2376     check_priv();
  2377     load_reg( R_EAX, Rm );
  2378     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2379     sh4_x86.tstate = TSTATE_NONE;
  2380 :}
  2381 LDC.L @Rm+, GBR {:  
  2382     COUNT_INST(I_LDCM);
  2383     load_reg( R_EAX, Rm );
  2384     check_ralign32( R_EAX );
  2385     MMU_TRANSLATE_READ( R_EAX );
  2386     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2387     MEM_READ_LONG( R_EAX, R_EAX );
  2388     store_spreg( R_EAX, R_GBR );
  2389     sh4_x86.tstate = TSTATE_NONE;
  2390 :}
  2391 LDC.L @Rm+, SR {:
  2392     COUNT_INST(I_LDCSRM);
  2393     if( sh4_x86.in_delay_slot ) {
  2394 	SLOTILLEGAL();
  2395     } else {
  2396 	check_priv();
  2397 	load_reg( R_EAX, Rm );
  2398 	check_ralign32( R_EAX );
  2399 	MMU_TRANSLATE_READ( R_EAX );
  2400 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2401 	MEM_READ_LONG( R_EAX, R_EAX );
  2402 	call_func1( sh4_write_sr, R_EAX );
  2403 	sh4_x86.priv_checked = FALSE;
  2404 	sh4_x86.fpuen_checked = FALSE;
  2405 	sh4_x86.tstate = TSTATE_NONE;
  2407 :}
  2408 LDC.L @Rm+, VBR {:  
  2409     COUNT_INST(I_LDCM);
  2410     check_priv();
  2411     load_reg( R_EAX, Rm );
  2412     check_ralign32( R_EAX );
  2413     MMU_TRANSLATE_READ( R_EAX );
  2414     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2415     MEM_READ_LONG( R_EAX, R_EAX );
  2416     store_spreg( R_EAX, R_VBR );
  2417     sh4_x86.tstate = TSTATE_NONE;
  2418 :}
  2419 LDC.L @Rm+, SSR {:
  2420     COUNT_INST(I_LDCM);
  2421     check_priv();
  2422     load_reg( R_EAX, Rm );
  2423     check_ralign32( R_EAX );
  2424     MMU_TRANSLATE_READ( R_EAX );
  2425     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2426     MEM_READ_LONG( R_EAX, R_EAX );
  2427     store_spreg( R_EAX, R_SSR );
  2428     sh4_x86.tstate = TSTATE_NONE;
  2429 :}
  2430 LDC.L @Rm+, SGR {:  
  2431     COUNT_INST(I_LDCM);
  2432     check_priv();
  2433     load_reg( R_EAX, Rm );
  2434     check_ralign32( R_EAX );
  2435     MMU_TRANSLATE_READ( R_EAX );
  2436     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2437     MEM_READ_LONG( R_EAX, R_EAX );
  2438     store_spreg( R_EAX, R_SGR );
  2439     sh4_x86.tstate = TSTATE_NONE;
  2440 :}
  2441 LDC.L @Rm+, SPC {:  
  2442     COUNT_INST(I_LDCM);
  2443     check_priv();
  2444     load_reg( R_EAX, Rm );
  2445     check_ralign32( R_EAX );
  2446     MMU_TRANSLATE_READ( R_EAX );
  2447     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2448     MEM_READ_LONG( R_EAX, R_EAX );
  2449     store_spreg( R_EAX, R_SPC );
  2450     sh4_x86.tstate = TSTATE_NONE;
  2451 :}
  2452 LDC.L @Rm+, DBR {:  
  2453     COUNT_INST(I_LDCM);
  2454     check_priv();
  2455     load_reg( R_EAX, Rm );
  2456     check_ralign32( R_EAX );
  2457     MMU_TRANSLATE_READ( R_EAX );
  2458     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2459     MEM_READ_LONG( R_EAX, R_EAX );
  2460     store_spreg( R_EAX, R_DBR );
  2461     sh4_x86.tstate = TSTATE_NONE;
  2462 :}
  2463 LDC.L @Rm+, Rn_BANK {:  
  2464     COUNT_INST(I_LDCM);
  2465     check_priv();
  2466     load_reg( R_EAX, Rm );
  2467     check_ralign32( R_EAX );
  2468     MMU_TRANSLATE_READ( R_EAX );
  2469     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2470     MEM_READ_LONG( R_EAX, R_EAX );
  2471     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2472     sh4_x86.tstate = TSTATE_NONE;
  2473 :}
  2474 LDS Rm, FPSCR {:
  2475     COUNT_INST(I_LDSFPSCR);
  2476     check_fpuen();
  2477     load_reg( R_EAX, Rm );
  2478     call_func1( sh4_write_fpscr, R_EAX );
  2479     sh4_x86.tstate = TSTATE_NONE;
  2480     return 2;
  2481 :}
  2482 LDS.L @Rm+, FPSCR {:  
  2483     COUNT_INST(I_LDSFPSCRM);
  2484     check_fpuen();
  2485     load_reg( R_EAX, Rm );
  2486     check_ralign32( R_EAX );
  2487     MMU_TRANSLATE_READ( R_EAX );
  2488     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2489     MEM_READ_LONG( R_EAX, R_EAX );
  2490     call_func1( sh4_write_fpscr, R_EAX );
  2491     sh4_x86.tstate = TSTATE_NONE;
  2492     return 2;
  2493 :}
  2494 LDS Rm, FPUL {:  
  2495     COUNT_INST(I_LDS);
  2496     check_fpuen();
  2497     load_reg( R_EAX, Rm );
  2498     store_spreg( R_EAX, R_FPUL );
  2499 :}
  2500 LDS.L @Rm+, FPUL {:  
  2501     COUNT_INST(I_LDSM);
  2502     check_fpuen();
  2503     load_reg( R_EAX, Rm );
  2504     check_ralign32( R_EAX );
  2505     MMU_TRANSLATE_READ( R_EAX );
  2506     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2507     MEM_READ_LONG( R_EAX, R_EAX );
  2508     store_spreg( R_EAX, R_FPUL );
  2509     sh4_x86.tstate = TSTATE_NONE;
  2510 :}
  2511 LDS Rm, MACH {: 
  2512     COUNT_INST(I_LDS);
  2513     load_reg( R_EAX, Rm );
  2514     store_spreg( R_EAX, R_MACH );
  2515 :}
  2516 LDS.L @Rm+, MACH {:  
  2517     COUNT_INST(I_LDSM);
  2518     load_reg( R_EAX, Rm );
  2519     check_ralign32( R_EAX );
  2520     MMU_TRANSLATE_READ( R_EAX );
  2521     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2522     MEM_READ_LONG( R_EAX, R_EAX );
  2523     store_spreg( R_EAX, R_MACH );
  2524     sh4_x86.tstate = TSTATE_NONE;
  2525 :}
  2526 LDS Rm, MACL {:  
  2527     COUNT_INST(I_LDS);
  2528     load_reg( R_EAX, Rm );
  2529     store_spreg( R_EAX, R_MACL );
  2530 :}
  2531 LDS.L @Rm+, MACL {:  
  2532     COUNT_INST(I_LDSM);
  2533     load_reg( R_EAX, Rm );
  2534     check_ralign32( R_EAX );
  2535     MMU_TRANSLATE_READ( R_EAX );
  2536     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2537     MEM_READ_LONG( R_EAX, R_EAX );
  2538     store_spreg( R_EAX, R_MACL );
  2539     sh4_x86.tstate = TSTATE_NONE;
  2540 :}
  2541 LDS Rm, PR {:  
  2542     COUNT_INST(I_LDS);
  2543     load_reg( R_EAX, Rm );
  2544     store_spreg( R_EAX, R_PR );
  2545 :}
  2546 LDS.L @Rm+, PR {:  
  2547     COUNT_INST(I_LDSM);
  2548     load_reg( R_EAX, Rm );
  2549     check_ralign32( R_EAX );
  2550     MMU_TRANSLATE_READ( R_EAX );
  2551     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2552     MEM_READ_LONG( R_EAX, R_EAX );
  2553     store_spreg( R_EAX, R_PR );
  2554     sh4_x86.tstate = TSTATE_NONE;
  2555 :}
  2556 LDTLB {:  
  2557     COUNT_INST(I_LDTLB);
  2558     call_func0( MMU_ldtlb );
  2559     sh4_x86.tstate = TSTATE_NONE;
  2560 :}
  2561 OCBI @Rn {:
  2562     COUNT_INST(I_OCBI);
  2563 :}
  2564 OCBP @Rn {:
  2565     COUNT_INST(I_OCBP);
  2566 :}
  2567 OCBWB @Rn {:
  2568     COUNT_INST(I_OCBWB);
  2569 :}
  2570 PREF @Rn {:
  2571     COUNT_INST(I_PREF);
  2572     load_reg( R_EAX, Rn );
  2573     MOV_r32_r32( R_EAX, R_ECX );
  2574     AND_imm32_r32( 0xFC000000, R_EAX );
  2575     CMP_imm32_r32( 0xE0000000, R_EAX );
  2576     JNE_rel8(end);
  2577     call_func1( sh4_flush_store_queue, R_ECX );
  2578     TEST_r32_r32( R_EAX, R_EAX );
  2579     JE_exc(-1);
  2580     JMP_TARGET(end);
  2581     sh4_x86.tstate = TSTATE_NONE;
  2582 :}
  2583 SLEEP {: 
  2584     COUNT_INST(I_SLEEP);
  2585     check_priv();
  2586     call_func0( sh4_sleep );
  2587     sh4_x86.tstate = TSTATE_NONE;
  2588     sh4_x86.in_delay_slot = DELAY_NONE;
  2589     return 2;
  2590 :}
  2591 STC SR, Rn {:
  2592     COUNT_INST(I_STCSR);
  2593     check_priv();
  2594     call_func0(sh4_read_sr);
  2595     store_reg( R_EAX, Rn );
  2596     sh4_x86.tstate = TSTATE_NONE;
  2597 :}
  2598 STC GBR, Rn {:  
  2599     COUNT_INST(I_STC);
  2600     load_spreg( R_EAX, R_GBR );
  2601     store_reg( R_EAX, Rn );
  2602 :}
  2603 STC VBR, Rn {:  
  2604     COUNT_INST(I_STC);
  2605     check_priv();
  2606     load_spreg( R_EAX, R_VBR );
  2607     store_reg( R_EAX, Rn );
  2608     sh4_x86.tstate = TSTATE_NONE;
  2609 :}
  2610 STC SSR, Rn {:  
  2611     COUNT_INST(I_STC);
  2612     check_priv();
  2613     load_spreg( R_EAX, R_SSR );
  2614     store_reg( R_EAX, Rn );
  2615     sh4_x86.tstate = TSTATE_NONE;
  2616 :}
  2617 STC SPC, Rn {:  
  2618     COUNT_INST(I_STC);
  2619     check_priv();
  2620     load_spreg( R_EAX, R_SPC );
  2621     store_reg( R_EAX, Rn );
  2622     sh4_x86.tstate = TSTATE_NONE;
  2623 :}
  2624 STC SGR, Rn {:  
  2625     COUNT_INST(I_STC);
  2626     check_priv();
  2627     load_spreg( R_EAX, R_SGR );
  2628     store_reg( R_EAX, Rn );
  2629     sh4_x86.tstate = TSTATE_NONE;
  2630 :}
  2631 STC DBR, Rn {:  
  2632     COUNT_INST(I_STC);
  2633     check_priv();
  2634     load_spreg( R_EAX, R_DBR );
  2635     store_reg( R_EAX, Rn );
  2636     sh4_x86.tstate = TSTATE_NONE;
  2637 :}
  2638 STC Rm_BANK, Rn {:
  2639     COUNT_INST(I_STC);
  2640     check_priv();
  2641     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2642     store_reg( R_EAX, Rn );
  2643     sh4_x86.tstate = TSTATE_NONE;
  2644 :}
  2645 STC.L SR, @-Rn {:
  2646     COUNT_INST(I_STCSRM);
  2647     check_priv();
  2648     load_reg( R_EAX, Rn );
  2649     check_walign32( R_EAX );
  2650     ADD_imm8s_r32( -4, R_EAX );
  2651     MMU_TRANSLATE_WRITE( R_EAX );
  2652     PUSH_realigned_r32( R_EAX );
  2653     call_func0( sh4_read_sr );
  2654     POP_realigned_r32( R_ECX );
  2655     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2656     MEM_WRITE_LONG( R_ECX, R_EAX );
  2657     sh4_x86.tstate = TSTATE_NONE;
  2658 :}
  2659 STC.L VBR, @-Rn {:  
  2660     COUNT_INST(I_STCM);
  2661     check_priv();
  2662     load_reg( R_EAX, Rn );
  2663     check_walign32( R_EAX );
  2664     ADD_imm8s_r32( -4, R_EAX );
  2665     MMU_TRANSLATE_WRITE( R_EAX );
  2666     load_spreg( R_EDX, R_VBR );
  2667     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2668     MEM_WRITE_LONG( R_EAX, R_EDX );
  2669     sh4_x86.tstate = TSTATE_NONE;
  2670 :}
  2671 STC.L SSR, @-Rn {:  
  2672     COUNT_INST(I_STCM);
  2673     check_priv();
  2674     load_reg( R_EAX, Rn );
  2675     check_walign32( R_EAX );
  2676     ADD_imm8s_r32( -4, R_EAX );
  2677     MMU_TRANSLATE_WRITE( R_EAX );
  2678     load_spreg( R_EDX, R_SSR );
  2679     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2680     MEM_WRITE_LONG( R_EAX, R_EDX );
  2681     sh4_x86.tstate = TSTATE_NONE;
  2682 :}
  2683 STC.L SPC, @-Rn {:
  2684     COUNT_INST(I_STCM);
  2685     check_priv();
  2686     load_reg( R_EAX, Rn );
  2687     check_walign32( R_EAX );
  2688     ADD_imm8s_r32( -4, R_EAX );
  2689     MMU_TRANSLATE_WRITE( R_EAX );
  2690     load_spreg( R_EDX, R_SPC );
  2691     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2692     MEM_WRITE_LONG( R_EAX, R_EDX );
  2693     sh4_x86.tstate = TSTATE_NONE;
  2694 :}
  2695 STC.L SGR, @-Rn {:  
  2696     COUNT_INST(I_STCM);
  2697     check_priv();
  2698     load_reg( R_EAX, Rn );
  2699     check_walign32( R_EAX );
  2700     ADD_imm8s_r32( -4, R_EAX );
  2701     MMU_TRANSLATE_WRITE( R_EAX );
  2702     load_spreg( R_EDX, R_SGR );
  2703     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2704     MEM_WRITE_LONG( R_EAX, R_EDX );
  2705     sh4_x86.tstate = TSTATE_NONE;
  2706 :}
  2707 STC.L DBR, @-Rn {:  
  2708     COUNT_INST(I_STCM);
  2709     check_priv();
  2710     load_reg( R_EAX, Rn );
  2711     check_walign32( R_EAX );
  2712     ADD_imm8s_r32( -4, R_EAX );
  2713     MMU_TRANSLATE_WRITE( R_EAX );
  2714     load_spreg( R_EDX, R_DBR );
  2715     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2716     MEM_WRITE_LONG( R_EAX, R_EDX );
  2717     sh4_x86.tstate = TSTATE_NONE;
  2718 :}
  2719 STC.L Rm_BANK, @-Rn {:  
  2720     COUNT_INST(I_STCM);
  2721     check_priv();
  2722     load_reg( R_EAX, Rn );
  2723     check_walign32( R_EAX );
  2724     ADD_imm8s_r32( -4, R_EAX );
  2725     MMU_TRANSLATE_WRITE( R_EAX );
  2726     load_spreg( R_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
  2727     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2728     MEM_WRITE_LONG( R_EAX, R_EDX );
  2729     sh4_x86.tstate = TSTATE_NONE;
  2730 :}
  2731 STC.L GBR, @-Rn {:  
  2732     COUNT_INST(I_STCM);
  2733     load_reg( R_EAX, Rn );
  2734     check_walign32( R_EAX );
  2735     ADD_imm8s_r32( -4, R_EAX );
  2736     MMU_TRANSLATE_WRITE( R_EAX );
  2737     load_spreg( R_EDX, R_GBR );
  2738     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2739     MEM_WRITE_LONG( R_EAX, R_EDX );
  2740     sh4_x86.tstate = TSTATE_NONE;
  2741 :}
  2742 STS FPSCR, Rn {:  
  2743     COUNT_INST(I_STSFPSCR);
  2744     check_fpuen();
  2745     load_spreg( R_EAX, R_FPSCR );
  2746     store_reg( R_EAX, Rn );
  2747 :}
  2748 STS.L FPSCR, @-Rn {:  
  2749     COUNT_INST(I_STSFPSCRM);
  2750     check_fpuen();
  2751     load_reg( R_EAX, Rn );
  2752     check_walign32( R_EAX );
  2753     ADD_imm8s_r32( -4, R_EAX );
  2754     MMU_TRANSLATE_WRITE( R_EAX );
  2755     load_spreg( R_EDX, R_FPSCR );
  2756     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2757     MEM_WRITE_LONG( R_EAX, R_EDX );
  2758     sh4_x86.tstate = TSTATE_NONE;
  2759 :}
  2760 STS FPUL, Rn {:  
  2761     COUNT_INST(I_STS);
  2762     check_fpuen();
  2763     load_spreg( R_EAX, R_FPUL );
  2764     store_reg( R_EAX, Rn );
  2765 :}
  2766 STS.L FPUL, @-Rn {:  
  2767     COUNT_INST(I_STSM);
  2768     check_fpuen();
  2769     load_reg( R_EAX, Rn );
  2770     check_walign32( R_EAX );
  2771     ADD_imm8s_r32( -4, R_EAX );
  2772     MMU_TRANSLATE_WRITE( R_EAX );
  2773     load_spreg( R_EDX, R_FPUL );
  2774     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2775     MEM_WRITE_LONG( R_EAX, R_EDX );
  2776     sh4_x86.tstate = TSTATE_NONE;
  2777 :}
  2778 STS MACH, Rn {:  
  2779     COUNT_INST(I_STS);
  2780     load_spreg( R_EAX, R_MACH );
  2781     store_reg( R_EAX, Rn );
  2782 :}
  2783 STS.L MACH, @-Rn {:  
  2784     COUNT_INST(I_STSM);
  2785     load_reg( R_EAX, Rn );
  2786     check_walign32( R_EAX );
  2787     ADD_imm8s_r32( -4, R_EAX );
  2788     MMU_TRANSLATE_WRITE( R_EAX );
  2789     load_spreg( R_EDX, R_MACH );
  2790     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2791     MEM_WRITE_LONG( R_EAX, R_EDX );
  2792     sh4_x86.tstate = TSTATE_NONE;
  2793 :}
  2794 STS MACL, Rn {:  
  2795     COUNT_INST(I_STS);
  2796     load_spreg( R_EAX, R_MACL );
  2797     store_reg( R_EAX, Rn );
  2798 :}
  2799 STS.L MACL, @-Rn {:  
  2800     COUNT_INST(I_STSM);
  2801     load_reg( R_EAX, Rn );
  2802     check_walign32( R_EAX );
  2803     ADD_imm8s_r32( -4, R_EAX );
  2804     MMU_TRANSLATE_WRITE( R_EAX );
  2805     load_spreg( R_EDX, R_MACL );
  2806     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2807     MEM_WRITE_LONG( R_EAX, R_EDX );
  2808     sh4_x86.tstate = TSTATE_NONE;
  2809 :}
  2810 STS PR, Rn {:  
  2811     COUNT_INST(I_STS);
  2812     load_spreg( R_EAX, R_PR );
  2813     store_reg( R_EAX, Rn );
  2814 :}
  2815 STS.L PR, @-Rn {:  
  2816     COUNT_INST(I_STSM);
  2817     load_reg( R_EAX, Rn );
  2818     check_walign32( R_EAX );
  2819     ADD_imm8s_r32( -4, R_EAX );
  2820     MMU_TRANSLATE_WRITE( R_EAX );
  2821     load_spreg( R_EDX, R_PR );
  2822     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2823     MEM_WRITE_LONG( R_EAX, R_EDX );
  2824     sh4_x86.tstate = TSTATE_NONE;
  2825 :}
  2827 NOP {: 
  2828     COUNT_INST(I_NOP);
  2829     /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ 
  2830 :}
  2831 %%
  2832     sh4_x86.in_delay_slot = DELAY_NONE;
  2833     return 0;
.