Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 911:2f6ba75b84d1
prev908:a00debcf2600
next926:68f3e0fe02f1
author nkeynes
date Fri Oct 31 03:24:49 2008 +0000 (15 years ago)
permissions -rw-r--r--
last change Remove FASTCALL from mem_copy_*, not really helping atm (and sometimes hurting)
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "lxdream.h"
    29 #include "sh4/xltcache.h"
    30 #include "sh4/sh4core.h"
    31 #include "sh4/sh4trans.h"
    32 #include "sh4/sh4stat.h"
    33 #include "sh4/sh4mmio.h"
    34 #include "sh4/x86op.h"
    35 #include "clock.h"
    37 #define DEFAULT_BACKPATCH_SIZE 4096
    39 struct backpatch_record {
    40     uint32_t fixup_offset;
    41     uint32_t fixup_icount;
    42     int32_t exc_code;
    43 };
    45 #define DELAY_NONE 0
    46 #define DELAY_PC 1
    47 #define DELAY_PC_PR 2
    49 /** 
    50  * Struct to manage internal translation state. This state is not saved -
    51  * it is only valid between calls to sh4_translate_begin_block() and
    52  * sh4_translate_end_block()
    53  */
    54 struct sh4_x86_state {
    55     int in_delay_slot;
    56     gboolean priv_checked; /* true if we've already checked the cpu mode. */
    57     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    58     gboolean branch_taken; /* true if we branched unconditionally */
    59     gboolean double_prec; /* true if FPU is in double-precision mode */
    60     gboolean double_size; /* true if FPU is in double-size mode */
    61     gboolean sse3_enabled; /* true if host supports SSE3 instructions */
    62     uint32_t block_start_pc;
    63     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    64     int tstate;
    66     /* mode flags */
    67     gboolean tlb_on; /* True if tlb translation is active */
    69     /* Allocated memory for the (block-wide) back-patch list */
    70     struct backpatch_record *backpatch_list;
    71     uint32_t backpatch_posn;
    72     uint32_t backpatch_size;
    73 };
    75 #define TSTATE_NONE -1
    76 #define TSTATE_O    0
    77 #define TSTATE_C    2
    78 #define TSTATE_E    4
    79 #define TSTATE_NE   5
    80 #define TSTATE_G    0xF
    81 #define TSTATE_GE   0xD
    82 #define TSTATE_A    7
    83 #define TSTATE_AE   3
    85 #ifdef ENABLE_SH4STATS
    86 #define COUNT_INST(id) load_imm32(R_EAX,id); call_func1(sh4_stats_add, R_EAX); sh4_x86.tstate = TSTATE_NONE
    87 #else
    88 #define COUNT_INST(id)
    89 #endif
    91 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
    92 #define JT_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    93 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    94     OP(0x70+sh4_x86.tstate); MARK_JMP8(label); OP(-1)
    96 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
    97 #define JF_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    98 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    99     OP(0x70+ (sh4_x86.tstate^1)); MARK_JMP8(label); OP(-1)
   101 static struct sh4_x86_state sh4_x86;
   103 static uint32_t max_int = 0x7FFFFFFF;
   104 static uint32_t min_int = 0x80000000;
   105 static uint32_t save_fcw; /* save value for fpu control word */
   106 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   108 gboolean is_sse3_supported()
   109 {
   110     uint32_t features;
   112     __asm__ __volatile__(
   113         "mov $0x01, %%eax\n\t"
   114         "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
   115     return (features & 1) ? TRUE : FALSE;
   116 }
   118 void sh4_translate_init(void)
   119 {
   120     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   121     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   122     sh4_x86.sse3_enabled = is_sse3_supported();
   123 }
   126 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   127 {
   128     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   129 	sh4_x86.backpatch_size <<= 1;
   130 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   131 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   132 	assert( sh4_x86.backpatch_list != NULL );
   133     }
   134     if( sh4_x86.in_delay_slot ) {
   135 	fixup_pc -= 2;
   136     }
   137     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
   138 	((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code);
   139     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   140     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   141     sh4_x86.backpatch_posn++;
   142 }
   144 /**
   145  * Emit an instruction to load an SH4 reg into a real register
   146  */
   147 static inline void load_reg( int x86reg, int sh4reg ) 
   148 {
   149     /* mov [bp+n], reg */
   150     OP(0x8B);
   151     OP(0x45 + (x86reg<<3));
   152     OP(REG_OFFSET(r[sh4reg]));
   153 }
   155 static inline void load_reg16s( int x86reg, int sh4reg )
   156 {
   157     OP(0x0F);
   158     OP(0xBF);
   159     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   160 }
   162 static inline void load_reg16u( int x86reg, int sh4reg )
   163 {
   164     OP(0x0F);
   165     OP(0xB7);
   166     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   168 }
   170 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   171 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   172 /**
   173  * Emit an instruction to load an immediate value into a register
   174  */
   175 static inline void load_imm32( int x86reg, uint32_t value ) {
   176     /* mov #value, reg */
   177     OP(0xB8 + x86reg);
   178     OP32(value);
   179 }
   181 /**
   182  * Load an immediate 64-bit quantity (note: x86-64 only)
   183  */
   184 static inline void load_imm64( int x86reg, uint64_t value ) {
   185     /* mov #value, reg */
   186     REXW();
   187     OP(0xB8 + x86reg);
   188     OP64(value);
   189 }
   191 /**
   192  * Emit an instruction to store an SH4 reg (RN)
   193  */
   194 void static inline store_reg( int x86reg, int sh4reg ) {
   195     /* mov reg, [bp+n] */
   196     OP(0x89);
   197     OP(0x45 + (x86reg<<3));
   198     OP(REG_OFFSET(r[sh4reg]));
   199 }
   201 /**
   202  * Load an FR register (single-precision floating point) into an integer x86
   203  * register (eg for register-to-register moves)
   204  */
   205 #define load_fr(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[0][(frm)^1]) )
   206 #define load_xf(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[1][(frm)^1]) )
   208 /**
   209  * Load the low half of a DR register (DR or XD) into an integer x86 register 
   210  */
   211 #define load_dr0(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   212 #define load_dr1(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   214 /**
   215  * Store an FR register (single-precision floating point) from an integer x86+
   216  * register (eg for register-to-register moves)
   217  */
   218 #define store_fr(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[0][(frm)^1]) )
   219 #define store_xf(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[1][(frm)^1]) )
   221 #define store_dr0(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   222 #define store_dr1(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   225 #define push_fpul()  FLDF_sh4r(R_FPUL)
   226 #define pop_fpul()   FSTPF_sh4r(R_FPUL)
   227 #define push_fr(frm) FLDF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   228 #define pop_fr(frm)  FSTPF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   229 #define push_xf(frm) FLDF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   230 #define pop_xf(frm)  FSTPF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   231 #define push_dr(frm) FLDD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   232 #define pop_dr(frm)  FSTPD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   233 #define push_xdr(frm) FLDD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   234 #define pop_xdr(frm)  FSTPD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   238 /* Exception checks - Note that all exception checks will clobber EAX */
   240 #define check_priv( ) \
   241     if( !sh4_x86.priv_checked ) { \
   242 	sh4_x86.priv_checked = TRUE;\
   243 	load_spreg( R_EAX, R_SR );\
   244 	AND_imm32_r32( SR_MD, R_EAX );\
   245 	if( sh4_x86.in_delay_slot ) {\
   246 	    JE_exc( EXC_SLOT_ILLEGAL );\
   247 	} else {\
   248 	    JE_exc( EXC_ILLEGAL );\
   249 	}\
   250 	sh4_x86.tstate = TSTATE_NONE; \
   251     }\
   253 #define check_fpuen( ) \
   254     if( !sh4_x86.fpuen_checked ) {\
   255 	sh4_x86.fpuen_checked = TRUE;\
   256 	load_spreg( R_EAX, R_SR );\
   257 	AND_imm32_r32( SR_FD, R_EAX );\
   258 	if( sh4_x86.in_delay_slot ) {\
   259 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   260 	} else {\
   261 	    JNE_exc(EXC_FPU_DISABLED);\
   262 	}\
   263 	sh4_x86.tstate = TSTATE_NONE; \
   264     }
   266 #define check_ralign16( x86reg ) \
   267     TEST_imm32_r32( 0x00000001, x86reg ); \
   268     JNE_exc(EXC_DATA_ADDR_READ)
   270 #define check_walign16( x86reg ) \
   271     TEST_imm32_r32( 0x00000001, x86reg ); \
   272     JNE_exc(EXC_DATA_ADDR_WRITE);
   274 #define check_ralign32( x86reg ) \
   275     TEST_imm32_r32( 0x00000003, x86reg ); \
   276     JNE_exc(EXC_DATA_ADDR_READ)
   278 #define check_walign32( x86reg ) \
   279     TEST_imm32_r32( 0x00000003, x86reg ); \
   280     JNE_exc(EXC_DATA_ADDR_WRITE);
   282 #define check_ralign64( x86reg ) \
   283     TEST_imm32_r32( 0x00000007, x86reg ); \
   284     JNE_exc(EXC_DATA_ADDR_READ)
   286 #define check_walign64( x86reg ) \
   287     TEST_imm32_r32( 0x00000007, x86reg ); \
   288     JNE_exc(EXC_DATA_ADDR_WRITE);
   290 #define UNDEF(ir)
   291 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   292 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
   293 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
   294 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
   295 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
   296 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
   297 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
   299 /**
   300  * Perform MMU translation on the address in addr_reg for a read operation, iff the TLB is turned 
   301  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   302  */
   303 #define MMU_TRANSLATE_READ( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   305 #define MMU_TRANSLATE_READ_EXC( addr_reg, exc_code ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(exc_code); MEM_RESULT(addr_reg) }
   306 /**
   307  * Perform MMU translation on the address in addr_reg for a write operation, iff the TLB is turned 
   308  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   309  */
   310 #define MMU_TRANSLATE_WRITE( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_write, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   312 #define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = DELAY_NONE; return 1;
   314 /****** Import appropriate calling conventions ******/
   315 #if SIZEOF_VOID_P == 8
   316 #include "sh4/ia64abi.h"
   317 #else /* 32-bit system */
   318 #ifdef APPLE_BUILD
   319 #include "sh4/ia32mac.h"
   320 #else
   321 #include "sh4/ia32abi.h"
   322 #endif
   323 #endif
   325 void sh4_translate_begin_block( sh4addr_t pc ) 
   326 {
   327 	enter_block();
   328     sh4_x86.in_delay_slot = FALSE;
   329     sh4_x86.priv_checked = FALSE;
   330     sh4_x86.fpuen_checked = FALSE;
   331     sh4_x86.branch_taken = FALSE;
   332     sh4_x86.backpatch_posn = 0;
   333     sh4_x86.block_start_pc = pc;
   334     sh4_x86.tlb_on = IS_MMU_ENABLED();
   335     sh4_x86.tstate = TSTATE_NONE;
   336     sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
   337     sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
   338 }
   341 uint32_t sh4_translate_end_block_size()
   342 {
   343     if( sh4_x86.backpatch_posn <= 3 ) {
   344         return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);
   345     } else {
   346         return EPILOGUE_SIZE + 48 + (sh4_x86.backpatch_posn-3)*15;
   347     }
   348 }
   351 /**
   352  * Embed a breakpoint into the generated code
   353  */
   354 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   355 {
   356     load_imm32( R_EAX, pc );
   357     call_func1( sh4_translate_breakpoint_hit, R_EAX );
   358     sh4_x86.tstate = TSTATE_NONE;
   359 }
   362 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   364 /**
   365  * Embed a call to sh4_execute_instruction for situations that we
   366  * can't translate (just page-crossing delay slots at the moment).
   367  * Caller is responsible for setting new_pc before calling this function.
   368  *
   369  * Performs:
   370  *   Set PC = endpc
   371  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   372  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   373  *   Call sh4_execute_instruction
   374  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   375  */
   376 void exit_block_emu( sh4vma_t endpc )
   377 {
   378     load_imm32( R_ECX, endpc - sh4_x86.block_start_pc );   // 5
   379     ADD_r32_sh4r( R_ECX, R_PC );
   381     load_imm32( R_ECX, (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period ); // 5
   382     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
   383     load_imm32( R_ECX, sh4_x86.in_delay_slot ? 1 : 0 );
   384     store_spreg( R_ECX, REG_OFFSET(in_delay_slot) );
   386     call_func0( sh4_execute_instruction );    
   387     load_spreg( R_EAX, R_PC );
   388     if( sh4_x86.tlb_on ) {
   389 	call_func1(xlat_get_code_by_vma,R_EAX);
   390     } else {
   391 	call_func1(xlat_get_code,R_EAX);
   392     }
   393     AND_imm8s_rptr( 0xFC, R_EAX );
   394     POP_r32(R_EBP);
   395     RET();
   396 } 
   398 /**
   399  * Translate a single instruction. Delayed branches are handled specially
   400  * by translating both branch and delayed instruction as a single unit (as
   401  * 
   402  * The instruction MUST be in the icache (assert check)
   403  *
   404  * @return true if the instruction marks the end of a basic block
   405  * (eg a branch or 
   406  */
   407 uint32_t sh4_translate_instruction( sh4vma_t pc )
   408 {
   409     uint32_t ir;
   410     /* Read instruction from icache */
   411     assert( IS_IN_ICACHE(pc) );
   412     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   414     if( !sh4_x86.in_delay_slot ) {
   415 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   416     }
   417 %%
   418 /* ALU operations */
   419 ADD Rm, Rn {:
   420     COUNT_INST(I_ADD);
   421     load_reg( R_EAX, Rm );
   422     load_reg( R_ECX, Rn );
   423     ADD_r32_r32( R_EAX, R_ECX );
   424     store_reg( R_ECX, Rn );
   425     sh4_x86.tstate = TSTATE_NONE;
   426 :}
   427 ADD #imm, Rn {:  
   428     COUNT_INST(I_ADDI);
   429     load_reg( R_EAX, Rn );
   430     ADD_imm8s_r32( imm, R_EAX );
   431     store_reg( R_EAX, Rn );
   432     sh4_x86.tstate = TSTATE_NONE;
   433 :}
   434 ADDC Rm, Rn {:
   435     COUNT_INST(I_ADDC);
   436     if( sh4_x86.tstate != TSTATE_C ) {
   437         LDC_t();
   438     }
   439     load_reg( R_EAX, Rm );
   440     load_reg( R_ECX, Rn );
   441     ADC_r32_r32( R_EAX, R_ECX );
   442     store_reg( R_ECX, Rn );
   443     SETC_t();
   444     sh4_x86.tstate = TSTATE_C;
   445 :}
   446 ADDV Rm, Rn {:
   447     COUNT_INST(I_ADDV);
   448     load_reg( R_EAX, Rm );
   449     load_reg( R_ECX, Rn );
   450     ADD_r32_r32( R_EAX, R_ECX );
   451     store_reg( R_ECX, Rn );
   452     SETO_t();
   453     sh4_x86.tstate = TSTATE_O;
   454 :}
   455 AND Rm, Rn {:
   456     COUNT_INST(I_AND);
   457     load_reg( R_EAX, Rm );
   458     load_reg( R_ECX, Rn );
   459     AND_r32_r32( R_EAX, R_ECX );
   460     store_reg( R_ECX, Rn );
   461     sh4_x86.tstate = TSTATE_NONE;
   462 :}
   463 AND #imm, R0 {:  
   464     COUNT_INST(I_ANDI);
   465     load_reg( R_EAX, 0 );
   466     AND_imm32_r32(imm, R_EAX); 
   467     store_reg( R_EAX, 0 );
   468     sh4_x86.tstate = TSTATE_NONE;
   469 :}
   470 AND.B #imm, @(R0, GBR) {: 
   471     COUNT_INST(I_ANDB);
   472     load_reg( R_EAX, 0 );
   473     load_spreg( R_ECX, R_GBR );
   474     ADD_r32_r32( R_ECX, R_EAX );
   475     MMU_TRANSLATE_WRITE( R_EAX );
   476     PUSH_realigned_r32(R_EAX);
   477     MEM_READ_BYTE( R_EAX, R_EDX );
   478     POP_realigned_r32(R_EAX);
   479     AND_imm32_r32(imm, R_EDX );
   480     MEM_WRITE_BYTE( R_EAX, R_EDX );
   481     sh4_x86.tstate = TSTATE_NONE;
   482 :}
   483 CMP/EQ Rm, Rn {:  
   484     COUNT_INST(I_CMPEQ);
   485     load_reg( R_EAX, Rm );
   486     load_reg( R_ECX, Rn );
   487     CMP_r32_r32( R_EAX, R_ECX );
   488     SETE_t();
   489     sh4_x86.tstate = TSTATE_E;
   490 :}
   491 CMP/EQ #imm, R0 {:  
   492     COUNT_INST(I_CMPEQI);
   493     load_reg( R_EAX, 0 );
   494     CMP_imm8s_r32(imm, R_EAX);
   495     SETE_t();
   496     sh4_x86.tstate = TSTATE_E;
   497 :}
   498 CMP/GE Rm, Rn {:  
   499     COUNT_INST(I_CMPGE);
   500     load_reg( R_EAX, Rm );
   501     load_reg( R_ECX, Rn );
   502     CMP_r32_r32( R_EAX, R_ECX );
   503     SETGE_t();
   504     sh4_x86.tstate = TSTATE_GE;
   505 :}
   506 CMP/GT Rm, Rn {: 
   507     COUNT_INST(I_CMPGT);
   508     load_reg( R_EAX, Rm );
   509     load_reg( R_ECX, Rn );
   510     CMP_r32_r32( R_EAX, R_ECX );
   511     SETG_t();
   512     sh4_x86.tstate = TSTATE_G;
   513 :}
   514 CMP/HI Rm, Rn {:  
   515     COUNT_INST(I_CMPHI);
   516     load_reg( R_EAX, Rm );
   517     load_reg( R_ECX, Rn );
   518     CMP_r32_r32( R_EAX, R_ECX );
   519     SETA_t();
   520     sh4_x86.tstate = TSTATE_A;
   521 :}
   522 CMP/HS Rm, Rn {: 
   523     COUNT_INST(I_CMPHS);
   524     load_reg( R_EAX, Rm );
   525     load_reg( R_ECX, Rn );
   526     CMP_r32_r32( R_EAX, R_ECX );
   527     SETAE_t();
   528     sh4_x86.tstate = TSTATE_AE;
   529  :}
   530 CMP/PL Rn {: 
   531     COUNT_INST(I_CMPPL);
   532     load_reg( R_EAX, Rn );
   533     CMP_imm8s_r32( 0, R_EAX );
   534     SETG_t();
   535     sh4_x86.tstate = TSTATE_G;
   536 :}
   537 CMP/PZ Rn {:  
   538     COUNT_INST(I_CMPPZ);
   539     load_reg( R_EAX, Rn );
   540     CMP_imm8s_r32( 0, R_EAX );
   541     SETGE_t();
   542     sh4_x86.tstate = TSTATE_GE;
   543 :}
   544 CMP/STR Rm, Rn {:  
   545     COUNT_INST(I_CMPSTR);
   546     load_reg( R_EAX, Rm );
   547     load_reg( R_ECX, Rn );
   548     XOR_r32_r32( R_ECX, R_EAX );
   549     TEST_r8_r8( R_AL, R_AL );
   550     JE_rel8(target1);
   551     TEST_r8_r8( R_AH, R_AH );
   552     JE_rel8(target2);
   553     SHR_imm8_r32( 16, R_EAX );
   554     TEST_r8_r8( R_AL, R_AL );
   555     JE_rel8(target3);
   556     TEST_r8_r8( R_AH, R_AH );
   557     JMP_TARGET(target1);
   558     JMP_TARGET(target2);
   559     JMP_TARGET(target3);
   560     SETE_t();
   561     sh4_x86.tstate = TSTATE_E;
   562 :}
   563 DIV0S Rm, Rn {:
   564     COUNT_INST(I_DIV0S);
   565     load_reg( R_EAX, Rm );
   566     load_reg( R_ECX, Rn );
   567     SHR_imm8_r32( 31, R_EAX );
   568     SHR_imm8_r32( 31, R_ECX );
   569     store_spreg( R_EAX, R_M );
   570     store_spreg( R_ECX, R_Q );
   571     CMP_r32_r32( R_EAX, R_ECX );
   572     SETNE_t();
   573     sh4_x86.tstate = TSTATE_NE;
   574 :}
   575 DIV0U {:  
   576     COUNT_INST(I_DIV0U);
   577     XOR_r32_r32( R_EAX, R_EAX );
   578     store_spreg( R_EAX, R_Q );
   579     store_spreg( R_EAX, R_M );
   580     store_spreg( R_EAX, R_T );
   581     sh4_x86.tstate = TSTATE_C; // works for DIV1
   582 :}
   583 DIV1 Rm, Rn {:
   584     COUNT_INST(I_DIV1);
   585     load_spreg( R_ECX, R_M );
   586     load_reg( R_EAX, Rn );
   587     if( sh4_x86.tstate != TSTATE_C ) {
   588 	LDC_t();
   589     }
   590     RCL1_r32( R_EAX );
   591     SETC_r8( R_DL ); // Q'
   592     CMP_sh4r_r32( R_Q, R_ECX );
   593     JE_rel8(mqequal);
   594     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   595     JMP_rel8(end);
   596     JMP_TARGET(mqequal);
   597     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   598     JMP_TARGET(end);
   599     store_reg( R_EAX, Rn ); // Done with Rn now
   600     SETC_r8(R_AL); // tmp1
   601     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   602     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   603     store_spreg( R_ECX, R_Q );
   604     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   605     MOVZX_r8_r32( R_AL, R_EAX );
   606     store_spreg( R_EAX, R_T );
   607     sh4_x86.tstate = TSTATE_NONE;
   608 :}
   609 DMULS.L Rm, Rn {:  
   610     COUNT_INST(I_DMULS);
   611     load_reg( R_EAX, Rm );
   612     load_reg( R_ECX, Rn );
   613     IMUL_r32(R_ECX);
   614     store_spreg( R_EDX, R_MACH );
   615     store_spreg( R_EAX, R_MACL );
   616     sh4_x86.tstate = TSTATE_NONE;
   617 :}
   618 DMULU.L Rm, Rn {:  
   619     COUNT_INST(I_DMULU);
   620     load_reg( R_EAX, Rm );
   621     load_reg( R_ECX, Rn );
   622     MUL_r32(R_ECX);
   623     store_spreg( R_EDX, R_MACH );
   624     store_spreg( R_EAX, R_MACL );    
   625     sh4_x86.tstate = TSTATE_NONE;
   626 :}
   627 DT Rn {:  
   628     COUNT_INST(I_DT);
   629     load_reg( R_EAX, Rn );
   630     ADD_imm8s_r32( -1, R_EAX );
   631     store_reg( R_EAX, Rn );
   632     SETE_t();
   633     sh4_x86.tstate = TSTATE_E;
   634 :}
   635 EXTS.B Rm, Rn {:  
   636     COUNT_INST(I_EXTSB);
   637     load_reg( R_EAX, Rm );
   638     MOVSX_r8_r32( R_EAX, R_EAX );
   639     store_reg( R_EAX, Rn );
   640 :}
   641 EXTS.W Rm, Rn {:  
   642     COUNT_INST(I_EXTSW);
   643     load_reg( R_EAX, Rm );
   644     MOVSX_r16_r32( R_EAX, R_EAX );
   645     store_reg( R_EAX, Rn );
   646 :}
   647 EXTU.B Rm, Rn {:  
   648     COUNT_INST(I_EXTUB);
   649     load_reg( R_EAX, Rm );
   650     MOVZX_r8_r32( R_EAX, R_EAX );
   651     store_reg( R_EAX, Rn );
   652 :}
   653 EXTU.W Rm, Rn {:  
   654     COUNT_INST(I_EXTUW);
   655     load_reg( R_EAX, Rm );
   656     MOVZX_r16_r32( R_EAX, R_EAX );
   657     store_reg( R_EAX, Rn );
   658 :}
   659 MAC.L @Rm+, @Rn+ {:
   660     COUNT_INST(I_MACL);
   661     if( Rm == Rn ) {
   662 	load_reg( R_EAX, Rm );
   663 	check_ralign32( R_EAX );
   664 	MMU_TRANSLATE_READ( R_EAX );
   665 	PUSH_realigned_r32( R_EAX );
   666 	load_reg( R_EAX, Rn );
   667 	ADD_imm8s_r32( 4, R_EAX );
   668 	MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
   669 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
   670 	// Note translate twice in case of page boundaries. Maybe worth
   671 	// adding a page-boundary check to skip the second translation
   672     } else {
   673 	load_reg( R_EAX, Rm );
   674 	check_ralign32( R_EAX );
   675 	MMU_TRANSLATE_READ( R_EAX );
   676 	load_reg( R_ECX, Rn );
   677 	check_ralign32( R_ECX );
   678 	PUSH_realigned_r32( R_EAX );
   679 	MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
   680 	MOV_r32_r32( R_ECX, R_EAX );
   681 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   682 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   683     }
   684     MEM_READ_LONG( R_EAX, R_EAX );
   685     POP_r32( R_ECX );
   686     PUSH_r32( R_EAX );
   687     MEM_READ_LONG( R_ECX, R_EAX );
   688     POP_realigned_r32( R_ECX );
   690     IMUL_r32( R_ECX );
   691     ADD_r32_sh4r( R_EAX, R_MACL );
   692     ADC_r32_sh4r( R_EDX, R_MACH );
   694     load_spreg( R_ECX, R_S );
   695     TEST_r32_r32(R_ECX, R_ECX);
   696     JE_rel8( nosat );
   697     call_func0( signsat48 );
   698     JMP_TARGET( nosat );
   699     sh4_x86.tstate = TSTATE_NONE;
   700 :}
   701 MAC.W @Rm+, @Rn+ {:  
   702     COUNT_INST(I_MACW);
   703     if( Rm == Rn ) {
   704 	load_reg( R_EAX, Rm );
   705 	check_ralign16( R_EAX );
   706 	MMU_TRANSLATE_READ( R_EAX );
   707 	PUSH_realigned_r32( R_EAX );
   708 	load_reg( R_EAX, Rn );
   709 	ADD_imm8s_r32( 2, R_EAX );
   710 	MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
   711 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   712 	// Note translate twice in case of page boundaries. Maybe worth
   713 	// adding a page-boundary check to skip the second translation
   714     } else {
   715 	load_reg( R_EAX, Rm );
   716 	check_ralign16( R_EAX );
   717 	MMU_TRANSLATE_READ( R_EAX );
   718 	load_reg( R_ECX, Rn );
   719 	check_ralign16( R_ECX );
   720 	PUSH_realigned_r32( R_EAX );
   721 	MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
   722 	MOV_r32_r32( R_ECX, R_EAX );
   723 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   724 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   725     }
   726     MEM_READ_WORD( R_EAX, R_EAX );
   727     POP_r32( R_ECX );
   728     PUSH_r32( R_EAX );
   729     MEM_READ_WORD( R_ECX, R_EAX );
   730     POP_realigned_r32( R_ECX );
   731     IMUL_r32( R_ECX );
   733     load_spreg( R_ECX, R_S );
   734     TEST_r32_r32( R_ECX, R_ECX );
   735     JE_rel8( nosat );
   737     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   738     JNO_rel8( end );            // 2
   739     load_imm32( R_EDX, 1 );         // 5
   740     store_spreg( R_EDX, R_MACH );   // 6
   741     JS_rel8( positive );        // 2
   742     load_imm32( R_EAX, 0x80000000 );// 5
   743     store_spreg( R_EAX, R_MACL );   // 6
   744     JMP_rel8(end2);           // 2
   746     JMP_TARGET(positive);
   747     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   748     store_spreg( R_EAX, R_MACL );   // 6
   749     JMP_rel8(end3);            // 2
   751     JMP_TARGET(nosat);
   752     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   753     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   754     JMP_TARGET(end);
   755     JMP_TARGET(end2);
   756     JMP_TARGET(end3);
   757     sh4_x86.tstate = TSTATE_NONE;
   758 :}
   759 MOVT Rn {:  
   760     COUNT_INST(I_MOVT);
   761     load_spreg( R_EAX, R_T );
   762     store_reg( R_EAX, Rn );
   763 :}
   764 MUL.L Rm, Rn {:  
   765     COUNT_INST(I_MULL);
   766     load_reg( R_EAX, Rm );
   767     load_reg( R_ECX, Rn );
   768     MUL_r32( R_ECX );
   769     store_spreg( R_EAX, R_MACL );
   770     sh4_x86.tstate = TSTATE_NONE;
   771 :}
   772 MULS.W Rm, Rn {:
   773     COUNT_INST(I_MULSW);
   774     load_reg16s( R_EAX, Rm );
   775     load_reg16s( R_ECX, Rn );
   776     MUL_r32( R_ECX );
   777     store_spreg( R_EAX, R_MACL );
   778     sh4_x86.tstate = TSTATE_NONE;
   779 :}
   780 MULU.W Rm, Rn {:  
   781     COUNT_INST(I_MULUW);
   782     load_reg16u( R_EAX, Rm );
   783     load_reg16u( R_ECX, Rn );
   784     MUL_r32( R_ECX );
   785     store_spreg( R_EAX, R_MACL );
   786     sh4_x86.tstate = TSTATE_NONE;
   787 :}
   788 NEG Rm, Rn {:
   789     COUNT_INST(I_NEG);
   790     load_reg( R_EAX, Rm );
   791     NEG_r32( R_EAX );
   792     store_reg( R_EAX, Rn );
   793     sh4_x86.tstate = TSTATE_NONE;
   794 :}
   795 NEGC Rm, Rn {:  
   796     COUNT_INST(I_NEGC);
   797     load_reg( R_EAX, Rm );
   798     XOR_r32_r32( R_ECX, R_ECX );
   799     LDC_t();
   800     SBB_r32_r32( R_EAX, R_ECX );
   801     store_reg( R_ECX, Rn );
   802     SETC_t();
   803     sh4_x86.tstate = TSTATE_C;
   804 :}
   805 NOT Rm, Rn {:  
   806     COUNT_INST(I_NOT);
   807     load_reg( R_EAX, Rm );
   808     NOT_r32( R_EAX );
   809     store_reg( R_EAX, Rn );
   810     sh4_x86.tstate = TSTATE_NONE;
   811 :}
   812 OR Rm, Rn {:  
   813     COUNT_INST(I_OR);
   814     load_reg( R_EAX, Rm );
   815     load_reg( R_ECX, Rn );
   816     OR_r32_r32( R_EAX, R_ECX );
   817     store_reg( R_ECX, Rn );
   818     sh4_x86.tstate = TSTATE_NONE;
   819 :}
   820 OR #imm, R0 {:
   821     COUNT_INST(I_ORI);
   822     load_reg( R_EAX, 0 );
   823     OR_imm32_r32(imm, R_EAX);
   824     store_reg( R_EAX, 0 );
   825     sh4_x86.tstate = TSTATE_NONE;
   826 :}
   827 OR.B #imm, @(R0, GBR) {:  
   828     COUNT_INST(I_ORB);
   829     load_reg( R_EAX, 0 );
   830     load_spreg( R_ECX, R_GBR );
   831     ADD_r32_r32( R_ECX, R_EAX );
   832     MMU_TRANSLATE_WRITE( R_EAX );
   833     PUSH_realigned_r32(R_EAX);
   834     MEM_READ_BYTE( R_EAX, R_EDX );
   835     POP_realigned_r32(R_EAX);
   836     OR_imm32_r32(imm, R_EDX );
   837     MEM_WRITE_BYTE( R_EAX, R_EDX );
   838     sh4_x86.tstate = TSTATE_NONE;
   839 :}
   840 ROTCL Rn {:
   841     COUNT_INST(I_ROTCL);
   842     load_reg( R_EAX, Rn );
   843     if( sh4_x86.tstate != TSTATE_C ) {
   844 	LDC_t();
   845     }
   846     RCL1_r32( R_EAX );
   847     store_reg( R_EAX, Rn );
   848     SETC_t();
   849     sh4_x86.tstate = TSTATE_C;
   850 :}
   851 ROTCR Rn {:  
   852     COUNT_INST(I_ROTCR);
   853     load_reg( R_EAX, Rn );
   854     if( sh4_x86.tstate != TSTATE_C ) {
   855 	LDC_t();
   856     }
   857     RCR1_r32( R_EAX );
   858     store_reg( R_EAX, Rn );
   859     SETC_t();
   860     sh4_x86.tstate = TSTATE_C;
   861 :}
   862 ROTL Rn {:  
   863     COUNT_INST(I_ROTL);
   864     load_reg( R_EAX, Rn );
   865     ROL1_r32( R_EAX );
   866     store_reg( R_EAX, Rn );
   867     SETC_t();
   868     sh4_x86.tstate = TSTATE_C;
   869 :}
   870 ROTR Rn {:  
   871     COUNT_INST(I_ROTR);
   872     load_reg( R_EAX, Rn );
   873     ROR1_r32( R_EAX );
   874     store_reg( R_EAX, Rn );
   875     SETC_t();
   876     sh4_x86.tstate = TSTATE_C;
   877 :}
   878 SHAD Rm, Rn {:
   879     COUNT_INST(I_SHAD);
   880     /* Annoyingly enough, not directly convertible */
   881     load_reg( R_EAX, Rn );
   882     load_reg( R_ECX, Rm );
   883     CMP_imm32_r32( 0, R_ECX );
   884     JGE_rel8(doshl);
   886     NEG_r32( R_ECX );      // 2
   887     AND_imm8_r8( 0x1F, R_CL ); // 3
   888     JE_rel8(emptysar);     // 2
   889     SAR_r32_CL( R_EAX );       // 2
   890     JMP_rel8(end);          // 2
   892     JMP_TARGET(emptysar);
   893     SAR_imm8_r32(31, R_EAX );  // 3
   894     JMP_rel8(end2);
   896     JMP_TARGET(doshl);
   897     AND_imm8_r8( 0x1F, R_CL ); // 3
   898     SHL_r32_CL( R_EAX );       // 2
   899     JMP_TARGET(end);
   900     JMP_TARGET(end2);
   901     store_reg( R_EAX, Rn );
   902     sh4_x86.tstate = TSTATE_NONE;
   903 :}
   904 SHLD Rm, Rn {:  
   905     COUNT_INST(I_SHLD);
   906     load_reg( R_EAX, Rn );
   907     load_reg( R_ECX, Rm );
   908     CMP_imm32_r32( 0, R_ECX );
   909     JGE_rel8(doshl);
   911     NEG_r32( R_ECX );      // 2
   912     AND_imm8_r8( 0x1F, R_CL ); // 3
   913     JE_rel8(emptyshr );
   914     SHR_r32_CL( R_EAX );       // 2
   915     JMP_rel8(end);          // 2
   917     JMP_TARGET(emptyshr);
   918     XOR_r32_r32( R_EAX, R_EAX );
   919     JMP_rel8(end2);
   921     JMP_TARGET(doshl);
   922     AND_imm8_r8( 0x1F, R_CL ); // 3
   923     SHL_r32_CL( R_EAX );       // 2
   924     JMP_TARGET(end);
   925     JMP_TARGET(end2);
   926     store_reg( R_EAX, Rn );
   927     sh4_x86.tstate = TSTATE_NONE;
   928 :}
   929 SHAL Rn {: 
   930     COUNT_INST(I_SHAL);
   931     load_reg( R_EAX, Rn );
   932     SHL1_r32( R_EAX );
   933     SETC_t();
   934     store_reg( R_EAX, Rn );
   935     sh4_x86.tstate = TSTATE_C;
   936 :}
   937 SHAR Rn {:  
   938     COUNT_INST(I_SHAR);
   939     load_reg( R_EAX, Rn );
   940     SAR1_r32( R_EAX );
   941     SETC_t();
   942     store_reg( R_EAX, Rn );
   943     sh4_x86.tstate = TSTATE_C;
   944 :}
   945 SHLL Rn {:  
   946     COUNT_INST(I_SHLL);
   947     load_reg( R_EAX, Rn );
   948     SHL1_r32( R_EAX );
   949     SETC_t();
   950     store_reg( R_EAX, Rn );
   951     sh4_x86.tstate = TSTATE_C;
   952 :}
   953 SHLL2 Rn {:
   954     COUNT_INST(I_SHLL);
   955     load_reg( R_EAX, Rn );
   956     SHL_imm8_r32( 2, R_EAX );
   957     store_reg( R_EAX, Rn );
   958     sh4_x86.tstate = TSTATE_NONE;
   959 :}
   960 SHLL8 Rn {:  
   961     COUNT_INST(I_SHLL);
   962     load_reg( R_EAX, Rn );
   963     SHL_imm8_r32( 8, R_EAX );
   964     store_reg( R_EAX, Rn );
   965     sh4_x86.tstate = TSTATE_NONE;
   966 :}
   967 SHLL16 Rn {:  
   968     COUNT_INST(I_SHLL);
   969     load_reg( R_EAX, Rn );
   970     SHL_imm8_r32( 16, R_EAX );
   971     store_reg( R_EAX, Rn );
   972     sh4_x86.tstate = TSTATE_NONE;
   973 :}
   974 SHLR Rn {:  
   975     COUNT_INST(I_SHLR);
   976     load_reg( R_EAX, Rn );
   977     SHR1_r32( R_EAX );
   978     SETC_t();
   979     store_reg( R_EAX, Rn );
   980     sh4_x86.tstate = TSTATE_C;
   981 :}
   982 SHLR2 Rn {:  
   983     COUNT_INST(I_SHLR);
   984     load_reg( R_EAX, Rn );
   985     SHR_imm8_r32( 2, R_EAX );
   986     store_reg( R_EAX, Rn );
   987     sh4_x86.tstate = TSTATE_NONE;
   988 :}
   989 SHLR8 Rn {:  
   990     COUNT_INST(I_SHLR);
   991     load_reg( R_EAX, Rn );
   992     SHR_imm8_r32( 8, R_EAX );
   993     store_reg( R_EAX, Rn );
   994     sh4_x86.tstate = TSTATE_NONE;
   995 :}
   996 SHLR16 Rn {:  
   997     COUNT_INST(I_SHLR);
   998     load_reg( R_EAX, Rn );
   999     SHR_imm8_r32( 16, R_EAX );
  1000     store_reg( R_EAX, Rn );
  1001     sh4_x86.tstate = TSTATE_NONE;
  1002 :}
  1003 SUB Rm, Rn {:  
  1004     COUNT_INST(I_SUB);
  1005     load_reg( R_EAX, Rm );
  1006     load_reg( R_ECX, Rn );
  1007     SUB_r32_r32( R_EAX, R_ECX );
  1008     store_reg( R_ECX, Rn );
  1009     sh4_x86.tstate = TSTATE_NONE;
  1010 :}
  1011 SUBC Rm, Rn {:  
  1012     COUNT_INST(I_SUBC);
  1013     load_reg( R_EAX, Rm );
  1014     load_reg( R_ECX, Rn );
  1015     if( sh4_x86.tstate != TSTATE_C ) {
  1016 	LDC_t();
  1018     SBB_r32_r32( R_EAX, R_ECX );
  1019     store_reg( R_ECX, Rn );
  1020     SETC_t();
  1021     sh4_x86.tstate = TSTATE_C;
  1022 :}
  1023 SUBV Rm, Rn {:  
  1024     COUNT_INST(I_SUBV);
  1025     load_reg( R_EAX, Rm );
  1026     load_reg( R_ECX, Rn );
  1027     SUB_r32_r32( R_EAX, R_ECX );
  1028     store_reg( R_ECX, Rn );
  1029     SETO_t();
  1030     sh4_x86.tstate = TSTATE_O;
  1031 :}
  1032 SWAP.B Rm, Rn {:  
  1033     COUNT_INST(I_SWAPB);
  1034     load_reg( R_EAX, Rm );
  1035     XCHG_r8_r8( R_AL, R_AH ); // NB: does not touch EFLAGS
  1036     store_reg( R_EAX, Rn );
  1037 :}
  1038 SWAP.W Rm, Rn {:  
  1039     COUNT_INST(I_SWAPB);
  1040     load_reg( R_EAX, Rm );
  1041     MOV_r32_r32( R_EAX, R_ECX );
  1042     SHL_imm8_r32( 16, R_ECX );
  1043     SHR_imm8_r32( 16, R_EAX );
  1044     OR_r32_r32( R_EAX, R_ECX );
  1045     store_reg( R_ECX, Rn );
  1046     sh4_x86.tstate = TSTATE_NONE;
  1047 :}
  1048 TAS.B @Rn {:  
  1049     COUNT_INST(I_TASB);
  1050     load_reg( R_EAX, Rn );
  1051     MMU_TRANSLATE_WRITE( R_EAX );
  1052     PUSH_realigned_r32( R_EAX );
  1053     MEM_READ_BYTE( R_EAX, R_EDX );
  1054     TEST_r8_r8( R_DL, R_DL );
  1055     SETE_t();
  1056     OR_imm8_r8( 0x80, R_DL );
  1057     POP_realigned_r32( R_EAX );
  1058     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1059     sh4_x86.tstate = TSTATE_NONE;
  1060 :}
  1061 TST Rm, Rn {:  
  1062     COUNT_INST(I_TST);
  1063     load_reg( R_EAX, Rm );
  1064     load_reg( R_ECX, Rn );
  1065     TEST_r32_r32( R_EAX, R_ECX );
  1066     SETE_t();
  1067     sh4_x86.tstate = TSTATE_E;
  1068 :}
  1069 TST #imm, R0 {:  
  1070     COUNT_INST(I_TSTI);
  1071     load_reg( R_EAX, 0 );
  1072     TEST_imm32_r32( imm, R_EAX );
  1073     SETE_t();
  1074     sh4_x86.tstate = TSTATE_E;
  1075 :}
  1076 TST.B #imm, @(R0, GBR) {:  
  1077     COUNT_INST(I_TSTB);
  1078     load_reg( R_EAX, 0);
  1079     load_reg( R_ECX, R_GBR);
  1080     ADD_r32_r32( R_ECX, R_EAX );
  1081     MMU_TRANSLATE_READ( R_EAX );
  1082     MEM_READ_BYTE( R_EAX, R_EAX );
  1083     TEST_imm8_r8( imm, R_AL );
  1084     SETE_t();
  1085     sh4_x86.tstate = TSTATE_E;
  1086 :}
  1087 XOR Rm, Rn {:  
  1088     COUNT_INST(I_XOR);
  1089     load_reg( R_EAX, Rm );
  1090     load_reg( R_ECX, Rn );
  1091     XOR_r32_r32( R_EAX, R_ECX );
  1092     store_reg( R_ECX, Rn );
  1093     sh4_x86.tstate = TSTATE_NONE;
  1094 :}
  1095 XOR #imm, R0 {:  
  1096     COUNT_INST(I_XORI);
  1097     load_reg( R_EAX, 0 );
  1098     XOR_imm32_r32( imm, R_EAX );
  1099     store_reg( R_EAX, 0 );
  1100     sh4_x86.tstate = TSTATE_NONE;
  1101 :}
  1102 XOR.B #imm, @(R0, GBR) {:  
  1103     COUNT_INST(I_XORB);
  1104     load_reg( R_EAX, 0 );
  1105     load_spreg( R_ECX, R_GBR );
  1106     ADD_r32_r32( R_ECX, R_EAX );
  1107     MMU_TRANSLATE_WRITE( R_EAX );
  1108     PUSH_realigned_r32(R_EAX);
  1109     MEM_READ_BYTE(R_EAX, R_EDX);
  1110     POP_realigned_r32(R_EAX);
  1111     XOR_imm32_r32( imm, R_EDX );
  1112     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1113     sh4_x86.tstate = TSTATE_NONE;
  1114 :}
  1115 XTRCT Rm, Rn {:
  1116     COUNT_INST(I_XTRCT);
  1117     load_reg( R_EAX, Rm );
  1118     load_reg( R_ECX, Rn );
  1119     SHL_imm8_r32( 16, R_EAX );
  1120     SHR_imm8_r32( 16, R_ECX );
  1121     OR_r32_r32( R_EAX, R_ECX );
  1122     store_reg( R_ECX, Rn );
  1123     sh4_x86.tstate = TSTATE_NONE;
  1124 :}
  1126 /* Data move instructions */
  1127 MOV Rm, Rn {:  
  1128     COUNT_INST(I_MOV);
  1129     load_reg( R_EAX, Rm );
  1130     store_reg( R_EAX, Rn );
  1131 :}
  1132 MOV #imm, Rn {:  
  1133     COUNT_INST(I_MOVI);
  1134     load_imm32( R_EAX, imm );
  1135     store_reg( R_EAX, Rn );
  1136 :}
  1137 MOV.B Rm, @Rn {:  
  1138     COUNT_INST(I_MOVB);
  1139     load_reg( R_EAX, Rn );
  1140     MMU_TRANSLATE_WRITE( R_EAX );
  1141     load_reg( R_EDX, Rm );
  1142     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1143     sh4_x86.tstate = TSTATE_NONE;
  1144 :}
  1145 MOV.B Rm, @-Rn {:  
  1146     COUNT_INST(I_MOVB);
  1147     load_reg( R_EAX, Rn );
  1148     ADD_imm8s_r32( -1, R_EAX );
  1149     MMU_TRANSLATE_WRITE( R_EAX );
  1150     load_reg( R_EDX, Rm );
  1151     ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
  1152     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1153     sh4_x86.tstate = TSTATE_NONE;
  1154 :}
  1155 MOV.B Rm, @(R0, Rn) {:  
  1156     COUNT_INST(I_MOVB);
  1157     load_reg( R_EAX, 0 );
  1158     load_reg( R_ECX, Rn );
  1159     ADD_r32_r32( R_ECX, R_EAX );
  1160     MMU_TRANSLATE_WRITE( R_EAX );
  1161     load_reg( R_EDX, Rm );
  1162     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1163     sh4_x86.tstate = TSTATE_NONE;
  1164 :}
  1165 MOV.B R0, @(disp, GBR) {:  
  1166     COUNT_INST(I_MOVB);
  1167     load_spreg( R_EAX, R_GBR );
  1168     ADD_imm32_r32( disp, R_EAX );
  1169     MMU_TRANSLATE_WRITE( R_EAX );
  1170     load_reg( R_EDX, 0 );
  1171     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1172     sh4_x86.tstate = TSTATE_NONE;
  1173 :}
  1174 MOV.B R0, @(disp, Rn) {:  
  1175     COUNT_INST(I_MOVB);
  1176     load_reg( R_EAX, Rn );
  1177     ADD_imm32_r32( disp, R_EAX );
  1178     MMU_TRANSLATE_WRITE( R_EAX );
  1179     load_reg( R_EDX, 0 );
  1180     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1181     sh4_x86.tstate = TSTATE_NONE;
  1182 :}
  1183 MOV.B @Rm, Rn {:  
  1184     COUNT_INST(I_MOVB);
  1185     load_reg( R_EAX, Rm );
  1186     MMU_TRANSLATE_READ( R_EAX );
  1187     MEM_READ_BYTE( R_EAX, R_EAX );
  1188     store_reg( R_EAX, Rn );
  1189     sh4_x86.tstate = TSTATE_NONE;
  1190 :}
  1191 MOV.B @Rm+, Rn {:  
  1192     COUNT_INST(I_MOVB);
  1193     load_reg( R_EAX, Rm );
  1194     MMU_TRANSLATE_READ( R_EAX );
  1195     ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
  1196     MEM_READ_BYTE( R_EAX, R_EAX );
  1197     store_reg( R_EAX, Rn );
  1198     sh4_x86.tstate = TSTATE_NONE;
  1199 :}
  1200 MOV.B @(R0, Rm), Rn {:  
  1201     COUNT_INST(I_MOVB);
  1202     load_reg( R_EAX, 0 );
  1203     load_reg( R_ECX, Rm );
  1204     ADD_r32_r32( R_ECX, R_EAX );
  1205     MMU_TRANSLATE_READ( R_EAX )
  1206     MEM_READ_BYTE( R_EAX, R_EAX );
  1207     store_reg( R_EAX, Rn );
  1208     sh4_x86.tstate = TSTATE_NONE;
  1209 :}
  1210 MOV.B @(disp, GBR), R0 {:  
  1211     COUNT_INST(I_MOVB);
  1212     load_spreg( R_EAX, R_GBR );
  1213     ADD_imm32_r32( disp, R_EAX );
  1214     MMU_TRANSLATE_READ( R_EAX );
  1215     MEM_READ_BYTE( R_EAX, R_EAX );
  1216     store_reg( R_EAX, 0 );
  1217     sh4_x86.tstate = TSTATE_NONE;
  1218 :}
  1219 MOV.B @(disp, Rm), R0 {:  
  1220     COUNT_INST(I_MOVB);
  1221     load_reg( R_EAX, Rm );
  1222     ADD_imm32_r32( disp, R_EAX );
  1223     MMU_TRANSLATE_READ( R_EAX );
  1224     MEM_READ_BYTE( R_EAX, R_EAX );
  1225     store_reg( R_EAX, 0 );
  1226     sh4_x86.tstate = TSTATE_NONE;
  1227 :}
  1228 MOV.L Rm, @Rn {:
  1229     COUNT_INST(I_MOVL);
  1230     load_reg( R_EAX, Rn );
  1231     check_walign32(R_EAX);
  1232     MMU_TRANSLATE_WRITE( R_EAX );
  1233     load_reg( R_EDX, Rm );
  1234     MEM_WRITE_LONG( R_EAX, R_EDX );
  1235     sh4_x86.tstate = TSTATE_NONE;
  1236 :}
  1237 MOV.L Rm, @-Rn {:  
  1238     COUNT_INST(I_MOVL);
  1239     load_reg( R_EAX, Rn );
  1240     ADD_imm8s_r32( -4, R_EAX );
  1241     check_walign32( R_EAX );
  1242     MMU_TRANSLATE_WRITE( R_EAX );
  1243     load_reg( R_EDX, Rm );
  1244     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  1245     MEM_WRITE_LONG( R_EAX, R_EDX );
  1246     sh4_x86.tstate = TSTATE_NONE;
  1247 :}
  1248 MOV.L Rm, @(R0, Rn) {:  
  1249     COUNT_INST(I_MOVL);
  1250     load_reg( R_EAX, 0 );
  1251     load_reg( R_ECX, Rn );
  1252     ADD_r32_r32( R_ECX, R_EAX );
  1253     check_walign32( R_EAX );
  1254     MMU_TRANSLATE_WRITE( R_EAX );
  1255     load_reg( R_EDX, Rm );
  1256     MEM_WRITE_LONG( R_EAX, R_EDX );
  1257     sh4_x86.tstate = TSTATE_NONE;
  1258 :}
  1259 MOV.L R0, @(disp, GBR) {:  
  1260     COUNT_INST(I_MOVL);
  1261     load_spreg( R_EAX, R_GBR );
  1262     ADD_imm32_r32( disp, R_EAX );
  1263     check_walign32( R_EAX );
  1264     MMU_TRANSLATE_WRITE( R_EAX );
  1265     load_reg( R_EDX, 0 );
  1266     MEM_WRITE_LONG( R_EAX, R_EDX );
  1267     sh4_x86.tstate = TSTATE_NONE;
  1268 :}
  1269 MOV.L Rm, @(disp, Rn) {:  
  1270     COUNT_INST(I_MOVL);
  1271     load_reg( R_EAX, Rn );
  1272     ADD_imm32_r32( disp, R_EAX );
  1273     check_walign32( R_EAX );
  1274     MMU_TRANSLATE_WRITE( R_EAX );
  1275     load_reg( R_EDX, Rm );
  1276     MEM_WRITE_LONG( R_EAX, R_EDX );
  1277     sh4_x86.tstate = TSTATE_NONE;
  1278 :}
  1279 MOV.L @Rm, Rn {:  
  1280     COUNT_INST(I_MOVL);
  1281     load_reg( R_EAX, Rm );
  1282     check_ralign32( R_EAX );
  1283     MMU_TRANSLATE_READ( R_EAX );
  1284     MEM_READ_LONG( R_EAX, R_EAX );
  1285     store_reg( R_EAX, Rn );
  1286     sh4_x86.tstate = TSTATE_NONE;
  1287 :}
  1288 MOV.L @Rm+, Rn {:  
  1289     COUNT_INST(I_MOVL);
  1290     load_reg( R_EAX, Rm );
  1291     check_ralign32( R_EAX );
  1292     MMU_TRANSLATE_READ( R_EAX );
  1293     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1294     MEM_READ_LONG( R_EAX, R_EAX );
  1295     store_reg( R_EAX, Rn );
  1296     sh4_x86.tstate = TSTATE_NONE;
  1297 :}
  1298 MOV.L @(R0, Rm), Rn {:  
  1299     COUNT_INST(I_MOVL);
  1300     load_reg( R_EAX, 0 );
  1301     load_reg( R_ECX, Rm );
  1302     ADD_r32_r32( R_ECX, R_EAX );
  1303     check_ralign32( R_EAX );
  1304     MMU_TRANSLATE_READ( R_EAX );
  1305     MEM_READ_LONG( R_EAX, R_EAX );
  1306     store_reg( R_EAX, Rn );
  1307     sh4_x86.tstate = TSTATE_NONE;
  1308 :}
  1309 MOV.L @(disp, GBR), R0 {:
  1310     COUNT_INST(I_MOVL);
  1311     load_spreg( R_EAX, R_GBR );
  1312     ADD_imm32_r32( disp, R_EAX );
  1313     check_ralign32( R_EAX );
  1314     MMU_TRANSLATE_READ( R_EAX );
  1315     MEM_READ_LONG( R_EAX, R_EAX );
  1316     store_reg( R_EAX, 0 );
  1317     sh4_x86.tstate = TSTATE_NONE;
  1318 :}
  1319 MOV.L @(disp, PC), Rn {:  
  1320     COUNT_INST(I_MOVLPC);
  1321     if( sh4_x86.in_delay_slot ) {
  1322 	SLOTILLEGAL();
  1323     } else {
  1324 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1325 	if( IS_IN_ICACHE(target) ) {
  1326 	    // If the target address is in the same page as the code, it's
  1327 	    // pretty safe to just ref it directly and circumvent the whole
  1328 	    // memory subsystem. (this is a big performance win)
  1330 	    // FIXME: There's a corner-case that's not handled here when
  1331 	    // the current code-page is in the ITLB but not in the UTLB.
  1332 	    // (should generate a TLB miss although need to test SH4 
  1333 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1334 	    // behaviour though.
  1335 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1336 	    MOV_moff32_EAX( ptr );
  1337 	} else {
  1338 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1339 	    // different virtual address than the translation was done with,
  1340 	    // but we can safely assume that the low bits are the same.
  1341 	    load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1342 	    ADD_sh4r_r32( R_PC, R_EAX );
  1343 	    MMU_TRANSLATE_READ( R_EAX );
  1344 	    MEM_READ_LONG( R_EAX, R_EAX );
  1345 	    sh4_x86.tstate = TSTATE_NONE;
  1347 	store_reg( R_EAX, Rn );
  1349 :}
  1350 MOV.L @(disp, Rm), Rn {:  
  1351     COUNT_INST(I_MOVL);
  1352     load_reg( R_EAX, Rm );
  1353     ADD_imm8s_r32( disp, R_EAX );
  1354     check_ralign32( R_EAX );
  1355     MMU_TRANSLATE_READ( R_EAX );
  1356     MEM_READ_LONG( R_EAX, R_EAX );
  1357     store_reg( R_EAX, Rn );
  1358     sh4_x86.tstate = TSTATE_NONE;
  1359 :}
  1360 MOV.W Rm, @Rn {:  
  1361     COUNT_INST(I_MOVW);
  1362     load_reg( R_EAX, Rn );
  1363     check_walign16( R_EAX );
  1364     MMU_TRANSLATE_WRITE( R_EAX )
  1365     load_reg( R_EDX, Rm );
  1366     MEM_WRITE_WORD( R_EAX, R_EDX );
  1367     sh4_x86.tstate = TSTATE_NONE;
  1368 :}
  1369 MOV.W Rm, @-Rn {:  
  1370     COUNT_INST(I_MOVW);
  1371     load_reg( R_EAX, Rn );
  1372     ADD_imm8s_r32( -2, R_EAX );
  1373     check_walign16( R_EAX );
  1374     MMU_TRANSLATE_WRITE( R_EAX );
  1375     load_reg( R_EDX, Rm );
  1376     ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
  1377     MEM_WRITE_WORD( R_EAX, R_EDX );
  1378     sh4_x86.tstate = TSTATE_NONE;
  1379 :}
  1380 MOV.W Rm, @(R0, Rn) {:  
  1381     COUNT_INST(I_MOVW);
  1382     load_reg( R_EAX, 0 );
  1383     load_reg( R_ECX, Rn );
  1384     ADD_r32_r32( R_ECX, R_EAX );
  1385     check_walign16( R_EAX );
  1386     MMU_TRANSLATE_WRITE( R_EAX );
  1387     load_reg( R_EDX, Rm );
  1388     MEM_WRITE_WORD( R_EAX, R_EDX );
  1389     sh4_x86.tstate = TSTATE_NONE;
  1390 :}
  1391 MOV.W R0, @(disp, GBR) {:  
  1392     COUNT_INST(I_MOVW);
  1393     load_spreg( R_EAX, R_GBR );
  1394     ADD_imm32_r32( disp, R_EAX );
  1395     check_walign16( R_EAX );
  1396     MMU_TRANSLATE_WRITE( R_EAX );
  1397     load_reg( R_EDX, 0 );
  1398     MEM_WRITE_WORD( R_EAX, R_EDX );
  1399     sh4_x86.tstate = TSTATE_NONE;
  1400 :}
  1401 MOV.W R0, @(disp, Rn) {:  
  1402     COUNT_INST(I_MOVW);
  1403     load_reg( R_EAX, Rn );
  1404     ADD_imm32_r32( disp, R_EAX );
  1405     check_walign16( R_EAX );
  1406     MMU_TRANSLATE_WRITE( R_EAX );
  1407     load_reg( R_EDX, 0 );
  1408     MEM_WRITE_WORD( R_EAX, R_EDX );
  1409     sh4_x86.tstate = TSTATE_NONE;
  1410 :}
  1411 MOV.W @Rm, Rn {:  
  1412     COUNT_INST(I_MOVW);
  1413     load_reg( R_EAX, Rm );
  1414     check_ralign16( R_EAX );
  1415     MMU_TRANSLATE_READ( R_EAX );
  1416     MEM_READ_WORD( R_EAX, R_EAX );
  1417     store_reg( R_EAX, Rn );
  1418     sh4_x86.tstate = TSTATE_NONE;
  1419 :}
  1420 MOV.W @Rm+, Rn {:  
  1421     COUNT_INST(I_MOVW);
  1422     load_reg( R_EAX, Rm );
  1423     check_ralign16( R_EAX );
  1424     MMU_TRANSLATE_READ( R_EAX );
  1425     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
  1426     MEM_READ_WORD( R_EAX, R_EAX );
  1427     store_reg( R_EAX, Rn );
  1428     sh4_x86.tstate = TSTATE_NONE;
  1429 :}
  1430 MOV.W @(R0, Rm), Rn {:  
  1431     COUNT_INST(I_MOVW);
  1432     load_reg( R_EAX, 0 );
  1433     load_reg( R_ECX, Rm );
  1434     ADD_r32_r32( R_ECX, R_EAX );
  1435     check_ralign16( R_EAX );
  1436     MMU_TRANSLATE_READ( R_EAX );
  1437     MEM_READ_WORD( R_EAX, R_EAX );
  1438     store_reg( R_EAX, Rn );
  1439     sh4_x86.tstate = TSTATE_NONE;
  1440 :}
  1441 MOV.W @(disp, GBR), R0 {:  
  1442     COUNT_INST(I_MOVW);
  1443     load_spreg( R_EAX, R_GBR );
  1444     ADD_imm32_r32( disp, R_EAX );
  1445     check_ralign16( R_EAX );
  1446     MMU_TRANSLATE_READ( R_EAX );
  1447     MEM_READ_WORD( R_EAX, R_EAX );
  1448     store_reg( R_EAX, 0 );
  1449     sh4_x86.tstate = TSTATE_NONE;
  1450 :}
  1451 MOV.W @(disp, PC), Rn {:  
  1452     COUNT_INST(I_MOVW);
  1453     if( sh4_x86.in_delay_slot ) {
  1454 	SLOTILLEGAL();
  1455     } else {
  1456 	// See comments for MOV.L @(disp, PC), Rn
  1457 	uint32_t target = pc + disp + 4;
  1458 	if( IS_IN_ICACHE(target) ) {
  1459 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1460 	    MOV_moff32_EAX( ptr );
  1461 	    MOVSX_r16_r32( R_EAX, R_EAX );
  1462 	} else {
  1463 	    load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
  1464 	    ADD_sh4r_r32( R_PC, R_EAX );
  1465 	    MMU_TRANSLATE_READ( R_EAX );
  1466 	    MEM_READ_WORD( R_EAX, R_EAX );
  1467 	    sh4_x86.tstate = TSTATE_NONE;
  1469 	store_reg( R_EAX, Rn );
  1471 :}
  1472 MOV.W @(disp, Rm), R0 {:  
  1473     COUNT_INST(I_MOVW);
  1474     load_reg( R_EAX, Rm );
  1475     ADD_imm32_r32( disp, R_EAX );
  1476     check_ralign16( R_EAX );
  1477     MMU_TRANSLATE_READ( R_EAX );
  1478     MEM_READ_WORD( R_EAX, R_EAX );
  1479     store_reg( R_EAX, 0 );
  1480     sh4_x86.tstate = TSTATE_NONE;
  1481 :}
  1482 MOVA @(disp, PC), R0 {:  
  1483     COUNT_INST(I_MOVA);
  1484     if( sh4_x86.in_delay_slot ) {
  1485 	SLOTILLEGAL();
  1486     } else {
  1487 	load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1488 	ADD_sh4r_r32( R_PC, R_ECX );
  1489 	store_reg( R_ECX, 0 );
  1490 	sh4_x86.tstate = TSTATE_NONE;
  1492 :}
  1493 MOVCA.L R0, @Rn {:  
  1494     COUNT_INST(I_MOVCA);
  1495     load_reg( R_EAX, Rn );
  1496     check_walign32( R_EAX );
  1497     MMU_TRANSLATE_WRITE( R_EAX );
  1498     load_reg( R_EDX, 0 );
  1499     MEM_WRITE_LONG( R_EAX, R_EDX );
  1500     sh4_x86.tstate = TSTATE_NONE;
  1501 :}
  1503 /* Control transfer instructions */
  1504 BF disp {:
  1505     COUNT_INST(I_BF);
  1506     if( sh4_x86.in_delay_slot ) {
  1507 	SLOTILLEGAL();
  1508     } else {
  1509 	sh4vma_t target = disp + pc + 4;
  1510 	JT_rel8( nottaken );
  1511 	exit_block_rel(target, pc+2 );
  1512 	JMP_TARGET(nottaken);
  1513 	return 2;
  1515 :}
  1516 BF/S disp {:
  1517     COUNT_INST(I_BFS);
  1518     if( sh4_x86.in_delay_slot ) {
  1519 	SLOTILLEGAL();
  1520     } else {
  1521 	sh4_x86.in_delay_slot = DELAY_PC;
  1522 	if( UNTRANSLATABLE(pc+2) ) {
  1523 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1524 	    JT_rel8(nottaken);
  1525 	    ADD_imm32_r32( disp, R_EAX );
  1526 	    JMP_TARGET(nottaken);
  1527 	    ADD_sh4r_r32( R_PC, R_EAX );
  1528 	    store_spreg( R_EAX, R_NEW_PC );
  1529 	    exit_block_emu(pc+2);
  1530 	    sh4_x86.branch_taken = TRUE;
  1531 	    return 2;
  1532 	} else {
  1533 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1534 		CMP_imm8s_sh4r( 1, R_T );
  1535 		sh4_x86.tstate = TSTATE_E;
  1537 	    sh4vma_t target = disp + pc + 4;
  1538 	    OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JT rel32
  1539 	    int save_tstate = sh4_x86.tstate;
  1540 	    sh4_translate_instruction(pc+2);
  1541 	    exit_block_rel( target, pc+4 );
  1543 	    // not taken
  1544 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1545 	    sh4_x86.tstate = save_tstate;
  1546 	    sh4_translate_instruction(pc+2);
  1547 	    return 4;
  1550 :}
  1551 BRA disp {:  
  1552     COUNT_INST(I_BRA);
  1553     if( sh4_x86.in_delay_slot ) {
  1554 	SLOTILLEGAL();
  1555     } else {
  1556 	sh4_x86.in_delay_slot = DELAY_PC;
  1557 	sh4_x86.branch_taken = TRUE;
  1558 	if( UNTRANSLATABLE(pc+2) ) {
  1559 	    load_spreg( R_EAX, R_PC );
  1560 	    ADD_imm32_r32( pc + disp + 4 - sh4_x86.block_start_pc, R_EAX );
  1561 	    store_spreg( R_EAX, R_NEW_PC );
  1562 	    exit_block_emu(pc+2);
  1563 	    return 2;
  1564 	} else {
  1565 	    sh4_translate_instruction( pc + 2 );
  1566 	    exit_block_rel( disp + pc + 4, pc+4 );
  1567 	    return 4;
  1570 :}
  1571 BRAF Rn {:  
  1572     COUNT_INST(I_BRAF);
  1573     if( sh4_x86.in_delay_slot ) {
  1574 	SLOTILLEGAL();
  1575     } else {
  1576 	load_spreg( R_EAX, R_PC );
  1577 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1578 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1579 	store_spreg( R_EAX, R_NEW_PC );
  1580 	sh4_x86.in_delay_slot = DELAY_PC;
  1581 	sh4_x86.tstate = TSTATE_NONE;
  1582 	sh4_x86.branch_taken = TRUE;
  1583 	if( UNTRANSLATABLE(pc+2) ) {
  1584 	    exit_block_emu(pc+2);
  1585 	    return 2;
  1586 	} else {
  1587 	    sh4_translate_instruction( pc + 2 );
  1588 	    exit_block_newpcset(pc+2);
  1589 	    return 4;
  1592 :}
  1593 BSR disp {:  
  1594     COUNT_INST(I_BSR);
  1595     if( sh4_x86.in_delay_slot ) {
  1596 	SLOTILLEGAL();
  1597     } else {
  1598 	load_spreg( R_EAX, R_PC );
  1599 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1600 	store_spreg( R_EAX, R_PR );
  1601 	sh4_x86.in_delay_slot = DELAY_PC;
  1602 	sh4_x86.branch_taken = TRUE;
  1603 	sh4_x86.tstate = TSTATE_NONE;
  1604 	if( UNTRANSLATABLE(pc+2) ) {
  1605 	    ADD_imm32_r32( disp, R_EAX );
  1606 	    store_spreg( R_EAX, R_NEW_PC );
  1607 	    exit_block_emu(pc+2);
  1608 	    return 2;
  1609 	} else {
  1610 	    sh4_translate_instruction( pc + 2 );
  1611 	    exit_block_rel( disp + pc + 4, pc+4 );
  1612 	    return 4;
  1615 :}
  1616 BSRF Rn {:  
  1617     COUNT_INST(I_BSRF);
  1618     if( sh4_x86.in_delay_slot ) {
  1619 	SLOTILLEGAL();
  1620     } else {
  1621 	load_spreg( R_EAX, R_PC );
  1622 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1623 	store_spreg( R_EAX, R_PR );
  1624 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1625 	store_spreg( R_EAX, R_NEW_PC );
  1627 	sh4_x86.in_delay_slot = DELAY_PC;
  1628 	sh4_x86.tstate = TSTATE_NONE;
  1629 	sh4_x86.branch_taken = TRUE;
  1630 	if( UNTRANSLATABLE(pc+2) ) {
  1631 	    exit_block_emu(pc+2);
  1632 	    return 2;
  1633 	} else {
  1634 	    sh4_translate_instruction( pc + 2 );
  1635 	    exit_block_newpcset(pc+2);
  1636 	    return 4;
  1639 :}
  1640 BT disp {:
  1641     COUNT_INST(I_BT);
  1642     if( sh4_x86.in_delay_slot ) {
  1643 	SLOTILLEGAL();
  1644     } else {
  1645 	sh4vma_t target = disp + pc + 4;
  1646 	JF_rel8( nottaken );
  1647 	exit_block_rel(target, pc+2 );
  1648 	JMP_TARGET(nottaken);
  1649 	return 2;
  1651 :}
  1652 BT/S disp {:
  1653     COUNT_INST(I_BTS);
  1654     if( sh4_x86.in_delay_slot ) {
  1655 	SLOTILLEGAL();
  1656     } else {
  1657 	sh4_x86.in_delay_slot = DELAY_PC;
  1658 	if( UNTRANSLATABLE(pc+2) ) {
  1659 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1660 	    JF_rel8(nottaken);
  1661 	    ADD_imm32_r32( disp, R_EAX );
  1662 	    JMP_TARGET(nottaken);
  1663 	    ADD_sh4r_r32( R_PC, R_EAX );
  1664 	    store_spreg( R_EAX, R_NEW_PC );
  1665 	    exit_block_emu(pc+2);
  1666 	    sh4_x86.branch_taken = TRUE;
  1667 	    return 2;
  1668 	} else {
  1669 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1670 		CMP_imm8s_sh4r( 1, R_T );
  1671 		sh4_x86.tstate = TSTATE_E;
  1673 	    OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JF rel32
  1674 	    int save_tstate = sh4_x86.tstate;
  1675 	    sh4_translate_instruction(pc+2);
  1676 	    exit_block_rel( disp + pc + 4, pc+4 );
  1677 	    // not taken
  1678 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1679 	    sh4_x86.tstate = save_tstate;
  1680 	    sh4_translate_instruction(pc+2);
  1681 	    return 4;
  1684 :}
  1685 JMP @Rn {:  
  1686     COUNT_INST(I_JMP);
  1687     if( sh4_x86.in_delay_slot ) {
  1688 	SLOTILLEGAL();
  1689     } else {
  1690 	load_reg( R_ECX, Rn );
  1691 	store_spreg( R_ECX, R_NEW_PC );
  1692 	sh4_x86.in_delay_slot = DELAY_PC;
  1693 	sh4_x86.branch_taken = TRUE;
  1694 	if( UNTRANSLATABLE(pc+2) ) {
  1695 	    exit_block_emu(pc+2);
  1696 	    return 2;
  1697 	} else {
  1698 	    sh4_translate_instruction(pc+2);
  1699 	    exit_block_newpcset(pc+2);
  1700 	    return 4;
  1703 :}
  1704 JSR @Rn {:  
  1705     COUNT_INST(I_JSR);
  1706     if( sh4_x86.in_delay_slot ) {
  1707 	SLOTILLEGAL();
  1708     } else {
  1709 	load_spreg( R_EAX, R_PC );
  1710 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1711 	store_spreg( R_EAX, R_PR );
  1712 	load_reg( R_ECX, Rn );
  1713 	store_spreg( R_ECX, R_NEW_PC );
  1714 	sh4_x86.in_delay_slot = DELAY_PC;
  1715 	sh4_x86.branch_taken = TRUE;
  1716 	sh4_x86.tstate = TSTATE_NONE;
  1717 	if( UNTRANSLATABLE(pc+2) ) {
  1718 	    exit_block_emu(pc+2);
  1719 	    return 2;
  1720 	} else {
  1721 	    sh4_translate_instruction(pc+2);
  1722 	    exit_block_newpcset(pc+2);
  1723 	    return 4;
  1726 :}
  1727 RTE {:  
  1728     COUNT_INST(I_RTE);
  1729     if( sh4_x86.in_delay_slot ) {
  1730 	SLOTILLEGAL();
  1731     } else {
  1732 	check_priv();
  1733 	load_spreg( R_ECX, R_SPC );
  1734 	store_spreg( R_ECX, R_NEW_PC );
  1735 	load_spreg( R_EAX, R_SSR );
  1736 	call_func1( sh4_write_sr, R_EAX );
  1737 	sh4_x86.in_delay_slot = DELAY_PC;
  1738 	sh4_x86.priv_checked = FALSE;
  1739 	sh4_x86.fpuen_checked = FALSE;
  1740 	sh4_x86.tstate = TSTATE_NONE;
  1741 	sh4_x86.branch_taken = TRUE;
  1742 	if( UNTRANSLATABLE(pc+2) ) {
  1743 	    exit_block_emu(pc+2);
  1744 	    return 2;
  1745 	} else {
  1746 	    sh4_translate_instruction(pc+2);
  1747 	    exit_block_newpcset(pc+2);
  1748 	    return 4;
  1751 :}
  1752 RTS {:  
  1753     COUNT_INST(I_RTS);
  1754     if( sh4_x86.in_delay_slot ) {
  1755 	SLOTILLEGAL();
  1756     } else {
  1757 	load_spreg( R_ECX, R_PR );
  1758 	store_spreg( R_ECX, R_NEW_PC );
  1759 	sh4_x86.in_delay_slot = DELAY_PC;
  1760 	sh4_x86.branch_taken = TRUE;
  1761 	if( UNTRANSLATABLE(pc+2) ) {
  1762 	    exit_block_emu(pc+2);
  1763 	    return 2;
  1764 	} else {
  1765 	    sh4_translate_instruction(pc+2);
  1766 	    exit_block_newpcset(pc+2);
  1767 	    return 4;
  1770 :}
  1771 TRAPA #imm {:  
  1772     COUNT_INST(I_TRAPA);
  1773     if( sh4_x86.in_delay_slot ) {
  1774 	SLOTILLEGAL();
  1775     } else {
  1776 	load_imm32( R_ECX, pc+2 - sh4_x86.block_start_pc );   // 5
  1777 	ADD_r32_sh4r( R_ECX, R_PC );
  1778 	load_imm32( R_EAX, imm );
  1779 	call_func1( sh4_raise_trap, R_EAX );
  1780 	sh4_x86.tstate = TSTATE_NONE;
  1781 	exit_block_pcset(pc);
  1782 	sh4_x86.branch_taken = TRUE;
  1783 	return 2;
  1785 :}
  1786 UNDEF {:  
  1787     COUNT_INST(I_UNDEF);
  1788     if( sh4_x86.in_delay_slot ) {
  1789 	SLOTILLEGAL();
  1790     } else {
  1791 	JMP_exc(EXC_ILLEGAL);
  1792 	return 2;
  1794 :}
  1796 CLRMAC {:  
  1797     COUNT_INST(I_CLRMAC);
  1798     XOR_r32_r32(R_EAX, R_EAX);
  1799     store_spreg( R_EAX, R_MACL );
  1800     store_spreg( R_EAX, R_MACH );
  1801     sh4_x86.tstate = TSTATE_NONE;
  1802 :}
  1803 CLRS {:
  1804     COUNT_INST(I_CLRS);
  1805     CLC();
  1806     SETC_sh4r(R_S);
  1807     sh4_x86.tstate = TSTATE_NONE;
  1808 :}
  1809 CLRT {:  
  1810     COUNT_INST(I_CLRT);
  1811     CLC();
  1812     SETC_t();
  1813     sh4_x86.tstate = TSTATE_C;
  1814 :}
  1815 SETS {:  
  1816     COUNT_INST(I_SETS);
  1817     STC();
  1818     SETC_sh4r(R_S);
  1819     sh4_x86.tstate = TSTATE_NONE;
  1820 :}
  1821 SETT {:  
  1822     COUNT_INST(I_SETT);
  1823     STC();
  1824     SETC_t();
  1825     sh4_x86.tstate = TSTATE_C;
  1826 :}
  1828 /* Floating point moves */
  1829 FMOV FRm, FRn {:  
  1830     COUNT_INST(I_FMOV1);
  1831     check_fpuen();
  1832     if( sh4_x86.double_size ) {
  1833         load_dr0( R_EAX, FRm );
  1834         load_dr1( R_ECX, FRm );
  1835         store_dr0( R_EAX, FRn );
  1836         store_dr1( R_ECX, FRn );
  1837     } else {
  1838         load_fr( R_EAX, FRm ); // SZ=0 branch
  1839         store_fr( R_EAX, FRn );
  1841 :}
  1842 FMOV FRm, @Rn {: 
  1843     COUNT_INST(I_FMOV2);
  1844     check_fpuen();
  1845     load_reg( R_EAX, Rn );
  1846     if( sh4_x86.double_size ) {
  1847         check_walign64( R_EAX );
  1848         MMU_TRANSLATE_WRITE( R_EAX );
  1849         load_dr0( R_EDX, FRm );
  1850         load_dr1( R_ECX, FRm );
  1851         MEM_WRITE_DOUBLE( R_EAX, R_EDX, R_ECX );
  1852     } else {
  1853         check_walign32( R_EAX );
  1854         MMU_TRANSLATE_WRITE( R_EAX );
  1855         load_fr( R_EDX, FRm );
  1856         MEM_WRITE_LONG( R_EAX, R_EDX );
  1858     sh4_x86.tstate = TSTATE_NONE;
  1859 :}
  1860 FMOV @Rm, FRn {:  
  1861     COUNT_INST(I_FMOV5);
  1862     check_fpuen();
  1863     load_reg( R_EAX, Rm );
  1864     if( sh4_x86.double_size ) {
  1865         check_ralign64( R_EAX );
  1866         MMU_TRANSLATE_READ( R_EAX );
  1867         MEM_READ_DOUBLE( R_EAX, R_EDX, R_EAX );
  1868         store_dr0( R_EDX, FRn );
  1869         store_dr1( R_EAX, FRn );    
  1870     } else {
  1871         check_ralign32( R_EAX );
  1872         MMU_TRANSLATE_READ( R_EAX );
  1873         MEM_READ_LONG( R_EAX, R_EAX );
  1874         store_fr( R_EAX, FRn );
  1876     sh4_x86.tstate = TSTATE_NONE;
  1877 :}
  1878 FMOV FRm, @-Rn {:  
  1879     COUNT_INST(I_FMOV3);
  1880     check_fpuen();
  1881     load_reg( R_EAX, Rn );
  1882     if( sh4_x86.double_size ) {
  1883         check_walign64( R_EAX );
  1884         ADD_imm8s_r32(-8,R_EAX);
  1885         MMU_TRANSLATE_WRITE( R_EAX );
  1886         load_dr0( R_EDX, FRm );
  1887         load_dr1( R_ECX, FRm );
  1888         ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
  1889         MEM_WRITE_DOUBLE( R_EAX, R_EDX, R_ECX );
  1890     } else {
  1891         check_walign32( R_EAX );
  1892         ADD_imm8s_r32( -4, R_EAX );
  1893         MMU_TRANSLATE_WRITE( R_EAX );
  1894         load_fr( R_EDX, FRm );
  1895         ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
  1896         MEM_WRITE_LONG( R_EAX, R_EDX );
  1898     sh4_x86.tstate = TSTATE_NONE;
  1899 :}
  1900 FMOV @Rm+, FRn {:
  1901     COUNT_INST(I_FMOV6);
  1902     check_fpuen();
  1903     load_reg( R_EAX, Rm );
  1904     if( sh4_x86.double_size ) {
  1905         check_ralign64( R_EAX );
  1906         MMU_TRANSLATE_READ( R_EAX );
  1907         ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
  1908         MEM_READ_DOUBLE( R_EAX, R_EDX, R_EAX );
  1909         store_dr0( R_EDX, FRn );
  1910         store_dr1( R_EAX, FRn );
  1911     } else {
  1912         check_ralign32( R_EAX );
  1913         MMU_TRANSLATE_READ( R_EAX );
  1914         ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1915         MEM_READ_LONG( R_EAX, R_EAX );
  1916         store_fr( R_EAX, FRn );
  1918     sh4_x86.tstate = TSTATE_NONE;
  1919 :}
  1920 FMOV FRm, @(R0, Rn) {:  
  1921     COUNT_INST(I_FMOV4);
  1922     check_fpuen();
  1923     load_reg( R_EAX, Rn );
  1924     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1925     if( sh4_x86.double_size ) {
  1926         check_walign64( R_EAX );
  1927         MMU_TRANSLATE_WRITE( R_EAX );
  1928         load_dr0( R_EDX, FRm );
  1929         load_dr1( R_ECX, FRm );
  1930         MEM_WRITE_DOUBLE( R_EAX, R_EDX, R_ECX );
  1931     } else {
  1932         check_walign32( R_EAX );
  1933         MMU_TRANSLATE_WRITE( R_EAX );
  1934         load_fr( R_EDX, FRm );
  1935         MEM_WRITE_LONG( R_EAX, R_EDX ); // 12
  1937     sh4_x86.tstate = TSTATE_NONE;
  1938 :}
  1939 FMOV @(R0, Rm), FRn {:  
  1940     COUNT_INST(I_FMOV7);
  1941     check_fpuen();
  1942     load_reg( R_EAX, Rm );
  1943     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1944     if( sh4_x86.double_size ) {
  1945         check_ralign64( R_EAX );
  1946         MMU_TRANSLATE_READ( R_EAX );
  1947         MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1948         store_dr0( R_ECX, FRn );
  1949         store_dr1( R_EAX, FRn );
  1950     } else {
  1951         check_ralign32( R_EAX );
  1952         MMU_TRANSLATE_READ( R_EAX );
  1953         MEM_READ_LONG( R_EAX, R_EAX );
  1954         store_fr( R_EAX, FRn );
  1956     sh4_x86.tstate = TSTATE_NONE;
  1957 :}
  1958 FLDI0 FRn {:  /* IFF PR=0 */
  1959     COUNT_INST(I_FLDI0);
  1960     check_fpuen();
  1961     if( sh4_x86.double_prec == 0 ) {
  1962         XOR_r32_r32( R_EAX, R_EAX );
  1963         store_fr( R_EAX, FRn );
  1965     sh4_x86.tstate = TSTATE_NONE;
  1966 :}
  1967 FLDI1 FRn {:  /* IFF PR=0 */
  1968     COUNT_INST(I_FLDI1);
  1969     check_fpuen();
  1970     if( sh4_x86.double_prec == 0 ) {
  1971         load_imm32(R_EAX, 0x3F800000);
  1972         store_fr( R_EAX, FRn );
  1974 :}
  1976 FLOAT FPUL, FRn {:  
  1977     COUNT_INST(I_FLOAT);
  1978     check_fpuen();
  1979     FILD_sh4r(R_FPUL);
  1980     if( sh4_x86.double_prec ) {
  1981         pop_dr( FRn );
  1982     } else {
  1983         pop_fr( FRn );
  1985 :}
  1986 FTRC FRm, FPUL {:  
  1987     COUNT_INST(I_FTRC);
  1988     check_fpuen();
  1989     if( sh4_x86.double_prec ) {
  1990         push_dr( FRm );
  1991     } else {
  1992         push_fr( FRm );
  1994     load_ptr( R_ECX, &max_int );
  1995     FILD_r32ind( R_ECX );
  1996     FCOMIP_st(1);
  1997     JNA_rel8( sat );
  1998     load_ptr( R_ECX, &min_int );  // 5
  1999     FILD_r32ind( R_ECX );           // 2
  2000     FCOMIP_st(1);                   // 2
  2001     JAE_rel8( sat2 );            // 2
  2002     load_ptr( R_EAX, &save_fcw );
  2003     FNSTCW_r32ind( R_EAX );
  2004     load_ptr( R_EDX, &trunc_fcw );
  2005     FLDCW_r32ind( R_EDX );
  2006     FISTP_sh4r(R_FPUL);             // 3
  2007     FLDCW_r32ind( R_EAX );
  2008     JMP_rel8(end);             // 2
  2010     JMP_TARGET(sat);
  2011     JMP_TARGET(sat2);
  2012     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  2013     store_spreg( R_ECX, R_FPUL );
  2014     FPOP_st();
  2015     JMP_TARGET(end);
  2016     sh4_x86.tstate = TSTATE_NONE;
  2017 :}
  2018 FLDS FRm, FPUL {:  
  2019     COUNT_INST(I_FLDS);
  2020     check_fpuen();
  2021     load_fr( R_EAX, FRm );
  2022     store_spreg( R_EAX, R_FPUL );
  2023 :}
  2024 FSTS FPUL, FRn {:  
  2025     COUNT_INST(I_FSTS);
  2026     check_fpuen();
  2027     load_spreg( R_EAX, R_FPUL );
  2028     store_fr( R_EAX, FRn );
  2029 :}
  2030 FCNVDS FRm, FPUL {:  
  2031     COUNT_INST(I_FCNVDS);
  2032     check_fpuen();
  2033     if( sh4_x86.double_prec ) {
  2034         push_dr( FRm );
  2035         pop_fpul();
  2037 :}
  2038 FCNVSD FPUL, FRn {:  
  2039     COUNT_INST(I_FCNVSD);
  2040     check_fpuen();
  2041     if( sh4_x86.double_prec ) {
  2042         push_fpul();
  2043         pop_dr( FRn );
  2045 :}
  2047 /* Floating point instructions */
  2048 FABS FRn {:  
  2049     COUNT_INST(I_FABS);
  2050     check_fpuen();
  2051     if( sh4_x86.double_prec ) {
  2052         push_dr(FRn);
  2053         FABS_st0();
  2054         pop_dr(FRn);
  2055     } else {
  2056         push_fr(FRn);
  2057         FABS_st0();
  2058         pop_fr(FRn);
  2060 :}
  2061 FADD FRm, FRn {:  
  2062     COUNT_INST(I_FADD);
  2063     check_fpuen();
  2064     if( sh4_x86.double_prec ) {
  2065         push_dr(FRm);
  2066         push_dr(FRn);
  2067         FADDP_st(1);
  2068         pop_dr(FRn);
  2069     } else {
  2070         push_fr(FRm);
  2071         push_fr(FRn);
  2072         FADDP_st(1);
  2073         pop_fr(FRn);
  2075 :}
  2076 FDIV FRm, FRn {:  
  2077     COUNT_INST(I_FDIV);
  2078     check_fpuen();
  2079     if( sh4_x86.double_prec ) {
  2080         push_dr(FRn);
  2081         push_dr(FRm);
  2082         FDIVP_st(1);
  2083         pop_dr(FRn);
  2084     } else {
  2085         push_fr(FRn);
  2086         push_fr(FRm);
  2087         FDIVP_st(1);
  2088         pop_fr(FRn);
  2090 :}
  2091 FMAC FR0, FRm, FRn {:  
  2092     COUNT_INST(I_FMAC);
  2093     check_fpuen();
  2094     if( sh4_x86.double_prec ) {
  2095         push_dr( 0 );
  2096         push_dr( FRm );
  2097         FMULP_st(1);
  2098         push_dr( FRn );
  2099         FADDP_st(1);
  2100         pop_dr( FRn );
  2101     } else {
  2102         push_fr( 0 );
  2103         push_fr( FRm );
  2104         FMULP_st(1);
  2105         push_fr( FRn );
  2106         FADDP_st(1);
  2107         pop_fr( FRn );
  2109 :}
  2111 FMUL FRm, FRn {:  
  2112     COUNT_INST(I_FMUL);
  2113     check_fpuen();
  2114     if( sh4_x86.double_prec ) {
  2115         push_dr(FRm);
  2116         push_dr(FRn);
  2117         FMULP_st(1);
  2118         pop_dr(FRn);
  2119     } else {
  2120         push_fr(FRm);
  2121         push_fr(FRn);
  2122         FMULP_st(1);
  2123         pop_fr(FRn);
  2125 :}
  2126 FNEG FRn {:  
  2127     COUNT_INST(I_FNEG);
  2128     check_fpuen();
  2129     if( sh4_x86.double_prec ) {
  2130         push_dr(FRn);
  2131         FCHS_st0();
  2132         pop_dr(FRn);
  2133     } else {
  2134         push_fr(FRn);
  2135         FCHS_st0();
  2136         pop_fr(FRn);
  2138 :}
  2139 FSRRA FRn {:  
  2140     COUNT_INST(I_FSRRA);
  2141     check_fpuen();
  2142     if( sh4_x86.double_prec == 0 ) {
  2143         FLD1_st0();
  2144         push_fr(FRn);
  2145         FSQRT_st0();
  2146         FDIVP_st(1);
  2147         pop_fr(FRn);
  2149 :}
  2150 FSQRT FRn {:  
  2151     COUNT_INST(I_FSQRT);
  2152     check_fpuen();
  2153     if( sh4_x86.double_prec ) {
  2154         push_dr(FRn);
  2155         FSQRT_st0();
  2156         pop_dr(FRn);
  2157     } else {
  2158         push_fr(FRn);
  2159         FSQRT_st0();
  2160         pop_fr(FRn);
  2162 :}
  2163 FSUB FRm, FRn {:  
  2164     COUNT_INST(I_FSUB);
  2165     check_fpuen();
  2166     if( sh4_x86.double_prec ) {
  2167         push_dr(FRn);
  2168         push_dr(FRm);
  2169         FSUBP_st(1);
  2170         pop_dr(FRn);
  2171     } else {
  2172         push_fr(FRn);
  2173         push_fr(FRm);
  2174         FSUBP_st(1);
  2175         pop_fr(FRn);
  2177 :}
  2179 FCMP/EQ FRm, FRn {:  
  2180     COUNT_INST(I_FCMPEQ);
  2181     check_fpuen();
  2182     if( sh4_x86.double_prec ) {
  2183         push_dr(FRm);
  2184         push_dr(FRn);
  2185     } else {
  2186         push_fr(FRm);
  2187         push_fr(FRn);
  2189     FCOMIP_st(1);
  2190     SETE_t();
  2191     FPOP_st();
  2192     sh4_x86.tstate = TSTATE_E;
  2193 :}
  2194 FCMP/GT FRm, FRn {:  
  2195     COUNT_INST(I_FCMPGT);
  2196     check_fpuen();
  2197     if( sh4_x86.double_prec ) {
  2198         push_dr(FRm);
  2199         push_dr(FRn);
  2200     } else {
  2201         push_fr(FRm);
  2202         push_fr(FRn);
  2204     FCOMIP_st(1);
  2205     SETA_t();
  2206     FPOP_st();
  2207     sh4_x86.tstate = TSTATE_A;
  2208 :}
  2210 FSCA FPUL, FRn {:  
  2211     COUNT_INST(I_FSCA);
  2212     check_fpuen();
  2213     if( sh4_x86.double_prec == 0 ) {
  2214         LEA_sh4r_rptr( REG_OFFSET(fr[0][FRn&0x0E]), R_EDX );
  2215         load_spreg( R_EAX, R_FPUL );
  2216         call_func2( sh4_fsca, R_EAX, R_EDX );
  2218     sh4_x86.tstate = TSTATE_NONE;
  2219 :}
  2220 FIPR FVm, FVn {:  
  2221     COUNT_INST(I_FIPR);
  2222     check_fpuen();
  2223     if( sh4_x86.double_prec == 0 ) {
  2224         if( sh4_x86.sse3_enabled ) {
  2225             MOVAPS_sh4r_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
  2226             MULPS_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
  2227             HADDPS_xmm_xmm( 4, 4 ); 
  2228             HADDPS_xmm_xmm( 4, 4 );
  2229             MOVSS_xmm_sh4r( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
  2230         } else {
  2231             push_fr( FVm<<2 );
  2232             push_fr( FVn<<2 );
  2233             FMULP_st(1);
  2234             push_fr( (FVm<<2)+1);
  2235             push_fr( (FVn<<2)+1);
  2236             FMULP_st(1);
  2237             FADDP_st(1);
  2238             push_fr( (FVm<<2)+2);
  2239             push_fr( (FVn<<2)+2);
  2240             FMULP_st(1);
  2241             FADDP_st(1);
  2242             push_fr( (FVm<<2)+3);
  2243             push_fr( (FVn<<2)+3);
  2244             FMULP_st(1);
  2245             FADDP_st(1);
  2246             pop_fr( (FVn<<2)+3);
  2249 :}
  2250 FTRV XMTRX, FVn {:  
  2251     COUNT_INST(I_FTRV);
  2252     check_fpuen();
  2253     if( sh4_x86.double_prec == 0 ) {
  2254         if( sh4_x86.sse3_enabled ) {
  2255             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1  M0  M3  M2
  2256             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5  M4  M7  M6
  2257             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9  M8  M11 M10
  2258             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
  2260             MOVSLDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
  2261             MOVSHDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
  2262             MOVAPS_xmm_xmm( 4, 6 );
  2263             MOVAPS_xmm_xmm( 5, 7 );
  2264             MOVLHPS_xmm_xmm( 4, 4 );  // V1 V1 V1 V1
  2265             MOVHLPS_xmm_xmm( 6, 6 );  // V3 V3 V3 V3
  2266             MOVLHPS_xmm_xmm( 5, 5 );  // V0 V0 V0 V0
  2267             MOVHLPS_xmm_xmm( 7, 7 );  // V2 V2 V2 V2
  2268             MULPS_xmm_xmm( 0, 4 );
  2269             MULPS_xmm_xmm( 1, 5 );
  2270             MULPS_xmm_xmm( 2, 6 );
  2271             MULPS_xmm_xmm( 3, 7 );
  2272             ADDPS_xmm_xmm( 5, 4 );
  2273             ADDPS_xmm_xmm( 7, 6 );
  2274             ADDPS_xmm_xmm( 6, 4 );
  2275             MOVAPS_xmm_sh4r( 4, REG_OFFSET(fr[0][FVn<<2]) );
  2276         } else {
  2277             LEA_sh4r_rptr( REG_OFFSET(fr[0][FVn<<2]), R_EAX );
  2278             call_func1( sh4_ftrv, R_EAX );
  2281     sh4_x86.tstate = TSTATE_NONE;
  2282 :}
  2284 FRCHG {:  
  2285     COUNT_INST(I_FRCHG);
  2286     check_fpuen();
  2287     load_spreg( R_ECX, R_FPSCR );
  2288     XOR_imm32_r32( FPSCR_FR, R_ECX );
  2289     store_spreg( R_ECX, R_FPSCR );
  2290     call_func0( sh4_switch_fr_banks );
  2291     sh4_x86.tstate = TSTATE_NONE;
  2292 :}
  2293 FSCHG {:  
  2294     COUNT_INST(I_FSCHG);
  2295     check_fpuen();
  2296     load_spreg( R_ECX, R_FPSCR );
  2297     XOR_imm32_r32( FPSCR_SZ, R_ECX );
  2298     store_spreg( R_ECX, R_FPSCR );
  2299     sh4_x86.tstate = TSTATE_NONE;
  2300     sh4_x86.double_size = !sh4_x86.double_size;
  2301 :}
  2303 /* Processor control instructions */
  2304 LDC Rm, SR {:
  2305     COUNT_INST(I_LDCSR);
  2306     if( sh4_x86.in_delay_slot ) {
  2307 	SLOTILLEGAL();
  2308     } else {
  2309 	check_priv();
  2310 	load_reg( R_EAX, Rm );
  2311 	call_func1( sh4_write_sr, R_EAX );
  2312 	sh4_x86.priv_checked = FALSE;
  2313 	sh4_x86.fpuen_checked = FALSE;
  2314 	sh4_x86.tstate = TSTATE_NONE;
  2316 :}
  2317 LDC Rm, GBR {: 
  2318     COUNT_INST(I_LDC);
  2319     load_reg( R_EAX, Rm );
  2320     store_spreg( R_EAX, R_GBR );
  2321 :}
  2322 LDC Rm, VBR {:  
  2323     COUNT_INST(I_LDC);
  2324     check_priv();
  2325     load_reg( R_EAX, Rm );
  2326     store_spreg( R_EAX, R_VBR );
  2327     sh4_x86.tstate = TSTATE_NONE;
  2328 :}
  2329 LDC Rm, SSR {:  
  2330     COUNT_INST(I_LDC);
  2331     check_priv();
  2332     load_reg( R_EAX, Rm );
  2333     store_spreg( R_EAX, R_SSR );
  2334     sh4_x86.tstate = TSTATE_NONE;
  2335 :}
  2336 LDC Rm, SGR {:  
  2337     COUNT_INST(I_LDC);
  2338     check_priv();
  2339     load_reg( R_EAX, Rm );
  2340     store_spreg( R_EAX, R_SGR );
  2341     sh4_x86.tstate = TSTATE_NONE;
  2342 :}
  2343 LDC Rm, SPC {:  
  2344     COUNT_INST(I_LDC);
  2345     check_priv();
  2346     load_reg( R_EAX, Rm );
  2347     store_spreg( R_EAX, R_SPC );
  2348     sh4_x86.tstate = TSTATE_NONE;
  2349 :}
  2350 LDC Rm, DBR {:  
  2351     COUNT_INST(I_LDC);
  2352     check_priv();
  2353     load_reg( R_EAX, Rm );
  2354     store_spreg( R_EAX, R_DBR );
  2355     sh4_x86.tstate = TSTATE_NONE;
  2356 :}
  2357 LDC Rm, Rn_BANK {:  
  2358     COUNT_INST(I_LDC);
  2359     check_priv();
  2360     load_reg( R_EAX, Rm );
  2361     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2362     sh4_x86.tstate = TSTATE_NONE;
  2363 :}
  2364 LDC.L @Rm+, GBR {:  
  2365     COUNT_INST(I_LDCM);
  2366     load_reg( R_EAX, Rm );
  2367     check_ralign32( R_EAX );
  2368     MMU_TRANSLATE_READ( R_EAX );
  2369     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2370     MEM_READ_LONG( R_EAX, R_EAX );
  2371     store_spreg( R_EAX, R_GBR );
  2372     sh4_x86.tstate = TSTATE_NONE;
  2373 :}
  2374 LDC.L @Rm+, SR {:
  2375     COUNT_INST(I_LDCSRM);
  2376     if( sh4_x86.in_delay_slot ) {
  2377 	SLOTILLEGAL();
  2378     } else {
  2379 	check_priv();
  2380 	load_reg( R_EAX, Rm );
  2381 	check_ralign32( R_EAX );
  2382 	MMU_TRANSLATE_READ( R_EAX );
  2383 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2384 	MEM_READ_LONG( R_EAX, R_EAX );
  2385 	call_func1( sh4_write_sr, R_EAX );
  2386 	sh4_x86.priv_checked = FALSE;
  2387 	sh4_x86.fpuen_checked = FALSE;
  2388 	sh4_x86.tstate = TSTATE_NONE;
  2390 :}
  2391 LDC.L @Rm+, VBR {:  
  2392     COUNT_INST(I_LDCM);
  2393     check_priv();
  2394     load_reg( R_EAX, Rm );
  2395     check_ralign32( R_EAX );
  2396     MMU_TRANSLATE_READ( R_EAX );
  2397     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2398     MEM_READ_LONG( R_EAX, R_EAX );
  2399     store_spreg( R_EAX, R_VBR );
  2400     sh4_x86.tstate = TSTATE_NONE;
  2401 :}
  2402 LDC.L @Rm+, SSR {:
  2403     COUNT_INST(I_LDCM);
  2404     check_priv();
  2405     load_reg( R_EAX, Rm );
  2406     check_ralign32( R_EAX );
  2407     MMU_TRANSLATE_READ( R_EAX );
  2408     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2409     MEM_READ_LONG( R_EAX, R_EAX );
  2410     store_spreg( R_EAX, R_SSR );
  2411     sh4_x86.tstate = TSTATE_NONE;
  2412 :}
  2413 LDC.L @Rm+, SGR {:  
  2414     COUNT_INST(I_LDCM);
  2415     check_priv();
  2416     load_reg( R_EAX, Rm );
  2417     check_ralign32( R_EAX );
  2418     MMU_TRANSLATE_READ( R_EAX );
  2419     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2420     MEM_READ_LONG( R_EAX, R_EAX );
  2421     store_spreg( R_EAX, R_SGR );
  2422     sh4_x86.tstate = TSTATE_NONE;
  2423 :}
  2424 LDC.L @Rm+, SPC {:  
  2425     COUNT_INST(I_LDCM);
  2426     check_priv();
  2427     load_reg( R_EAX, Rm );
  2428     check_ralign32( R_EAX );
  2429     MMU_TRANSLATE_READ( R_EAX );
  2430     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2431     MEM_READ_LONG( R_EAX, R_EAX );
  2432     store_spreg( R_EAX, R_SPC );
  2433     sh4_x86.tstate = TSTATE_NONE;
  2434 :}
  2435 LDC.L @Rm+, DBR {:  
  2436     COUNT_INST(I_LDCM);
  2437     check_priv();
  2438     load_reg( R_EAX, Rm );
  2439     check_ralign32( R_EAX );
  2440     MMU_TRANSLATE_READ( R_EAX );
  2441     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2442     MEM_READ_LONG( R_EAX, R_EAX );
  2443     store_spreg( R_EAX, R_DBR );
  2444     sh4_x86.tstate = TSTATE_NONE;
  2445 :}
  2446 LDC.L @Rm+, Rn_BANK {:  
  2447     COUNT_INST(I_LDCM);
  2448     check_priv();
  2449     load_reg( R_EAX, Rm );
  2450     check_ralign32( R_EAX );
  2451     MMU_TRANSLATE_READ( R_EAX );
  2452     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2453     MEM_READ_LONG( R_EAX, R_EAX );
  2454     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2455     sh4_x86.tstate = TSTATE_NONE;
  2456 :}
  2457 LDS Rm, FPSCR {:
  2458     COUNT_INST(I_LDSFPSCR);
  2459     check_fpuen();
  2460     load_reg( R_EAX, Rm );
  2461     call_func1( sh4_write_fpscr, R_EAX );
  2462     sh4_x86.tstate = TSTATE_NONE;
  2463     return 2;
  2464 :}
  2465 LDS.L @Rm+, FPSCR {:  
  2466     COUNT_INST(I_LDSFPSCRM);
  2467     check_fpuen();
  2468     load_reg( R_EAX, Rm );
  2469     check_ralign32( R_EAX );
  2470     MMU_TRANSLATE_READ( R_EAX );
  2471     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2472     MEM_READ_LONG( R_EAX, R_EAX );
  2473     call_func1( sh4_write_fpscr, R_EAX );
  2474     sh4_x86.tstate = TSTATE_NONE;
  2475     return 2;
  2476 :}
  2477 LDS Rm, FPUL {:  
  2478     COUNT_INST(I_LDS);
  2479     check_fpuen();
  2480     load_reg( R_EAX, Rm );
  2481     store_spreg( R_EAX, R_FPUL );
  2482 :}
  2483 LDS.L @Rm+, FPUL {:  
  2484     COUNT_INST(I_LDSM);
  2485     check_fpuen();
  2486     load_reg( R_EAX, Rm );
  2487     check_ralign32( R_EAX );
  2488     MMU_TRANSLATE_READ( R_EAX );
  2489     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2490     MEM_READ_LONG( R_EAX, R_EAX );
  2491     store_spreg( R_EAX, R_FPUL );
  2492     sh4_x86.tstate = TSTATE_NONE;
  2493 :}
  2494 LDS Rm, MACH {: 
  2495     COUNT_INST(I_LDS);
  2496     load_reg( R_EAX, Rm );
  2497     store_spreg( R_EAX, R_MACH );
  2498 :}
  2499 LDS.L @Rm+, MACH {:  
  2500     COUNT_INST(I_LDSM);
  2501     load_reg( R_EAX, Rm );
  2502     check_ralign32( R_EAX );
  2503     MMU_TRANSLATE_READ( R_EAX );
  2504     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2505     MEM_READ_LONG( R_EAX, R_EAX );
  2506     store_spreg( R_EAX, R_MACH );
  2507     sh4_x86.tstate = TSTATE_NONE;
  2508 :}
  2509 LDS Rm, MACL {:  
  2510     COUNT_INST(I_LDS);
  2511     load_reg( R_EAX, Rm );
  2512     store_spreg( R_EAX, R_MACL );
  2513 :}
  2514 LDS.L @Rm+, MACL {:  
  2515     COUNT_INST(I_LDSM);
  2516     load_reg( R_EAX, Rm );
  2517     check_ralign32( R_EAX );
  2518     MMU_TRANSLATE_READ( R_EAX );
  2519     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2520     MEM_READ_LONG( R_EAX, R_EAX );
  2521     store_spreg( R_EAX, R_MACL );
  2522     sh4_x86.tstate = TSTATE_NONE;
  2523 :}
  2524 LDS Rm, PR {:  
  2525     COUNT_INST(I_LDS);
  2526     load_reg( R_EAX, Rm );
  2527     store_spreg( R_EAX, R_PR );
  2528 :}
  2529 LDS.L @Rm+, PR {:  
  2530     COUNT_INST(I_LDSM);
  2531     load_reg( R_EAX, Rm );
  2532     check_ralign32( R_EAX );
  2533     MMU_TRANSLATE_READ( R_EAX );
  2534     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2535     MEM_READ_LONG( R_EAX, R_EAX );
  2536     store_spreg( R_EAX, R_PR );
  2537     sh4_x86.tstate = TSTATE_NONE;
  2538 :}
  2539 LDTLB {:  
  2540     COUNT_INST(I_LDTLB);
  2541     call_func0( MMU_ldtlb );
  2542     sh4_x86.tstate = TSTATE_NONE;
  2543 :}
  2544 OCBI @Rn {:
  2545     COUNT_INST(I_OCBI);
  2546 :}
  2547 OCBP @Rn {:
  2548     COUNT_INST(I_OCBP);
  2549 :}
  2550 OCBWB @Rn {:
  2551     COUNT_INST(I_OCBWB);
  2552 :}
  2553 PREF @Rn {:
  2554     COUNT_INST(I_PREF);
  2555     load_reg( R_EAX, Rn );
  2556     MOV_r32_r32( R_EAX, R_ECX );
  2557     AND_imm32_r32( 0xFC000000, R_ECX );
  2558     CMP_imm32_r32( 0xE0000000, R_ECX );
  2559     JNE_rel8(end);
  2560     if( sh4_x86.tlb_on ) {
  2561     	call_func1( sh4_flush_store_queue_mmu, R_EAX );
  2562         TEST_r32_r32( R_EAX, R_EAX );
  2563         JE_exc(-1);
  2564     } else {
  2565     	call_func1( sh4_flush_store_queue, R_EAX );
  2567     JMP_TARGET(end);
  2568     sh4_x86.tstate = TSTATE_NONE;
  2569 :}
  2570 SLEEP {: 
  2571     COUNT_INST(I_SLEEP);
  2572     check_priv();
  2573     call_func0( sh4_sleep );
  2574     sh4_x86.tstate = TSTATE_NONE;
  2575     sh4_x86.in_delay_slot = DELAY_NONE;
  2576     return 2;
  2577 :}
  2578 STC SR, Rn {:
  2579     COUNT_INST(I_STCSR);
  2580     check_priv();
  2581     call_func0(sh4_read_sr);
  2582     store_reg( R_EAX, Rn );
  2583     sh4_x86.tstate = TSTATE_NONE;
  2584 :}
  2585 STC GBR, Rn {:  
  2586     COUNT_INST(I_STC);
  2587     load_spreg( R_EAX, R_GBR );
  2588     store_reg( R_EAX, Rn );
  2589 :}
  2590 STC VBR, Rn {:  
  2591     COUNT_INST(I_STC);
  2592     check_priv();
  2593     load_spreg( R_EAX, R_VBR );
  2594     store_reg( R_EAX, Rn );
  2595     sh4_x86.tstate = TSTATE_NONE;
  2596 :}
  2597 STC SSR, Rn {:  
  2598     COUNT_INST(I_STC);
  2599     check_priv();
  2600     load_spreg( R_EAX, R_SSR );
  2601     store_reg( R_EAX, Rn );
  2602     sh4_x86.tstate = TSTATE_NONE;
  2603 :}
  2604 STC SPC, Rn {:  
  2605     COUNT_INST(I_STC);
  2606     check_priv();
  2607     load_spreg( R_EAX, R_SPC );
  2608     store_reg( R_EAX, Rn );
  2609     sh4_x86.tstate = TSTATE_NONE;
  2610 :}
  2611 STC SGR, Rn {:  
  2612     COUNT_INST(I_STC);
  2613     check_priv();
  2614     load_spreg( R_EAX, R_SGR );
  2615     store_reg( R_EAX, Rn );
  2616     sh4_x86.tstate = TSTATE_NONE;
  2617 :}
  2618 STC DBR, Rn {:  
  2619     COUNT_INST(I_STC);
  2620     check_priv();
  2621     load_spreg( R_EAX, R_DBR );
  2622     store_reg( R_EAX, Rn );
  2623     sh4_x86.tstate = TSTATE_NONE;
  2624 :}
  2625 STC Rm_BANK, Rn {:
  2626     COUNT_INST(I_STC);
  2627     check_priv();
  2628     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2629     store_reg( R_EAX, Rn );
  2630     sh4_x86.tstate = TSTATE_NONE;
  2631 :}
  2632 STC.L SR, @-Rn {:
  2633     COUNT_INST(I_STCSRM);
  2634     check_priv();
  2635     load_reg( R_EAX, Rn );
  2636     check_walign32( R_EAX );
  2637     ADD_imm8s_r32( -4, R_EAX );
  2638     MMU_TRANSLATE_WRITE( R_EAX );
  2639     PUSH_realigned_r32( R_EAX );
  2640     call_func0( sh4_read_sr );
  2641     POP_realigned_r32( R_ECX );
  2642     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2643     MEM_WRITE_LONG( R_ECX, R_EAX );
  2644     sh4_x86.tstate = TSTATE_NONE;
  2645 :}
  2646 STC.L VBR, @-Rn {:  
  2647     COUNT_INST(I_STCM);
  2648     check_priv();
  2649     load_reg( R_EAX, Rn );
  2650     check_walign32( R_EAX );
  2651     ADD_imm8s_r32( -4, R_EAX );
  2652     MMU_TRANSLATE_WRITE( R_EAX );
  2653     load_spreg( R_EDX, R_VBR );
  2654     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2655     MEM_WRITE_LONG( R_EAX, R_EDX );
  2656     sh4_x86.tstate = TSTATE_NONE;
  2657 :}
  2658 STC.L SSR, @-Rn {:  
  2659     COUNT_INST(I_STCM);
  2660     check_priv();
  2661     load_reg( R_EAX, Rn );
  2662     check_walign32( R_EAX );
  2663     ADD_imm8s_r32( -4, R_EAX );
  2664     MMU_TRANSLATE_WRITE( R_EAX );
  2665     load_spreg( R_EDX, R_SSR );
  2666     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2667     MEM_WRITE_LONG( R_EAX, R_EDX );
  2668     sh4_x86.tstate = TSTATE_NONE;
  2669 :}
  2670 STC.L SPC, @-Rn {:
  2671     COUNT_INST(I_STCM);
  2672     check_priv();
  2673     load_reg( R_EAX, Rn );
  2674     check_walign32( R_EAX );
  2675     ADD_imm8s_r32( -4, R_EAX );
  2676     MMU_TRANSLATE_WRITE( R_EAX );
  2677     load_spreg( R_EDX, R_SPC );
  2678     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2679     MEM_WRITE_LONG( R_EAX, R_EDX );
  2680     sh4_x86.tstate = TSTATE_NONE;
  2681 :}
  2682 STC.L SGR, @-Rn {:  
  2683     COUNT_INST(I_STCM);
  2684     check_priv();
  2685     load_reg( R_EAX, Rn );
  2686     check_walign32( R_EAX );
  2687     ADD_imm8s_r32( -4, R_EAX );
  2688     MMU_TRANSLATE_WRITE( R_EAX );
  2689     load_spreg( R_EDX, R_SGR );
  2690     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2691     MEM_WRITE_LONG( R_EAX, R_EDX );
  2692     sh4_x86.tstate = TSTATE_NONE;
  2693 :}
  2694 STC.L DBR, @-Rn {:  
  2695     COUNT_INST(I_STCM);
  2696     check_priv();
  2697     load_reg( R_EAX, Rn );
  2698     check_walign32( R_EAX );
  2699     ADD_imm8s_r32( -4, R_EAX );
  2700     MMU_TRANSLATE_WRITE( R_EAX );
  2701     load_spreg( R_EDX, R_DBR );
  2702     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2703     MEM_WRITE_LONG( R_EAX, R_EDX );
  2704     sh4_x86.tstate = TSTATE_NONE;
  2705 :}
  2706 STC.L Rm_BANK, @-Rn {:  
  2707     COUNT_INST(I_STCM);
  2708     check_priv();
  2709     load_reg( R_EAX, Rn );
  2710     check_walign32( R_EAX );
  2711     ADD_imm8s_r32( -4, R_EAX );
  2712     MMU_TRANSLATE_WRITE( R_EAX );
  2713     load_spreg( R_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
  2714     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2715     MEM_WRITE_LONG( R_EAX, R_EDX );
  2716     sh4_x86.tstate = TSTATE_NONE;
  2717 :}
  2718 STC.L GBR, @-Rn {:  
  2719     COUNT_INST(I_STCM);
  2720     load_reg( R_EAX, Rn );
  2721     check_walign32( R_EAX );
  2722     ADD_imm8s_r32( -4, R_EAX );
  2723     MMU_TRANSLATE_WRITE( R_EAX );
  2724     load_spreg( R_EDX, R_GBR );
  2725     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2726     MEM_WRITE_LONG( R_EAX, R_EDX );
  2727     sh4_x86.tstate = TSTATE_NONE;
  2728 :}
  2729 STS FPSCR, Rn {:  
  2730     COUNT_INST(I_STSFPSCR);
  2731     check_fpuen();
  2732     load_spreg( R_EAX, R_FPSCR );
  2733     store_reg( R_EAX, Rn );
  2734 :}
  2735 STS.L FPSCR, @-Rn {:  
  2736     COUNT_INST(I_STSFPSCRM);
  2737     check_fpuen();
  2738     load_reg( R_EAX, Rn );
  2739     check_walign32( R_EAX );
  2740     ADD_imm8s_r32( -4, R_EAX );
  2741     MMU_TRANSLATE_WRITE( R_EAX );
  2742     load_spreg( R_EDX, R_FPSCR );
  2743     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2744     MEM_WRITE_LONG( R_EAX, R_EDX );
  2745     sh4_x86.tstate = TSTATE_NONE;
  2746 :}
  2747 STS FPUL, Rn {:  
  2748     COUNT_INST(I_STS);
  2749     check_fpuen();
  2750     load_spreg( R_EAX, R_FPUL );
  2751     store_reg( R_EAX, Rn );
  2752 :}
  2753 STS.L FPUL, @-Rn {:  
  2754     COUNT_INST(I_STSM);
  2755     check_fpuen();
  2756     load_reg( R_EAX, Rn );
  2757     check_walign32( R_EAX );
  2758     ADD_imm8s_r32( -4, R_EAX );
  2759     MMU_TRANSLATE_WRITE( R_EAX );
  2760     load_spreg( R_EDX, R_FPUL );
  2761     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2762     MEM_WRITE_LONG( R_EAX, R_EDX );
  2763     sh4_x86.tstate = TSTATE_NONE;
  2764 :}
  2765 STS MACH, Rn {:  
  2766     COUNT_INST(I_STS);
  2767     load_spreg( R_EAX, R_MACH );
  2768     store_reg( R_EAX, Rn );
  2769 :}
  2770 STS.L MACH, @-Rn {:  
  2771     COUNT_INST(I_STSM);
  2772     load_reg( R_EAX, Rn );
  2773     check_walign32( R_EAX );
  2774     ADD_imm8s_r32( -4, R_EAX );
  2775     MMU_TRANSLATE_WRITE( R_EAX );
  2776     load_spreg( R_EDX, R_MACH );
  2777     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2778     MEM_WRITE_LONG( R_EAX, R_EDX );
  2779     sh4_x86.tstate = TSTATE_NONE;
  2780 :}
  2781 STS MACL, Rn {:  
  2782     COUNT_INST(I_STS);
  2783     load_spreg( R_EAX, R_MACL );
  2784     store_reg( R_EAX, Rn );
  2785 :}
  2786 STS.L MACL, @-Rn {:  
  2787     COUNT_INST(I_STSM);
  2788     load_reg( R_EAX, Rn );
  2789     check_walign32( R_EAX );
  2790     ADD_imm8s_r32( -4, R_EAX );
  2791     MMU_TRANSLATE_WRITE( R_EAX );
  2792     load_spreg( R_EDX, R_MACL );
  2793     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2794     MEM_WRITE_LONG( R_EAX, R_EDX );
  2795     sh4_x86.tstate = TSTATE_NONE;
  2796 :}
  2797 STS PR, Rn {:  
  2798     COUNT_INST(I_STS);
  2799     load_spreg( R_EAX, R_PR );
  2800     store_reg( R_EAX, Rn );
  2801 :}
  2802 STS.L PR, @-Rn {:  
  2803     COUNT_INST(I_STSM);
  2804     load_reg( R_EAX, Rn );
  2805     check_walign32( R_EAX );
  2806     ADD_imm8s_r32( -4, R_EAX );
  2807     MMU_TRANSLATE_WRITE( R_EAX );
  2808     load_spreg( R_EDX, R_PR );
  2809     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2810     MEM_WRITE_LONG( R_EAX, R_EDX );
  2811     sh4_x86.tstate = TSTATE_NONE;
  2812 :}
  2814 NOP {: 
  2815     COUNT_INST(I_NOP);
  2816     /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ 
  2817 :}
  2818 %%
  2819     sh4_x86.in_delay_slot = DELAY_NONE;
  2820     return 0;
.