Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 911:2f6ba75b84d1
prev908:a00debcf2600
next926:68f3e0fe02f1
author nkeynes
date Thu Dec 11 23:26:03 2008 +0000 (15 years ago)
permissions -rw-r--r--
last change Disable the generational translation cache - I've got no evidence that it
actually helps performance, and it simplifies things to get rid of it (in
particular, translated code doesn't have to worry about being moved now).
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "lxdream.h"
    29 #include "sh4/xltcache.h"
    30 #include "sh4/sh4core.h"
    31 #include "sh4/sh4trans.h"
    32 #include "sh4/sh4stat.h"
    33 #include "sh4/sh4mmio.h"
    34 #include "sh4/x86op.h"
    35 #include "clock.h"
    37 #define DEFAULT_BACKPATCH_SIZE 4096
    39 struct backpatch_record {
    40     uint32_t fixup_offset;
    41     uint32_t fixup_icount;
    42     int32_t exc_code;
    43 };
    45 #define DELAY_NONE 0
    46 #define DELAY_PC 1
    47 #define DELAY_PC_PR 2
    49 /** 
    50  * Struct to manage internal translation state. This state is not saved -
    51  * it is only valid between calls to sh4_translate_begin_block() and
    52  * sh4_translate_end_block()
    53  */
    54 struct sh4_x86_state {
    55     int in_delay_slot;
    56     gboolean priv_checked; /* true if we've already checked the cpu mode. */
    57     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    58     gboolean branch_taken; /* true if we branched unconditionally */
    59     gboolean double_prec; /* true if FPU is in double-precision mode */
    60     gboolean double_size; /* true if FPU is in double-size mode */
    61     gboolean sse3_enabled; /* true if host supports SSE3 instructions */
    62     uint32_t block_start_pc;
    63     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    64     int tstate;
    66     /* mode flags */
    67     gboolean tlb_on; /* True if tlb translation is active */
    69     /* Allocated memory for the (block-wide) back-patch list */
    70     struct backpatch_record *backpatch_list;
    71     uint32_t backpatch_posn;
    72     uint32_t backpatch_size;
    73 };
    75 #define TSTATE_NONE -1
    76 #define TSTATE_O    0
    77 #define TSTATE_C    2
    78 #define TSTATE_E    4
    79 #define TSTATE_NE   5
    80 #define TSTATE_G    0xF
    81 #define TSTATE_GE   0xD
    82 #define TSTATE_A    7
    83 #define TSTATE_AE   3
    85 #ifdef ENABLE_SH4STATS
    86 #define COUNT_INST(id) load_imm32(R_EAX,id); call_func1(sh4_stats_add, R_EAX); sh4_x86.tstate = TSTATE_NONE
    87 #else
    88 #define COUNT_INST(id)
    89 #endif
    91 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
    92 #define JT_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    93 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    94     OP(0x70+sh4_x86.tstate); MARK_JMP8(label); OP(-1)
    96 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
    97 #define JF_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    98 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    99     OP(0x70+ (sh4_x86.tstate^1)); MARK_JMP8(label); OP(-1)
   101 static struct sh4_x86_state sh4_x86;
   103 static uint32_t max_int = 0x7FFFFFFF;
   104 static uint32_t min_int = 0x80000000;
   105 static uint32_t save_fcw; /* save value for fpu control word */
   106 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   108 gboolean is_sse3_supported()
   109 {
   110     uint32_t features;
   112     __asm__ __volatile__(
   113         "mov $0x01, %%eax\n\t"
   114         "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
   115     return (features & 1) ? TRUE : FALSE;
   116 }
   118 void sh4_translate_init(void)
   119 {
   120     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   121     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   122     sh4_x86.sse3_enabled = is_sse3_supported();
   123 }
   126 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   127 {
   128     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   129 	sh4_x86.backpatch_size <<= 1;
   130 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   131 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   132 	assert( sh4_x86.backpatch_list != NULL );
   133     }
   134     if( sh4_x86.in_delay_slot ) {
   135 	fixup_pc -= 2;
   136     }
   137     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
   138 	((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code);
   139     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   140     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   141     sh4_x86.backpatch_posn++;
   142 }
   144 /**
   145  * Emit an instruction to load an SH4 reg into a real register
   146  */
   147 static inline void load_reg( int x86reg, int sh4reg ) 
   148 {
   149     /* mov [bp+n], reg */
   150     OP(0x8B);
   151     OP(0x45 + (x86reg<<3));
   152     OP(REG_OFFSET(r[sh4reg]));
   153 }
   155 static inline void load_reg16s( int x86reg, int sh4reg )
   156 {
   157     OP(0x0F);
   158     OP(0xBF);
   159     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   160 }
   162 static inline void load_reg16u( int x86reg, int sh4reg )
   163 {
   164     OP(0x0F);
   165     OP(0xB7);
   166     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   168 }
   170 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   171 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   172 /**
   173  * Emit an instruction to load an immediate value into a register
   174  */
   175 static inline void load_imm32( int x86reg, uint32_t value ) {
   176     /* mov #value, reg */
   177     OP(0xB8 + x86reg);
   178     OP32(value);
   179 }
   181 /**
   182  * Load an immediate 64-bit quantity (note: x86-64 only)
   183  */
   184 static inline void load_imm64( int x86reg, uint64_t value ) {
   185     /* mov #value, reg */
   186     REXW();
   187     OP(0xB8 + x86reg);
   188     OP64(value);
   189 }
   191 /**
   192  * Emit an instruction to store an SH4 reg (RN)
   193  */
   194 void static inline store_reg( int x86reg, int sh4reg ) {
   195     /* mov reg, [bp+n] */
   196     OP(0x89);
   197     OP(0x45 + (x86reg<<3));
   198     OP(REG_OFFSET(r[sh4reg]));
   199 }
   201 /**
   202  * Load an FR register (single-precision floating point) into an integer x86
   203  * register (eg for register-to-register moves)
   204  */
   205 #define load_fr(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[0][(frm)^1]) )
   206 #define load_xf(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[1][(frm)^1]) )
   208 /**
   209  * Load the low half of a DR register (DR or XD) into an integer x86 register 
   210  */
   211 #define load_dr0(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   212 #define load_dr1(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   214 /**
   215  * Store an FR register (single-precision floating point) from an integer x86+
   216  * register (eg for register-to-register moves)
   217  */
   218 #define store_fr(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[0][(frm)^1]) )
   219 #define store_xf(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[1][(frm)^1]) )
   221 #define store_dr0(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   222 #define store_dr1(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   225 #define push_fpul()  FLDF_sh4r(R_FPUL)
   226 #define pop_fpul()   FSTPF_sh4r(R_FPUL)
   227 #define push_fr(frm) FLDF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   228 #define pop_fr(frm)  FSTPF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   229 #define push_xf(frm) FLDF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   230 #define pop_xf(frm)  FSTPF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   231 #define push_dr(frm) FLDD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   232 #define pop_dr(frm)  FSTPD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   233 #define push_xdr(frm) FLDD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   234 #define pop_xdr(frm)  FSTPD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   238 /* Exception checks - Note that all exception checks will clobber EAX */
   240 #define check_priv( ) \
   241     if( !sh4_x86.priv_checked ) { \
   242 	sh4_x86.priv_checked = TRUE;\
   243 	load_spreg( R_EAX, R_SR );\
   244 	AND_imm32_r32( SR_MD, R_EAX );\
   245 	if( sh4_x86.in_delay_slot ) {\
   246 	    JE_exc( EXC_SLOT_ILLEGAL );\
   247 	} else {\
   248 	    JE_exc( EXC_ILLEGAL );\
   249 	}\
   250 	sh4_x86.tstate = TSTATE_NONE; \
   251     }\
   253 #define check_fpuen( ) \
   254     if( !sh4_x86.fpuen_checked ) {\
   255 	sh4_x86.fpuen_checked = TRUE;\
   256 	load_spreg( R_EAX, R_SR );\
   257 	AND_imm32_r32( SR_FD, R_EAX );\
   258 	if( sh4_x86.in_delay_slot ) {\
   259 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   260 	} else {\
   261 	    JNE_exc(EXC_FPU_DISABLED);\
   262 	}\
   263 	sh4_x86.tstate = TSTATE_NONE; \
   264     }
   266 #define check_ralign16( x86reg ) \
   267     TEST_imm32_r32( 0x00000001, x86reg ); \
   268     JNE_exc(EXC_DATA_ADDR_READ)
   270 #define check_walign16( x86reg ) \
   271     TEST_imm32_r32( 0x00000001, x86reg ); \
   272     JNE_exc(EXC_DATA_ADDR_WRITE);
   274 #define check_ralign32( x86reg ) \
   275     TEST_imm32_r32( 0x00000003, x86reg ); \
   276     JNE_exc(EXC_DATA_ADDR_READ)
   278 #define check_walign32( x86reg ) \
   279     TEST_imm32_r32( 0x00000003, x86reg ); \
   280     JNE_exc(EXC_DATA_ADDR_WRITE);
   282 #define check_ralign64( x86reg ) \
   283     TEST_imm32_r32( 0x00000007, x86reg ); \
   284     JNE_exc(EXC_DATA_ADDR_READ)
   286 #define check_walign64( x86reg ) \
   287     TEST_imm32_r32( 0x00000007, x86reg ); \
   288     JNE_exc(EXC_DATA_ADDR_WRITE);
   290 #define UNDEF(ir)
   291 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   292 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
   293 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
   294 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
   295 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
   296 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
   297 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
   299 /**
   300  * Perform MMU translation on the address in addr_reg for a read operation, iff the TLB is turned 
   301  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   302  */
   303 #define MMU_TRANSLATE_READ( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   305 #define MMU_TRANSLATE_READ_EXC( addr_reg, exc_code ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(exc_code); MEM_RESULT(addr_reg) }
   306 /**
   307  * Perform MMU translation on the address in addr_reg for a write operation, iff the TLB is turned 
   308  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   309  */
   310 #define MMU_TRANSLATE_WRITE( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_write, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   312 #define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = DELAY_NONE; return 1;
   314 /****** Import appropriate calling conventions ******/
   315 #if SIZEOF_VOID_P == 8
   316 #include "sh4/ia64abi.h"
   317 #else /* 32-bit system */
   318 #ifdef APPLE_BUILD
   319 #include "sh4/ia32mac.h"
   320 #else
   321 #include "sh4/ia32abi.h"
   322 #endif
   323 #endif
   325 void sh4_translate_begin_block( sh4addr_t pc ) 
   326 {
   327 	enter_block();
   328     sh4_x86.in_delay_slot = FALSE;
   329     sh4_x86.priv_checked = FALSE;
   330     sh4_x86.fpuen_checked = FALSE;
   331     sh4_x86.branch_taken = FALSE;
   332     sh4_x86.backpatch_posn = 0;
   333     sh4_x86.block_start_pc = pc;
   334     sh4_x86.tlb_on = IS_MMU_ENABLED();
   335     sh4_x86.tstate = TSTATE_NONE;
   336     sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
   337     sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
   338 }
   341 uint32_t sh4_translate_end_block_size()
   342 {
   343     if( sh4_x86.backpatch_posn <= 3 ) {
   344         return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);
   345     } else {
   346         return EPILOGUE_SIZE + 48 + (sh4_x86.backpatch_posn-3)*15;
   347     }
   348 }
   351 /**
   352  * Embed a breakpoint into the generated code
   353  */
   354 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   355 {
   356     load_imm32( R_EAX, pc );
   357     call_func1( sh4_translate_breakpoint_hit, R_EAX );
   358     sh4_x86.tstate = TSTATE_NONE;
   359 }
   362 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   364 /**
   365  * Embed a call to sh4_execute_instruction for situations that we
   366  * can't translate (just page-crossing delay slots at the moment).
   367  * Caller is responsible for setting new_pc before calling this function.
   368  *
   369  * Performs:
   370  *   Set PC = endpc
   371  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   372  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   373  *   Call sh4_execute_instruction
   374  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   375  */
   376 void exit_block_emu( sh4vma_t endpc )
   377 {
   378     load_imm32( R_ECX, endpc - sh4_x86.block_start_pc );   // 5
   379     ADD_r32_sh4r( R_ECX, R_PC );
   381     load_imm32( R_ECX, (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period ); // 5
   382     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
   383     load_imm32( R_ECX, sh4_x86.in_delay_slot ? 1 : 0 );
   384     store_spreg( R_ECX, REG_OFFSET(in_delay_slot) );
   386     call_func0( sh4_execute_instruction );    
   387     load_spreg( R_EAX, R_PC );
   388     if( sh4_x86.tlb_on ) {
   389 	call_func1(xlat_get_code_by_vma,R_EAX);
   390     } else {
   391 	call_func1(xlat_get_code,R_EAX);
   392     }
   393     AND_imm8s_rptr( 0xFC, R_EAX );
   394     POP_r32(R_EBP);
   395     RET();
   396 } 
   398 /**
   399  * Translate a single instruction. Delayed branches are handled specially
   400  * by translating both branch and delayed instruction as a single unit (as
   401  * 
   402  * The instruction MUST be in the icache (assert check)
   403  *
   404  * @return true if the instruction marks the end of a basic block
   405  * (eg a branch or 
   406  */
   407 uint32_t sh4_translate_instruction( sh4vma_t pc )
   408 {
   409     uint32_t ir;
   410     /* Read instruction from icache */
   411     assert( IS_IN_ICACHE(pc) );
   412     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   414     if( !sh4_x86.in_delay_slot ) {
   415 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   416     }
   417 %%
   418 /* ALU operations */
   419 ADD Rm, Rn {:
   420     COUNT_INST(I_ADD);
   421     load_reg( R_EAX, Rm );
   422     load_reg( R_ECX, Rn );
   423     ADD_r32_r32( R_EAX, R_ECX );
   424     store_reg( R_ECX, Rn );
   425     sh4_x86.tstate = TSTATE_NONE;
   426 :}
   427 ADD #imm, Rn {:  
   428     COUNT_INST(I_ADDI);
   429     load_reg( R_EAX, Rn );
   430     ADD_imm8s_r32( imm, R_EAX );
   431     store_reg( R_EAX, Rn );
   432     sh4_x86.tstate = TSTATE_NONE;
   433 :}
   434 ADDC Rm, Rn {:
   435     COUNT_INST(I_ADDC);
   436     if( sh4_x86.tstate != TSTATE_C ) {
   437         LDC_t();
   438     }
   439     load_reg( R_EAX, Rm );
   440     load_reg( R_ECX, Rn );
   441     ADC_r32_r32( R_EAX, R_ECX );
   442     store_reg( R_ECX, Rn );
   443     SETC_t();
   444     sh4_x86.tstate = TSTATE_C;
   445 :}
   446 ADDV Rm, Rn {:
   447     COUNT_INST(I_ADDV);
   448     load_reg( R_EAX, Rm );
   449     load_reg( R_ECX, Rn );
   450     ADD_r32_r32( R_EAX, R_ECX );
   451     store_reg( R_ECX, Rn );
   452     SETO_t();
   453     sh4_x86.tstate = TSTATE_O;
   454 :}
   455 AND Rm, Rn {:
   456     COUNT_INST(I_AND);
   457     load_reg( R_EAX, Rm );
   458     load_reg( R_ECX, Rn );
   459     AND_r32_r32( R_EAX, R_ECX );
   460     store_reg( R_ECX, Rn );
   461     sh4_x86.tstate = TSTATE_NONE;
   462 :}
   463 AND #imm, R0 {:  
   464     COUNT_INST(I_ANDI);
   465     load_reg( R_EAX, 0 );
   466     AND_imm32_r32(imm, R_EAX); 
   467     store_reg( R_EAX, 0 );
   468     sh4_x86.tstate = TSTATE_NONE;
   469 :}
   470 AND.B #imm, @(R0, GBR) {: 
   471     COUNT_INST(I_ANDB);
   472     load_reg( R_EAX, 0 );
   473     load_spreg( R_ECX, R_GBR );
   474     ADD_r32_r32( R_ECX, R_EAX );
   475     MMU_TRANSLATE_WRITE( R_EAX );
   476     PUSH_realigned_r32(R_EAX);
   477     MEM_READ_BYTE( R_EAX, R_EDX );
   478     POP_realigned_r32(R_EAX);
   479     AND_imm32_r32(imm, R_EDX );
   480     MEM_WRITE_BYTE( R_EAX, R_EDX );
   481     sh4_x86.tstate = TSTATE_NONE;
   482 :}
   483 CMP/EQ Rm, Rn {:  
   484     COUNT_INST(I_CMPEQ);
   485     load_reg( R_EAX, Rm );
   486     load_reg( R_ECX, Rn );
   487     CMP_r32_r32( R_EAX, R_ECX );
   488     SETE_t();
   489     sh4_x86.tstate = TSTATE_E;
   490 :}
   491 CMP/EQ #imm, R0 {:  
   492     COUNT_INST(I_CMPEQI);
   493     load_reg( R_EAX, 0 );
   494     CMP_imm8s_r32(imm, R_EAX);
   495     SETE_t();
   496     sh4_x86.tstate = TSTATE_E;
   497 :}
   498 CMP/GE Rm, Rn {:  
   499     COUNT_INST(I_CMPGE);
   500     load_reg( R_EAX, Rm );
   501     load_reg( R_ECX, Rn );
   502     CMP_r32_r32( R_EAX, R_ECX );
   503     SETGE_t();
   504     sh4_x86.tstate = TSTATE_GE;
   505 :}
   506 CMP/GT Rm, Rn {: 
   507     COUNT_INST(I_CMPGT);
   508     load_reg( R_EAX, Rm );
   509     load_reg( R_ECX, Rn );
   510     CMP_r32_r32( R_EAX, R_ECX );
   511     SETG_t();
   512     sh4_x86.tstate = TSTATE_G;
   513 :}
   514 CMP/HI Rm, Rn {:  
   515     COUNT_INST(I_CMPHI);
   516     load_reg( R_EAX, Rm );
   517     load_reg( R_ECX, Rn );
   518     CMP_r32_r32( R_EAX, R_ECX );
   519     SETA_t();
   520     sh4_x86.tstate = TSTATE_A;
   521 :}
   522 CMP/HS Rm, Rn {: 
   523     COUNT_INST(I_CMPHS);
   524     load_reg( R_EAX, Rm );
   525     load_reg( R_ECX, Rn );
   526     CMP_r32_r32( R_EAX, R_ECX );
   527     SETAE_t();
   528     sh4_x86.tstate = TSTATE_AE;
   529  :}
   530 CMP/PL Rn {: 
   531     COUNT_INST(I_CMPPL);
   532     load_reg( R_EAX, Rn );
   533     CMP_imm8s_r32( 0, R_EAX );
   534     SETG_t();
   535     sh4_x86.tstate = TSTATE_G;
   536 :}
   537 CMP/PZ Rn {:  
   538     COUNT_INST(I_CMPPZ);
   539     load_reg( R_EAX, Rn );
   540     CMP_imm8s_r32( 0, R_EAX );
   541     SETGE_t();
   542     sh4_x86.tstate = TSTATE_GE;
   543 :}
   544 CMP/STR Rm, Rn {:  
   545     COUNT_INST(I_CMPSTR);
   546     load_reg( R_EAX, Rm );
   547     load_reg( R_ECX, Rn );
   548     XOR_r32_r32( R_ECX, R_EAX );
   549     TEST_r8_r8( R_AL, R_AL );
   550     JE_rel8(target1);
   551     TEST_r8_r8( R_AH, R_AH );
   552     JE_rel8(target2);
   553     SHR_imm8_r32( 16, R_EAX );
   554     TEST_r8_r8( R_AL, R_AL );
   555     JE_rel8(target3);
   556     TEST_r8_r8( R_AH, R_AH );
   557     JMP_TARGET(target1);
   558     JMP_TARGET(target2);
   559     JMP_TARGET(target3);
   560     SETE_t();
   561     sh4_x86.tstate = TSTATE_E;
   562 :}
   563 DIV0S Rm, Rn {:
   564     COUNT_INST(I_DIV0S);
   565     load_reg( R_EAX, Rm );
   566     load_reg( R_ECX, Rn );
   567     SHR_imm8_r32( 31, R_EAX );
   568     SHR_imm8_r32( 31, R_ECX );
   569     store_spreg( R_EAX, R_M );
   570     store_spreg( R_ECX, R_Q );
   571     CMP_r32_r32( R_EAX, R_ECX );
   572     SETNE_t();
   573     sh4_x86.tstate = TSTATE_NE;
   574 :}
   575 DIV0U {:  
   576     COUNT_INST(I_DIV0U);
   577     XOR_r32_r32( R_EAX, R_EAX );
   578     store_spreg( R_EAX, R_Q );
   579     store_spreg( R_EAX, R_M );
   580     store_spreg( R_EAX, R_T );
   581     sh4_x86.tstate = TSTATE_C; // works for DIV1
   582 :}
   583 DIV1 Rm, Rn {:
   584     COUNT_INST(I_DIV1);
   585     load_spreg( R_ECX, R_M );
   586     load_reg( R_EAX, Rn );
   587     if( sh4_x86.tstate != TSTATE_C ) {
   588 	LDC_t();
   589     }
   590     RCL1_r32( R_EAX );
   591     SETC_r8( R_DL ); // Q'
   592     CMP_sh4r_r32( R_Q, R_ECX );
   593     JE_rel8(mqequal);
   594     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   595     JMP_rel8(end);
   596     JMP_TARGET(mqequal);
   597     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   598     JMP_TARGET(end);
   599     store_reg( R_EAX, Rn ); // Done with Rn now
   600     SETC_r8(R_AL); // tmp1
   601     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   602     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   603     store_spreg( R_ECX, R_Q );
   604     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   605     MOVZX_r8_r32( R_AL, R_EAX );
   606     store_spreg( R_EAX, R_T );
   607     sh4_x86.tstate = TSTATE_NONE;
   608 :}
   609 DMULS.L Rm, Rn {:  
   610     COUNT_INST(I_DMULS);
   611     load_reg( R_EAX, Rm );
   612     load_reg( R_ECX, Rn );
   613     IMUL_r32(R_ECX);
   614     store_spreg( R_EDX, R_MACH );
   615     store_spreg( R_EAX, R_MACL );
   616     sh4_x86.tstate = TSTATE_NONE;
   617 :}
   618 DMULU.L Rm, Rn {:  
   619     COUNT_INST(I_DMULU);
   620     load_reg( R_EAX, Rm );
   621     load_reg( R_ECX, Rn );
   622     MUL_r32(R_ECX);
   623     store_spreg( R_EDX, R_MACH );
   624     store_spreg( R_EAX, R_MACL );    
   625     sh4_x86.tstate = TSTATE_NONE;
   626 :}
   627 DT Rn {:  
   628     COUNT_INST(I_DT);
   629     load_reg( R_EAX, Rn );
   630     ADD_imm8s_r32( -1, R_EAX );
   631     store_reg( R_EAX, Rn );
   632     SETE_t();
   633     sh4_x86.tstate = TSTATE_E;
   634 :}
   635 EXTS.B Rm, Rn {:  
   636     COUNT_INST(I_EXTSB);
   637     load_reg( R_EAX, Rm );
   638     MOVSX_r8_r32( R_EAX, R_EAX );
   639     store_reg( R_EAX, Rn );
   640 :}
   641 EXTS.W Rm, Rn {:  
   642     COUNT_INST(I_EXTSW);
   643     load_reg( R_EAX, Rm );
   644     MOVSX_r16_r32( R_EAX, R_EAX );
   645     store_reg( R_EAX, Rn );
   646 :}
   647 EXTU.B Rm, Rn {:  
   648     COUNT_INST(I_EXTUB);
   649     load_reg( R_EAX, Rm );
   650     MOVZX_r8_r32( R_EAX, R_EAX );
   651     store_reg( R_EAX, Rn );
   652 :}
   653 EXTU.W Rm, Rn {:  
   654     COUNT_INST(I_EXTUW);
   655     load_reg( R_EAX, Rm );
   656     MOVZX_r16_r32( R_EAX, R_EAX );
   657     store_reg( R_EAX, Rn );
   658 :}
   659 MAC.L @Rm+, @Rn+ {:
   660     COUNT_INST(I_MACL);
   661     if( Rm == Rn ) {
   662 	load_reg( R_EAX, Rm );
   663 	check_ralign32( R_EAX );
   664 	MMU_TRANSLATE_READ( R_EAX );
   665 	PUSH_realigned_r32( R_EAX );
   666 	load_reg( R_EAX, Rn );
   667 	ADD_imm8s_r32( 4, R_EAX );
   668 	MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
   669 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
   670 	// Note translate twice in case of page boundaries. Maybe worth
   671 	// adding a page-boundary check to skip the second translation
   672     } else {
   673 	load_reg( R_EAX, Rm );
   674 	check_ralign32( R_EAX );
   675 	MMU_TRANSLATE_READ( R_EAX );
   676 	load_reg( R_ECX, Rn );
   677 	check_ralign32( R_ECX );
   678 	PUSH_realigned_r32( R_EAX );
   679 	MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
   680 	MOV_r32_r32( R_ECX, R_EAX );
   681 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   682 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   683     }
   684     MEM_READ_LONG( R_EAX, R_EAX );
   685     POP_r32( R_ECX );
   686     PUSH_r32( R_EAX );
   687     MEM_READ_LONG( R_ECX, R_EAX );
   688     POP_realigned_r32( R_ECX );
   690     IMUL_r32( R_ECX );
   691     ADD_r32_sh4r( R_EAX, R_MACL );
   692     ADC_r32_sh4r( R_EDX, R_MACH );
   694     load_spreg( R_ECX, R_S );
   695     TEST_r32_r32(R_ECX, R_ECX);
   696     JE_rel8( nosat );
   697     call_func0( signsat48 );
   698     JMP_TARGET( nosat );
   699     sh4_x86.tstate = TSTATE_NONE;
   700 :}
   701 MAC.W @Rm+, @Rn+ {:  
   702     COUNT_INST(I_MACW);
   703     if( Rm == Rn ) {
   704 	load_reg( R_EAX, Rm );
   705 	check_ralign16( R_EAX );
   706 	MMU_TRANSLATE_READ( R_EAX );
   707 	PUSH_realigned_r32( R_EAX );
   708 	load_reg( R_EAX, Rn );
   709 	ADD_imm8s_r32( 2, R_EAX );
   710 	MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
   711 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   712 	// Note translate twice in case of page boundaries. Maybe worth
   713 	// adding a page-boundary check to skip the second translation
   714     } else {
   715 	load_reg( R_EAX, Rm );
   716 	check_ralign16( R_EAX );
   717 	MMU_TRANSLATE_READ( R_EAX );
   718 	load_reg( R_ECX, Rn );
   719 	check_ralign16( R_ECX );
   720 	PUSH_realigned_r32( R_EAX );
   721 	MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
   722 	MOV_r32_r32( R_ECX, R_EAX );
   723 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   724 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   725     }
   726     MEM_READ_WORD( R_EAX, R_EAX );
   727     POP_r32( R_ECX );
   728     PUSH_r32( R_EAX );
   729     MEM_READ_WORD( R_ECX, R_EAX );
   730     POP_realigned_r32( R_ECX );
   731     IMUL_r32( R_ECX );
   733     load_spreg( R_ECX, R_S );
   734     TEST_r32_r32( R_ECX, R_ECX );
   735     JE_rel8( nosat );
   737     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   738     JNO_rel8( end );            // 2
   739     load_imm32( R_EDX, 1 );         // 5
   740     store_spreg( R_EDX, R_MACH );   // 6
   741     JS_rel8( positive );        // 2
   742     load_imm32( R_EAX, 0x80000000 );// 5
   743     store_spreg( R_EAX, R_MACL );   // 6
   744     JMP_rel8(end2);           // 2
   746     JMP_TARGET(positive);
   747     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   748     store_spreg( R_EAX, R_MACL );   // 6
   749     JMP_rel8(end3);            // 2
   751     JMP_TARGET(nosat);
   752     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   753     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   754     JMP_TARGET(end);
   755     JMP_TARGET(end2);
   756     JMP_TARGET(end3);
   757     sh4_x86.tstate = TSTATE_NONE;
   758 :}
   759 MOVT Rn {:  
   760     COUNT_INST(I_MOVT);
   761     load_spreg( R_EAX, R_T );
   762     store_reg( R_EAX, Rn );
   763 :}
   764 MUL.L Rm, Rn {:  
   765     COUNT_INST(I_MULL);
   766     load_reg( R_EAX, Rm );
   767     load_reg( R_ECX, Rn );
   768     MUL_r32( R_ECX );
   769     store_spreg( R_EAX, R_MACL );
   770     sh4_x86.tstate = TSTATE_NONE;
   771 :}
   772 MULS.W Rm, Rn {:
   773     COUNT_INST(I_MULSW);
   774     load_reg16s( R_EAX, Rm );
   775     load_reg16s( R_ECX, Rn );
   776     MUL_r32( R_ECX );
   777     store_spreg( R_EAX, R_MACL );
   778     sh4_x86.tstate = TSTATE_NONE;
   779 :}
   780 MULU.W Rm, Rn {:  
   781     COUNT_INST(I_MULUW);
   782     load_reg16u( R_EAX, Rm );
   783     load_reg16u( R_ECX, Rn );
   784     MUL_r32( R_ECX );
   785     store_spreg( R_EAX, R_MACL );
   786     sh4_x86.tstate = TSTATE_NONE;
   787 :}
   788 NEG Rm, Rn {:
   789     COUNT_INST(I_NEG);
   790     load_reg( R_EAX, Rm );
   791     NEG_r32( R_EAX );
   792     store_reg( R_EAX, Rn );
   793     sh4_x86.tstate = TSTATE_NONE;
   794 :}
   795 NEGC Rm, Rn {:  
   796     COUNT_INST(I_NEGC);
   797     load_reg( R_EAX, Rm );
   798     XOR_r32_r32( R_ECX, R_ECX );
   799     LDC_t();
   800     SBB_r32_r32( R_EAX, R_ECX );
   801     store_reg( R_ECX, Rn );
   802     SETC_t();
   803     sh4_x86.tstate = TSTATE_C;
   804 :}
   805 NOT Rm, Rn {:  
   806     COUNT_INST(I_NOT);
   807     load_reg( R_EAX, Rm );
   808     NOT_r32( R_EAX );
   809     store_reg( R_EAX, Rn );
   810     sh4_x86.tstate = TSTATE_NONE;
   811 :}
   812 OR Rm, Rn {:  
   813     COUNT_INST(I_OR);
   814     load_reg( R_EAX, Rm );
   815     load_reg( R_ECX, Rn );
   816     OR_r32_r32( R_EAX, R_ECX );
   817     store_reg( R_ECX, Rn );
   818     sh4_x86.tstate = TSTATE_NONE;
   819 :}
   820 OR #imm, R0 {:
   821     COUNT_INST(I_ORI);
   822     load_reg( R_EAX, 0 );
   823     OR_imm32_r32(imm, R_EAX);
   824     store_reg( R_EAX, 0 );
   825     sh4_x86.tstate = TSTATE_NONE;
   826 :}
   827 OR.B #imm, @(R0, GBR) {:  
   828     COUNT_INST(I_ORB);
   829     load_reg( R_EAX, 0 );
   830     load_spreg( R_ECX, R_GBR );
   831     ADD_r32_r32( R_ECX, R_EAX );
   832     MMU_TRANSLATE_WRITE( R_EAX );
   833     PUSH_realigned_r32(R_EAX);
   834     MEM_READ_BYTE( R_EAX, R_EDX );
   835     POP_realigned_r32(R_EAX);
   836     OR_imm32_r32(imm, R_EDX );
   837     MEM_WRITE_BYTE( R_EAX, R_EDX );
   838     sh4_x86.tstate = TSTATE_NONE;
   839 :}
   840 ROTCL Rn {:
   841     COUNT_INST(I_ROTCL);
   842     load_reg( R_EAX, Rn );
   843     if( sh4_x86.tstate != TSTATE_C ) {
   844 	LDC_t();
   845     }
   846     RCL1_r32( R_EAX );
   847     store_reg( R_EAX, Rn );
   848     SETC_t();
   849     sh4_x86.tstate = TSTATE_C;
   850 :}
   851 ROTCR Rn {:  
   852     COUNT_INST(I_ROTCR);
   853     load_reg( R_EAX, Rn );
   854     if( sh4_x86.tstate != TSTATE_C ) {
   855 	LDC_t();
   856     }
   857     RCR1_r32( R_EAX );
   858     store_reg( R_EAX, Rn );
   859     SETC_t();
   860     sh4_x86.tstate = TSTATE_C;
   861 :}
   862 ROTL Rn {:  
   863     COUNT_INST(I_ROTL);
   864     load_reg( R_EAX, Rn );
   865     ROL1_r32( R_EAX );
   866     store_reg( R_EAX, Rn );
   867     SETC_t();
   868     sh4_x86.tstate = TSTATE_C;
   869 :}
   870 ROTR Rn {:  
   871     COUNT_INST(I_ROTR);
   872     load_reg( R_EAX, Rn );
   873     ROR1_r32( R_EAX );
   874     store_reg( R_EAX, Rn );
   875     SETC_t();
   876     sh4_x86.tstate = TSTATE_C;
   877 :}
   878 SHAD Rm, Rn {:
   879     COUNT_INST(I_SHAD);
   880     /* Annoyingly enough, not directly convertible */
   881     load_reg( R_EAX, Rn );
   882     load_reg( R_ECX, Rm );
   883     CMP_imm32_r32( 0, R_ECX );
   884     JGE_rel8(doshl);
   886     NEG_r32( R_ECX );      // 2
   887     AND_imm8_r8( 0x1F, R_CL ); // 3
   888     JE_rel8(emptysar);     // 2
   889     SAR_r32_CL( R_EAX );       // 2
   890     JMP_rel8(end);          // 2
   892     JMP_TARGET(emptysar);
   893     SAR_imm8_r32(31, R_EAX );  // 3
   894     JMP_rel8(end2);
   896     JMP_TARGET(doshl);
   897     AND_imm8_r8( 0x1F, R_CL ); // 3
   898     SHL_r32_CL( R_EAX );       // 2
   899     JMP_TARGET(end);
   900     JMP_TARGET(end2);
   901     store_reg( R_EAX, Rn );
   902     sh4_x86.tstate = TSTATE_NONE;
   903 :}
   904 SHLD Rm, Rn {:  
   905     COUNT_INST(I_SHLD);
   906     load_reg( R_EAX, Rn );
   907     load_reg( R_ECX, Rm );
   908     CMP_imm32_r32( 0, R_ECX );
   909     JGE_rel8(doshl);
   911     NEG_r32( R_ECX );      // 2
   912     AND_imm8_r8( 0x1F, R_CL ); // 3
   913     JE_rel8(emptyshr );
   914     SHR_r32_CL( R_EAX );       // 2
   915     JMP_rel8(end);          // 2
   917     JMP_TARGET(emptyshr);
   918     XOR_r32_r32( R_EAX, R_EAX );
   919     JMP_rel8(end2);
   921     JMP_TARGET(doshl);
   922     AND_imm8_r8( 0x1F, R_CL ); // 3
   923     SHL_r32_CL( R_EAX );       // 2
   924     JMP_TARGET(end);
   925     JMP_TARGET(end2);
   926     store_reg( R_EAX, Rn );
   927     sh4_x86.tstate = TSTATE_NONE;
   928 :}
   929 SHAL Rn {: 
   930     COUNT_INST(I_SHAL);
   931     load_reg( R_EAX, Rn );
   932     SHL1_r32( R_EAX );
   933     SETC_t();
   934     store_reg( R_EAX, Rn );
   935     sh4_x86.tstate = TSTATE_C;
   936 :}
   937 SHAR Rn {:  
   938     COUNT_INST(I_SHAR);
   939     load_reg( R_EAX, Rn );
   940     SAR1_r32( R_EAX );
   941     SETC_t();
   942     store_reg( R_EAX, Rn );
   943     sh4_x86.tstate = TSTATE_C;
   944 :}
   945 SHLL Rn {:  
   946     COUNT_INST(I_SHLL);
   947     load_reg( R_EAX, Rn );
   948     SHL1_r32( R_EAX );
   949     SETC_t();
   950     store_reg( R_EAX, Rn );
   951     sh4_x86.tstate = TSTATE_C;
   952 :}
   953 SHLL2 Rn {:
   954     COUNT_INST(I_SHLL);
   955     load_reg( R_EAX, Rn );
   956     SHL_imm8_r32( 2, R_EAX );
   957     store_reg( R_EAX, Rn );
   958     sh4_x86.tstate = TSTATE_NONE;
   959 :}
   960 SHLL8 Rn {:  
   961     COUNT_INST(I_SHLL);
   962     load_reg( R_EAX, Rn );
   963     SHL_imm8_r32( 8, R_EAX );
   964     store_reg( R_EAX, Rn );
   965     sh4_x86.tstate = TSTATE_NONE;
   966 :}
   967 SHLL16 Rn {:  
   968     COUNT_INST(I_SHLL);
   969     load_reg( R_EAX, Rn );
   970     SHL_imm8_r32( 16, R_EAX );
   971     store_reg( R_EAX, Rn );
   972     sh4_x86.tstate = TSTATE_NONE;
   973 :}
   974 SHLR Rn {:  
   975     COUNT_INST(I_SHLR);
   976     load_reg( R_EAX, Rn );
   977     SHR1_r32( R_EAX );
   978     SETC_t();
   979     store_reg( R_EAX, Rn );
   980     sh4_x86.tstate = TSTATE_C;
   981 :}
   982 SHLR2 Rn {:  
   983     COUNT_INST(I_SHLR);
   984     load_reg( R_EAX, Rn );
   985     SHR_imm8_r32( 2, R_EAX );
   986     store_reg( R_EAX, Rn );
   987     sh4_x86.tstate = TSTATE_NONE;
   988 :}
   989 SHLR8 Rn {:  
   990     COUNT_INST(I_SHLR);
   991     load_reg( R_EAX, Rn );
   992     SHR_imm8_r32( 8, R_EAX );
   993     store_reg( R_EAX, Rn );
   994     sh4_x86.tstate = TSTATE_NONE;
   995 :}
   996 SHLR16 Rn {:  
   997     COUNT_INST(I_SHLR);
   998     load_reg( R_EAX, Rn );
   999     SHR_imm8_r32( 16, R_EAX );
  1000     store_reg( R_EAX, Rn );
  1001     sh4_x86.tstate = TSTATE_NONE;
  1002 :}
  1003 SUB Rm, Rn {:  
  1004     COUNT_INST(I_SUB);
  1005     load_reg( R_EAX, Rm );
  1006     load_reg( R_ECX, Rn );
  1007     SUB_r32_r32( R_EAX, R_ECX );
  1008     store_reg( R_ECX, Rn );
  1009     sh4_x86.tstate = TSTATE_NONE;
  1010 :}
  1011 SUBC Rm, Rn {:  
  1012     COUNT_INST(I_SUBC);
  1013     load_reg( R_EAX, Rm );
  1014     load_reg( R_ECX, Rn );
  1015     if( sh4_x86.tstate != TSTATE_C ) {
  1016 	LDC_t();
  1018     SBB_r32_r32( R_EAX, R_ECX );
  1019     store_reg( R_ECX, Rn );
  1020     SETC_t();
  1021     sh4_x86.tstate = TSTATE_C;
  1022 :}
  1023 SUBV Rm, Rn {:  
  1024     COUNT_INST(I_SUBV);
  1025     load_reg( R_EAX, Rm );
  1026     load_reg( R_ECX, Rn );
  1027     SUB_r32_r32( R_EAX, R_ECX );
  1028     store_reg( R_ECX, Rn );
  1029     SETO_t();
  1030     sh4_x86.tstate = TSTATE_O;
  1031 :}
  1032 SWAP.B Rm, Rn {:  
  1033     COUNT_INST(I_SWAPB);
  1034     load_reg( R_EAX, Rm );
  1035     XCHG_r8_r8( R_AL, R_AH ); // NB: does not touch EFLAGS
  1036     store_reg( R_EAX, Rn );
  1037 :}
  1038 SWAP.W Rm, Rn {:  
  1039     COUNT_INST(I_SWAPB);
  1040     load_reg( R_EAX, Rm );
  1041     MOV_r32_r32( R_EAX, R_ECX );
  1042     SHL_imm8_r32( 16, R_ECX );
  1043     SHR_imm8_r32( 16, R_EAX );
  1044     OR_r32_r32( R_EAX, R_ECX );
  1045     store_reg( R_ECX, Rn );
  1046     sh4_x86.tstate = TSTATE_NONE;
  1047 :}
  1048 TAS.B @Rn {:  
  1049     COUNT_INST(I_TASB);
  1050     load_reg( R_EAX, Rn );
  1051     MMU_TRANSLATE_WRITE( R_EAX );
  1052     PUSH_realigned_r32( R_EAX );
  1053     MEM_READ_BYTE( R_EAX, R_EDX );
  1054     TEST_r8_r8( R_DL, R_DL );
  1055     SETE_t();
  1056     OR_imm8_r8( 0x80, R_DL );
  1057     POP_realigned_r32( R_EAX );
  1058     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1059     sh4_x86.tstate = TSTATE_NONE;
  1060 :}
  1061 TST Rm, Rn {:  
  1062     COUNT_INST(I_TST);
  1063     load_reg( R_EAX, Rm );
  1064     load_reg( R_ECX, Rn );
  1065     TEST_r32_r32( R_EAX, R_ECX );
  1066     SETE_t();
  1067     sh4_x86.tstate = TSTATE_E;
  1068 :}
  1069 TST #imm, R0 {:  
  1070     COUNT_INST(I_TSTI);
  1071     load_reg( R_EAX, 0 );
  1072     TEST_imm32_r32( imm, R_EAX );
  1073     SETE_t();
  1074     sh4_x86.tstate = TSTATE_E;
  1075 :}
  1076 TST.B #imm, @(R0, GBR) {:  
  1077     COUNT_INST(I_TSTB);
  1078     load_reg( R_EAX, 0);
  1079     load_reg( R_ECX, R_GBR);
  1080     ADD_r32_r32( R_ECX, R_EAX );
  1081     MMU_TRANSLATE_READ( R_EAX );
  1082     MEM_READ_BYTE( R_EAX, R_EAX );
  1083     TEST_imm8_r8( imm, R_AL );
  1084     SETE_t();
  1085     sh4_x86.tstate = TSTATE_E;
  1086 :}
  1087 XOR Rm, Rn {:  
  1088     COUNT_INST(I_XOR);
  1089     load_reg( R_EAX, Rm );
  1090     load_reg( R_ECX, Rn );
  1091     XOR_r32_r32( R_EAX, R_ECX );
  1092     store_reg( R_ECX, Rn );
  1093     sh4_x86.tstate = TSTATE_NONE;
  1094 :}
  1095 XOR #imm, R0 {:  
  1096     COUNT_INST(I_XORI);
  1097     load_reg( R_EAX, 0 );
  1098     XOR_imm32_r32( imm, R_EAX );
  1099     store_reg( R_EAX, 0 );
  1100     sh4_x86.tstate = TSTATE_NONE;
  1101 :}
  1102 XOR.B #imm, @(R0, GBR) {:  
  1103     COUNT_INST(I_XORB);
  1104     load_reg( R_EAX, 0 );
  1105     load_spreg( R_ECX, R_GBR );
  1106     ADD_r32_r32( R_ECX, R_EAX );
  1107     MMU_TRANSLATE_WRITE( R_EAX );
  1108     PUSH_realigned_r32(R_EAX);
  1109     MEM_READ_BYTE(R_EAX, R_EDX);
  1110     POP_realigned_r32(R_EAX);
  1111     XOR_imm32_r32( imm, R_EDX );
  1112     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1113     sh4_x86.tstate = TSTATE_NONE;
  1114 :}
  1115 XTRCT Rm, Rn {:
  1116     COUNT_INST(I_XTRCT);
  1117     load_reg( R_EAX, Rm );
  1118     load_reg( R_ECX, Rn );
  1119     SHL_imm8_r32( 16, R_EAX );
  1120     SHR_imm8_r32( 16, R_ECX );
  1121     OR_r32_r32( R_EAX, R_ECX );
  1122     store_reg( R_ECX, Rn );
  1123     sh4_x86.tstate = TSTATE_NONE;
  1124 :}
  1126 /* Data move instructions */
  1127 MOV Rm, Rn {:  
  1128     COUNT_INST(I_MOV);
  1129     load_reg( R_EAX, Rm );
  1130     store_reg( R_EAX, Rn );
  1131 :}
  1132 MOV #imm, Rn {:  
  1133     COUNT_INST(I_MOVI);
  1134     load_imm32( R_EAX, imm );
  1135     store_reg( R_EAX, Rn );
  1136 :}
  1137 MOV.B Rm, @Rn {:  
  1138     COUNT_INST(I_MOVB);
  1139     load_reg( R_EAX, Rn );
  1140     MMU_TRANSLATE_WRITE( R_EAX );
  1141     load_reg( R_EDX, Rm );
  1142     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1143     sh4_x86.tstate = TSTATE_NONE;
  1144 :}
  1145 MOV.B Rm, @-Rn {:  
  1146     COUNT_INST(I_MOVB);
  1147     load_reg( R_EAX, Rn );
  1148     ADD_imm8s_r32( -1, R_EAX );
  1149     MMU_TRANSLATE_WRITE( R_EAX );
  1150     load_reg( R_EDX, Rm );
  1151     ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
  1152     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1153     sh4_x86.tstate = TSTATE_NONE;
  1154 :}
  1155 MOV.B Rm, @(R0, Rn) {:  
  1156     COUNT_INST(I_MOVB);
  1157     load_reg( R_EAX, 0 );
  1158     load_reg( R_ECX, Rn );
  1159     ADD_r32_r32( R_ECX, R_EAX );
  1160     MMU_TRANSLATE_WRITE( R_EAX );
  1161     load_reg( R_EDX, Rm );
  1162     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1163     sh4_x86.tstate = TSTATE_NONE;
  1164 :}
  1165 MOV.B R0, @(disp, GBR) {:  
  1166     COUNT_INST(I_MOVB);
  1167     load_spreg( R_EAX, R_GBR );
  1168     ADD_imm32_r32( disp, R_EAX );
  1169     MMU_TRANSLATE_WRITE( R_EAX );
  1170     load_reg( R_EDX, 0 );
  1171     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1172     sh4_x86.tstate = TSTATE_NONE;
  1173 :}
  1174 MOV.B R0, @(disp, Rn) {:  
  1175     COUNT_INST(I_MOVB);
  1176     load_reg( R_EAX, Rn );
  1177     ADD_imm32_r32( disp, R_EAX );
  1178     MMU_TRANSLATE_WRITE( R_EAX );
  1179     load_reg( R_EDX, 0 );
  1180     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1181     sh4_x86.tstate = TSTATE_NONE;
  1182 :}
  1183 MOV.B @Rm, Rn {:  
  1184     COUNT_INST(I_MOVB);
  1185     load_reg( R_EAX, Rm );
  1186     MMU_TRANSLATE_READ( R_EAX );
  1187     MEM_READ_BYTE( R_EAX, R_EAX );
  1188     store_reg( R_EAX, Rn );
  1189     sh4_x86.tstate = TSTATE_NONE;
  1190 :}
  1191 MOV.B @Rm+, Rn {:  
  1192     COUNT_INST(I_MOVB);
  1193     load_reg( R_EAX, Rm );
  1194     MMU_TRANSLATE_READ( R_EAX );
  1195     ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
  1196     MEM_READ_BYTE( R_EAX, R_EAX );
  1197     store_reg( R_EAX, Rn );
  1198     sh4_x86.tstate = TSTATE_NONE;
  1199 :}
  1200 MOV.B @(R0, Rm), Rn {:  
  1201     COUNT_INST(I_MOVB);
  1202     load_reg( R_EAX, 0 );
  1203     load_reg( R_ECX, Rm );
  1204     ADD_r32_r32( R_ECX, R_EAX );
  1205     MMU_TRANSLATE_READ( R_EAX )
  1206     MEM_READ_BYTE( R_EAX, R_EAX );
  1207     store_reg( R_EAX, Rn );
  1208     sh4_x86.tstate = TSTATE_NONE;
  1209 :}
  1210 MOV.B @(disp, GBR), R0 {:  
  1211     COUNT_INST(I_MOVB);
  1212     load_spreg( R_EAX, R_GBR );
  1213     ADD_imm32_r32( disp, R_EAX );
  1214     MMU_TRANSLATE_READ( R_EAX );
  1215     MEM_READ_BYTE( R_EAX, R_EAX );
  1216     store_reg( R_EAX, 0 );
  1217     sh4_x86.tstate = TSTATE_NONE;
  1218 :}
  1219 MOV.B @(disp, Rm), R0 {:  
  1220     COUNT_INST(I_MOVB);
  1221     load_reg( R_EAX, Rm );
  1222     ADD_imm32_r32( disp, R_EAX );
  1223     MMU_TRANSLATE_READ( R_EAX );
  1224     MEM_READ_BYTE( R_EAX, R_EAX );
  1225     store_reg( R_EAX, 0 );
  1226     sh4_x86.tstate = TSTATE_NONE;
  1227 :}
  1228 MOV.L Rm, @Rn {:
  1229     COUNT_INST(I_MOVL);
  1230     load_reg( R_EAX, Rn );
  1231     check_walign32(R_EAX);
  1232     MMU_TRANSLATE_WRITE( R_EAX );
  1233     load_reg( R_EDX, Rm );
  1234     MEM_WRITE_LONG( R_EAX, R_EDX );
  1235     sh4_x86.tstate = TSTATE_NONE;
  1236 :}
  1237 MOV.L Rm, @-Rn {:  
  1238     COUNT_INST(I_MOVL);
  1239     load_reg( R_EAX, Rn );
  1240     ADD_imm8s_r32( -4, R_EAX );
  1241     check_walign32( R_EAX );
  1242     MMU_TRANSLATE_WRITE( R_EAX );
  1243     load_reg( R_EDX, Rm );
  1244     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  1245     MEM_WRITE_LONG( R_EAX, R_EDX );
  1246     sh4_x86.tstate = TSTATE_NONE;
  1247 :}
  1248 MOV.L Rm, @(R0, Rn) {:  
  1249     COUNT_INST(I_MOVL);
  1250     load_reg( R_EAX, 0 );
  1251     load_reg( R_ECX, Rn );
  1252     ADD_r32_r32( R_ECX, R_EAX );
  1253     check_walign32( R_EAX );
  1254     MMU_TRANSLATE_WRITE( R_EAX );
  1255     load_reg( R_EDX, Rm );
  1256     MEM_WRITE_LONG( R_EAX, R_EDX );
  1257     sh4_x86.tstate = TSTATE_NONE;
  1258 :}
  1259 MOV.L R0, @(disp, GBR) {:  
  1260     COUNT_INST(I_MOVL);
  1261     load_spreg( R_EAX, R_GBR );
  1262     ADD_imm32_r32( disp, R_EAX );
  1263     check_walign32( R_EAX );
  1264     MMU_TRANSLATE_WRITE( R_EAX );
  1265     load_reg( R_EDX, 0 );
  1266     MEM_WRITE_LONG( R_EAX, R_EDX );
  1267     sh4_x86.tstate = TSTATE_NONE;
  1268 :}
  1269 MOV.L Rm, @(disp, Rn) {:  
  1270     COUNT_INST(I_MOVL);
  1271     load_reg( R_EAX, Rn );
  1272     ADD_imm32_r32( disp, R_EAX );
  1273     check_walign32( R_EAX );
  1274     MMU_TRANSLATE_WRITE( R_EAX );
  1275     load_reg( R_EDX, Rm );
  1276     MEM_WRITE_LONG( R_EAX, R_EDX );
  1277     sh4_x86.tstate = TSTATE_NONE;
  1278 :}
  1279 MOV.L @Rm, Rn {:  
  1280     COUNT_INST(I_MOVL);
  1281     load_reg( R_EAX, Rm );
  1282     check_ralign32( R_EAX );
  1283     MMU_TRANSLATE_READ( R_EAX );
  1284     MEM_READ_LONG( R_EAX, R_EAX );
  1285     store_reg( R_EAX, Rn );
  1286     sh4_x86.tstate = TSTATE_NONE;
  1287 :}
  1288 MOV.L @Rm+, Rn {:  
  1289     COUNT_INST(I_MOVL);
  1290     load_reg( R_EAX, Rm );
  1291     check_ralign32( R_EAX );
  1292     MMU_TRANSLATE_READ( R_EAX );
  1293     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1294     MEM_READ_LONG( R_EAX, R_EAX );
  1295     store_reg( R_EAX, Rn );
  1296     sh4_x86.tstate = TSTATE_NONE;
  1297 :}
  1298 MOV.L @(R0, Rm), Rn {:  
  1299     COUNT_INST(I_MOVL);
  1300     load_reg( R_EAX, 0 );
  1301     load_reg( R_ECX, Rm );
  1302     ADD_r32_r32( R_ECX, R_EAX );
  1303     check_ralign32( R_EAX );
  1304     MMU_TRANSLATE_READ( R_EAX );
  1305     MEM_READ_LONG( R_EAX, R_EAX );
  1306     store_reg( R_EAX, Rn );
  1307     sh4_x86.tstate = TSTATE_NONE;
  1308 :}
  1309 MOV.L @(disp, GBR), R0 {:
  1310     COUNT_INST(I_MOVL);
  1311     load_spreg( R_EAX, R_GBR );
  1312     ADD_imm32_r32( disp, R_EAX );
  1313     check_ralign32( R_EAX );
  1314     MMU_TRANSLATE_READ( R_EAX );
  1315     MEM_READ_LONG( R_EAX, R_EAX );
  1316     store_reg( R_EAX, 0 );
  1317     sh4_x86.tstate = TSTATE_NONE;
  1318 :}
  1319 MOV.L @(disp, PC), Rn {:  
  1320     COUNT_INST(I_MOVLPC);
  1321     if( sh4_x86.in_delay_slot ) {
  1322 	SLOTILLEGAL();
  1323     } else {
  1324 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1325 	if( IS_IN_ICACHE(target) ) {
  1326 	    // If the target address is in the same page as the code, it's
  1327 	    // pretty safe to just ref it directly and circumvent the whole
  1328 	    // memory subsystem. (this is a big performance win)
  1330 	    // FIXME: There's a corner-case that's not handled here when
  1331 	    // the current code-page is in the ITLB but not in the UTLB.
  1332 	    // (should generate a TLB miss although need to test SH4 
  1333 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1334 	    // behaviour though.
  1335 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1336 	    MOV_moff32_EAX( ptr );
  1337 	} else {
  1338 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1339 	    // different virtual address than the translation was done with,
  1340 	    // but we can safely assume that the low bits are the same.
  1341 	    load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1342 	    ADD_sh4r_r32( R_PC, R_EAX );
  1343 	    MMU_TRANSLATE_READ( R_EAX );
  1344 	    MEM_READ_LONG( R_EAX, R_EAX );
  1345 	    sh4_x86.tstate = TSTATE_NONE;
  1347 	store_reg( R_EAX, Rn );
  1349 :}
  1350 MOV.L @(disp, Rm), Rn {:  
  1351     COUNT_INST(I_MOVL);
  1352     load_reg( R_EAX, Rm );
  1353     ADD_imm8s_r32( disp, R_EAX );
  1354     check_ralign32( R_EAX );
  1355     MMU_TRANSLATE_READ( R_EAX );
  1356     MEM_READ_LONG( R_EAX, R_EAX );
  1357     store_reg( R_EAX, Rn );
  1358     sh4_x86.tstate = TSTATE_NONE;
  1359 :}
  1360 MOV.W Rm, @Rn {:  
  1361     COUNT_INST(I_MOVW);
  1362     load_reg( R_EAX, Rn );
  1363     check_walign16( R_EAX );
  1364     MMU_TRANSLATE_WRITE( R_EAX )
  1365     load_reg( R_EDX, Rm );
  1366     MEM_WRITE_WORD( R_EAX, R_EDX );
  1367     sh4_x86.tstate = TSTATE_NONE;
  1368 :}
  1369 MOV.W Rm, @-Rn {:  
  1370     COUNT_INST(I_MOVW);
  1371     load_reg( R_EAX, Rn );
  1372     ADD_imm8s_r32( -2, R_EAX );
  1373     check_walign16( R_EAX );
  1374     MMU_TRANSLATE_WRITE( R_EAX );
  1375     load_reg( R_EDX, Rm );
  1376     ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
  1377     MEM_WRITE_WORD( R_EAX, R_EDX );
  1378     sh4_x86.tstate = TSTATE_NONE;
  1379 :}
  1380 MOV.W Rm, @(R0, Rn) {:  
  1381     COUNT_INST(I_MOVW);
  1382     load_reg( R_EAX, 0 );
  1383     load_reg( R_ECX, Rn );
  1384     ADD_r32_r32( R_ECX, R_EAX );
  1385     check_walign16( R_EAX );
  1386     MMU_TRANSLATE_WRITE( R_EAX );
  1387     load_reg( R_EDX, Rm );
  1388     MEM_WRITE_WORD( R_EAX, R_EDX );
  1389     sh4_x86.tstate = TSTATE_NONE;
  1390 :}
  1391 MOV.W R0, @(disp, GBR) {:  
  1392     COUNT_INST(I_MOVW);
  1393     load_spreg( R_EAX, R_GBR );
  1394     ADD_imm32_r32( disp, R_EAX );
  1395     check_walign16( R_EAX );
  1396     MMU_TRANSLATE_WRITE( R_EAX );
  1397     load_reg( R_EDX, 0 );
  1398     MEM_WRITE_WORD( R_EAX, R_EDX );
  1399     sh4_x86.tstate = TSTATE_NONE;
  1400 :}
  1401 MOV.W R0, @(disp, Rn) {:  
  1402     COUNT_INST(I_MOVW);
  1403     load_reg( R_EAX, Rn );
  1404     ADD_imm32_r32( disp, R_EAX );
  1405     check_walign16( R_EAX );
  1406     MMU_TRANSLATE_WRITE( R_EAX );
  1407     load_reg( R_EDX, 0 );
  1408     MEM_WRITE_WORD( R_EAX, R_EDX );
  1409     sh4_x86.tstate = TSTATE_NONE;
  1410 :}
  1411 MOV.W @Rm, Rn {:  
  1412     COUNT_INST(I_MOVW);
  1413     load_reg( R_EAX, Rm );
  1414     check_ralign16( R_EAX );
  1415     MMU_TRANSLATE_READ( R_EAX );
  1416     MEM_READ_WORD( R_EAX, R_EAX );
  1417     store_reg( R_EAX, Rn );
  1418     sh4_x86.tstate = TSTATE_NONE;
  1419 :}
  1420 MOV.W @Rm+, Rn {:  
  1421     COUNT_INST(I_MOVW);
  1422     load_reg( R_EAX, Rm );
  1423     check_ralign16( R_EAX );
  1424     MMU_TRANSLATE_READ( R_EAX );
  1425     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
  1426     MEM_READ_WORD( R_EAX, R_EAX );
  1427     store_reg( R_EAX, Rn );
  1428     sh4_x86.tstate = TSTATE_NONE;
  1429 :}
  1430 MOV.W @(R0, Rm), Rn {:  
  1431     COUNT_INST(I_MOVW);
  1432     load_reg( R_EAX, 0 );
  1433     load_reg( R_ECX, Rm );
  1434     ADD_r32_r32( R_ECX, R_EAX );
  1435     check_ralign16( R_EAX );
  1436     MMU_TRANSLATE_READ( R_EAX );
  1437     MEM_READ_WORD( R_EAX, R_EAX );
  1438     store_reg( R_EAX, Rn );
  1439     sh4_x86.tstate = TSTATE_NONE;
  1440 :}
  1441 MOV.W @(disp, GBR), R0 {:  
  1442     COUNT_INST(I_MOVW);
  1443     load_spreg( R_EAX, R_GBR );
  1444     ADD_imm32_r32( disp, R_EAX );
  1445     check_ralign16( R_EAX );
  1446     MMU_TRANSLATE_READ( R_EAX );
  1447     MEM_READ_WORD( R_EAX, R_EAX );
  1448     store_reg( R_EAX, 0 );
  1449     sh4_x86.tstate = TSTATE_NONE;
  1450 :}
  1451 MOV.W @(disp, PC), Rn {:  
  1452     COUNT_INST(I_MOVW);
  1453     if( sh4_x86.in_delay_slot ) {
  1454 	SLOTILLEGAL();
  1455     } else {
  1456 	// See comments for MOV.L @(disp, PC), Rn
  1457 	uint32_t target = pc + disp + 4;
  1458 	if( IS_IN_ICACHE(target) ) {
  1459 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1460 	    MOV_moff32_EAX( ptr );
  1461 	    MOVSX_r16_r32( R_EAX, R_EAX );
  1462 	} else {
  1463 	    load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
  1464 	    ADD_sh4r_r32( R_PC, R_EAX );
  1465 	    MMU_TRANSLATE_READ( R_EAX );
  1466 	    MEM_READ_WORD( R_EAX, R_EAX );
  1467 	    sh4_x86.tstate = TSTATE_NONE;
  1469 	store_reg( R_EAX, Rn );
  1471 :}
  1472 MOV.W @(disp, Rm), R0 {:  
  1473     COUNT_INST(I_MOVW);
  1474     load_reg( R_EAX, Rm );
  1475     ADD_imm32_r32( disp, R_EAX );
  1476     check_ralign16( R_EAX );
  1477     MMU_TRANSLATE_READ( R_EAX );
  1478     MEM_READ_WORD( R_EAX, R_EAX );
  1479     store_reg( R_EAX, 0 );
  1480     sh4_x86.tstate = TSTATE_NONE;
  1481 :}
  1482 MOVA @(disp, PC), R0 {:  
  1483     COUNT_INST(I_MOVA);
  1484     if( sh4_x86.in_delay_slot ) {
  1485 	SLOTILLEGAL();
  1486     } else {
  1487 	load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1488 	ADD_sh4r_r32( R_PC, R_ECX );
  1489 	store_reg( R_ECX, 0 );
  1490 	sh4_x86.tstate = TSTATE_NONE;
  1492 :}
  1493 MOVCA.L R0, @Rn {:  
  1494     COUNT_INST(I_MOVCA);
  1495     load_reg( R_EAX, Rn );
  1496     check_walign32( R_EAX );
  1497     MMU_TRANSLATE_WRITE( R_EAX );
  1498     load_reg( R_EDX, 0 );
  1499     MEM_WRITE_LONG( R_EAX, R_EDX );
  1500     sh4_x86.tstate = TSTATE_NONE;
  1501 :}
  1503 /* Control transfer instructions */
  1504 BF disp {:
  1505     COUNT_INST(I_BF);
  1506     if( sh4_x86.in_delay_slot ) {
  1507 	SLOTILLEGAL();
  1508     } else {
  1509 	sh4vma_t target = disp + pc + 4;
  1510 	JT_rel8( nottaken );
  1511 	exit_block_rel(target, pc+2 );
  1512 	JMP_TARGET(nottaken);
  1513 	return 2;
  1515 :}
  1516 BF/S disp {:
  1517     COUNT_INST(I_BFS);
  1518     if( sh4_x86.in_delay_slot ) {
  1519 	SLOTILLEGAL();
  1520     } else {
  1521 	sh4_x86.in_delay_slot = DELAY_PC;
  1522 	if( UNTRANSLATABLE(pc+2) ) {
  1523 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1524 	    JT_rel8(nottaken);
  1525 	    ADD_imm32_r32( disp, R_EAX );
  1526 	    JMP_TARGET(nottaken);
  1527 	    ADD_sh4r_r32( R_PC, R_EAX );
  1528 	    store_spreg( R_EAX, R_NEW_PC );
  1529 	    exit_block_emu(pc+2);
  1530 	    sh4_x86.branch_taken = TRUE;
  1531 	    return 2;
  1532 	} else {
  1533 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1534 		CMP_imm8s_sh4r( 1, R_T );
  1535 		sh4_x86.tstate = TSTATE_E;
  1537 	    sh4vma_t target = disp + pc + 4;
  1538 	    OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JT rel32
  1539 	    int save_tstate = sh4_x86.tstate;
  1540 	    sh4_translate_instruction(pc+2);
  1541 	    exit_block_rel( target, pc+4 );
  1543 	    // not taken
  1544 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1545 	    sh4_x86.tstate = save_tstate;
  1546 	    sh4_translate_instruction(pc+2);
  1547 	    return 4;
  1550 :}
  1551 BRA disp {:  
  1552     COUNT_INST(I_BRA);
  1553     if( sh4_x86.in_delay_slot ) {
  1554 	SLOTILLEGAL();
  1555     } else {
  1556 	sh4_x86.in_delay_slot = DELAY_PC;
  1557 	sh4_x86.branch_taken = TRUE;
  1558 	if( UNTRANSLATABLE(pc+2) ) {
  1559 	    load_spreg( R_EAX, R_PC );
  1560 	    ADD_imm32_r32( pc + disp + 4 - sh4_x86.block_start_pc, R_EAX );
  1561 	    store_spreg( R_EAX, R_NEW_PC );
  1562 	    exit_block_emu(pc+2);
  1563 	    return 2;
  1564 	} else {
  1565 	    sh4_translate_instruction( pc + 2 );
  1566 	    exit_block_rel( disp + pc + 4, pc+4 );
  1567 	    return 4;
  1570 :}
  1571 BRAF Rn {:  
  1572     COUNT_INST(I_BRAF);
  1573     if( sh4_x86.in_delay_slot ) {
  1574 	SLOTILLEGAL();
  1575     } else {
  1576 	load_spreg( R_EAX, R_PC );
  1577 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1578 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1579 	store_spreg( R_EAX, R_NEW_PC );
  1580 	sh4_x86.in_delay_slot = DELAY_PC;
  1581 	sh4_x86.tstate = TSTATE_NONE;
  1582 	sh4_x86.branch_taken = TRUE;
  1583 	if( UNTRANSLATABLE(pc+2) ) {
  1584 	    exit_block_emu(pc+2);
  1585 	    return 2;
  1586 	} else {
  1587 	    sh4_translate_instruction( pc + 2 );
  1588 	    exit_block_newpcset(pc+2);
  1589 	    return 4;
  1592 :}
  1593 BSR disp {:  
  1594     COUNT_INST(I_BSR);
  1595     if( sh4_x86.in_delay_slot ) {
  1596 	SLOTILLEGAL();
  1597     } else {
  1598 	load_spreg( R_EAX, R_PC );
  1599 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1600 	store_spreg( R_EAX, R_PR );
  1601 	sh4_x86.in_delay_slot = DELAY_PC;
  1602 	sh4_x86.branch_taken = TRUE;
  1603 	sh4_x86.tstate = TSTATE_NONE;
  1604 	if( UNTRANSLATABLE(pc+2) ) {
  1605 	    ADD_imm32_r32( disp, R_EAX );
  1606 	    store_spreg( R_EAX, R_NEW_PC );
  1607 	    exit_block_emu(pc+2);
  1608 	    return 2;
  1609 	} else {
  1610 	    sh4_translate_instruction( pc + 2 );
  1611 	    exit_block_rel( disp + pc + 4, pc+4 );
  1612 	    return 4;
  1615 :}
  1616 BSRF Rn {:  
  1617     COUNT_INST(I_BSRF);
  1618     if( sh4_x86.in_delay_slot ) {
  1619 	SLOTILLEGAL();
  1620     } else {
  1621 	load_spreg( R_EAX, R_PC );
  1622 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1623 	store_spreg( R_EAX, R_PR );
  1624 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1625 	store_spreg( R_EAX, R_NEW_PC );
  1627 	sh4_x86.in_delay_slot = DELAY_PC;
  1628 	sh4_x86.tstate = TSTATE_NONE;
  1629 	sh4_x86.branch_taken = TRUE;
  1630 	if( UNTRANSLATABLE(pc+2) ) {
  1631 	    exit_block_emu(pc+2);
  1632 	    return 2;
  1633 	} else {
  1634 	    sh4_translate_instruction( pc + 2 );
  1635 	    exit_block_newpcset(pc+2);
  1636 	    return 4;
  1639 :}
  1640 BT disp {:
  1641     COUNT_INST(I_BT);
  1642     if( sh4_x86.in_delay_slot ) {
  1643 	SLOTILLEGAL();
  1644     } else {
  1645 	sh4vma_t target = disp + pc + 4;
  1646 	JF_rel8( nottaken );
  1647 	exit_block_rel(target, pc+2 );
  1648 	JMP_TARGET(nottaken);
  1649 	return 2;
  1651 :}
  1652 BT/S disp {:
  1653     COUNT_INST(I_BTS);
  1654     if( sh4_x86.in_delay_slot ) {
  1655 	SLOTILLEGAL();
  1656     } else {
  1657 	sh4_x86.in_delay_slot = DELAY_PC;
  1658 	if( UNTRANSLATABLE(pc+2) ) {
  1659 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1660 	    JF_rel8(nottaken);
  1661 	    ADD_imm32_r32( disp, R_EAX );
  1662 	    JMP_TARGET(nottaken);
  1663 	    ADD_sh4r_r32( R_PC, R_EAX );
  1664 	    store_spreg( R_EAX, R_NEW_PC );
  1665 	    exit_block_emu(pc+2);
  1666 	    sh4_x86.branch_taken = TRUE;
  1667 	    return 2;
  1668 	} else {
  1669 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1670 		CMP_imm8s_sh4r( 1, R_T );
  1671 		sh4_x86.tstate = TSTATE_E;
  1673 	    OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JF rel32
  1674 	    int save_tstate = sh4_x86.tstate;
  1675 	    sh4_translate_instruction(pc+2);
  1676 	    exit_block_rel( disp + pc + 4, pc+4 );
  1677 	    // not taken
  1678 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1679 	    sh4_x86.tstate = save_tstate;
  1680 	    sh4_translate_instruction(pc+2);
  1681 	    return 4;
  1684 :}
  1685 JMP @Rn {:  
  1686     COUNT_INST(I_JMP);
  1687     if( sh4_x86.in_delay_slot ) {
  1688 	SLOTILLEGAL();
  1689     } else {
  1690 	load_reg( R_ECX, Rn );
  1691 	store_spreg( R_ECX, R_NEW_PC );
  1692 	sh4_x86.in_delay_slot = DELAY_PC;
  1693 	sh4_x86.branch_taken = TRUE;
  1694 	if( UNTRANSLATABLE(pc+2) ) {
  1695 	    exit_block_emu(pc+2);
  1696 	    return 2;
  1697 	} else {
  1698 	    sh4_translate_instruction(pc+2);
  1699 	    exit_block_newpcset(pc+2);
  1700 	    return 4;
  1703 :}
  1704 JSR @Rn {:  
  1705     COUNT_INST(I_JSR);
  1706     if( sh4_x86.in_delay_slot ) {
  1707 	SLOTILLEGAL();
  1708     } else {
  1709 	load_spreg( R_EAX, R_PC );
  1710 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1711 	store_spreg( R_EAX, R_PR );
  1712 	load_reg( R_ECX, Rn );
  1713 	store_spreg( R_ECX, R_NEW_PC );
  1714 	sh4_x86.in_delay_slot = DELAY_PC;
  1715 	sh4_x86.branch_taken = TRUE;
  1716 	sh4_x86.tstate = TSTATE_NONE;
  1717 	if( UNTRANSLATABLE(pc+2) ) {
  1718 	    exit_block_emu(pc+2);
  1719 	    return 2;
  1720 	} else {
  1721 	    sh4_translate_instruction(pc+2);
  1722 	    exit_block_newpcset(pc+2);
  1723 	    return 4;
  1726 :}
  1727 RTE {:  
  1728     COUNT_INST(I_RTE);
  1729     if( sh4_x86.in_delay_slot ) {
  1730 	SLOTILLEGAL();
  1731     } else {
  1732 	check_priv();
  1733 	load_spreg( R_ECX, R_SPC );
  1734 	store_spreg( R_ECX, R_NEW_PC );
  1735 	load_spreg( R_EAX, R_SSR );
  1736 	call_func1( sh4_write_sr, R_EAX );
  1737 	sh4_x86.in_delay_slot = DELAY_PC;
  1738 	sh4_x86.priv_checked = FALSE;
  1739 	sh4_x86.fpuen_checked = FALSE;
  1740 	sh4_x86.tstate = TSTATE_NONE;
  1741 	sh4_x86.branch_taken = TRUE;
  1742 	if( UNTRANSLATABLE(pc+2) ) {
  1743 	    exit_block_emu(pc+2);
  1744 	    return 2;
  1745 	} else {
  1746 	    sh4_translate_instruction(pc+2);
  1747 	    exit_block_newpcset(pc+2);
  1748 	    return 4;
  1751 :}
  1752 RTS {:  
  1753     COUNT_INST(I_RTS);
  1754     if( sh4_x86.in_delay_slot ) {
  1755 	SLOTILLEGAL();
  1756     } else {
  1757 	load_spreg( R_ECX, R_PR );
  1758 	store_spreg( R_ECX, R_NEW_PC );
  1759 	sh4_x86.in_delay_slot = DELAY_PC;
  1760 	sh4_x86.branch_taken = TRUE;
  1761 	if( UNTRANSLATABLE(pc+2) ) {
  1762 	    exit_block_emu(pc+2);
  1763 	    return 2;
  1764 	} else {
  1765 	    sh4_translate_instruction(pc+2);
  1766 	    exit_block_newpcset(pc+2);
  1767 	    return 4;
  1770 :}
  1771 TRAPA #imm {:  
  1772     COUNT_INST(I_TRAPA);
  1773     if( sh4_x86.in_delay_slot ) {
  1774 	SLOTILLEGAL();
  1775     } else {
  1776 	load_imm32( R_ECX, pc+2 - sh4_x86.block_start_pc );   // 5
  1777 	ADD_r32_sh4r( R_ECX, R_PC );
  1778 	load_imm32( R_EAX, imm );
  1779 	call_func1( sh4_raise_trap, R_EAX );
  1780 	sh4_x86.tstate = TSTATE_NONE;
  1781 	exit_block_pcset(pc);
  1782 	sh4_x86.branch_taken = TRUE;
  1783 	return 2;
  1785 :}
  1786 UNDEF {:  
  1787     COUNT_INST(I_UNDEF);
  1788     if( sh4_x86.in_delay_slot ) {
  1789 	SLOTILLEGAL();
  1790     } else {
  1791 	JMP_exc(EXC_ILLEGAL);
  1792 	return 2;
  1794 :}
  1796 CLRMAC {:  
  1797     COUNT_INST(I_CLRMAC);
  1798     XOR_r32_r32(R_EAX, R_EAX);
  1799     store_spreg( R_EAX, R_MACL );
  1800     store_spreg( R_EAX, R_MACH );
  1801     sh4_x86.tstate = TSTATE_NONE;
  1802 :}
  1803 CLRS {:
  1804     COUNT_INST(I_CLRS);
  1805     CLC();
  1806     SETC_sh4r(R_S);
  1807     sh4_x86.tstate = TSTATE_NONE;
  1808 :}
  1809 CLRT {:  
  1810     COUNT_INST(I_CLRT);
  1811     CLC();
  1812     SETC_t();
  1813     sh4_x86.tstate = TSTATE_C;
  1814 :}
  1815 SETS {:  
  1816     COUNT_INST(I_SETS);
  1817     STC();
  1818     SETC_sh4r(R_S);
  1819     sh4_x86.tstate = TSTATE_NONE;
  1820 :}
  1821 SETT {:  
  1822     COUNT_INST(I_SETT);
  1823     STC();
  1824     SETC_t();
  1825     sh4_x86.tstate = TSTATE_C;
  1826 :}
  1828 /* Floating point moves */
  1829 FMOV FRm, FRn {:  
  1830     COUNT_INST(I_FMOV1);
  1831     check_fpuen();
  1832     if( sh4_x86.double_size ) {
  1833         load_dr0( R_EAX, FRm );
  1834         load_dr1( R_ECX, FRm );
  1835         store_dr0( R_EAX, FRn );
  1836         store_dr1( R_ECX, FRn );
  1837     } else {
  1838         load_fr( R_EAX, FRm ); // SZ=0 branch
  1839         store_fr( R_EAX, FRn );
  1841 :}
  1842 FMOV FRm, @Rn {: 
  1843     COUNT_INST(I_FMOV2);
  1844     check_fpuen();
  1845     load_reg( R_EAX, Rn );
  1846     if( sh4_x86.double_size ) {
  1847         check_walign64( R_EAX );
  1848         MMU_TRANSLATE_WRITE( R_EAX );
  1849         load_dr0( R_EDX, FRm );
  1850         load_dr1( R_ECX, FRm );
  1851         MEM_WRITE_DOUBLE( R_EAX, R_EDX, R_ECX );
  1852     } else {
  1853         check_walign32( R_EAX );
  1854         MMU_TRANSLATE_WRITE( R_EAX );
  1855         load_fr( R_EDX, FRm );
  1856         MEM_WRITE_LONG( R_EAX, R_EDX );
  1858     sh4_x86.tstate = TSTATE_NONE;
  1859 :}
  1860 FMOV @Rm, FRn {:  
  1861     COUNT_INST(I_FMOV5);
  1862     check_fpuen();
  1863     load_reg( R_EAX, Rm );
  1864     if( sh4_x86.double_size ) {
  1865         check_ralign64( R_EAX );
  1866         MMU_TRANSLATE_READ( R_EAX );
  1867         MEM_READ_DOUBLE( R_EAX, R_EDX, R_EAX );
  1868         store_dr0( R_EDX, FRn );
  1869         store_dr1( R_EAX, FRn );    
  1870     } else {
  1871         check_ralign32( R_EAX );
  1872         MMU_TRANSLATE_READ( R_EAX );
  1873         MEM_READ_LONG( R_EAX, R_EAX );
  1874         store_fr( R_EAX, FRn );
  1876     sh4_x86.tstate = TSTATE_NONE;
  1877 :}
  1878 FMOV FRm, @-Rn {:  
  1879     COUNT_INST(I_FMOV3);
  1880     check_fpuen();
  1881     load_reg( R_EAX, Rn );
  1882     if( sh4_x86.double_size ) {
  1883         check_walign64( R_EAX );
  1884         ADD_imm8s_r32(-8,R_EAX);
  1885         MMU_TRANSLATE_WRITE( R_EAX );
  1886         load_dr0( R_EDX, FRm );
  1887         load_dr1( R_ECX, FRm );
  1888         ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
  1889         MEM_WRITE_DOUBLE( R_EAX, R_EDX, R_ECX );
  1890     } else {
  1891         check_walign32( R_EAX );
  1892         ADD_imm8s_r32( -4, R_EAX );
  1893         MMU_TRANSLATE_WRITE( R_EAX );
  1894         load_fr( R_EDX, FRm );
  1895         ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
  1896         MEM_WRITE_LONG( R_EAX, R_EDX );
  1898     sh4_x86.tstate = TSTATE_NONE;
  1899 :}
  1900 FMOV @Rm+, FRn {:
  1901     COUNT_INST(I_FMOV6);
  1902     check_fpuen();
  1903     load_reg( R_EAX, Rm );
  1904     if( sh4_x86.double_size ) {
  1905         check_ralign64( R_EAX );
  1906         MMU_TRANSLATE_READ( R_EAX );
  1907         ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
  1908         MEM_READ_DOUBLE( R_EAX, R_EDX, R_EAX );
  1909         store_dr0( R_EDX, FRn );
  1910         store_dr1( R_EAX, FRn );
  1911     } else {
  1912         check_ralign32( R_EAX );
  1913         MMU_TRANSLATE_READ( R_EAX );
  1914         ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1915         MEM_READ_LONG( R_EAX, R_EAX );
  1916         store_fr( R_EAX, FRn );
  1918     sh4_x86.tstate = TSTATE_NONE;
  1919 :}
  1920 FMOV FRm, @(R0, Rn) {:  
  1921     COUNT_INST(I_FMOV4);
  1922     check_fpuen();
  1923     load_reg( R_EAX, Rn );
  1924     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1925     if( sh4_x86.double_size ) {
  1926         check_walign64( R_EAX );
  1927         MMU_TRANSLATE_WRITE( R_EAX );
  1928         load_dr0( R_EDX, FRm );
  1929         load_dr1( R_ECX, FRm );
  1930         MEM_WRITE_DOUBLE( R_EAX, R_EDX, R_ECX );
  1931     } else {
  1932         check_walign32( R_EAX );
  1933         MMU_TRANSLATE_WRITE( R_EAX );
  1934         load_fr( R_EDX, FRm );
  1935         MEM_WRITE_LONG( R_EAX, R_EDX ); // 12
  1937     sh4_x86.tstate = TSTATE_NONE;
  1938 :}
  1939 FMOV @(R0, Rm), FRn {:  
  1940     COUNT_INST(I_FMOV7);
  1941     check_fpuen();
  1942     load_reg( R_EAX, Rm );
  1943     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1944     if( sh4_x86.double_size ) {
  1945         check_ralign64( R_EAX );
  1946         MMU_TRANSLATE_READ( R_EAX );
  1947         MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1948         store_dr0( R_ECX, FRn );
  1949         store_dr1( R_EAX, FRn );
  1950     } else {
  1951         check_ralign32( R_EAX );
  1952         MMU_TRANSLATE_READ( R_EAX );
  1953         MEM_READ_LONG( R_EAX, R_EAX );
  1954         store_fr( R_EAX, FRn );
  1956     sh4_x86.tstate = TSTATE_NONE;
  1957 :}
  1958 FLDI0 FRn {:  /* IFF PR=0 */
  1959     COUNT_INST(I_FLDI0);
  1960     check_fpuen();
  1961     if( sh4_x86.double_prec == 0 ) {
  1962         XOR_r32_r32( R_EAX, R_EAX );
  1963         store_fr( R_EAX, FRn );
  1965     sh4_x86.tstate = TSTATE_NONE;
  1966 :}
  1967 FLDI1 FRn {:  /* IFF PR=0 */
  1968     COUNT_INST(I_FLDI1);
  1969     check_fpuen();
  1970     if( sh4_x86.double_prec == 0 ) {
  1971         load_imm32(R_EAX, 0x3F800000);
  1972         store_fr( R_EAX, FRn );
  1974 :}
  1976 FLOAT FPUL, FRn {:  
  1977     COUNT_INST(I_FLOAT);
  1978     check_fpuen();
  1979     FILD_sh4r(R_FPUL);
  1980     if( sh4_x86.double_prec ) {
  1981         pop_dr( FRn );
  1982     } else {
  1983         pop_fr( FRn );
  1985 :}
  1986 FTRC FRm, FPUL {:  
  1987     COUNT_INST(I_FTRC);
  1988     check_fpuen();
  1989     if( sh4_x86.double_prec ) {
  1990         push_dr( FRm );
  1991     } else {
  1992         push_fr( FRm );
  1994     load_ptr( R_ECX, &max_int );
  1995     FILD_r32ind( R_ECX );
  1996     FCOMIP_st(1);
  1997     JNA_rel8( sat );
  1998     load_ptr( R_ECX, &min_int );  // 5
  1999     FILD_r32ind( R_ECX );           // 2
  2000     FCOMIP_st(1);                   // 2
  2001     JAE_rel8( sat2 );            // 2
  2002     load_ptr( R_EAX, &save_fcw );
  2003     FNSTCW_r32ind( R_EAX );
  2004     load_ptr( R_EDX, &trunc_fcw );
  2005     FLDCW_r32ind( R_EDX );
  2006     FISTP_sh4r(R_FPUL);             // 3
  2007     FLDCW_r32ind( R_EAX );
  2008     JMP_rel8(end);             // 2
  2010     JMP_TARGET(sat);
  2011     JMP_TARGET(sat2);
  2012     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  2013     store_spreg( R_ECX, R_FPUL );
  2014     FPOP_st();
  2015     JMP_TARGET(end);
  2016     sh4_x86.tstate = TSTATE_NONE;
  2017 :}
  2018 FLDS FRm, FPUL {:  
  2019     COUNT_INST(I_FLDS);
  2020     check_fpuen();
  2021     load_fr( R_EAX, FRm );
  2022     store_spreg( R_EAX, R_FPUL );
  2023 :}
  2024 FSTS FPUL, FRn {:  
  2025     COUNT_INST(I_FSTS);
  2026     check_fpuen();
  2027     load_spreg( R_EAX, R_FPUL );
  2028     store_fr( R_EAX, FRn );
  2029 :}
  2030 FCNVDS FRm, FPUL {:  
  2031     COUNT_INST(I_FCNVDS);
  2032     check_fpuen();
  2033     if( sh4_x86.double_prec ) {
  2034         push_dr( FRm );
  2035         pop_fpul();
  2037 :}
  2038 FCNVSD FPUL, FRn {:  
  2039     COUNT_INST(I_FCNVSD);
  2040     check_fpuen();
  2041     if( sh4_x86.double_prec ) {
  2042         push_fpul();
  2043         pop_dr( FRn );
  2045 :}
  2047 /* Floating point instructions */
  2048 FABS FRn {:  
  2049     COUNT_INST(I_FABS);
  2050     check_fpuen();
  2051     if( sh4_x86.double_prec ) {
  2052         push_dr(FRn);
  2053         FABS_st0();
  2054         pop_dr(FRn);
  2055     } else {
  2056         push_fr(FRn);
  2057         FABS_st0();
  2058         pop_fr(FRn);
  2060 :}
  2061 FADD FRm, FRn {:  
  2062     COUNT_INST(I_FADD);
  2063     check_fpuen();
  2064     if( sh4_x86.double_prec ) {
  2065         push_dr(FRm);
  2066         push_dr(FRn);
  2067         FADDP_st(1);
  2068         pop_dr(FRn);
  2069     } else {
  2070         push_fr(FRm);
  2071         push_fr(FRn);
  2072         FADDP_st(1);
  2073         pop_fr(FRn);
  2075 :}
  2076 FDIV FRm, FRn {:  
  2077     COUNT_INST(I_FDIV);
  2078     check_fpuen();
  2079     if( sh4_x86.double_prec ) {
  2080         push_dr(FRn);
  2081         push_dr(FRm);
  2082         FDIVP_st(1);
  2083         pop_dr(FRn);
  2084     } else {
  2085         push_fr(FRn);
  2086         push_fr(FRm);
  2087         FDIVP_st(1);
  2088         pop_fr(FRn);
  2090 :}
  2091 FMAC FR0, FRm, FRn {:  
  2092     COUNT_INST(I_FMAC);
  2093     check_fpuen();
  2094     if( sh4_x86.double_prec ) {
  2095         push_dr( 0 );
  2096         push_dr( FRm );
  2097         FMULP_st(1);
  2098         push_dr( FRn );
  2099         FADDP_st(1);
  2100         pop_dr( FRn );
  2101     } else {
  2102         push_fr( 0 );
  2103         push_fr( FRm );
  2104         FMULP_st(1);
  2105         push_fr( FRn );
  2106         FADDP_st(1);
  2107         pop_fr( FRn );
  2109 :}
  2111 FMUL FRm, FRn {:  
  2112     COUNT_INST(I_FMUL);
  2113     check_fpuen();
  2114     if( sh4_x86.double_prec ) {
  2115         push_dr(FRm);
  2116         push_dr(FRn);
  2117         FMULP_st(1);
  2118         pop_dr(FRn);
  2119     } else {
  2120         push_fr(FRm);
  2121         push_fr(FRn);
  2122         FMULP_st(1);
  2123         pop_fr(FRn);
  2125 :}
  2126 FNEG FRn {:  
  2127     COUNT_INST(I_FNEG);
  2128     check_fpuen();
  2129     if( sh4_x86.double_prec ) {
  2130         push_dr(FRn);
  2131         FCHS_st0();
  2132         pop_dr(FRn);
  2133     } else {
  2134         push_fr(FRn);
  2135         FCHS_st0();
  2136         pop_fr(FRn);
  2138 :}
  2139 FSRRA FRn {:  
  2140     COUNT_INST(I_FSRRA);
  2141     check_fpuen();
  2142     if( sh4_x86.double_prec == 0 ) {
  2143         FLD1_st0();
  2144         push_fr(FRn);
  2145         FSQRT_st0();
  2146         FDIVP_st(1);
  2147         pop_fr(FRn);
  2149 :}
  2150 FSQRT FRn {:  
  2151     COUNT_INST(I_FSQRT);
  2152     check_fpuen();
  2153     if( sh4_x86.double_prec ) {
  2154         push_dr(FRn);
  2155         FSQRT_st0();
  2156         pop_dr(FRn);
  2157     } else {
  2158         push_fr(FRn);
  2159         FSQRT_st0();
  2160         pop_fr(FRn);
  2162 :}
  2163 FSUB FRm, FRn {:  
  2164     COUNT_INST(I_FSUB);
  2165     check_fpuen();
  2166     if( sh4_x86.double_prec ) {
  2167         push_dr(FRn);
  2168         push_dr(FRm);
  2169         FSUBP_st(1);
  2170         pop_dr(FRn);
  2171     } else {
  2172         push_fr(FRn);
  2173         push_fr(FRm);
  2174         FSUBP_st(1);
  2175         pop_fr(FRn);
  2177 :}
  2179 FCMP/EQ FRm, FRn {:  
  2180     COUNT_INST(I_FCMPEQ);
  2181     check_fpuen();
  2182     if( sh4_x86.double_prec ) {
  2183         push_dr(FRm);
  2184         push_dr(FRn);
  2185     } else {
  2186         push_fr(FRm);
  2187         push_fr(FRn);
  2189     FCOMIP_st(1);
  2190     SETE_t();
  2191     FPOP_st();
  2192     sh4_x86.tstate = TSTATE_E;
  2193 :}
  2194 FCMP/GT FRm, FRn {:  
  2195     COUNT_INST(I_FCMPGT);
  2196     check_fpuen();
  2197     if( sh4_x86.double_prec ) {
  2198         push_dr(FRm);
  2199         push_dr(FRn);
  2200     } else {
  2201         push_fr(FRm);
  2202         push_fr(FRn);
  2204     FCOMIP_st(1);
  2205     SETA_t();
  2206     FPOP_st();
  2207     sh4_x86.tstate = TSTATE_A;
  2208 :}
  2210 FSCA FPUL, FRn {:  
  2211     COUNT_INST(I_FSCA);
  2212     check_fpuen();
  2213     if( sh4_x86.double_prec == 0 ) {
  2214         LEA_sh4r_rptr( REG_OFFSET(fr[0][FRn&0x0E]), R_EDX );
  2215         load_spreg( R_EAX, R_FPUL );
  2216         call_func2( sh4_fsca, R_EAX, R_EDX );
  2218     sh4_x86.tstate = TSTATE_NONE;
  2219 :}
  2220 FIPR FVm, FVn {:  
  2221     COUNT_INST(I_FIPR);
  2222     check_fpuen();
  2223     if( sh4_x86.double_prec == 0 ) {
  2224         if( sh4_x86.sse3_enabled ) {
  2225             MOVAPS_sh4r_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
  2226             MULPS_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
  2227             HADDPS_xmm_xmm( 4, 4 ); 
  2228             HADDPS_xmm_xmm( 4, 4 );
  2229             MOVSS_xmm_sh4r( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
  2230         } else {
  2231             push_fr( FVm<<2 );
  2232             push_fr( FVn<<2 );
  2233             FMULP_st(1);
  2234             push_fr( (FVm<<2)+1);
  2235             push_fr( (FVn<<2)+1);
  2236             FMULP_st(1);
  2237             FADDP_st(1);
  2238             push_fr( (FVm<<2)+2);
  2239             push_fr( (FVn<<2)+2);
  2240             FMULP_st(1);
  2241             FADDP_st(1);
  2242             push_fr( (FVm<<2)+3);
  2243             push_fr( (FVn<<2)+3);
  2244             FMULP_st(1);
  2245             FADDP_st(1);
  2246             pop_fr( (FVn<<2)+3);
  2249 :}
  2250 FTRV XMTRX, FVn {:  
  2251     COUNT_INST(I_FTRV);
  2252     check_fpuen();
  2253     if( sh4_x86.double_prec == 0 ) {
  2254         if( sh4_x86.sse3_enabled ) {
  2255             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1  M0  M3  M2
  2256             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5  M4  M7  M6
  2257             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9  M8  M11 M10
  2258             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
  2260             MOVSLDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
  2261             MOVSHDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
  2262             MOVAPS_xmm_xmm( 4, 6 );
  2263             MOVAPS_xmm_xmm( 5, 7 );
  2264             MOVLHPS_xmm_xmm( 4, 4 );  // V1 V1 V1 V1
  2265             MOVHLPS_xmm_xmm( 6, 6 );  // V3 V3 V3 V3
  2266             MOVLHPS_xmm_xmm( 5, 5 );  // V0 V0 V0 V0
  2267             MOVHLPS_xmm_xmm( 7, 7 );  // V2 V2 V2 V2
  2268             MULPS_xmm_xmm( 0, 4 );
  2269             MULPS_xmm_xmm( 1, 5 );
  2270             MULPS_xmm_xmm( 2, 6 );
  2271             MULPS_xmm_xmm( 3, 7 );
  2272             ADDPS_xmm_xmm( 5, 4 );
  2273             ADDPS_xmm_xmm( 7, 6 );
  2274             ADDPS_xmm_xmm( 6, 4 );
  2275             MOVAPS_xmm_sh4r( 4, REG_OFFSET(fr[0][FVn<<2]) );
  2276         } else {
  2277             LEA_sh4r_rptr( REG_OFFSET(fr[0][FVn<<2]), R_EAX );
  2278             call_func1( sh4_ftrv, R_EAX );
  2281     sh4_x86.tstate = TSTATE_NONE;
  2282 :}
  2284 FRCHG {:  
  2285     COUNT_INST(I_FRCHG);
  2286     check_fpuen();
  2287     load_spreg( R_ECX, R_FPSCR );
  2288     XOR_imm32_r32( FPSCR_FR, R_ECX );
  2289     store_spreg( R_ECX, R_FPSCR );
  2290     call_func0( sh4_switch_fr_banks );
  2291     sh4_x86.tstate = TSTATE_NONE;
  2292 :}
  2293 FSCHG {:  
  2294     COUNT_INST(I_FSCHG);
  2295     check_fpuen();
  2296     load_spreg( R_ECX, R_FPSCR );
  2297     XOR_imm32_r32( FPSCR_SZ, R_ECX );
  2298     store_spreg( R_ECX, R_FPSCR );
  2299     sh4_x86.tstate = TSTATE_NONE;
  2300     sh4_x86.double_size = !sh4_x86.double_size;
  2301 :}
  2303 /* Processor control instructions */
  2304 LDC Rm, SR {:
  2305     COUNT_INST(I_LDCSR);
  2306     if( sh4_x86.in_delay_slot ) {
  2307 	SLOTILLEGAL();
  2308     } else {
  2309 	check_priv();
  2310 	load_reg( R_EAX, Rm );
  2311 	call_func1( sh4_write_sr, R_EAX );
  2312 	sh4_x86.priv_checked = FALSE;
  2313 	sh4_x86.fpuen_checked = FALSE;
  2314 	sh4_x86.tstate = TSTATE_NONE;
  2316 :}
  2317 LDC Rm, GBR {: 
  2318     COUNT_INST(I_LDC);
  2319     load_reg( R_EAX, Rm );
  2320     store_spreg( R_EAX, R_GBR );
  2321 :}
  2322 LDC Rm, VBR {:  
  2323     COUNT_INST(I_LDC);
  2324     check_priv();
  2325     load_reg( R_EAX, Rm );
  2326     store_spreg( R_EAX, R_VBR );
  2327     sh4_x86.tstate = TSTATE_NONE;
  2328 :}
  2329 LDC Rm, SSR {:  
  2330     COUNT_INST(I_LDC);
  2331     check_priv();
  2332     load_reg( R_EAX, Rm );
  2333     store_spreg( R_EAX, R_SSR );
  2334     sh4_x86.tstate = TSTATE_NONE;
  2335 :}
  2336 LDC Rm, SGR {:  
  2337     COUNT_INST(I_LDC);
  2338     check_priv();
  2339     load_reg( R_EAX, Rm );
  2340     store_spreg( R_EAX, R_SGR );
  2341     sh4_x86.tstate = TSTATE_NONE;
  2342 :}
  2343 LDC Rm, SPC {:  
  2344     COUNT_INST(I_LDC);
  2345     check_priv();
  2346     load_reg( R_EAX, Rm );
  2347     store_spreg( R_EAX, R_SPC );
  2348     sh4_x86.tstate = TSTATE_NONE;
  2349 :}
  2350 LDC Rm, DBR {:  
  2351     COUNT_INST(I_LDC);
  2352     check_priv();
  2353     load_reg( R_EAX, Rm );
  2354     store_spreg( R_EAX, R_DBR );
  2355     sh4_x86.tstate = TSTATE_NONE;
  2356 :}
  2357 LDC Rm, Rn_BANK {:  
  2358     COUNT_INST(I_LDC);
  2359     check_priv();
  2360     load_reg( R_EAX, Rm );
  2361     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2362     sh4_x86.tstate = TSTATE_NONE;
  2363 :}
  2364 LDC.L @Rm+, GBR {:  
  2365     COUNT_INST(I_LDCM);
  2366     load_reg( R_EAX, Rm );
  2367     check_ralign32( R_EAX );
  2368     MMU_TRANSLATE_READ( R_EAX );
  2369     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2370     MEM_READ_LONG( R_EAX, R_EAX );
  2371     store_spreg( R_EAX, R_GBR );
  2372     sh4_x86.tstate = TSTATE_NONE;
  2373 :}
  2374 LDC.L @Rm+, SR {:
  2375     COUNT_INST(I_LDCSRM);
  2376     if( sh4_x86.in_delay_slot ) {
  2377 	SLOTILLEGAL();
  2378     } else {
  2379 	check_priv();
  2380 	load_reg( R_EAX, Rm );
  2381 	check_ralign32( R_EAX );
  2382 	MMU_TRANSLATE_READ( R_EAX );
  2383 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2384 	MEM_READ_LONG( R_EAX, R_EAX );
  2385 	call_func1( sh4_write_sr, R_EAX );
  2386 	sh4_x86.priv_checked = FALSE;
  2387 	sh4_x86.fpuen_checked = FALSE;
  2388 	sh4_x86.tstate = TSTATE_NONE;
  2390 :}
  2391 LDC.L @Rm+, VBR {:  
  2392     COUNT_INST(I_LDCM);
  2393     check_priv();
  2394     load_reg( R_EAX, Rm );
  2395     check_ralign32( R_EAX );
  2396     MMU_TRANSLATE_READ( R_EAX );
  2397     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2398     MEM_READ_LONG( R_EAX, R_EAX );
  2399     store_spreg( R_EAX, R_VBR );
  2400     sh4_x86.tstate = TSTATE_NONE;
  2401 :}
  2402 LDC.L @Rm+, SSR {:
  2403     COUNT_INST(I_LDCM);
  2404     check_priv();
  2405     load_reg( R_EAX, Rm );
  2406     check_ralign32( R_EAX );
  2407     MMU_TRANSLATE_READ( R_EAX );
  2408     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2409     MEM_READ_LONG( R_EAX, R_EAX );
  2410     store_spreg( R_EAX, R_SSR );
  2411     sh4_x86.tstate = TSTATE_NONE;
  2412 :}
  2413 LDC.L @Rm+, SGR {:  
  2414     COUNT_INST(I_LDCM);
  2415     check_priv();
  2416     load_reg( R_EAX, Rm );
  2417     check_ralign32( R_EAX );
  2418     MMU_TRANSLATE_READ( R_EAX );
  2419     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2420     MEM_READ_LONG( R_EAX, R_EAX );
  2421     store_spreg( R_EAX, R_SGR );
  2422     sh4_x86.tstate = TSTATE_NONE;
  2423 :}
  2424 LDC.L @Rm+, SPC {:  
  2425     COUNT_INST(I_LDCM);
  2426     check_priv();
  2427     load_reg( R_EAX, Rm );
  2428     check_ralign32( R_EAX );
  2429     MMU_TRANSLATE_READ( R_EAX );
  2430     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2431     MEM_READ_LONG( R_EAX, R_EAX );
  2432     store_spreg( R_EAX, R_SPC );
  2433     sh4_x86.tstate = TSTATE_NONE;
  2434 :}
  2435 LDC.L @Rm+, DBR {:  
  2436     COUNT_INST(I_LDCM);
  2437     check_priv();
  2438     load_reg( R_EAX, Rm );
  2439     check_ralign32( R_EAX );
  2440     MMU_TRANSLATE_READ( R_EAX );
  2441     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2442     MEM_READ_LONG( R_EAX, R_EAX );
  2443     store_spreg( R_EAX, R_DBR );
  2444     sh4_x86.tstate = TSTATE_NONE;
  2445 :}
  2446 LDC.L @Rm+, Rn_BANK {:  
  2447     COUNT_INST(I_LDCM);
  2448     check_priv();
  2449     load_reg( R_EAX, Rm );
  2450     check_ralign32( R_EAX );
  2451     MMU_TRANSLATE_READ( R_EAX );
  2452     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2453     MEM_READ_LONG( R_EAX, R_EAX );
  2454     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2455     sh4_x86.tstate = TSTATE_NONE;
  2456 :}
  2457 LDS Rm, FPSCR {:
  2458     COUNT_INST(I_LDSFPSCR);
  2459     check_fpuen();
  2460     load_reg( R_EAX, Rm );
  2461     call_func1( sh4_write_fpscr, R_EAX );
  2462     sh4_x86.tstate = TSTATE_NONE;
  2463     return 2;
  2464 :}
  2465 LDS.L @Rm+, FPSCR {:  
  2466     COUNT_INST(I_LDSFPSCRM);
  2467     check_fpuen();
  2468     load_reg( R_EAX, Rm );
  2469     check_ralign32( R_EAX );
  2470     MMU_TRANSLATE_READ( R_EAX );
  2471     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2472     MEM_READ_LONG( R_EAX, R_EAX );
  2473     call_func1( sh4_write_fpscr, R_EAX );
  2474     sh4_x86.tstate = TSTATE_NONE;
  2475     return 2;
  2476 :}
  2477 LDS Rm, FPUL {:  
  2478     COUNT_INST(I_LDS);
  2479     check_fpuen();
  2480     load_reg( R_EAX, Rm );
  2481     store_spreg( R_EAX, R_FPUL );
  2482 :}
  2483 LDS.L @Rm+, FPUL {:  
  2484     COUNT_INST(I_LDSM);
  2485     check_fpuen();
  2486     load_reg( R_EAX, Rm );
  2487     check_ralign32( R_EAX );
  2488     MMU_TRANSLATE_READ( R_EAX );
  2489     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2490     MEM_READ_LONG( R_EAX, R_EAX );
  2491     store_spreg( R_EAX, R_FPUL );
  2492     sh4_x86.tstate = TSTATE_NONE;
  2493 :}
  2494 LDS Rm, MACH {: 
  2495     COUNT_INST(I_LDS);
  2496     load_reg( R_EAX, Rm );
  2497     store_spreg( R_EAX, R_MACH );
  2498 :}
  2499 LDS.L @Rm+, MACH {:  
  2500     COUNT_INST(I_LDSM);
  2501     load_reg( R_EAX, Rm );
  2502     check_ralign32( R_EAX );
  2503     MMU_TRANSLATE_READ( R_EAX );
  2504     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2505     MEM_READ_LONG( R_EAX, R_EAX );
  2506     store_spreg( R_EAX, R_MACH );
  2507     sh4_x86.tstate = TSTATE_NONE;
  2508 :}
  2509 LDS Rm, MACL {:  
  2510     COUNT_INST(I_LDS);
  2511     load_reg( R_EAX, Rm );
  2512     store_spreg( R_EAX, R_MACL );
  2513 :}
  2514 LDS.L @Rm+, MACL {:  
  2515     COUNT_INST(I_LDSM);
  2516     load_reg( R_EAX, Rm );
  2517     check_ralign32( R_EAX );
  2518     MMU_TRANSLATE_READ( R_EAX );
  2519     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2520     MEM_READ_LONG( R_EAX, R_EAX );
  2521     store_spreg( R_EAX, R_MACL );
  2522     sh4_x86.tstate = TSTATE_NONE;
  2523 :}
  2524 LDS Rm, PR {:  
  2525     COUNT_INST(I_LDS);
  2526     load_reg( R_EAX, Rm );
  2527     store_spreg( R_EAX, R_PR );
  2528 :}
  2529 LDS.L @Rm+, PR {:  
  2530     COUNT_INST(I_LDSM);
  2531     load_reg( R_EAX, Rm );
  2532     check_ralign32( R_EAX );
  2533     MMU_TRANSLATE_READ( R_EAX );
  2534     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2535     MEM_READ_LONG( R_EAX, R_EAX );
  2536     store_spreg( R_EAX, R_PR );
  2537     sh4_x86.tstate = TSTATE_NONE;
  2538 :}
  2539 LDTLB {:  
  2540     COUNT_INST(I_LDTLB);
  2541     call_func0( MMU_ldtlb );
  2542     sh4_x86.tstate = TSTATE_NONE;
  2543 :}
  2544 OCBI @Rn {:
  2545     COUNT_INST(I_OCBI);
  2546 :}
  2547 OCBP @Rn {:
  2548     COUNT_INST(I_OCBP);
  2549 :}
  2550 OCBWB @Rn {:
  2551     COUNT_INST(I_OCBWB);
  2552 :}
  2553 PREF @Rn {:
  2554     COUNT_INST(I_PREF);
  2555     load_reg( R_EAX, Rn );
  2556     MOV_r32_r32( R_EAX, R_ECX );
  2557     AND_imm32_r32( 0xFC000000, R_ECX );
  2558     CMP_imm32_r32( 0xE0000000, R_ECX );
  2559     JNE_rel8(end);
  2560     if( sh4_x86.tlb_on ) {
  2561     	call_func1( sh4_flush_store_queue_mmu, R_EAX );
  2562         TEST_r32_r32( R_EAX, R_EAX );
  2563         JE_exc(-1);
  2564     } else {
  2565     	call_func1( sh4_flush_store_queue, R_EAX );
  2567     JMP_TARGET(end);
  2568     sh4_x86.tstate = TSTATE_NONE;
  2569 :}
  2570 SLEEP {: 
  2571     COUNT_INST(I_SLEEP);
  2572     check_priv();
  2573     call_func0( sh4_sleep );
  2574     sh4_x86.tstate = TSTATE_NONE;
  2575     sh4_x86.in_delay_slot = DELAY_NONE;
  2576     return 2;
  2577 :}
  2578 STC SR, Rn {:
  2579     COUNT_INST(I_STCSR);
  2580     check_priv();
  2581     call_func0(sh4_read_sr);
  2582     store_reg( R_EAX, Rn );
  2583     sh4_x86.tstate = TSTATE_NONE;
  2584 :}
  2585 STC GBR, Rn {:  
  2586     COUNT_INST(I_STC);
  2587     load_spreg( R_EAX, R_GBR );
  2588     store_reg( R_EAX, Rn );
  2589 :}
  2590 STC VBR, Rn {:  
  2591     COUNT_INST(I_STC);
  2592     check_priv();
  2593     load_spreg( R_EAX, R_VBR );
  2594     store_reg( R_EAX, Rn );
  2595     sh4_x86.tstate = TSTATE_NONE;
  2596 :}
  2597 STC SSR, Rn {:  
  2598     COUNT_INST(I_STC);
  2599     check_priv();
  2600     load_spreg( R_EAX, R_SSR );
  2601     store_reg( R_EAX, Rn );
  2602     sh4_x86.tstate = TSTATE_NONE;
  2603 :}
  2604 STC SPC, Rn {:  
  2605     COUNT_INST(I_STC);
  2606     check_priv();
  2607     load_spreg( R_EAX, R_SPC );
  2608     store_reg( R_EAX, Rn );
  2609     sh4_x86.tstate = TSTATE_NONE;
  2610 :}
  2611 STC SGR, Rn {:  
  2612     COUNT_INST(I_STC);
  2613     check_priv();
  2614     load_spreg( R_EAX, R_SGR );
  2615     store_reg( R_EAX, Rn );
  2616     sh4_x86.tstate = TSTATE_NONE;
  2617 :}
  2618 STC DBR, Rn {:  
  2619     COUNT_INST(I_STC);
  2620     check_priv();
  2621     load_spreg( R_EAX, R_DBR );
  2622     store_reg( R_EAX, Rn );
  2623     sh4_x86.tstate = TSTATE_NONE;
  2624 :}
  2625 STC Rm_BANK, Rn {:
  2626     COUNT_INST(I_STC);
  2627     check_priv();
  2628     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2629     store_reg( R_EAX, Rn );
  2630     sh4_x86.tstate = TSTATE_NONE;
  2631 :}
  2632 STC.L SR, @-Rn {:
  2633     COUNT_INST(I_STCSRM);
  2634     check_priv();
  2635     load_reg( R_EAX, Rn );
  2636     check_walign32( R_EAX );
  2637     ADD_imm8s_r32( -4, R_EAX );
  2638     MMU_TRANSLATE_WRITE( R_EAX );
  2639     PUSH_realigned_r32( R_EAX );
  2640     call_func0( sh4_read_sr );
  2641     POP_realigned_r32( R_ECX );
  2642     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2643     MEM_WRITE_LONG( R_ECX, R_EAX );
  2644     sh4_x86.tstate = TSTATE_NONE;
  2645 :}
  2646 STC.L VBR, @-Rn {:  
  2647     COUNT_INST(I_STCM);
  2648     check_priv();
  2649     load_reg( R_EAX, Rn );
  2650     check_walign32( R_EAX );
  2651     ADD_imm8s_r32( -4, R_EAX );
  2652     MMU_TRANSLATE_WRITE( R_EAX );
  2653     load_spreg( R_EDX, R_VBR );
  2654     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2655     MEM_WRITE_LONG( R_EAX, R_EDX );
  2656     sh4_x86.tstate = TSTATE_NONE;
  2657 :}
  2658 STC.L SSR, @-Rn {:  
  2659     COUNT_INST(I_STCM);
  2660     check_priv();
  2661     load_reg( R_EAX, Rn );
  2662     check_walign32( R_EAX );
  2663     ADD_imm8s_r32( -4, R_EAX );
  2664     MMU_TRANSLATE_WRITE( R_EAX );
  2665     load_spreg( R_EDX, R_SSR );
  2666     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2667     MEM_WRITE_LONG( R_EAX, R_EDX );
  2668     sh4_x86.tstate = TSTATE_NONE;
  2669 :}
  2670 STC.L SPC, @-Rn {:
  2671     COUNT_INST(I_STCM);
  2672     check_priv();
  2673     load_reg( R_EAX, Rn );
  2674     check_walign32( R_EAX );
  2675     ADD_imm8s_r32( -4, R_EAX );
  2676     MMU_TRANSLATE_WRITE( R_EAX );
  2677     load_spreg( R_EDX, R_SPC );
  2678     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2679     MEM_WRITE_LONG( R_EAX, R_EDX );
  2680     sh4_x86.tstate = TSTATE_NONE;
  2681 :}
  2682 STC.L SGR, @-Rn {:  
  2683     COUNT_INST(I_STCM);
  2684     check_priv();
  2685     load_reg( R_EAX, Rn );
  2686     check_walign32( R_EAX );
  2687     ADD_imm8s_r32( -4, R_EAX );
  2688     MMU_TRANSLATE_WRITE( R_EAX );
  2689     load_spreg( R_EDX, R_SGR );
  2690     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2691     MEM_WRITE_LONG( R_EAX, R_EDX );
  2692     sh4_x86.tstate = TSTATE_NONE;
  2693 :}
  2694 STC.L DBR, @-Rn {:  
  2695     COUNT_INST(I_STCM);
  2696     check_priv();
  2697     load_reg( R_EAX, Rn );
  2698     check_walign32( R_EAX );
  2699     ADD_imm8s_r32( -4, R_EAX );
  2700     MMU_TRANSLATE_WRITE( R_EAX );
  2701     load_spreg( R_EDX, R_DBR );
  2702     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2703     MEM_WRITE_LONG( R_EAX, R_EDX );
  2704     sh4_x86.tstate = TSTATE_NONE;
  2705 :}
  2706 STC.L Rm_BANK, @-Rn {:  
  2707     COUNT_INST(I_STCM);
  2708     check_priv();
  2709     load_reg( R_EAX, Rn );
  2710     check_walign32( R_EAX );
  2711     ADD_imm8s_r32( -4, R_EAX );
  2712     MMU_TRANSLATE_WRITE( R_EAX );
  2713     load_spreg( R_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
  2714     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2715     MEM_WRITE_LONG( R_EAX, R_EDX );
  2716     sh4_x86.tstate = TSTATE_NONE;
  2717 :}
  2718 STC.L GBR, @-Rn {:  
  2719     COUNT_INST(I_STCM);
  2720     load_reg( R_EAX, Rn );
  2721     check_walign32( R_EAX );
  2722     ADD_imm8s_r32( -4, R_EAX );
  2723     MMU_TRANSLATE_WRITE( R_EAX );
  2724     load_spreg( R_EDX, R_GBR );
  2725     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2726     MEM_WRITE_LONG( R_EAX, R_EDX );
  2727     sh4_x86.tstate = TSTATE_NONE;
  2728 :}
  2729 STS FPSCR, Rn {:  
  2730     COUNT_INST(I_STSFPSCR);
  2731     check_fpuen();
  2732     load_spreg( R_EAX, R_FPSCR );
  2733     store_reg( R_EAX, Rn );
  2734 :}
  2735 STS.L FPSCR, @-Rn {:  
  2736     COUNT_INST(I_STSFPSCRM);
  2737     check_fpuen();
  2738     load_reg( R_EAX, Rn );
  2739     check_walign32( R_EAX );
  2740     ADD_imm8s_r32( -4, R_EAX );
  2741     MMU_TRANSLATE_WRITE( R_EAX );
  2742     load_spreg( R_EDX, R_FPSCR );
  2743     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2744     MEM_WRITE_LONG( R_EAX, R_EDX );
  2745     sh4_x86.tstate = TSTATE_NONE;
  2746 :}
  2747 STS FPUL, Rn {:  
  2748     COUNT_INST(I_STS);
  2749     check_fpuen();
  2750     load_spreg( R_EAX, R_FPUL );
  2751     store_reg( R_EAX, Rn );
  2752 :}
  2753 STS.L FPUL, @-Rn {:  
  2754     COUNT_INST(I_STSM);
  2755     check_fpuen();
  2756     load_reg( R_EAX, Rn );
  2757     check_walign32( R_EAX );
  2758     ADD_imm8s_r32( -4, R_EAX );
  2759     MMU_TRANSLATE_WRITE( R_EAX );
  2760     load_spreg( R_EDX, R_FPUL );
  2761     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2762     MEM_WRITE_LONG( R_EAX, R_EDX );
  2763     sh4_x86.tstate = TSTATE_NONE;
  2764 :}
  2765 STS MACH, Rn {:  
  2766     COUNT_INST(I_STS);
  2767     load_spreg( R_EAX, R_MACH );
  2768     store_reg( R_EAX, Rn );
  2769 :}
  2770 STS.L MACH, @-Rn {:  
  2771     COUNT_INST(I_STSM);
  2772     load_reg( R_EAX, Rn );
  2773     check_walign32( R_EAX );
  2774     ADD_imm8s_r32( -4, R_EAX );
  2775     MMU_TRANSLATE_WRITE( R_EAX );
  2776     load_spreg( R_EDX, R_MACH );
  2777     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2778     MEM_WRITE_LONG( R_EAX, R_EDX );
  2779     sh4_x86.tstate = TSTATE_NONE;
  2780 :}
  2781 STS MACL, Rn {:  
  2782     COUNT_INST(I_STS);
  2783     load_spreg( R_EAX, R_MACL );
  2784     store_reg( R_EAX, Rn );
  2785 :}
  2786 STS.L MACL, @-Rn {:  
  2787     COUNT_INST(I_STSM);
  2788     load_reg( R_EAX, Rn );
  2789     check_walign32( R_EAX );
  2790     ADD_imm8s_r32( -4, R_EAX );
  2791     MMU_TRANSLATE_WRITE( R_EAX );
  2792     load_spreg( R_EDX, R_MACL );
  2793     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2794     MEM_WRITE_LONG( R_EAX, R_EDX );
  2795     sh4_x86.tstate = TSTATE_NONE;
  2796 :}
  2797 STS PR, Rn {:  
  2798     COUNT_INST(I_STS);
  2799     load_spreg( R_EAX, R_PR );
  2800     store_reg( R_EAX, Rn );
  2801 :}
  2802 STS.L PR, @-Rn {:  
  2803     COUNT_INST(I_STSM);
  2804     load_reg( R_EAX, Rn );
  2805     check_walign32( R_EAX );
  2806     ADD_imm8s_r32( -4, R_EAX );
  2807     MMU_TRANSLATE_WRITE( R_EAX );
  2808     load_spreg( R_EDX, R_PR );
  2809     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2810     MEM_WRITE_LONG( R_EAX, R_EDX );
  2811     sh4_x86.tstate = TSTATE_NONE;
  2812 :}
  2814 NOP {: 
  2815     COUNT_INST(I_NOP);
  2816     /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ 
  2817 :}
  2818 %%
  2819     sh4_x86.in_delay_slot = DELAY_NONE;
  2820     return 0;
.