Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 937:81b0c79d9788
prev936:f394309c399a
next939:6f2302afeb89
author nkeynes
date Sat Dec 27 03:14:59 2008 +0000 (13 years ago)
branchlxdream-mem
permissions -rw-r--r--
last change Update sh4x86 to take advantage of SR assumptions. nice 2% there :)
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "lxdream.h"
    29 #include "sh4/xltcache.h"
    30 #include "sh4/sh4core.h"
    31 #include "sh4/sh4trans.h"
    32 #include "sh4/sh4stat.h"
    33 #include "sh4/sh4mmio.h"
    34 #include "sh4/x86op.h"
    35 #include "clock.h"
    37 #define DEFAULT_BACKPATCH_SIZE 4096
    39 struct backpatch_record {
    40     uint32_t fixup_offset;
    41     uint32_t fixup_icount;
    42     int32_t exc_code;
    43 };
    45 #define DELAY_NONE 0
    46 #define DELAY_PC 1
    47 #define DELAY_PC_PR 2
    49 /** 
    50  * Struct to manage internal translation state. This state is not saved -
    51  * it is only valid between calls to sh4_translate_begin_block() and
    52  * sh4_translate_end_block()
    53  */
    54 struct sh4_x86_state {
    55     int in_delay_slot;
    56     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    57     gboolean branch_taken; /* true if we branched unconditionally */
    58     gboolean double_prec; /* true if FPU is in double-precision mode */
    59     gboolean double_size; /* true if FPU is in double-size mode */
    60     gboolean sse3_enabled; /* true if host supports SSE3 instructions */
    61     uint32_t block_start_pc;
    62     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    63     int tstate;
    65     /* mode flags */
    66     gboolean tlb_on; /* True if tlb translation is active */
    68     /* Allocated memory for the (block-wide) back-patch list */
    69     struct backpatch_record *backpatch_list;
    70     uint32_t backpatch_posn;
    71     uint32_t backpatch_size;
    72 };
    74 #define TSTATE_NONE -1
    75 #define TSTATE_O    0
    76 #define TSTATE_C    2
    77 #define TSTATE_E    4
    78 #define TSTATE_NE   5
    79 #define TSTATE_G    0xF
    80 #define TSTATE_GE   0xD
    81 #define TSTATE_A    7
    82 #define TSTATE_AE   3
    84 #ifdef ENABLE_SH4STATS
    85 #define COUNT_INST(id) load_imm32(R_EAX,id); call_func1(sh4_stats_add, R_EAX); sh4_x86.tstate = TSTATE_NONE
    86 #else
    87 #define COUNT_INST(id)
    88 #endif
    90 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
    91 #define JT_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    92 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    93     OP(0x70+sh4_x86.tstate); MARK_JMP8(label); OP(-1)
    95 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
    96 #define JF_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    97 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    98     OP(0x70+ (sh4_x86.tstate^1)); MARK_JMP8(label); OP(-1)
   100 static struct sh4_x86_state sh4_x86;
   102 static uint32_t max_int = 0x7FFFFFFF;
   103 static uint32_t min_int = 0x80000000;
   104 static uint32_t save_fcw; /* save value for fpu control word */
   105 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   107 gboolean is_sse3_supported()
   108 {
   109     uint32_t features;
   111     __asm__ __volatile__(
   112         "mov $0x01, %%eax\n\t"
   113         "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
   114     return (features & 1) ? TRUE : FALSE;
   115 }
   117 void sh4_translate_init(void)
   118 {
   119     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   120     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   121     sh4_x86.sse3_enabled = is_sse3_supported();
   122 }
   125 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   126 {
   127     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   128 	sh4_x86.backpatch_size <<= 1;
   129 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   130 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   131 	assert( sh4_x86.backpatch_list != NULL );
   132     }
   133     if( sh4_x86.in_delay_slot ) {
   134 	fixup_pc -= 2;
   135     }
   136     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
   137 	((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code);
   138     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   139     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   140     sh4_x86.backpatch_posn++;
   141 }
   143 /**
   144  * Emit an instruction to load an SH4 reg into a real register
   145  */
   146 static inline void load_reg( int x86reg, int sh4reg ) 
   147 {
   148     /* mov [bp+n], reg */
   149     OP(0x8B);
   150     OP(0x45 + (x86reg<<3));
   151     OP(REG_OFFSET(r[sh4reg]));
   152 }
   154 static inline void load_reg16s( int x86reg, int sh4reg )
   155 {
   156     OP(0x0F);
   157     OP(0xBF);
   158     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   159 }
   161 static inline void load_reg16u( int x86reg, int sh4reg )
   162 {
   163     OP(0x0F);
   164     OP(0xB7);
   165     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   167 }
   169 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   170 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   171 /**
   172  * Emit an instruction to load an immediate value into a register
   173  */
   174 static inline void load_imm32( int x86reg, uint32_t value ) {
   175     /* mov #value, reg */
   176     OP(0xB8 + x86reg);
   177     OP32(value);
   178 }
   180 /**
   181  * Load an immediate 64-bit quantity (note: x86-64 only)
   182  */
   183 static inline void load_imm64( int x86reg, uint64_t value ) {
   184     /* mov #value, reg */
   185     REXW();
   186     OP(0xB8 + x86reg);
   187     OP64(value);
   188 }
   190 /**
   191  * Emit an instruction to store an SH4 reg (RN)
   192  */
   193 void static inline store_reg( int x86reg, int sh4reg ) {
   194     /* mov reg, [bp+n] */
   195     OP(0x89);
   196     OP(0x45 + (x86reg<<3));
   197     OP(REG_OFFSET(r[sh4reg]));
   198 }
   200 /**
   201  * Load an FR register (single-precision floating point) into an integer x86
   202  * register (eg for register-to-register moves)
   203  */
   204 #define load_fr(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[0][(frm)^1]) )
   205 #define load_xf(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[1][(frm)^1]) )
   207 /**
   208  * Load the low half of a DR register (DR or XD) into an integer x86 register 
   209  */
   210 #define load_dr0(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   211 #define load_dr1(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   213 /**
   214  * Store an FR register (single-precision floating point) from an integer x86+
   215  * register (eg for register-to-register moves)
   216  */
   217 #define store_fr(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[0][(frm)^1]) )
   218 #define store_xf(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[1][(frm)^1]) )
   220 #define store_dr0(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   221 #define store_dr1(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   224 #define push_fpul()  FLDF_sh4r(R_FPUL)
   225 #define pop_fpul()   FSTPF_sh4r(R_FPUL)
   226 #define push_fr(frm) FLDF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   227 #define pop_fr(frm)  FSTPF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   228 #define push_xf(frm) FLDF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   229 #define pop_xf(frm)  FSTPF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   230 #define push_dr(frm) FLDD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   231 #define pop_dr(frm)  FSTPD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   232 #define push_xdr(frm) FLDD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   233 #define pop_xdr(frm)  FSTPD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   237 /* Exception checks - Note that all exception checks will clobber EAX */
   239 #define check_priv( ) \
   240     if( (sh4r.xlat_sh4_mode & SR_MD) == 0 ) { \
   241         if( sh4_x86.in_delay_slot ) { \
   242             JMP_exc(EXC_SLOT_ILLEGAL); \
   243         } else { \
   244             JMP_exc(EXC_ILLEGAL ); \
   245         } \
   246         sh4_x86.in_delay_slot = DELAY_NONE; \
   247         return 2; \
   248     }
   250 #define check_fpuen( ) \
   251     if( !sh4_x86.fpuen_checked ) {\
   252 	sh4_x86.fpuen_checked = TRUE;\
   253 	load_spreg( R_EAX, R_SR );\
   254 	AND_imm32_r32( SR_FD, R_EAX );\
   255 	if( sh4_x86.in_delay_slot ) {\
   256 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   257 	} else {\
   258 	    JNE_exc(EXC_FPU_DISABLED);\
   259 	}\
   260 	sh4_x86.tstate = TSTATE_NONE; \
   261     }
   263 #define check_ralign16( x86reg ) \
   264     TEST_imm32_r32( 0x00000001, x86reg ); \
   265     JNE_exc(EXC_DATA_ADDR_READ)
   267 #define check_walign16( x86reg ) \
   268     TEST_imm32_r32( 0x00000001, x86reg ); \
   269     JNE_exc(EXC_DATA_ADDR_WRITE);
   271 #define check_ralign32( x86reg ) \
   272     TEST_imm32_r32( 0x00000003, x86reg ); \
   273     JNE_exc(EXC_DATA_ADDR_READ)
   275 #define check_walign32( x86reg ) \
   276     TEST_imm32_r32( 0x00000003, x86reg ); \
   277     JNE_exc(EXC_DATA_ADDR_WRITE);
   279 #define check_ralign64( x86reg ) \
   280     TEST_imm32_r32( 0x00000007, x86reg ); \
   281     JNE_exc(EXC_DATA_ADDR_READ)
   283 #define check_walign64( x86reg ) \
   284     TEST_imm32_r32( 0x00000007, x86reg ); \
   285     JNE_exc(EXC_DATA_ADDR_WRITE);
   287 #define UNDEF(ir)
   288 #define MEM_REGION_PTR(name) offsetof( struct mem_region_fn, name )
   289 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   290 #define MEM_READ_BYTE( addr_reg, value_reg ) decode_address(addr_reg); call_func1_r32disp8(R_ECX, MEM_REGION_PTR(read_byte), addr_reg ); MEM_RESULT(value_reg)
   291 #define MEM_READ_WORD( addr_reg, value_reg ) decode_address(addr_reg); call_func1_r32disp8(R_ECX, MEM_REGION_PTR(read_word), addr_reg ); MEM_RESULT(value_reg)
   292 #define MEM_READ_LONG( addr_reg, value_reg ) decode_address(addr_reg); call_func1_r32disp8(R_ECX, MEM_REGION_PTR(read_long), addr_reg ); MEM_RESULT(value_reg)
   293 #define MEM_WRITE_BYTE( addr_reg, value_reg ) decode_address(addr_reg); call_func2_r32disp8(R_ECX, MEM_REGION_PTR(write_byte), addr_reg, value_reg)
   294 #define MEM_WRITE_WORD( addr_reg, value_reg ) decode_address(addr_reg); call_func2_r32disp8(R_ECX, MEM_REGION_PTR(write_word), addr_reg, value_reg)
   295 #define MEM_WRITE_LONG( addr_reg, value_reg ) decode_address(addr_reg); call_func2_r32disp8(R_ECX, MEM_REGION_PTR(write_long), addr_reg, value_reg)
   297 #ifdef HAVE_FRAME_ADDRESS
   298 /**
   299  * Perform MMU translation on the address in addr_reg for a read operation, iff the TLB is turned 
   300  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   301  */
   302 #define MMU_TRANSLATE_READ( addr_reg ) if( sh4_x86.tlb_on ) {  call_func1_exc(mmu_vma_to_phys_read, addr_reg, pc); MEM_RESULT(addr_reg); }
   304 /**
   305  * Perform MMU translation on the address in addr_reg for a write operation, iff the TLB is turned 
   306  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   307  */
   308 #define MMU_TRANSLATE_WRITE( addr_reg ) if( sh4_x86.tlb_on ) { call_func1_exc(mmu_vma_to_phys_write, addr_reg, pc); MEM_RESULT(addr_reg); }
   309 #else
   310 #define MMU_TRANSLATE_READ( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   311 #define MMU_TRANSLATE_WRITE( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_write, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   312 #endif
   314 #define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = DELAY_NONE; return 2;
   316 /****** Import appropriate calling conventions ******/
   317 #if SIZEOF_VOID_P == 8
   318 #include "sh4/ia64abi.h"
   319 #else /* 32-bit system */
   320 #include "sh4/ia32abi.h"
   321 #endif
   323 void sh4_translate_begin_block( sh4addr_t pc ) 
   324 {
   325     enter_block();
   326     sh4_x86.in_delay_slot = FALSE;
   327     sh4_x86.fpuen_checked = FALSE;
   328     sh4_x86.branch_taken = FALSE;
   329     sh4_x86.backpatch_posn = 0;
   330     sh4_x86.block_start_pc = pc;
   331     sh4_x86.tlb_on = IS_MMU_ENABLED();
   332     sh4_x86.tstate = TSTATE_NONE;
   333     sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
   334     sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
   335 }
   338 uint32_t sh4_translate_end_block_size()
   339 {
   340     if( sh4_x86.backpatch_posn <= 3 ) {
   341         return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);
   342     } else {
   343         return EPILOGUE_SIZE + 48 + (sh4_x86.backpatch_posn-3)*15;
   344     }
   345 }
   348 /**
   349  * Embed a breakpoint into the generated code
   350  */
   351 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   352 {
   353     load_imm32( R_EAX, pc );
   354     call_func1( sh4_translate_breakpoint_hit, R_EAX );
   355     sh4_x86.tstate = TSTATE_NONE;
   356 }
   359 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   361 /**
   362  * Embed a call to sh4_execute_instruction for situations that we
   363  * can't translate (just page-crossing delay slots at the moment).
   364  * Caller is responsible for setting new_pc before calling this function.
   365  *
   366  * Performs:
   367  *   Set PC = endpc
   368  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   369  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   370  *   Call sh4_execute_instruction
   371  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   372  */
   373 void exit_block_emu( sh4vma_t endpc )
   374 {
   375     load_imm32( R_ECX, endpc - sh4_x86.block_start_pc );   // 5
   376     ADD_r32_sh4r( R_ECX, R_PC );
   378     load_imm32( R_ECX, (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period ); // 5
   379     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
   380     load_imm32( R_ECX, sh4_x86.in_delay_slot ? 1 : 0 );
   381     store_spreg( R_ECX, REG_OFFSET(in_delay_slot) );
   383     call_func0( sh4_execute_instruction );    
   384     load_spreg( R_EAX, R_PC );
   385     if( sh4_x86.tlb_on ) {
   386 	call_func1(xlat_get_code_by_vma,R_EAX);
   387     } else {
   388 	call_func1(xlat_get_code,R_EAX);
   389     }
   390     exit_block();
   391 } 
   393 /**
   394  * Translate a single instruction. Delayed branches are handled specially
   395  * by translating both branch and delayed instruction as a single unit (as
   396  * 
   397  * The instruction MUST be in the icache (assert check)
   398  *
   399  * @return true if the instruction marks the end of a basic block
   400  * (eg a branch or 
   401  */
   402 uint32_t sh4_translate_instruction( sh4vma_t pc )
   403 {
   404     uint32_t ir;
   405     /* Read instruction from icache */
   406     assert( IS_IN_ICACHE(pc) );
   407     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   409     if( !sh4_x86.in_delay_slot ) {
   410 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   411     }
   412 %%
   413 /* ALU operations */
   414 ADD Rm, Rn {:
   415     COUNT_INST(I_ADD);
   416     load_reg( R_EAX, Rm );
   417     load_reg( R_ECX, Rn );
   418     ADD_r32_r32( R_EAX, R_ECX );
   419     store_reg( R_ECX, Rn );
   420     sh4_x86.tstate = TSTATE_NONE;
   421 :}
   422 ADD #imm, Rn {:  
   423     COUNT_INST(I_ADDI);
   424     load_reg( R_EAX, Rn );
   425     ADD_imm8s_r32( imm, R_EAX );
   426     store_reg( R_EAX, Rn );
   427     sh4_x86.tstate = TSTATE_NONE;
   428 :}
   429 ADDC Rm, Rn {:
   430     COUNT_INST(I_ADDC);
   431     if( sh4_x86.tstate != TSTATE_C ) {
   432         LDC_t();
   433     }
   434     load_reg( R_EAX, Rm );
   435     load_reg( R_ECX, Rn );
   436     ADC_r32_r32( R_EAX, R_ECX );
   437     store_reg( R_ECX, Rn );
   438     SETC_t();
   439     sh4_x86.tstate = TSTATE_C;
   440 :}
   441 ADDV Rm, Rn {:
   442     COUNT_INST(I_ADDV);
   443     load_reg( R_EAX, Rm );
   444     load_reg( R_ECX, Rn );
   445     ADD_r32_r32( R_EAX, R_ECX );
   446     store_reg( R_ECX, Rn );
   447     SETO_t();
   448     sh4_x86.tstate = TSTATE_O;
   449 :}
   450 AND Rm, Rn {:
   451     COUNT_INST(I_AND);
   452     load_reg( R_EAX, Rm );
   453     load_reg( R_ECX, Rn );
   454     AND_r32_r32( R_EAX, R_ECX );
   455     store_reg( R_ECX, Rn );
   456     sh4_x86.tstate = TSTATE_NONE;
   457 :}
   458 AND #imm, R0 {:  
   459     COUNT_INST(I_ANDI);
   460     load_reg( R_EAX, 0 );
   461     AND_imm32_r32(imm, R_EAX); 
   462     store_reg( R_EAX, 0 );
   463     sh4_x86.tstate = TSTATE_NONE;
   464 :}
   465 AND.B #imm, @(R0, GBR) {: 
   466     COUNT_INST(I_ANDB);
   467     load_reg( R_EAX, 0 );
   468     load_spreg( R_ECX, R_GBR );
   469     ADD_r32_r32( R_ECX, R_EAX );
   470     MMU_TRANSLATE_WRITE( R_EAX );
   471     MOV_r32_esp8(R_EAX, 0);
   472     MEM_READ_BYTE( R_EAX, R_EDX );
   473     MOV_esp8_r32(0, R_EAX);
   474     AND_imm32_r32(imm, R_EDX );
   475     MEM_WRITE_BYTE( R_EAX, R_EDX );
   476     sh4_x86.tstate = TSTATE_NONE;
   477 :}
   478 CMP/EQ Rm, Rn {:  
   479     COUNT_INST(I_CMPEQ);
   480     load_reg( R_EAX, Rm );
   481     load_reg( R_ECX, Rn );
   482     CMP_r32_r32( R_EAX, R_ECX );
   483     SETE_t();
   484     sh4_x86.tstate = TSTATE_E;
   485 :}
   486 CMP/EQ #imm, R0 {:  
   487     COUNT_INST(I_CMPEQI);
   488     load_reg( R_EAX, 0 );
   489     CMP_imm8s_r32(imm, R_EAX);
   490     SETE_t();
   491     sh4_x86.tstate = TSTATE_E;
   492 :}
   493 CMP/GE Rm, Rn {:  
   494     COUNT_INST(I_CMPGE);
   495     load_reg( R_EAX, Rm );
   496     load_reg( R_ECX, Rn );
   497     CMP_r32_r32( R_EAX, R_ECX );
   498     SETGE_t();
   499     sh4_x86.tstate = TSTATE_GE;
   500 :}
   501 CMP/GT Rm, Rn {: 
   502     COUNT_INST(I_CMPGT);
   503     load_reg( R_EAX, Rm );
   504     load_reg( R_ECX, Rn );
   505     CMP_r32_r32( R_EAX, R_ECX );
   506     SETG_t();
   507     sh4_x86.tstate = TSTATE_G;
   508 :}
   509 CMP/HI Rm, Rn {:  
   510     COUNT_INST(I_CMPHI);
   511     load_reg( R_EAX, Rm );
   512     load_reg( R_ECX, Rn );
   513     CMP_r32_r32( R_EAX, R_ECX );
   514     SETA_t();
   515     sh4_x86.tstate = TSTATE_A;
   516 :}
   517 CMP/HS Rm, Rn {: 
   518     COUNT_INST(I_CMPHS);
   519     load_reg( R_EAX, Rm );
   520     load_reg( R_ECX, Rn );
   521     CMP_r32_r32( R_EAX, R_ECX );
   522     SETAE_t();
   523     sh4_x86.tstate = TSTATE_AE;
   524  :}
   525 CMP/PL Rn {: 
   526     COUNT_INST(I_CMPPL);
   527     load_reg( R_EAX, Rn );
   528     CMP_imm8s_r32( 0, R_EAX );
   529     SETG_t();
   530     sh4_x86.tstate = TSTATE_G;
   531 :}
   532 CMP/PZ Rn {:  
   533     COUNT_INST(I_CMPPZ);
   534     load_reg( R_EAX, Rn );
   535     CMP_imm8s_r32( 0, R_EAX );
   536     SETGE_t();
   537     sh4_x86.tstate = TSTATE_GE;
   538 :}
   539 CMP/STR Rm, Rn {:  
   540     COUNT_INST(I_CMPSTR);
   541     load_reg( R_EAX, Rm );
   542     load_reg( R_ECX, Rn );
   543     XOR_r32_r32( R_ECX, R_EAX );
   544     TEST_r8_r8( R_AL, R_AL );
   545     JE_rel8(target1);
   546     TEST_r8_r8( R_AH, R_AH );
   547     JE_rel8(target2);
   548     SHR_imm8_r32( 16, R_EAX );
   549     TEST_r8_r8( R_AL, R_AL );
   550     JE_rel8(target3);
   551     TEST_r8_r8( R_AH, R_AH );
   552     JMP_TARGET(target1);
   553     JMP_TARGET(target2);
   554     JMP_TARGET(target3);
   555     SETE_t();
   556     sh4_x86.tstate = TSTATE_E;
   557 :}
   558 DIV0S Rm, Rn {:
   559     COUNT_INST(I_DIV0S);
   560     load_reg( R_EAX, Rm );
   561     load_reg( R_ECX, Rn );
   562     SHR_imm8_r32( 31, R_EAX );
   563     SHR_imm8_r32( 31, R_ECX );
   564     store_spreg( R_EAX, R_M );
   565     store_spreg( R_ECX, R_Q );
   566     CMP_r32_r32( R_EAX, R_ECX );
   567     SETNE_t();
   568     sh4_x86.tstate = TSTATE_NE;
   569 :}
   570 DIV0U {:  
   571     COUNT_INST(I_DIV0U);
   572     XOR_r32_r32( R_EAX, R_EAX );
   573     store_spreg( R_EAX, R_Q );
   574     store_spreg( R_EAX, R_M );
   575     store_spreg( R_EAX, R_T );
   576     sh4_x86.tstate = TSTATE_C; // works for DIV1
   577 :}
   578 DIV1 Rm, Rn {:
   579     COUNT_INST(I_DIV1);
   580     load_spreg( R_ECX, R_M );
   581     load_reg( R_EAX, Rn );
   582     if( sh4_x86.tstate != TSTATE_C ) {
   583 	LDC_t();
   584     }
   585     RCL1_r32( R_EAX );
   586     SETC_r8( R_DL ); // Q'
   587     CMP_sh4r_r32( R_Q, R_ECX );
   588     JE_rel8(mqequal);
   589     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   590     JMP_rel8(end);
   591     JMP_TARGET(mqequal);
   592     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   593     JMP_TARGET(end);
   594     store_reg( R_EAX, Rn ); // Done with Rn now
   595     SETC_r8(R_AL); // tmp1
   596     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   597     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   598     store_spreg( R_ECX, R_Q );
   599     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   600     MOVZX_r8_r32( R_AL, R_EAX );
   601     store_spreg( R_EAX, R_T );
   602     sh4_x86.tstate = TSTATE_NONE;
   603 :}
   604 DMULS.L Rm, Rn {:  
   605     COUNT_INST(I_DMULS);
   606     load_reg( R_EAX, Rm );
   607     load_reg( R_ECX, Rn );
   608     IMUL_r32(R_ECX);
   609     store_spreg( R_EDX, R_MACH );
   610     store_spreg( R_EAX, R_MACL );
   611     sh4_x86.tstate = TSTATE_NONE;
   612 :}
   613 DMULU.L Rm, Rn {:  
   614     COUNT_INST(I_DMULU);
   615     load_reg( R_EAX, Rm );
   616     load_reg( R_ECX, Rn );
   617     MUL_r32(R_ECX);
   618     store_spreg( R_EDX, R_MACH );
   619     store_spreg( R_EAX, R_MACL );    
   620     sh4_x86.tstate = TSTATE_NONE;
   621 :}
   622 DT Rn {:  
   623     COUNT_INST(I_DT);
   624     load_reg( R_EAX, Rn );
   625     ADD_imm8s_r32( -1, R_EAX );
   626     store_reg( R_EAX, Rn );
   627     SETE_t();
   628     sh4_x86.tstate = TSTATE_E;
   629 :}
   630 EXTS.B Rm, Rn {:  
   631     COUNT_INST(I_EXTSB);
   632     load_reg( R_EAX, Rm );
   633     MOVSX_r8_r32( R_EAX, R_EAX );
   634     store_reg( R_EAX, Rn );
   635 :}
   636 EXTS.W Rm, Rn {:  
   637     COUNT_INST(I_EXTSW);
   638     load_reg( R_EAX, Rm );
   639     MOVSX_r16_r32( R_EAX, R_EAX );
   640     store_reg( R_EAX, Rn );
   641 :}
   642 EXTU.B Rm, Rn {:  
   643     COUNT_INST(I_EXTUB);
   644     load_reg( R_EAX, Rm );
   645     MOVZX_r8_r32( R_EAX, R_EAX );
   646     store_reg( R_EAX, Rn );
   647 :}
   648 EXTU.W Rm, Rn {:  
   649     COUNT_INST(I_EXTUW);
   650     load_reg( R_EAX, Rm );
   651     MOVZX_r16_r32( R_EAX, R_EAX );
   652     store_reg( R_EAX, Rn );
   653 :}
   654 MAC.L @Rm+, @Rn+ {:
   655     COUNT_INST(I_MACL);
   656     if( Rm == Rn ) {
   657 	load_reg( R_EAX, Rm );
   658 	check_ralign32( R_EAX );
   659 	MMU_TRANSLATE_READ( R_EAX );
   660 	MOV_r32_esp8(R_EAX, 0);
   661 	load_reg( R_EAX, Rn );
   662 	ADD_imm8s_r32( 4, R_EAX );
   663 	MMU_TRANSLATE_READ( R_EAX );
   664 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
   665 	// Note translate twice in case of page boundaries. Maybe worth
   666 	// adding a page-boundary check to skip the second translation
   667     } else {
   668 	load_reg( R_EAX, Rm );
   669 	check_ralign32( R_EAX );
   670 	MMU_TRANSLATE_READ( R_EAX );
   671 	MOV_r32_esp8( R_EAX, 0 );
   672 	load_reg( R_EAX, Rn );
   673 	check_ralign32( R_EAX );
   674 	MMU_TRANSLATE_READ( R_EAX );
   675 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   676 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   677     }
   678     MEM_READ_LONG( R_EAX, R_EAX );
   679     MOV_r32_esp8( R_EAX, 4 );
   680     MOV_esp8_r32( 0, R_EAX );
   681     MEM_READ_LONG( R_EAX, R_EAX );
   682     MOV_esp8_r32( 4, R_ECX );
   684     IMUL_r32( R_ECX );
   685     ADD_r32_sh4r( R_EAX, R_MACL );
   686     ADC_r32_sh4r( R_EDX, R_MACH );
   688     load_spreg( R_ECX, R_S );
   689     TEST_r32_r32(R_ECX, R_ECX);
   690     JE_rel8( nosat );
   691     call_func0( signsat48 );
   692     JMP_TARGET( nosat );
   693     sh4_x86.tstate = TSTATE_NONE;
   694 :}
   695 MAC.W @Rm+, @Rn+ {:  
   696     COUNT_INST(I_MACW);
   697     if( Rm == Rn ) {
   698 	load_reg( R_EAX, Rm );
   699 	check_ralign16( R_EAX );
   700 	MMU_TRANSLATE_READ( R_EAX );
   701         MOV_r32_esp8( R_EAX, 0 );
   702 	load_reg( R_EAX, Rn );
   703 	ADD_imm8s_r32( 2, R_EAX );
   704 	MMU_TRANSLATE_READ( R_EAX );
   705 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   706 	// Note translate twice in case of page boundaries. Maybe worth
   707 	// adding a page-boundary check to skip the second translation
   708     } else {
   709 	load_reg( R_EAX, Rm );
   710 	check_ralign16( R_EAX );
   711 	MMU_TRANSLATE_READ( R_EAX );
   712         MOV_r32_esp8( R_EAX, 0 );
   713 	load_reg( R_EAX, Rn );
   714 	check_ralign16( R_EAX );
   715 	MMU_TRANSLATE_READ( R_EAX );
   716 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   717 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   718     }
   719     MEM_READ_WORD( R_EAX, R_EAX );
   720     MOV_r32_esp8( R_EAX, 4 );
   721     MOV_esp8_r32( 0, R_EAX );
   722     MEM_READ_WORD( R_EAX, R_EAX );
   723     MOV_esp8_r32( 4, R_ECX );
   725     IMUL_r32( R_ECX );
   726     load_spreg( R_ECX, R_S );
   727     TEST_r32_r32( R_ECX, R_ECX );
   728     JE_rel8( nosat );
   730     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   731     JNO_rel8( end );            // 2
   732     load_imm32( R_EDX, 1 );         // 5
   733     store_spreg( R_EDX, R_MACH );   // 6
   734     JS_rel8( positive );        // 2
   735     load_imm32( R_EAX, 0x80000000 );// 5
   736     store_spreg( R_EAX, R_MACL );   // 6
   737     JMP_rel8(end2);           // 2
   739     JMP_TARGET(positive);
   740     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   741     store_spreg( R_EAX, R_MACL );   // 6
   742     JMP_rel8(end3);            // 2
   744     JMP_TARGET(nosat);
   745     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   746     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   747     JMP_TARGET(end);
   748     JMP_TARGET(end2);
   749     JMP_TARGET(end3);
   750     sh4_x86.tstate = TSTATE_NONE;
   751 :}
   752 MOVT Rn {:  
   753     COUNT_INST(I_MOVT);
   754     load_spreg( R_EAX, R_T );
   755     store_reg( R_EAX, Rn );
   756 :}
   757 MUL.L Rm, Rn {:  
   758     COUNT_INST(I_MULL);
   759     load_reg( R_EAX, Rm );
   760     load_reg( R_ECX, Rn );
   761     MUL_r32( R_ECX );
   762     store_spreg( R_EAX, R_MACL );
   763     sh4_x86.tstate = TSTATE_NONE;
   764 :}
   765 MULS.W Rm, Rn {:
   766     COUNT_INST(I_MULSW);
   767     load_reg16s( R_EAX, Rm );
   768     load_reg16s( R_ECX, Rn );
   769     MUL_r32( R_ECX );
   770     store_spreg( R_EAX, R_MACL );
   771     sh4_x86.tstate = TSTATE_NONE;
   772 :}
   773 MULU.W Rm, Rn {:  
   774     COUNT_INST(I_MULUW);
   775     load_reg16u( R_EAX, Rm );
   776     load_reg16u( R_ECX, Rn );
   777     MUL_r32( R_ECX );
   778     store_spreg( R_EAX, R_MACL );
   779     sh4_x86.tstate = TSTATE_NONE;
   780 :}
   781 NEG Rm, Rn {:
   782     COUNT_INST(I_NEG);
   783     load_reg( R_EAX, Rm );
   784     NEG_r32( R_EAX );
   785     store_reg( R_EAX, Rn );
   786     sh4_x86.tstate = TSTATE_NONE;
   787 :}
   788 NEGC Rm, Rn {:  
   789     COUNT_INST(I_NEGC);
   790     load_reg( R_EAX, Rm );
   791     XOR_r32_r32( R_ECX, R_ECX );
   792     LDC_t();
   793     SBB_r32_r32( R_EAX, R_ECX );
   794     store_reg( R_ECX, Rn );
   795     SETC_t();
   796     sh4_x86.tstate = TSTATE_C;
   797 :}
   798 NOT Rm, Rn {:  
   799     COUNT_INST(I_NOT);
   800     load_reg( R_EAX, Rm );
   801     NOT_r32( R_EAX );
   802     store_reg( R_EAX, Rn );
   803     sh4_x86.tstate = TSTATE_NONE;
   804 :}
   805 OR Rm, Rn {:  
   806     COUNT_INST(I_OR);
   807     load_reg( R_EAX, Rm );
   808     load_reg( R_ECX, Rn );
   809     OR_r32_r32( R_EAX, R_ECX );
   810     store_reg( R_ECX, Rn );
   811     sh4_x86.tstate = TSTATE_NONE;
   812 :}
   813 OR #imm, R0 {:
   814     COUNT_INST(I_ORI);
   815     load_reg( R_EAX, 0 );
   816     OR_imm32_r32(imm, R_EAX);
   817     store_reg( R_EAX, 0 );
   818     sh4_x86.tstate = TSTATE_NONE;
   819 :}
   820 OR.B #imm, @(R0, GBR) {:  
   821     COUNT_INST(I_ORB);
   822     load_reg( R_EAX, 0 );
   823     load_spreg( R_ECX, R_GBR );
   824     ADD_r32_r32( R_ECX, R_EAX );
   825     MMU_TRANSLATE_WRITE( R_EAX );
   826     MOV_r32_esp8( R_EAX, 0 );
   827     MEM_READ_BYTE( R_EAX, R_EDX );
   828     MOV_esp8_r32( 0, R_EAX );
   829     OR_imm32_r32(imm, R_EDX );
   830     MEM_WRITE_BYTE( R_EAX, R_EDX );
   831     sh4_x86.tstate = TSTATE_NONE;
   832 :}
   833 ROTCL Rn {:
   834     COUNT_INST(I_ROTCL);
   835     load_reg( R_EAX, Rn );
   836     if( sh4_x86.tstate != TSTATE_C ) {
   837 	LDC_t();
   838     }
   839     RCL1_r32( R_EAX );
   840     store_reg( R_EAX, Rn );
   841     SETC_t();
   842     sh4_x86.tstate = TSTATE_C;
   843 :}
   844 ROTCR Rn {:  
   845     COUNT_INST(I_ROTCR);
   846     load_reg( R_EAX, Rn );
   847     if( sh4_x86.tstate != TSTATE_C ) {
   848 	LDC_t();
   849     }
   850     RCR1_r32( R_EAX );
   851     store_reg( R_EAX, Rn );
   852     SETC_t();
   853     sh4_x86.tstate = TSTATE_C;
   854 :}
   855 ROTL Rn {:  
   856     COUNT_INST(I_ROTL);
   857     load_reg( R_EAX, Rn );
   858     ROL1_r32( R_EAX );
   859     store_reg( R_EAX, Rn );
   860     SETC_t();
   861     sh4_x86.tstate = TSTATE_C;
   862 :}
   863 ROTR Rn {:  
   864     COUNT_INST(I_ROTR);
   865     load_reg( R_EAX, Rn );
   866     ROR1_r32( R_EAX );
   867     store_reg( R_EAX, Rn );
   868     SETC_t();
   869     sh4_x86.tstate = TSTATE_C;
   870 :}
   871 SHAD Rm, Rn {:
   872     COUNT_INST(I_SHAD);
   873     /* Annoyingly enough, not directly convertible */
   874     load_reg( R_EAX, Rn );
   875     load_reg( R_ECX, Rm );
   876     CMP_imm32_r32( 0, R_ECX );
   877     JGE_rel8(doshl);
   879     NEG_r32( R_ECX );      // 2
   880     AND_imm8_r8( 0x1F, R_CL ); // 3
   881     JE_rel8(emptysar);     // 2
   882     SAR_r32_CL( R_EAX );       // 2
   883     JMP_rel8(end);          // 2
   885     JMP_TARGET(emptysar);
   886     SAR_imm8_r32(31, R_EAX );  // 3
   887     JMP_rel8(end2);
   889     JMP_TARGET(doshl);
   890     AND_imm8_r8( 0x1F, R_CL ); // 3
   891     SHL_r32_CL( R_EAX );       // 2
   892     JMP_TARGET(end);
   893     JMP_TARGET(end2);
   894     store_reg( R_EAX, Rn );
   895     sh4_x86.tstate = TSTATE_NONE;
   896 :}
   897 SHLD Rm, Rn {:  
   898     COUNT_INST(I_SHLD);
   899     load_reg( R_EAX, Rn );
   900     load_reg( R_ECX, Rm );
   901     CMP_imm32_r32( 0, R_ECX );
   902     JGE_rel8(doshl);
   904     NEG_r32( R_ECX );      // 2
   905     AND_imm8_r8( 0x1F, R_CL ); // 3
   906     JE_rel8(emptyshr );
   907     SHR_r32_CL( R_EAX );       // 2
   908     JMP_rel8(end);          // 2
   910     JMP_TARGET(emptyshr);
   911     XOR_r32_r32( R_EAX, R_EAX );
   912     JMP_rel8(end2);
   914     JMP_TARGET(doshl);
   915     AND_imm8_r8( 0x1F, R_CL ); // 3
   916     SHL_r32_CL( R_EAX );       // 2
   917     JMP_TARGET(end);
   918     JMP_TARGET(end2);
   919     store_reg( R_EAX, Rn );
   920     sh4_x86.tstate = TSTATE_NONE;
   921 :}
   922 SHAL Rn {: 
   923     COUNT_INST(I_SHAL);
   924     load_reg( R_EAX, Rn );
   925     SHL1_r32( R_EAX );
   926     SETC_t();
   927     store_reg( R_EAX, Rn );
   928     sh4_x86.tstate = TSTATE_C;
   929 :}
   930 SHAR Rn {:  
   931     COUNT_INST(I_SHAR);
   932     load_reg( R_EAX, Rn );
   933     SAR1_r32( R_EAX );
   934     SETC_t();
   935     store_reg( R_EAX, Rn );
   936     sh4_x86.tstate = TSTATE_C;
   937 :}
   938 SHLL Rn {:  
   939     COUNT_INST(I_SHLL);
   940     load_reg( R_EAX, Rn );
   941     SHL1_r32( R_EAX );
   942     SETC_t();
   943     store_reg( R_EAX, Rn );
   944     sh4_x86.tstate = TSTATE_C;
   945 :}
   946 SHLL2 Rn {:
   947     COUNT_INST(I_SHLL);
   948     load_reg( R_EAX, Rn );
   949     SHL_imm8_r32( 2, R_EAX );
   950     store_reg( R_EAX, Rn );
   951     sh4_x86.tstate = TSTATE_NONE;
   952 :}
   953 SHLL8 Rn {:  
   954     COUNT_INST(I_SHLL);
   955     load_reg( R_EAX, Rn );
   956     SHL_imm8_r32( 8, R_EAX );
   957     store_reg( R_EAX, Rn );
   958     sh4_x86.tstate = TSTATE_NONE;
   959 :}
   960 SHLL16 Rn {:  
   961     COUNT_INST(I_SHLL);
   962     load_reg( R_EAX, Rn );
   963     SHL_imm8_r32( 16, R_EAX );
   964     store_reg( R_EAX, Rn );
   965     sh4_x86.tstate = TSTATE_NONE;
   966 :}
   967 SHLR Rn {:  
   968     COUNT_INST(I_SHLR);
   969     load_reg( R_EAX, Rn );
   970     SHR1_r32( R_EAX );
   971     SETC_t();
   972     store_reg( R_EAX, Rn );
   973     sh4_x86.tstate = TSTATE_C;
   974 :}
   975 SHLR2 Rn {:  
   976     COUNT_INST(I_SHLR);
   977     load_reg( R_EAX, Rn );
   978     SHR_imm8_r32( 2, R_EAX );
   979     store_reg( R_EAX, Rn );
   980     sh4_x86.tstate = TSTATE_NONE;
   981 :}
   982 SHLR8 Rn {:  
   983     COUNT_INST(I_SHLR);
   984     load_reg( R_EAX, Rn );
   985     SHR_imm8_r32( 8, R_EAX );
   986     store_reg( R_EAX, Rn );
   987     sh4_x86.tstate = TSTATE_NONE;
   988 :}
   989 SHLR16 Rn {:  
   990     COUNT_INST(I_SHLR);
   991     load_reg( R_EAX, Rn );
   992     SHR_imm8_r32( 16, R_EAX );
   993     store_reg( R_EAX, Rn );
   994     sh4_x86.tstate = TSTATE_NONE;
   995 :}
   996 SUB Rm, Rn {:  
   997     COUNT_INST(I_SUB);
   998     load_reg( R_EAX, Rm );
   999     load_reg( R_ECX, Rn );
  1000     SUB_r32_r32( R_EAX, R_ECX );
  1001     store_reg( R_ECX, Rn );
  1002     sh4_x86.tstate = TSTATE_NONE;
  1003 :}
  1004 SUBC Rm, Rn {:  
  1005     COUNT_INST(I_SUBC);
  1006     load_reg( R_EAX, Rm );
  1007     load_reg( R_ECX, Rn );
  1008     if( sh4_x86.tstate != TSTATE_C ) {
  1009 	LDC_t();
  1011     SBB_r32_r32( R_EAX, R_ECX );
  1012     store_reg( R_ECX, Rn );
  1013     SETC_t();
  1014     sh4_x86.tstate = TSTATE_C;
  1015 :}
  1016 SUBV Rm, Rn {:  
  1017     COUNT_INST(I_SUBV);
  1018     load_reg( R_EAX, Rm );
  1019     load_reg( R_ECX, Rn );
  1020     SUB_r32_r32( R_EAX, R_ECX );
  1021     store_reg( R_ECX, Rn );
  1022     SETO_t();
  1023     sh4_x86.tstate = TSTATE_O;
  1024 :}
  1025 SWAP.B Rm, Rn {:  
  1026     COUNT_INST(I_SWAPB);
  1027     load_reg( R_EAX, Rm );
  1028     XCHG_r8_r8( R_AL, R_AH ); // NB: does not touch EFLAGS
  1029     store_reg( R_EAX, Rn );
  1030 :}
  1031 SWAP.W Rm, Rn {:  
  1032     COUNT_INST(I_SWAPB);
  1033     load_reg( R_EAX, Rm );
  1034     MOV_r32_r32( R_EAX, R_ECX );
  1035     SHL_imm8_r32( 16, R_ECX );
  1036     SHR_imm8_r32( 16, R_EAX );
  1037     OR_r32_r32( R_EAX, R_ECX );
  1038     store_reg( R_ECX, Rn );
  1039     sh4_x86.tstate = TSTATE_NONE;
  1040 :}
  1041 TAS.B @Rn {:  
  1042     COUNT_INST(I_TASB);
  1043     load_reg( R_EAX, Rn );
  1044     MMU_TRANSLATE_WRITE( R_EAX );
  1045     MOV_r32_esp8( R_EAX, 0 );
  1046     MEM_READ_BYTE( R_EAX, R_EDX );
  1047     TEST_r8_r8( R_DL, R_DL );
  1048     SETE_t();
  1049     OR_imm8_r8( 0x80, R_DL );
  1050     MOV_esp8_r32( 0, R_EAX );
  1051     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1052     sh4_x86.tstate = TSTATE_NONE;
  1053 :}
  1054 TST Rm, Rn {:  
  1055     COUNT_INST(I_TST);
  1056     load_reg( R_EAX, Rm );
  1057     load_reg( R_ECX, Rn );
  1058     TEST_r32_r32( R_EAX, R_ECX );
  1059     SETE_t();
  1060     sh4_x86.tstate = TSTATE_E;
  1061 :}
  1062 TST #imm, R0 {:  
  1063     COUNT_INST(I_TSTI);
  1064     load_reg( R_EAX, 0 );
  1065     TEST_imm32_r32( imm, R_EAX );
  1066     SETE_t();
  1067     sh4_x86.tstate = TSTATE_E;
  1068 :}
  1069 TST.B #imm, @(R0, GBR) {:  
  1070     COUNT_INST(I_TSTB);
  1071     load_reg( R_EAX, 0);
  1072     load_reg( R_ECX, R_GBR);
  1073     ADD_r32_r32( R_ECX, R_EAX );
  1074     MMU_TRANSLATE_READ( R_EAX );
  1075     MEM_READ_BYTE( R_EAX, R_EAX );
  1076     TEST_imm8_r8( imm, R_AL );
  1077     SETE_t();
  1078     sh4_x86.tstate = TSTATE_E;
  1079 :}
  1080 XOR Rm, Rn {:  
  1081     COUNT_INST(I_XOR);
  1082     load_reg( R_EAX, Rm );
  1083     load_reg( R_ECX, Rn );
  1084     XOR_r32_r32( R_EAX, R_ECX );
  1085     store_reg( R_ECX, Rn );
  1086     sh4_x86.tstate = TSTATE_NONE;
  1087 :}
  1088 XOR #imm, R0 {:  
  1089     COUNT_INST(I_XORI);
  1090     load_reg( R_EAX, 0 );
  1091     XOR_imm32_r32( imm, R_EAX );
  1092     store_reg( R_EAX, 0 );
  1093     sh4_x86.tstate = TSTATE_NONE;
  1094 :}
  1095 XOR.B #imm, @(R0, GBR) {:  
  1096     COUNT_INST(I_XORB);
  1097     load_reg( R_EAX, 0 );
  1098     load_spreg( R_ECX, R_GBR );
  1099     ADD_r32_r32( R_ECX, R_EAX );
  1100     MMU_TRANSLATE_WRITE( R_EAX );
  1101     MOV_r32_esp8( R_EAX, 0 );
  1102     MEM_READ_BYTE(R_EAX, R_EDX);
  1103     MOV_esp8_r32( 0, R_EAX );
  1104     XOR_imm32_r32( imm, R_EDX );
  1105     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1106     sh4_x86.tstate = TSTATE_NONE;
  1107 :}
  1108 XTRCT Rm, Rn {:
  1109     COUNT_INST(I_XTRCT);
  1110     load_reg( R_EAX, Rm );
  1111     load_reg( R_ECX, Rn );
  1112     SHL_imm8_r32( 16, R_EAX );
  1113     SHR_imm8_r32( 16, R_ECX );
  1114     OR_r32_r32( R_EAX, R_ECX );
  1115     store_reg( R_ECX, Rn );
  1116     sh4_x86.tstate = TSTATE_NONE;
  1117 :}
  1119 /* Data move instructions */
  1120 MOV Rm, Rn {:  
  1121     COUNT_INST(I_MOV);
  1122     load_reg( R_EAX, Rm );
  1123     store_reg( R_EAX, Rn );
  1124 :}
  1125 MOV #imm, Rn {:  
  1126     COUNT_INST(I_MOVI);
  1127     load_imm32( R_EAX, imm );
  1128     store_reg( R_EAX, Rn );
  1129 :}
  1130 MOV.B Rm, @Rn {:  
  1131     COUNT_INST(I_MOVB);
  1132     load_reg( R_EAX, Rn );
  1133     MMU_TRANSLATE_WRITE( R_EAX );
  1134     load_reg( R_EDX, Rm );
  1135     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1136     sh4_x86.tstate = TSTATE_NONE;
  1137 :}
  1138 MOV.B Rm, @-Rn {:  
  1139     COUNT_INST(I_MOVB);
  1140     load_reg( R_EAX, Rn );
  1141     ADD_imm8s_r32( -1, R_EAX );
  1142     MMU_TRANSLATE_WRITE( R_EAX );
  1143     load_reg( R_EDX, Rm );
  1144     ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
  1145     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1146     sh4_x86.tstate = TSTATE_NONE;
  1147 :}
  1148 MOV.B Rm, @(R0, Rn) {:  
  1149     COUNT_INST(I_MOVB);
  1150     load_reg( R_EAX, 0 );
  1151     load_reg( R_ECX, Rn );
  1152     ADD_r32_r32( R_ECX, R_EAX );
  1153     MMU_TRANSLATE_WRITE( R_EAX );
  1154     load_reg( R_EDX, Rm );
  1155     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1156     sh4_x86.tstate = TSTATE_NONE;
  1157 :}
  1158 MOV.B R0, @(disp, GBR) {:  
  1159     COUNT_INST(I_MOVB);
  1160     load_spreg( R_EAX, R_GBR );
  1161     ADD_imm32_r32( disp, R_EAX );
  1162     MMU_TRANSLATE_WRITE( R_EAX );
  1163     load_reg( R_EDX, 0 );
  1164     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1165     sh4_x86.tstate = TSTATE_NONE;
  1166 :}
  1167 MOV.B R0, @(disp, Rn) {:  
  1168     COUNT_INST(I_MOVB);
  1169     load_reg( R_EAX, Rn );
  1170     ADD_imm32_r32( disp, R_EAX );
  1171     MMU_TRANSLATE_WRITE( R_EAX );
  1172     load_reg( R_EDX, 0 );
  1173     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1174     sh4_x86.tstate = TSTATE_NONE;
  1175 :}
  1176 MOV.B @Rm, Rn {:  
  1177     COUNT_INST(I_MOVB);
  1178     load_reg( R_EAX, Rm );
  1179     MMU_TRANSLATE_READ( R_EAX );
  1180     MEM_READ_BYTE( R_EAX, R_EAX );
  1181     store_reg( R_EAX, Rn );
  1182     sh4_x86.tstate = TSTATE_NONE;
  1183 :}
  1184 MOV.B @Rm+, Rn {:  
  1185     COUNT_INST(I_MOVB);
  1186     load_reg( R_EAX, Rm );
  1187     MMU_TRANSLATE_READ( R_EAX );
  1188     ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
  1189     MEM_READ_BYTE( R_EAX, R_EAX );
  1190     store_reg( R_EAX, Rn );
  1191     sh4_x86.tstate = TSTATE_NONE;
  1192 :}
  1193 MOV.B @(R0, Rm), Rn {:  
  1194     COUNT_INST(I_MOVB);
  1195     load_reg( R_EAX, 0 );
  1196     load_reg( R_ECX, Rm );
  1197     ADD_r32_r32( R_ECX, R_EAX );
  1198     MMU_TRANSLATE_READ( R_EAX )
  1199     MEM_READ_BYTE( R_EAX, R_EAX );
  1200     store_reg( R_EAX, Rn );
  1201     sh4_x86.tstate = TSTATE_NONE;
  1202 :}
  1203 MOV.B @(disp, GBR), R0 {:  
  1204     COUNT_INST(I_MOVB);
  1205     load_spreg( R_EAX, R_GBR );
  1206     ADD_imm32_r32( disp, R_EAX );
  1207     MMU_TRANSLATE_READ( R_EAX );
  1208     MEM_READ_BYTE( R_EAX, R_EAX );
  1209     store_reg( R_EAX, 0 );
  1210     sh4_x86.tstate = TSTATE_NONE;
  1211 :}
  1212 MOV.B @(disp, Rm), R0 {:  
  1213     COUNT_INST(I_MOVB);
  1214     load_reg( R_EAX, Rm );
  1215     ADD_imm32_r32( disp, R_EAX );
  1216     MMU_TRANSLATE_READ( R_EAX );
  1217     MEM_READ_BYTE( R_EAX, R_EAX );
  1218     store_reg( R_EAX, 0 );
  1219     sh4_x86.tstate = TSTATE_NONE;
  1220 :}
  1221 MOV.L Rm, @Rn {:
  1222     COUNT_INST(I_MOVL);
  1223     load_reg( R_EAX, Rn );
  1224     check_walign32(R_EAX);
  1225     MOV_r32_r32( R_EAX, R_ECX );
  1226     AND_imm32_r32( 0xFC000000, R_ECX );
  1227     CMP_imm32_r32( 0xE0000000, R_ECX );
  1228     JNE_rel8( notsq );
  1229     AND_imm8s_r32( 0x3C, R_EAX );
  1230     load_reg( R_EDX, Rm );
  1231     MOV_r32_ebpr32disp32( R_EDX, R_EAX, REG_OFFSET(store_queue) );
  1232     JMP_rel8(end);
  1233     JMP_TARGET(notsq);
  1234     MMU_TRANSLATE_WRITE( R_EAX );
  1235     load_reg( R_EDX, Rm );
  1236     MEM_WRITE_LONG( R_EAX, R_EDX );
  1237     JMP_TARGET(end);
  1238     sh4_x86.tstate = TSTATE_NONE;
  1239 :}
  1240 MOV.L Rm, @-Rn {:  
  1241     COUNT_INST(I_MOVL);
  1242     load_reg( R_EAX, Rn );
  1243     ADD_imm8s_r32( -4, R_EAX );
  1244     check_walign32( R_EAX );
  1245     MMU_TRANSLATE_WRITE( R_EAX );
  1246     load_reg( R_EDX, Rm );
  1247     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  1248     MEM_WRITE_LONG( R_EAX, R_EDX );
  1249     sh4_x86.tstate = TSTATE_NONE;
  1250 :}
  1251 MOV.L Rm, @(R0, Rn) {:  
  1252     COUNT_INST(I_MOVL);
  1253     load_reg( R_EAX, 0 );
  1254     load_reg( R_ECX, Rn );
  1255     ADD_r32_r32( R_ECX, R_EAX );
  1256     check_walign32( R_EAX );
  1257     MMU_TRANSLATE_WRITE( R_EAX );
  1258     load_reg( R_EDX, Rm );
  1259     MEM_WRITE_LONG( R_EAX, R_EDX );
  1260     sh4_x86.tstate = TSTATE_NONE;
  1261 :}
  1262 MOV.L R0, @(disp, GBR) {:  
  1263     COUNT_INST(I_MOVL);
  1264     load_spreg( R_EAX, R_GBR );
  1265     ADD_imm32_r32( disp, R_EAX );
  1266     check_walign32( R_EAX );
  1267     MMU_TRANSLATE_WRITE( R_EAX );
  1268     load_reg( R_EDX, 0 );
  1269     MEM_WRITE_LONG( R_EAX, R_EDX );
  1270     sh4_x86.tstate = TSTATE_NONE;
  1271 :}
  1272 MOV.L Rm, @(disp, Rn) {:  
  1273     COUNT_INST(I_MOVL);
  1274     load_reg( R_EAX, Rn );
  1275     ADD_imm32_r32( disp, R_EAX );
  1276     check_walign32( R_EAX );
  1277     MOV_r32_r32( R_EAX, R_ECX );
  1278     AND_imm32_r32( 0xFC000000, R_ECX );
  1279     CMP_imm32_r32( 0xE0000000, R_ECX );
  1280     JNE_rel8( notsq );
  1281     AND_imm8s_r32( 0x3C, R_EAX );
  1282     load_reg( R_EDX, Rm );
  1283     MOV_r32_ebpr32disp32( R_EDX, R_EAX, REG_OFFSET(store_queue) );
  1284     JMP_rel8(end);
  1285     JMP_TARGET(notsq);
  1286     MMU_TRANSLATE_WRITE( R_EAX );
  1287     load_reg( R_EDX, Rm );
  1288     MEM_WRITE_LONG( R_EAX, R_EDX );
  1289     JMP_TARGET(end);
  1290     sh4_x86.tstate = TSTATE_NONE;
  1291 :}
  1292 MOV.L @Rm, Rn {:  
  1293     COUNT_INST(I_MOVL);
  1294     load_reg( R_EAX, Rm );
  1295     check_ralign32( R_EAX );
  1296     MMU_TRANSLATE_READ( R_EAX );
  1297     MEM_READ_LONG( R_EAX, R_EAX );
  1298     store_reg( R_EAX, Rn );
  1299     sh4_x86.tstate = TSTATE_NONE;
  1300 :}
  1301 MOV.L @Rm+, Rn {:  
  1302     COUNT_INST(I_MOVL);
  1303     load_reg( R_EAX, Rm );
  1304     check_ralign32( R_EAX );
  1305     MMU_TRANSLATE_READ( R_EAX );
  1306     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1307     MEM_READ_LONG( R_EAX, R_EAX );
  1308     store_reg( R_EAX, Rn );
  1309     sh4_x86.tstate = TSTATE_NONE;
  1310 :}
  1311 MOV.L @(R0, Rm), Rn {:  
  1312     COUNT_INST(I_MOVL);
  1313     load_reg( R_EAX, 0 );
  1314     load_reg( R_ECX, Rm );
  1315     ADD_r32_r32( R_ECX, R_EAX );
  1316     check_ralign32( R_EAX );
  1317     MMU_TRANSLATE_READ( R_EAX );
  1318     MEM_READ_LONG( R_EAX, R_EAX );
  1319     store_reg( R_EAX, Rn );
  1320     sh4_x86.tstate = TSTATE_NONE;
  1321 :}
  1322 MOV.L @(disp, GBR), R0 {:
  1323     COUNT_INST(I_MOVL);
  1324     load_spreg( R_EAX, R_GBR );
  1325     ADD_imm32_r32( disp, R_EAX );
  1326     check_ralign32( R_EAX );
  1327     MMU_TRANSLATE_READ( R_EAX );
  1328     MEM_READ_LONG( R_EAX, R_EAX );
  1329     store_reg( R_EAX, 0 );
  1330     sh4_x86.tstate = TSTATE_NONE;
  1331 :}
  1332 MOV.L @(disp, PC), Rn {:  
  1333     COUNT_INST(I_MOVLPC);
  1334     if( sh4_x86.in_delay_slot ) {
  1335 	SLOTILLEGAL();
  1336     } else {
  1337 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1338 	if( IS_IN_ICACHE(target) ) {
  1339 	    // If the target address is in the same page as the code, it's
  1340 	    // pretty safe to just ref it directly and circumvent the whole
  1341 	    // memory subsystem. (this is a big performance win)
  1343 	    // FIXME: There's a corner-case that's not handled here when
  1344 	    // the current code-page is in the ITLB but not in the UTLB.
  1345 	    // (should generate a TLB miss although need to test SH4 
  1346 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1347 	    // behaviour though.
  1348 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1349 	    MOV_moff32_EAX( ptr );
  1350 	} else {
  1351 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1352 	    // different virtual address than the translation was done with,
  1353 	    // but we can safely assume that the low bits are the same.
  1354 	    load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1355 	    ADD_sh4r_r32( R_PC, R_EAX );
  1356 	    MMU_TRANSLATE_READ( R_EAX );
  1357 	    MEM_READ_LONG( R_EAX, R_EAX );
  1358 	    sh4_x86.tstate = TSTATE_NONE;
  1360 	store_reg( R_EAX, Rn );
  1362 :}
  1363 MOV.L @(disp, Rm), Rn {:  
  1364     COUNT_INST(I_MOVL);
  1365     load_reg( R_EAX, Rm );
  1366     ADD_imm8s_r32( disp, R_EAX );
  1367     check_ralign32( R_EAX );
  1368     MMU_TRANSLATE_READ( R_EAX );
  1369     MEM_READ_LONG( R_EAX, R_EAX );
  1370     store_reg( R_EAX, Rn );
  1371     sh4_x86.tstate = TSTATE_NONE;
  1372 :}
  1373 MOV.W Rm, @Rn {:  
  1374     COUNT_INST(I_MOVW);
  1375     load_reg( R_EAX, Rn );
  1376     check_walign16( R_EAX );
  1377     MMU_TRANSLATE_WRITE( R_EAX )
  1378     load_reg( R_EDX, Rm );
  1379     MEM_WRITE_WORD( R_EAX, R_EDX );
  1380     sh4_x86.tstate = TSTATE_NONE;
  1381 :}
  1382 MOV.W Rm, @-Rn {:  
  1383     COUNT_INST(I_MOVW);
  1384     load_reg( R_EAX, Rn );
  1385     ADD_imm8s_r32( -2, R_EAX );
  1386     check_walign16( R_EAX );
  1387     MMU_TRANSLATE_WRITE( R_EAX );
  1388     load_reg( R_EDX, Rm );
  1389     ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
  1390     MEM_WRITE_WORD( R_EAX, R_EDX );
  1391     sh4_x86.tstate = TSTATE_NONE;
  1392 :}
  1393 MOV.W Rm, @(R0, Rn) {:  
  1394     COUNT_INST(I_MOVW);
  1395     load_reg( R_EAX, 0 );
  1396     load_reg( R_ECX, Rn );
  1397     ADD_r32_r32( R_ECX, R_EAX );
  1398     check_walign16( R_EAX );
  1399     MMU_TRANSLATE_WRITE( R_EAX );
  1400     load_reg( R_EDX, Rm );
  1401     MEM_WRITE_WORD( R_EAX, R_EDX );
  1402     sh4_x86.tstate = TSTATE_NONE;
  1403 :}
  1404 MOV.W R0, @(disp, GBR) {:  
  1405     COUNT_INST(I_MOVW);
  1406     load_spreg( R_EAX, R_GBR );
  1407     ADD_imm32_r32( disp, R_EAX );
  1408     check_walign16( R_EAX );
  1409     MMU_TRANSLATE_WRITE( R_EAX );
  1410     load_reg( R_EDX, 0 );
  1411     MEM_WRITE_WORD( R_EAX, R_EDX );
  1412     sh4_x86.tstate = TSTATE_NONE;
  1413 :}
  1414 MOV.W R0, @(disp, Rn) {:  
  1415     COUNT_INST(I_MOVW);
  1416     load_reg( R_EAX, Rn );
  1417     ADD_imm32_r32( disp, R_EAX );
  1418     check_walign16( R_EAX );
  1419     MMU_TRANSLATE_WRITE( R_EAX );
  1420     load_reg( R_EDX, 0 );
  1421     MEM_WRITE_WORD( R_EAX, R_EDX );
  1422     sh4_x86.tstate = TSTATE_NONE;
  1423 :}
  1424 MOV.W @Rm, Rn {:  
  1425     COUNT_INST(I_MOVW);
  1426     load_reg( R_EAX, Rm );
  1427     check_ralign16( R_EAX );
  1428     MMU_TRANSLATE_READ( R_EAX );
  1429     MEM_READ_WORD( R_EAX, R_EAX );
  1430     store_reg( R_EAX, Rn );
  1431     sh4_x86.tstate = TSTATE_NONE;
  1432 :}
  1433 MOV.W @Rm+, Rn {:  
  1434     COUNT_INST(I_MOVW);
  1435     load_reg( R_EAX, Rm );
  1436     check_ralign16( R_EAX );
  1437     MMU_TRANSLATE_READ( R_EAX );
  1438     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
  1439     MEM_READ_WORD( R_EAX, R_EAX );
  1440     store_reg( R_EAX, Rn );
  1441     sh4_x86.tstate = TSTATE_NONE;
  1442 :}
  1443 MOV.W @(R0, Rm), Rn {:  
  1444     COUNT_INST(I_MOVW);
  1445     load_reg( R_EAX, 0 );
  1446     load_reg( R_ECX, Rm );
  1447     ADD_r32_r32( R_ECX, R_EAX );
  1448     check_ralign16( R_EAX );
  1449     MMU_TRANSLATE_READ( R_EAX );
  1450     MEM_READ_WORD( R_EAX, R_EAX );
  1451     store_reg( R_EAX, Rn );
  1452     sh4_x86.tstate = TSTATE_NONE;
  1453 :}
  1454 MOV.W @(disp, GBR), R0 {:  
  1455     COUNT_INST(I_MOVW);
  1456     load_spreg( R_EAX, R_GBR );
  1457     ADD_imm32_r32( disp, R_EAX );
  1458     check_ralign16( R_EAX );
  1459     MMU_TRANSLATE_READ( R_EAX );
  1460     MEM_READ_WORD( R_EAX, R_EAX );
  1461     store_reg( R_EAX, 0 );
  1462     sh4_x86.tstate = TSTATE_NONE;
  1463 :}
  1464 MOV.W @(disp, PC), Rn {:  
  1465     COUNT_INST(I_MOVW);
  1466     if( sh4_x86.in_delay_slot ) {
  1467 	SLOTILLEGAL();
  1468     } else {
  1469 	// See comments for MOV.L @(disp, PC), Rn
  1470 	uint32_t target = pc + disp + 4;
  1471 	if( IS_IN_ICACHE(target) ) {
  1472 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1473 	    MOV_moff32_EAX( ptr );
  1474 	    MOVSX_r16_r32( R_EAX, R_EAX );
  1475 	} else {
  1476 	    load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
  1477 	    ADD_sh4r_r32( R_PC, R_EAX );
  1478 	    MMU_TRANSLATE_READ( R_EAX );
  1479 	    MEM_READ_WORD( R_EAX, R_EAX );
  1480 	    sh4_x86.tstate = TSTATE_NONE;
  1482 	store_reg( R_EAX, Rn );
  1484 :}
  1485 MOV.W @(disp, Rm), R0 {:  
  1486     COUNT_INST(I_MOVW);
  1487     load_reg( R_EAX, Rm );
  1488     ADD_imm32_r32( disp, R_EAX );
  1489     check_ralign16( R_EAX );
  1490     MMU_TRANSLATE_READ( R_EAX );
  1491     MEM_READ_WORD( R_EAX, R_EAX );
  1492     store_reg( R_EAX, 0 );
  1493     sh4_x86.tstate = TSTATE_NONE;
  1494 :}
  1495 MOVA @(disp, PC), R0 {:  
  1496     COUNT_INST(I_MOVA);
  1497     if( sh4_x86.in_delay_slot ) {
  1498 	SLOTILLEGAL();
  1499     } else {
  1500 	load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1501 	ADD_sh4r_r32( R_PC, R_ECX );
  1502 	store_reg( R_ECX, 0 );
  1503 	sh4_x86.tstate = TSTATE_NONE;
  1505 :}
  1506 MOVCA.L R0, @Rn {:  
  1507     COUNT_INST(I_MOVCA);
  1508     load_reg( R_EAX, Rn );
  1509     check_walign32( R_EAX );
  1510     MMU_TRANSLATE_WRITE( R_EAX );
  1511     load_reg( R_EDX, 0 );
  1512     MEM_WRITE_LONG( R_EAX, R_EDX );
  1513     sh4_x86.tstate = TSTATE_NONE;
  1514 :}
  1516 /* Control transfer instructions */
  1517 BF disp {:
  1518     COUNT_INST(I_BF);
  1519     if( sh4_x86.in_delay_slot ) {
  1520 	SLOTILLEGAL();
  1521     } else {
  1522 	sh4vma_t target = disp + pc + 4;
  1523 	JT_rel8( nottaken );
  1524 	exit_block_rel(target, pc+2 );
  1525 	JMP_TARGET(nottaken);
  1526 	return 2;
  1528 :}
  1529 BF/S disp {:
  1530     COUNT_INST(I_BFS);
  1531     if( sh4_x86.in_delay_slot ) {
  1532 	SLOTILLEGAL();
  1533     } else {
  1534 	sh4_x86.in_delay_slot = DELAY_PC;
  1535 	if( UNTRANSLATABLE(pc+2) ) {
  1536 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1537 	    JT_rel8(nottaken);
  1538 	    ADD_imm32_r32( disp, R_EAX );
  1539 	    JMP_TARGET(nottaken);
  1540 	    ADD_sh4r_r32( R_PC, R_EAX );
  1541 	    store_spreg( R_EAX, R_NEW_PC );
  1542 	    exit_block_emu(pc+2);
  1543 	    sh4_x86.branch_taken = TRUE;
  1544 	    return 2;
  1545 	} else {
  1546 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1547 		CMP_imm8s_sh4r( 1, R_T );
  1548 		sh4_x86.tstate = TSTATE_E;
  1550 	    sh4vma_t target = disp + pc + 4;
  1551 	    OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JT rel32
  1552 	    int save_tstate = sh4_x86.tstate;
  1553 	    sh4_translate_instruction(pc+2);
  1554 	    exit_block_rel( target, pc+4 );
  1556 	    // not taken
  1557 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1558 	    sh4_x86.tstate = save_tstate;
  1559 	    sh4_translate_instruction(pc+2);
  1560 	    return 4;
  1563 :}
  1564 BRA disp {:  
  1565     COUNT_INST(I_BRA);
  1566     if( sh4_x86.in_delay_slot ) {
  1567 	SLOTILLEGAL();
  1568     } else {
  1569 	sh4_x86.in_delay_slot = DELAY_PC;
  1570 	sh4_x86.branch_taken = TRUE;
  1571 	if( UNTRANSLATABLE(pc+2) ) {
  1572 	    load_spreg( R_EAX, R_PC );
  1573 	    ADD_imm32_r32( pc + disp + 4 - sh4_x86.block_start_pc, R_EAX );
  1574 	    store_spreg( R_EAX, R_NEW_PC );
  1575 	    exit_block_emu(pc+2);
  1576 	    return 2;
  1577 	} else {
  1578 	    sh4_translate_instruction( pc + 2 );
  1579 	    exit_block_rel( disp + pc + 4, pc+4 );
  1580 	    return 4;
  1583 :}
  1584 BRAF Rn {:  
  1585     COUNT_INST(I_BRAF);
  1586     if( sh4_x86.in_delay_slot ) {
  1587 	SLOTILLEGAL();
  1588     } else {
  1589 	load_spreg( R_EAX, R_PC );
  1590 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1591 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1592 	store_spreg( R_EAX, R_NEW_PC );
  1593 	sh4_x86.in_delay_slot = DELAY_PC;
  1594 	sh4_x86.tstate = TSTATE_NONE;
  1595 	sh4_x86.branch_taken = TRUE;
  1596 	if( UNTRANSLATABLE(pc+2) ) {
  1597 	    exit_block_emu(pc+2);
  1598 	    return 2;
  1599 	} else {
  1600 	    sh4_translate_instruction( pc + 2 );
  1601 	    exit_block_newpcset(pc+2);
  1602 	    return 4;
  1605 :}
  1606 BSR disp {:  
  1607     COUNT_INST(I_BSR);
  1608     if( sh4_x86.in_delay_slot ) {
  1609 	SLOTILLEGAL();
  1610     } else {
  1611 	load_spreg( R_EAX, R_PC );
  1612 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1613 	store_spreg( R_EAX, R_PR );
  1614 	sh4_x86.in_delay_slot = DELAY_PC;
  1615 	sh4_x86.branch_taken = TRUE;
  1616 	sh4_x86.tstate = TSTATE_NONE;
  1617 	if( UNTRANSLATABLE(pc+2) ) {
  1618 	    ADD_imm32_r32( disp, R_EAX );
  1619 	    store_spreg( R_EAX, R_NEW_PC );
  1620 	    exit_block_emu(pc+2);
  1621 	    return 2;
  1622 	} else {
  1623 	    sh4_translate_instruction( pc + 2 );
  1624 	    exit_block_rel( disp + pc + 4, pc+4 );
  1625 	    return 4;
  1628 :}
  1629 BSRF Rn {:  
  1630     COUNT_INST(I_BSRF);
  1631     if( sh4_x86.in_delay_slot ) {
  1632 	SLOTILLEGAL();
  1633     } else {
  1634 	load_spreg( R_EAX, R_PC );
  1635 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1636 	store_spreg( R_EAX, R_PR );
  1637 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1638 	store_spreg( R_EAX, R_NEW_PC );
  1640 	sh4_x86.in_delay_slot = DELAY_PC;
  1641 	sh4_x86.tstate = TSTATE_NONE;
  1642 	sh4_x86.branch_taken = TRUE;
  1643 	if( UNTRANSLATABLE(pc+2) ) {
  1644 	    exit_block_emu(pc+2);
  1645 	    return 2;
  1646 	} else {
  1647 	    sh4_translate_instruction( pc + 2 );
  1648 	    exit_block_newpcset(pc+2);
  1649 	    return 4;
  1652 :}
  1653 BT disp {:
  1654     COUNT_INST(I_BT);
  1655     if( sh4_x86.in_delay_slot ) {
  1656 	SLOTILLEGAL();
  1657     } else {
  1658 	sh4vma_t target = disp + pc + 4;
  1659 	JF_rel8( nottaken );
  1660 	exit_block_rel(target, pc+2 );
  1661 	JMP_TARGET(nottaken);
  1662 	return 2;
  1664 :}
  1665 BT/S disp {:
  1666     COUNT_INST(I_BTS);
  1667     if( sh4_x86.in_delay_slot ) {
  1668 	SLOTILLEGAL();
  1669     } else {
  1670 	sh4_x86.in_delay_slot = DELAY_PC;
  1671 	if( UNTRANSLATABLE(pc+2) ) {
  1672 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1673 	    JF_rel8(nottaken);
  1674 	    ADD_imm32_r32( disp, R_EAX );
  1675 	    JMP_TARGET(nottaken);
  1676 	    ADD_sh4r_r32( R_PC, R_EAX );
  1677 	    store_spreg( R_EAX, R_NEW_PC );
  1678 	    exit_block_emu(pc+2);
  1679 	    sh4_x86.branch_taken = TRUE;
  1680 	    return 2;
  1681 	} else {
  1682 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1683 		CMP_imm8s_sh4r( 1, R_T );
  1684 		sh4_x86.tstate = TSTATE_E;
  1686 	    OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JF rel32
  1687 	    int save_tstate = sh4_x86.tstate;
  1688 	    sh4_translate_instruction(pc+2);
  1689 	    exit_block_rel( disp + pc + 4, pc+4 );
  1690 	    // not taken
  1691 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1692 	    sh4_x86.tstate = save_tstate;
  1693 	    sh4_translate_instruction(pc+2);
  1694 	    return 4;
  1697 :}
  1698 JMP @Rn {:  
  1699     COUNT_INST(I_JMP);
  1700     if( sh4_x86.in_delay_slot ) {
  1701 	SLOTILLEGAL();
  1702     } else {
  1703 	load_reg( R_ECX, Rn );
  1704 	store_spreg( R_ECX, R_NEW_PC );
  1705 	sh4_x86.in_delay_slot = DELAY_PC;
  1706 	sh4_x86.branch_taken = TRUE;
  1707 	if( UNTRANSLATABLE(pc+2) ) {
  1708 	    exit_block_emu(pc+2);
  1709 	    return 2;
  1710 	} else {
  1711 	    sh4_translate_instruction(pc+2);
  1712 	    exit_block_newpcset(pc+2);
  1713 	    return 4;
  1716 :}
  1717 JSR @Rn {:  
  1718     COUNT_INST(I_JSR);
  1719     if( sh4_x86.in_delay_slot ) {
  1720 	SLOTILLEGAL();
  1721     } else {
  1722 	load_spreg( R_EAX, R_PC );
  1723 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1724 	store_spreg( R_EAX, R_PR );
  1725 	load_reg( R_ECX, Rn );
  1726 	store_spreg( R_ECX, R_NEW_PC );
  1727 	sh4_x86.in_delay_slot = DELAY_PC;
  1728 	sh4_x86.branch_taken = TRUE;
  1729 	sh4_x86.tstate = TSTATE_NONE;
  1730 	if( UNTRANSLATABLE(pc+2) ) {
  1731 	    exit_block_emu(pc+2);
  1732 	    return 2;
  1733 	} else {
  1734 	    sh4_translate_instruction(pc+2);
  1735 	    exit_block_newpcset(pc+2);
  1736 	    return 4;
  1739 :}
  1740 RTE {:  
  1741     COUNT_INST(I_RTE);
  1742     if( sh4_x86.in_delay_slot ) {
  1743 	SLOTILLEGAL();
  1744     } else {
  1745 	check_priv();
  1746 	load_spreg( R_ECX, R_SPC );
  1747 	store_spreg( R_ECX, R_NEW_PC );
  1748 	load_spreg( R_EAX, R_SSR );
  1749 	call_func1( sh4_write_sr, R_EAX );
  1750 	sh4_x86.in_delay_slot = DELAY_PC;
  1751 	sh4_x86.fpuen_checked = FALSE;
  1752 	sh4_x86.tstate = TSTATE_NONE;
  1753 	sh4_x86.branch_taken = TRUE;
  1754 	if( UNTRANSLATABLE(pc+2) ) {
  1755 	    exit_block_emu(pc+2);
  1756 	    return 2;
  1757 	} else {
  1758 	    sh4_translate_instruction(pc+2);
  1759 	    exit_block_newpcset(pc+2);
  1760 	    return 4;
  1763 :}
  1764 RTS {:  
  1765     COUNT_INST(I_RTS);
  1766     if( sh4_x86.in_delay_slot ) {
  1767 	SLOTILLEGAL();
  1768     } else {
  1769 	load_spreg( R_ECX, R_PR );
  1770 	store_spreg( R_ECX, R_NEW_PC );
  1771 	sh4_x86.in_delay_slot = DELAY_PC;
  1772 	sh4_x86.branch_taken = TRUE;
  1773 	if( UNTRANSLATABLE(pc+2) ) {
  1774 	    exit_block_emu(pc+2);
  1775 	    return 2;
  1776 	} else {
  1777 	    sh4_translate_instruction(pc+2);
  1778 	    exit_block_newpcset(pc+2);
  1779 	    return 4;
  1782 :}
  1783 TRAPA #imm {:  
  1784     COUNT_INST(I_TRAPA);
  1785     if( sh4_x86.in_delay_slot ) {
  1786 	SLOTILLEGAL();
  1787     } else {
  1788 	load_imm32( R_ECX, pc+2 - sh4_x86.block_start_pc );   // 5
  1789 	ADD_r32_sh4r( R_ECX, R_PC );
  1790 	load_imm32( R_EAX, imm );
  1791 	call_func1( sh4_raise_trap, R_EAX );
  1792 	sh4_x86.tstate = TSTATE_NONE;
  1793 	exit_block_pcset(pc);
  1794 	sh4_x86.branch_taken = TRUE;
  1795 	return 2;
  1797 :}
  1798 UNDEF {:  
  1799     COUNT_INST(I_UNDEF);
  1800     if( sh4_x86.in_delay_slot ) {
  1801 	SLOTILLEGAL();
  1802     } else {
  1803 	JMP_exc(EXC_ILLEGAL);
  1804 	return 2;
  1806 :}
  1808 CLRMAC {:  
  1809     COUNT_INST(I_CLRMAC);
  1810     XOR_r32_r32(R_EAX, R_EAX);
  1811     store_spreg( R_EAX, R_MACL );
  1812     store_spreg( R_EAX, R_MACH );
  1813     sh4_x86.tstate = TSTATE_NONE;
  1814 :}
  1815 CLRS {:
  1816     COUNT_INST(I_CLRS);
  1817     CLC();
  1818     SETC_sh4r(R_S);
  1819     sh4_x86.tstate = TSTATE_NONE;
  1820 :}
  1821 CLRT {:  
  1822     COUNT_INST(I_CLRT);
  1823     CLC();
  1824     SETC_t();
  1825     sh4_x86.tstate = TSTATE_C;
  1826 :}
  1827 SETS {:  
  1828     COUNT_INST(I_SETS);
  1829     STC();
  1830     SETC_sh4r(R_S);
  1831     sh4_x86.tstate = TSTATE_NONE;
  1832 :}
  1833 SETT {:  
  1834     COUNT_INST(I_SETT);
  1835     STC();
  1836     SETC_t();
  1837     sh4_x86.tstate = TSTATE_C;
  1838 :}
  1840 /* Floating point moves */
  1841 FMOV FRm, FRn {:  
  1842     COUNT_INST(I_FMOV1);
  1843     check_fpuen();
  1844     if( sh4_x86.double_size ) {
  1845         load_dr0( R_EAX, FRm );
  1846         load_dr1( R_ECX, FRm );
  1847         store_dr0( R_EAX, FRn );
  1848         store_dr1( R_ECX, FRn );
  1849     } else {
  1850         load_fr( R_EAX, FRm ); // SZ=0 branch
  1851         store_fr( R_EAX, FRn );
  1853 :}
  1854 FMOV FRm, @Rn {: 
  1855     COUNT_INST(I_FMOV2);
  1856     check_fpuen();
  1857     load_reg( R_EAX, Rn );
  1858     if( sh4_x86.double_size ) {
  1859         check_walign64( R_EAX );
  1860         MMU_TRANSLATE_WRITE( R_EAX );
  1861         load_dr0( R_EDX, FRm );
  1862         load_dr1( R_ECX, FRm );
  1863         MEM_WRITE_DOUBLE( R_EAX, R_EDX, R_ECX );
  1864     } else {
  1865         check_walign32( R_EAX );
  1866         MMU_TRANSLATE_WRITE( R_EAX );
  1867         load_fr( R_EDX, FRm );
  1868         MEM_WRITE_LONG( R_EAX, R_EDX );
  1870     sh4_x86.tstate = TSTATE_NONE;
  1871 :}
  1872 FMOV @Rm, FRn {:  
  1873     COUNT_INST(I_FMOV5);
  1874     check_fpuen();
  1875     load_reg( R_EAX, Rm );
  1876     if( sh4_x86.double_size ) {
  1877         check_ralign64( R_EAX );
  1878         MMU_TRANSLATE_READ( R_EAX );
  1879         MEM_READ_DOUBLE( R_EAX, R_EDX, R_EAX );
  1880         store_dr0( R_EDX, FRn );
  1881         store_dr1( R_EAX, FRn );    
  1882     } else {
  1883         check_ralign32( R_EAX );
  1884         MMU_TRANSLATE_READ( R_EAX );
  1885         MEM_READ_LONG( R_EAX, R_EAX );
  1886         store_fr( R_EAX, FRn );
  1888     sh4_x86.tstate = TSTATE_NONE;
  1889 :}
  1890 FMOV FRm, @-Rn {:  
  1891     COUNT_INST(I_FMOV3);
  1892     check_fpuen();
  1893     load_reg( R_EAX, Rn );
  1894     if( sh4_x86.double_size ) {
  1895         check_walign64( R_EAX );
  1896         ADD_imm8s_r32(-8,R_EAX);
  1897         MMU_TRANSLATE_WRITE( R_EAX );
  1898         load_dr0( R_EDX, FRm );
  1899         load_dr1( R_ECX, FRm );
  1900         ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
  1901         MEM_WRITE_DOUBLE( R_EAX, R_EDX, R_ECX );
  1902     } else {
  1903         check_walign32( R_EAX );
  1904         ADD_imm8s_r32( -4, R_EAX );
  1905         MMU_TRANSLATE_WRITE( R_EAX );
  1906         load_fr( R_EDX, FRm );
  1907         ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
  1908         MEM_WRITE_LONG( R_EAX, R_EDX );
  1910     sh4_x86.tstate = TSTATE_NONE;
  1911 :}
  1912 FMOV @Rm+, FRn {:
  1913     COUNT_INST(I_FMOV6);
  1914     check_fpuen();
  1915     load_reg( R_EAX, Rm );
  1916     if( sh4_x86.double_size ) {
  1917         check_ralign64( R_EAX );
  1918         MMU_TRANSLATE_READ( R_EAX );
  1919         ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
  1920         MEM_READ_DOUBLE( R_EAX, R_EDX, R_EAX );
  1921         store_dr0( R_EDX, FRn );
  1922         store_dr1( R_EAX, FRn );
  1923     } else {
  1924         check_ralign32( R_EAX );
  1925         MMU_TRANSLATE_READ( R_EAX );
  1926         ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1927         MEM_READ_LONG( R_EAX, R_EAX );
  1928         store_fr( R_EAX, FRn );
  1930     sh4_x86.tstate = TSTATE_NONE;
  1931 :}
  1932 FMOV FRm, @(R0, Rn) {:  
  1933     COUNT_INST(I_FMOV4);
  1934     check_fpuen();
  1935     load_reg( R_EAX, Rn );
  1936     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1937     if( sh4_x86.double_size ) {
  1938         check_walign64( R_EAX );
  1939         MMU_TRANSLATE_WRITE( R_EAX );
  1940         load_dr0( R_EDX, FRm );
  1941         load_dr1( R_ECX, FRm );
  1942         MEM_WRITE_DOUBLE( R_EAX, R_EDX, R_ECX );
  1943     } else {
  1944         check_walign32( R_EAX );
  1945         MMU_TRANSLATE_WRITE( R_EAX );
  1946         load_fr( R_EDX, FRm );
  1947         MEM_WRITE_LONG( R_EAX, R_EDX ); // 12
  1949     sh4_x86.tstate = TSTATE_NONE;
  1950 :}
  1951 FMOV @(R0, Rm), FRn {:  
  1952     COUNT_INST(I_FMOV7);
  1953     check_fpuen();
  1954     load_reg( R_EAX, Rm );
  1955     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1956     if( sh4_x86.double_size ) {
  1957         check_ralign64( R_EAX );
  1958         MMU_TRANSLATE_READ( R_EAX );
  1959         MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1960         store_dr0( R_ECX, FRn );
  1961         store_dr1( R_EAX, FRn );
  1962     } else {
  1963         check_ralign32( R_EAX );
  1964         MMU_TRANSLATE_READ( R_EAX );
  1965         MEM_READ_LONG( R_EAX, R_EAX );
  1966         store_fr( R_EAX, FRn );
  1968     sh4_x86.tstate = TSTATE_NONE;
  1969 :}
  1970 FLDI0 FRn {:  /* IFF PR=0 */
  1971     COUNT_INST(I_FLDI0);
  1972     check_fpuen();
  1973     if( sh4_x86.double_prec == 0 ) {
  1974         XOR_r32_r32( R_EAX, R_EAX );
  1975         store_fr( R_EAX, FRn );
  1977     sh4_x86.tstate = TSTATE_NONE;
  1978 :}
  1979 FLDI1 FRn {:  /* IFF PR=0 */
  1980     COUNT_INST(I_FLDI1);
  1981     check_fpuen();
  1982     if( sh4_x86.double_prec == 0 ) {
  1983         load_imm32(R_EAX, 0x3F800000);
  1984         store_fr( R_EAX, FRn );
  1986 :}
  1988 FLOAT FPUL, FRn {:  
  1989     COUNT_INST(I_FLOAT);
  1990     check_fpuen();
  1991     FILD_sh4r(R_FPUL);
  1992     if( sh4_x86.double_prec ) {
  1993         pop_dr( FRn );
  1994     } else {
  1995         pop_fr( FRn );
  1997 :}
  1998 FTRC FRm, FPUL {:  
  1999     COUNT_INST(I_FTRC);
  2000     check_fpuen();
  2001     if( sh4_x86.double_prec ) {
  2002         push_dr( FRm );
  2003     } else {
  2004         push_fr( FRm );
  2006     load_ptr( R_ECX, &max_int );
  2007     FILD_r32ind( R_ECX );
  2008     FCOMIP_st(1);
  2009     JNA_rel8( sat );
  2010     load_ptr( R_ECX, &min_int );  // 5
  2011     FILD_r32ind( R_ECX );           // 2
  2012     FCOMIP_st(1);                   // 2
  2013     JAE_rel8( sat2 );            // 2
  2014     load_ptr( R_EAX, &save_fcw );
  2015     FNSTCW_r32ind( R_EAX );
  2016     load_ptr( R_EDX, &trunc_fcw );
  2017     FLDCW_r32ind( R_EDX );
  2018     FISTP_sh4r(R_FPUL);             // 3
  2019     FLDCW_r32ind( R_EAX );
  2020     JMP_rel8(end);             // 2
  2022     JMP_TARGET(sat);
  2023     JMP_TARGET(sat2);
  2024     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  2025     store_spreg( R_ECX, R_FPUL );
  2026     FPOP_st();
  2027     JMP_TARGET(end);
  2028     sh4_x86.tstate = TSTATE_NONE;
  2029 :}
  2030 FLDS FRm, FPUL {:  
  2031     COUNT_INST(I_FLDS);
  2032     check_fpuen();
  2033     load_fr( R_EAX, FRm );
  2034     store_spreg( R_EAX, R_FPUL );
  2035 :}
  2036 FSTS FPUL, FRn {:  
  2037     COUNT_INST(I_FSTS);
  2038     check_fpuen();
  2039     load_spreg( R_EAX, R_FPUL );
  2040     store_fr( R_EAX, FRn );
  2041 :}
  2042 FCNVDS FRm, FPUL {:  
  2043     COUNT_INST(I_FCNVDS);
  2044     check_fpuen();
  2045     if( sh4_x86.double_prec ) {
  2046         push_dr( FRm );
  2047         pop_fpul();
  2049 :}
  2050 FCNVSD FPUL, FRn {:  
  2051     COUNT_INST(I_FCNVSD);
  2052     check_fpuen();
  2053     if( sh4_x86.double_prec ) {
  2054         push_fpul();
  2055         pop_dr( FRn );
  2057 :}
  2059 /* Floating point instructions */
  2060 FABS FRn {:  
  2061     COUNT_INST(I_FABS);
  2062     check_fpuen();
  2063     if( sh4_x86.double_prec ) {
  2064         push_dr(FRn);
  2065         FABS_st0();
  2066         pop_dr(FRn);
  2067     } else {
  2068         push_fr(FRn);
  2069         FABS_st0();
  2070         pop_fr(FRn);
  2072 :}
  2073 FADD FRm, FRn {:  
  2074     COUNT_INST(I_FADD);
  2075     check_fpuen();
  2076     if( sh4_x86.double_prec ) {
  2077         push_dr(FRm);
  2078         push_dr(FRn);
  2079         FADDP_st(1);
  2080         pop_dr(FRn);
  2081     } else {
  2082         push_fr(FRm);
  2083         push_fr(FRn);
  2084         FADDP_st(1);
  2085         pop_fr(FRn);
  2087 :}
  2088 FDIV FRm, FRn {:  
  2089     COUNT_INST(I_FDIV);
  2090     check_fpuen();
  2091     if( sh4_x86.double_prec ) {
  2092         push_dr(FRn);
  2093         push_dr(FRm);
  2094         FDIVP_st(1);
  2095         pop_dr(FRn);
  2096     } else {
  2097         push_fr(FRn);
  2098         push_fr(FRm);
  2099         FDIVP_st(1);
  2100         pop_fr(FRn);
  2102 :}
  2103 FMAC FR0, FRm, FRn {:  
  2104     COUNT_INST(I_FMAC);
  2105     check_fpuen();
  2106     if( sh4_x86.double_prec ) {
  2107         push_dr( 0 );
  2108         push_dr( FRm );
  2109         FMULP_st(1);
  2110         push_dr( FRn );
  2111         FADDP_st(1);
  2112         pop_dr( FRn );
  2113     } else {
  2114         push_fr( 0 );
  2115         push_fr( FRm );
  2116         FMULP_st(1);
  2117         push_fr( FRn );
  2118         FADDP_st(1);
  2119         pop_fr( FRn );
  2121 :}
  2123 FMUL FRm, FRn {:  
  2124     COUNT_INST(I_FMUL);
  2125     check_fpuen();
  2126     if( sh4_x86.double_prec ) {
  2127         push_dr(FRm);
  2128         push_dr(FRn);
  2129         FMULP_st(1);
  2130         pop_dr(FRn);
  2131     } else {
  2132         push_fr(FRm);
  2133         push_fr(FRn);
  2134         FMULP_st(1);
  2135         pop_fr(FRn);
  2137 :}
  2138 FNEG FRn {:  
  2139     COUNT_INST(I_FNEG);
  2140     check_fpuen();
  2141     if( sh4_x86.double_prec ) {
  2142         push_dr(FRn);
  2143         FCHS_st0();
  2144         pop_dr(FRn);
  2145     } else {
  2146         push_fr(FRn);
  2147         FCHS_st0();
  2148         pop_fr(FRn);
  2150 :}
  2151 FSRRA FRn {:  
  2152     COUNT_INST(I_FSRRA);
  2153     check_fpuen();
  2154     if( sh4_x86.double_prec == 0 ) {
  2155         FLD1_st0();
  2156         push_fr(FRn);
  2157         FSQRT_st0();
  2158         FDIVP_st(1);
  2159         pop_fr(FRn);
  2161 :}
  2162 FSQRT FRn {:  
  2163     COUNT_INST(I_FSQRT);
  2164     check_fpuen();
  2165     if( sh4_x86.double_prec ) {
  2166         push_dr(FRn);
  2167         FSQRT_st0();
  2168         pop_dr(FRn);
  2169     } else {
  2170         push_fr(FRn);
  2171         FSQRT_st0();
  2172         pop_fr(FRn);
  2174 :}
  2175 FSUB FRm, FRn {:  
  2176     COUNT_INST(I_FSUB);
  2177     check_fpuen();
  2178     if( sh4_x86.double_prec ) {
  2179         push_dr(FRn);
  2180         push_dr(FRm);
  2181         FSUBP_st(1);
  2182         pop_dr(FRn);
  2183     } else {
  2184         push_fr(FRn);
  2185         push_fr(FRm);
  2186         FSUBP_st(1);
  2187         pop_fr(FRn);
  2189 :}
  2191 FCMP/EQ FRm, FRn {:  
  2192     COUNT_INST(I_FCMPEQ);
  2193     check_fpuen();
  2194     if( sh4_x86.double_prec ) {
  2195         push_dr(FRm);
  2196         push_dr(FRn);
  2197     } else {
  2198         push_fr(FRm);
  2199         push_fr(FRn);
  2201     FCOMIP_st(1);
  2202     SETE_t();
  2203     FPOP_st();
  2204     sh4_x86.tstate = TSTATE_E;
  2205 :}
  2206 FCMP/GT FRm, FRn {:  
  2207     COUNT_INST(I_FCMPGT);
  2208     check_fpuen();
  2209     if( sh4_x86.double_prec ) {
  2210         push_dr(FRm);
  2211         push_dr(FRn);
  2212     } else {
  2213         push_fr(FRm);
  2214         push_fr(FRn);
  2216     FCOMIP_st(1);
  2217     SETA_t();
  2218     FPOP_st();
  2219     sh4_x86.tstate = TSTATE_A;
  2220 :}
  2222 FSCA FPUL, FRn {:  
  2223     COUNT_INST(I_FSCA);
  2224     check_fpuen();
  2225     if( sh4_x86.double_prec == 0 ) {
  2226         LEA_sh4r_rptr( REG_OFFSET(fr[0][FRn&0x0E]), R_EDX );
  2227         load_spreg( R_EAX, R_FPUL );
  2228         call_func2( sh4_fsca, R_EAX, R_EDX );
  2230     sh4_x86.tstate = TSTATE_NONE;
  2231 :}
  2232 FIPR FVm, FVn {:  
  2233     COUNT_INST(I_FIPR);
  2234     check_fpuen();
  2235     if( sh4_x86.double_prec == 0 ) {
  2236         if( sh4_x86.sse3_enabled ) {
  2237             MOVAPS_sh4r_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
  2238             MULPS_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
  2239             HADDPS_xmm_xmm( 4, 4 ); 
  2240             HADDPS_xmm_xmm( 4, 4 );
  2241             MOVSS_xmm_sh4r( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
  2242         } else {
  2243             push_fr( FVm<<2 );
  2244             push_fr( FVn<<2 );
  2245             FMULP_st(1);
  2246             push_fr( (FVm<<2)+1);
  2247             push_fr( (FVn<<2)+1);
  2248             FMULP_st(1);
  2249             FADDP_st(1);
  2250             push_fr( (FVm<<2)+2);
  2251             push_fr( (FVn<<2)+2);
  2252             FMULP_st(1);
  2253             FADDP_st(1);
  2254             push_fr( (FVm<<2)+3);
  2255             push_fr( (FVn<<2)+3);
  2256             FMULP_st(1);
  2257             FADDP_st(1);
  2258             pop_fr( (FVn<<2)+3);
  2261 :}
  2262 FTRV XMTRX, FVn {:  
  2263     COUNT_INST(I_FTRV);
  2264     check_fpuen();
  2265     if( sh4_x86.double_prec == 0 ) {
  2266         if( sh4_x86.sse3_enabled ) {
  2267             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1  M0  M3  M2
  2268             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5  M4  M7  M6
  2269             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9  M8  M11 M10
  2270             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
  2272             MOVSLDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
  2273             MOVSHDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
  2274             MOVAPS_xmm_xmm( 4, 6 );
  2275             MOVAPS_xmm_xmm( 5, 7 );
  2276             MOVLHPS_xmm_xmm( 4, 4 );  // V1 V1 V1 V1
  2277             MOVHLPS_xmm_xmm( 6, 6 );  // V3 V3 V3 V3
  2278             MOVLHPS_xmm_xmm( 5, 5 );  // V0 V0 V0 V0
  2279             MOVHLPS_xmm_xmm( 7, 7 );  // V2 V2 V2 V2
  2280             MULPS_xmm_xmm( 0, 4 );
  2281             MULPS_xmm_xmm( 1, 5 );
  2282             MULPS_xmm_xmm( 2, 6 );
  2283             MULPS_xmm_xmm( 3, 7 );
  2284             ADDPS_xmm_xmm( 5, 4 );
  2285             ADDPS_xmm_xmm( 7, 6 );
  2286             ADDPS_xmm_xmm( 6, 4 );
  2287             MOVAPS_xmm_sh4r( 4, REG_OFFSET(fr[0][FVn<<2]) );
  2288         } else {
  2289             LEA_sh4r_rptr( REG_OFFSET(fr[0][FVn<<2]), R_EAX );
  2290             call_func1( sh4_ftrv, R_EAX );
  2293     sh4_x86.tstate = TSTATE_NONE;
  2294 :}
  2296 FRCHG {:  
  2297     COUNT_INST(I_FRCHG);
  2298     check_fpuen();
  2299     XOR_imm32_sh4r( FPSCR_FR, R_FPSCR );
  2300     call_func0( sh4_switch_fr_banks );
  2301     sh4_x86.tstate = TSTATE_NONE;
  2302 :}
  2303 FSCHG {:  
  2304     COUNT_INST(I_FSCHG);
  2305     check_fpuen();
  2306     XOR_imm32_sh4r( FPSCR_SZ, R_FPSCR);
  2307     XOR_imm32_sh4r( FPSCR_SZ, REG_OFFSET(xlat_sh4_mode) );
  2308     sh4_x86.tstate = TSTATE_NONE;
  2309     sh4_x86.double_size = !sh4_x86.double_size;
  2310 :}
  2312 /* Processor control instructions */
  2313 LDC Rm, SR {:
  2314     COUNT_INST(I_LDCSR);
  2315     if( sh4_x86.in_delay_slot ) {
  2316 	SLOTILLEGAL();
  2317     } else {
  2318 	check_priv();
  2319 	load_reg( R_EAX, Rm );
  2320 	call_func1( sh4_write_sr, R_EAX );
  2321 	sh4_x86.fpuen_checked = FALSE;
  2322 	sh4_x86.tstate = TSTATE_NONE;
  2323 	return 2;
  2325 :}
  2326 LDC Rm, GBR {: 
  2327     COUNT_INST(I_LDC);
  2328     load_reg( R_EAX, Rm );
  2329     store_spreg( R_EAX, R_GBR );
  2330 :}
  2331 LDC Rm, VBR {:  
  2332     COUNT_INST(I_LDC);
  2333     check_priv();
  2334     load_reg( R_EAX, Rm );
  2335     store_spreg( R_EAX, R_VBR );
  2336     sh4_x86.tstate = TSTATE_NONE;
  2337 :}
  2338 LDC Rm, SSR {:  
  2339     COUNT_INST(I_LDC);
  2340     check_priv();
  2341     load_reg( R_EAX, Rm );
  2342     store_spreg( R_EAX, R_SSR );
  2343     sh4_x86.tstate = TSTATE_NONE;
  2344 :}
  2345 LDC Rm, SGR {:  
  2346     COUNT_INST(I_LDC);
  2347     check_priv();
  2348     load_reg( R_EAX, Rm );
  2349     store_spreg( R_EAX, R_SGR );
  2350     sh4_x86.tstate = TSTATE_NONE;
  2351 :}
  2352 LDC Rm, SPC {:  
  2353     COUNT_INST(I_LDC);
  2354     check_priv();
  2355     load_reg( R_EAX, Rm );
  2356     store_spreg( R_EAX, R_SPC );
  2357     sh4_x86.tstate = TSTATE_NONE;
  2358 :}
  2359 LDC Rm, DBR {:  
  2360     COUNT_INST(I_LDC);
  2361     check_priv();
  2362     load_reg( R_EAX, Rm );
  2363     store_spreg( R_EAX, R_DBR );
  2364     sh4_x86.tstate = TSTATE_NONE;
  2365 :}
  2366 LDC Rm, Rn_BANK {:  
  2367     COUNT_INST(I_LDC);
  2368     check_priv();
  2369     load_reg( R_EAX, Rm );
  2370     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2371     sh4_x86.tstate = TSTATE_NONE;
  2372 :}
  2373 LDC.L @Rm+, GBR {:  
  2374     COUNT_INST(I_LDCM);
  2375     load_reg( R_EAX, Rm );
  2376     check_ralign32( R_EAX );
  2377     MMU_TRANSLATE_READ( R_EAX );
  2378     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2379     MEM_READ_LONG( R_EAX, R_EAX );
  2380     store_spreg( R_EAX, R_GBR );
  2381     sh4_x86.tstate = TSTATE_NONE;
  2382 :}
  2383 LDC.L @Rm+, SR {:
  2384     COUNT_INST(I_LDCSRM);
  2385     if( sh4_x86.in_delay_slot ) {
  2386 	SLOTILLEGAL();
  2387     } else {
  2388 	check_priv();
  2389 	load_reg( R_EAX, Rm );
  2390 	check_ralign32( R_EAX );
  2391 	MMU_TRANSLATE_READ( R_EAX );
  2392 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2393 	MEM_READ_LONG( R_EAX, R_EAX );
  2394 	call_func1( sh4_write_sr, R_EAX );
  2395 	sh4_x86.fpuen_checked = FALSE;
  2396 	sh4_x86.tstate = TSTATE_NONE;
  2397 	return 2;
  2399 :}
  2400 LDC.L @Rm+, VBR {:  
  2401     COUNT_INST(I_LDCM);
  2402     check_priv();
  2403     load_reg( R_EAX, Rm );
  2404     check_ralign32( R_EAX );
  2405     MMU_TRANSLATE_READ( R_EAX );
  2406     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2407     MEM_READ_LONG( R_EAX, R_EAX );
  2408     store_spreg( R_EAX, R_VBR );
  2409     sh4_x86.tstate = TSTATE_NONE;
  2410 :}
  2411 LDC.L @Rm+, SSR {:
  2412     COUNT_INST(I_LDCM);
  2413     check_priv();
  2414     load_reg( R_EAX, Rm );
  2415     check_ralign32( R_EAX );
  2416     MMU_TRANSLATE_READ( R_EAX );
  2417     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2418     MEM_READ_LONG( R_EAX, R_EAX );
  2419     store_spreg( R_EAX, R_SSR );
  2420     sh4_x86.tstate = TSTATE_NONE;
  2421 :}
  2422 LDC.L @Rm+, SGR {:  
  2423     COUNT_INST(I_LDCM);
  2424     check_priv();
  2425     load_reg( R_EAX, Rm );
  2426     check_ralign32( R_EAX );
  2427     MMU_TRANSLATE_READ( R_EAX );
  2428     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2429     MEM_READ_LONG( R_EAX, R_EAX );
  2430     store_spreg( R_EAX, R_SGR );
  2431     sh4_x86.tstate = TSTATE_NONE;
  2432 :}
  2433 LDC.L @Rm+, SPC {:  
  2434     COUNT_INST(I_LDCM);
  2435     check_priv();
  2436     load_reg( R_EAX, Rm );
  2437     check_ralign32( R_EAX );
  2438     MMU_TRANSLATE_READ( R_EAX );
  2439     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2440     MEM_READ_LONG( R_EAX, R_EAX );
  2441     store_spreg( R_EAX, R_SPC );
  2442     sh4_x86.tstate = TSTATE_NONE;
  2443 :}
  2444 LDC.L @Rm+, DBR {:  
  2445     COUNT_INST(I_LDCM);
  2446     check_priv();
  2447     load_reg( R_EAX, Rm );
  2448     check_ralign32( R_EAX );
  2449     MMU_TRANSLATE_READ( R_EAX );
  2450     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2451     MEM_READ_LONG( R_EAX, R_EAX );
  2452     store_spreg( R_EAX, R_DBR );
  2453     sh4_x86.tstate = TSTATE_NONE;
  2454 :}
  2455 LDC.L @Rm+, Rn_BANK {:  
  2456     COUNT_INST(I_LDCM);
  2457     check_priv();
  2458     load_reg( R_EAX, Rm );
  2459     check_ralign32( R_EAX );
  2460     MMU_TRANSLATE_READ( R_EAX );
  2461     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2462     MEM_READ_LONG( R_EAX, R_EAX );
  2463     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2464     sh4_x86.tstate = TSTATE_NONE;
  2465 :}
  2466 LDS Rm, FPSCR {:
  2467     COUNT_INST(I_LDSFPSCR);
  2468     check_fpuen();
  2469     load_reg( R_EAX, Rm );
  2470     call_func1( sh4_write_fpscr, R_EAX );
  2471     sh4_x86.tstate = TSTATE_NONE;
  2472     return 2;
  2473 :}
  2474 LDS.L @Rm+, FPSCR {:  
  2475     COUNT_INST(I_LDSFPSCRM);
  2476     check_fpuen();
  2477     load_reg( R_EAX, Rm );
  2478     check_ralign32( R_EAX );
  2479     MMU_TRANSLATE_READ( R_EAX );
  2480     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2481     MEM_READ_LONG( R_EAX, R_EAX );
  2482     call_func1( sh4_write_fpscr, R_EAX );
  2483     sh4_x86.tstate = TSTATE_NONE;
  2484     return 2;
  2485 :}
  2486 LDS Rm, FPUL {:  
  2487     COUNT_INST(I_LDS);
  2488     check_fpuen();
  2489     load_reg( R_EAX, Rm );
  2490     store_spreg( R_EAX, R_FPUL );
  2491 :}
  2492 LDS.L @Rm+, FPUL {:  
  2493     COUNT_INST(I_LDSM);
  2494     check_fpuen();
  2495     load_reg( R_EAX, Rm );
  2496     check_ralign32( R_EAX );
  2497     MMU_TRANSLATE_READ( R_EAX );
  2498     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2499     MEM_READ_LONG( R_EAX, R_EAX );
  2500     store_spreg( R_EAX, R_FPUL );
  2501     sh4_x86.tstate = TSTATE_NONE;
  2502 :}
  2503 LDS Rm, MACH {: 
  2504     COUNT_INST(I_LDS);
  2505     load_reg( R_EAX, Rm );
  2506     store_spreg( R_EAX, R_MACH );
  2507 :}
  2508 LDS.L @Rm+, MACH {:  
  2509     COUNT_INST(I_LDSM);
  2510     load_reg( R_EAX, Rm );
  2511     check_ralign32( R_EAX );
  2512     MMU_TRANSLATE_READ( R_EAX );
  2513     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2514     MEM_READ_LONG( R_EAX, R_EAX );
  2515     store_spreg( R_EAX, R_MACH );
  2516     sh4_x86.tstate = TSTATE_NONE;
  2517 :}
  2518 LDS Rm, MACL {:  
  2519     COUNT_INST(I_LDS);
  2520     load_reg( R_EAX, Rm );
  2521     store_spreg( R_EAX, R_MACL );
  2522 :}
  2523 LDS.L @Rm+, MACL {:  
  2524     COUNT_INST(I_LDSM);
  2525     load_reg( R_EAX, Rm );
  2526     check_ralign32( R_EAX );
  2527     MMU_TRANSLATE_READ( R_EAX );
  2528     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2529     MEM_READ_LONG( R_EAX, R_EAX );
  2530     store_spreg( R_EAX, R_MACL );
  2531     sh4_x86.tstate = TSTATE_NONE;
  2532 :}
  2533 LDS Rm, PR {:  
  2534     COUNT_INST(I_LDS);
  2535     load_reg( R_EAX, Rm );
  2536     store_spreg( R_EAX, R_PR );
  2537 :}
  2538 LDS.L @Rm+, PR {:  
  2539     COUNT_INST(I_LDSM);
  2540     load_reg( R_EAX, Rm );
  2541     check_ralign32( R_EAX );
  2542     MMU_TRANSLATE_READ( R_EAX );
  2543     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2544     MEM_READ_LONG( R_EAX, R_EAX );
  2545     store_spreg( R_EAX, R_PR );
  2546     sh4_x86.tstate = TSTATE_NONE;
  2547 :}
  2548 LDTLB {:  
  2549     COUNT_INST(I_LDTLB);
  2550     call_func0( MMU_ldtlb );
  2551     sh4_x86.tstate = TSTATE_NONE;
  2552 :}
  2553 OCBI @Rn {:
  2554     COUNT_INST(I_OCBI);
  2555 :}
  2556 OCBP @Rn {:
  2557     COUNT_INST(I_OCBP);
  2558 :}
  2559 OCBWB @Rn {:
  2560     COUNT_INST(I_OCBWB);
  2561 :}
  2562 PREF @Rn {:
  2563     COUNT_INST(I_PREF);
  2564     load_reg( R_EAX, Rn );
  2565     MOV_r32_r32( R_EAX, R_ECX );
  2566     AND_imm32_r32( 0xFC000000, R_ECX );
  2567     CMP_imm32_r32( 0xE0000000, R_ECX );
  2568     JNE_rel8(end);
  2569     if( sh4_x86.tlb_on ) {
  2570     	call_func1( sh4_flush_store_queue_mmu, R_EAX );
  2571         TEST_r32_r32( R_EAX, R_EAX );
  2572         JE_exc(-1);
  2573     } else {
  2574     	call_func1( sh4_flush_store_queue, R_EAX );
  2576     JMP_TARGET(end);
  2577     sh4_x86.tstate = TSTATE_NONE;
  2578 :}
  2579 SLEEP {: 
  2580     COUNT_INST(I_SLEEP);
  2581     check_priv();
  2582     call_func0( sh4_sleep );
  2583     sh4_x86.tstate = TSTATE_NONE;
  2584     sh4_x86.in_delay_slot = DELAY_NONE;
  2585     return 2;
  2586 :}
  2587 STC SR, Rn {:
  2588     COUNT_INST(I_STCSR);
  2589     check_priv();
  2590     call_func0(sh4_read_sr);
  2591     store_reg( R_EAX, Rn );
  2592     sh4_x86.tstate = TSTATE_NONE;
  2593 :}
  2594 STC GBR, Rn {:  
  2595     COUNT_INST(I_STC);
  2596     load_spreg( R_EAX, R_GBR );
  2597     store_reg( R_EAX, Rn );
  2598 :}
  2599 STC VBR, Rn {:  
  2600     COUNT_INST(I_STC);
  2601     check_priv();
  2602     load_spreg( R_EAX, R_VBR );
  2603     store_reg( R_EAX, Rn );
  2604     sh4_x86.tstate = TSTATE_NONE;
  2605 :}
  2606 STC SSR, Rn {:  
  2607     COUNT_INST(I_STC);
  2608     check_priv();
  2609     load_spreg( R_EAX, R_SSR );
  2610     store_reg( R_EAX, Rn );
  2611     sh4_x86.tstate = TSTATE_NONE;
  2612 :}
  2613 STC SPC, Rn {:  
  2614     COUNT_INST(I_STC);
  2615     check_priv();
  2616     load_spreg( R_EAX, R_SPC );
  2617     store_reg( R_EAX, Rn );
  2618     sh4_x86.tstate = TSTATE_NONE;
  2619 :}
  2620 STC SGR, Rn {:  
  2621     COUNT_INST(I_STC);
  2622     check_priv();
  2623     load_spreg( R_EAX, R_SGR );
  2624     store_reg( R_EAX, Rn );
  2625     sh4_x86.tstate = TSTATE_NONE;
  2626 :}
  2627 STC DBR, Rn {:  
  2628     COUNT_INST(I_STC);
  2629     check_priv();
  2630     load_spreg( R_EAX, R_DBR );
  2631     store_reg( R_EAX, Rn );
  2632     sh4_x86.tstate = TSTATE_NONE;
  2633 :}
  2634 STC Rm_BANK, Rn {:
  2635     COUNT_INST(I_STC);
  2636     check_priv();
  2637     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2638     store_reg( R_EAX, Rn );
  2639     sh4_x86.tstate = TSTATE_NONE;
  2640 :}
  2641 STC.L SR, @-Rn {:
  2642     COUNT_INST(I_STCSRM);
  2643     check_priv();
  2644     load_reg( R_EAX, Rn );
  2645     check_walign32( R_EAX );
  2646     ADD_imm8s_r32( -4, R_EAX );
  2647     MMU_TRANSLATE_WRITE( R_EAX );
  2648     MOV_r32_esp8( R_EAX, 0 );
  2649     call_func0( sh4_read_sr );
  2650     MOV_r32_r32( R_EAX, R_EDX );
  2651     MOV_esp8_r32( 0, R_EAX );
  2652     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2653     MEM_WRITE_LONG( R_EAX, R_EDX );
  2654     sh4_x86.tstate = TSTATE_NONE;
  2655 :}
  2656 STC.L VBR, @-Rn {:  
  2657     COUNT_INST(I_STCM);
  2658     check_priv();
  2659     load_reg( R_EAX, Rn );
  2660     check_walign32( R_EAX );
  2661     ADD_imm8s_r32( -4, R_EAX );
  2662     MMU_TRANSLATE_WRITE( R_EAX );
  2663     load_spreg( R_EDX, R_VBR );
  2664     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2665     MEM_WRITE_LONG( R_EAX, R_EDX );
  2666     sh4_x86.tstate = TSTATE_NONE;
  2667 :}
  2668 STC.L SSR, @-Rn {:  
  2669     COUNT_INST(I_STCM);
  2670     check_priv();
  2671     load_reg( R_EAX, Rn );
  2672     check_walign32( R_EAX );
  2673     ADD_imm8s_r32( -4, R_EAX );
  2674     MMU_TRANSLATE_WRITE( R_EAX );
  2675     load_spreg( R_EDX, R_SSR );
  2676     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2677     MEM_WRITE_LONG( R_EAX, R_EDX );
  2678     sh4_x86.tstate = TSTATE_NONE;
  2679 :}
  2680 STC.L SPC, @-Rn {:
  2681     COUNT_INST(I_STCM);
  2682     check_priv();
  2683     load_reg( R_EAX, Rn );
  2684     check_walign32( R_EAX );
  2685     ADD_imm8s_r32( -4, R_EAX );
  2686     MMU_TRANSLATE_WRITE( R_EAX );
  2687     load_spreg( R_EDX, R_SPC );
  2688     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2689     MEM_WRITE_LONG( R_EAX, R_EDX );
  2690     sh4_x86.tstate = TSTATE_NONE;
  2691 :}
  2692 STC.L SGR, @-Rn {:  
  2693     COUNT_INST(I_STCM);
  2694     check_priv();
  2695     load_reg( R_EAX, Rn );
  2696     check_walign32( R_EAX );
  2697     ADD_imm8s_r32( -4, R_EAX );
  2698     MMU_TRANSLATE_WRITE( R_EAX );
  2699     load_spreg( R_EDX, R_SGR );
  2700     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2701     MEM_WRITE_LONG( R_EAX, R_EDX );
  2702     sh4_x86.tstate = TSTATE_NONE;
  2703 :}
  2704 STC.L DBR, @-Rn {:  
  2705     COUNT_INST(I_STCM);
  2706     check_priv();
  2707     load_reg( R_EAX, Rn );
  2708     check_walign32( R_EAX );
  2709     ADD_imm8s_r32( -4, R_EAX );
  2710     MMU_TRANSLATE_WRITE( R_EAX );
  2711     load_spreg( R_EDX, R_DBR );
  2712     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2713     MEM_WRITE_LONG( R_EAX, R_EDX );
  2714     sh4_x86.tstate = TSTATE_NONE;
  2715 :}
  2716 STC.L Rm_BANK, @-Rn {:  
  2717     COUNT_INST(I_STCM);
  2718     check_priv();
  2719     load_reg( R_EAX, Rn );
  2720     check_walign32( R_EAX );
  2721     ADD_imm8s_r32( -4, R_EAX );
  2722     MMU_TRANSLATE_WRITE( R_EAX );
  2723     load_spreg( R_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
  2724     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2725     MEM_WRITE_LONG( R_EAX, R_EDX );
  2726     sh4_x86.tstate = TSTATE_NONE;
  2727 :}
  2728 STC.L GBR, @-Rn {:  
  2729     COUNT_INST(I_STCM);
  2730     load_reg( R_EAX, Rn );
  2731     check_walign32( R_EAX );
  2732     ADD_imm8s_r32( -4, R_EAX );
  2733     MMU_TRANSLATE_WRITE( R_EAX );
  2734     load_spreg( R_EDX, R_GBR );
  2735     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2736     MEM_WRITE_LONG( R_EAX, R_EDX );
  2737     sh4_x86.tstate = TSTATE_NONE;
  2738 :}
  2739 STS FPSCR, Rn {:  
  2740     COUNT_INST(I_STSFPSCR);
  2741     check_fpuen();
  2742     load_spreg( R_EAX, R_FPSCR );
  2743     store_reg( R_EAX, Rn );
  2744 :}
  2745 STS.L FPSCR, @-Rn {:  
  2746     COUNT_INST(I_STSFPSCRM);
  2747     check_fpuen();
  2748     load_reg( R_EAX, Rn );
  2749     check_walign32( R_EAX );
  2750     ADD_imm8s_r32( -4, R_EAX );
  2751     MMU_TRANSLATE_WRITE( R_EAX );
  2752     load_spreg( R_EDX, R_FPSCR );
  2753     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2754     MEM_WRITE_LONG( R_EAX, R_EDX );
  2755     sh4_x86.tstate = TSTATE_NONE;
  2756 :}
  2757 STS FPUL, Rn {:  
  2758     COUNT_INST(I_STS);
  2759     check_fpuen();
  2760     load_spreg( R_EAX, R_FPUL );
  2761     store_reg( R_EAX, Rn );
  2762 :}
  2763 STS.L FPUL, @-Rn {:  
  2764     COUNT_INST(I_STSM);
  2765     check_fpuen();
  2766     load_reg( R_EAX, Rn );
  2767     check_walign32( R_EAX );
  2768     ADD_imm8s_r32( -4, R_EAX );
  2769     MMU_TRANSLATE_WRITE( R_EAX );
  2770     load_spreg( R_EDX, R_FPUL );
  2771     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2772     MEM_WRITE_LONG( R_EAX, R_EDX );
  2773     sh4_x86.tstate = TSTATE_NONE;
  2774 :}
  2775 STS MACH, Rn {:  
  2776     COUNT_INST(I_STS);
  2777     load_spreg( R_EAX, R_MACH );
  2778     store_reg( R_EAX, Rn );
  2779 :}
  2780 STS.L MACH, @-Rn {:  
  2781     COUNT_INST(I_STSM);
  2782     load_reg( R_EAX, Rn );
  2783     check_walign32( R_EAX );
  2784     ADD_imm8s_r32( -4, R_EAX );
  2785     MMU_TRANSLATE_WRITE( R_EAX );
  2786     load_spreg( R_EDX, R_MACH );
  2787     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2788     MEM_WRITE_LONG( R_EAX, R_EDX );
  2789     sh4_x86.tstate = TSTATE_NONE;
  2790 :}
  2791 STS MACL, Rn {:  
  2792     COUNT_INST(I_STS);
  2793     load_spreg( R_EAX, R_MACL );
  2794     store_reg( R_EAX, Rn );
  2795 :}
  2796 STS.L MACL, @-Rn {:  
  2797     COUNT_INST(I_STSM);
  2798     load_reg( R_EAX, Rn );
  2799     check_walign32( R_EAX );
  2800     ADD_imm8s_r32( -4, R_EAX );
  2801     MMU_TRANSLATE_WRITE( R_EAX );
  2802     load_spreg( R_EDX, R_MACL );
  2803     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2804     MEM_WRITE_LONG( R_EAX, R_EDX );
  2805     sh4_x86.tstate = TSTATE_NONE;
  2806 :}
  2807 STS PR, Rn {:  
  2808     COUNT_INST(I_STS);
  2809     load_spreg( R_EAX, R_PR );
  2810     store_reg( R_EAX, Rn );
  2811 :}
  2812 STS.L PR, @-Rn {:  
  2813     COUNT_INST(I_STSM);
  2814     load_reg( R_EAX, Rn );
  2815     check_walign32( R_EAX );
  2816     ADD_imm8s_r32( -4, R_EAX );
  2817     MMU_TRANSLATE_WRITE( R_EAX );
  2818     load_spreg( R_EDX, R_PR );
  2819     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2820     MEM_WRITE_LONG( R_EAX, R_EDX );
  2821     sh4_x86.tstate = TSTATE_NONE;
  2822 :}
  2824 NOP {: 
  2825     COUNT_INST(I_NOP);
  2826     /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ 
  2827 :}
  2828 %%
  2829     sh4_x86.in_delay_slot = DELAY_NONE;
  2830     return 0;
.