Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 929:fd8cb0c82f5f
prev927:17b6b9e245d8
next930:07e5b11419db
author nkeynes
date Sat Dec 20 03:01:40 2008 +0000 (12 years ago)
branchlxdream-mem
permissions -rw-r--r--
last change First pass experiment using cached decoding.
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    23 #include <stddef.h>
    25 #ifndef NDEBUG
    26 #define DEBUG_JUMPS 1
    27 #endif
    29 #include "lxdream.h"
    30 #include "sh4/xltcache.h"
    31 #include "sh4/sh4core.h"
    32 #include "sh4/sh4trans.h"
    33 #include "sh4/sh4stat.h"
    34 #include "sh4/sh4mmio.h"
    35 #include "sh4/x86op.h"
    36 #include "clock.h"
    38 #define DEFAULT_BACKPATCH_SIZE 4096
    40 struct backpatch_record {
    41     uint32_t fixup_offset;
    42     uint32_t fixup_icount;
    43     int32_t exc_code;
    44 };
    46 #define DELAY_NONE 0
    47 #define DELAY_PC 1
    48 #define DELAY_PC_PR 2
    50 /** 
    51  * Struct to manage internal translation state. This state is not saved -
    52  * it is only valid between calls to sh4_translate_begin_block() and
    53  * sh4_translate_end_block()
    54  */
    55 struct sh4_x86_state {
    56     int in_delay_slot;
    57     gboolean priv_checked; /* true if we've already checked the cpu mode. */
    58     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    59     gboolean branch_taken; /* true if we branched unconditionally */
    60     gboolean double_prec; /* true if FPU is in double-precision mode */
    61     gboolean double_size; /* true if FPU is in double-size mode */
    62     gboolean sse3_enabled; /* true if host supports SSE3 instructions */
    63     uint32_t block_start_pc;
    64     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    65     int tstate;
    67     /* mode flags */
    68     gboolean tlb_on; /* True if tlb translation is active */
    70     /* Allocated memory for the (block-wide) back-patch list */
    71     struct backpatch_record *backpatch_list;
    72     uint32_t backpatch_posn;
    73     uint32_t backpatch_size;
    74 };
    76 #define TSTATE_NONE -1
    77 #define TSTATE_O    0
    78 #define TSTATE_C    2
    79 #define TSTATE_E    4
    80 #define TSTATE_NE   5
    81 #define TSTATE_G    0xF
    82 #define TSTATE_GE   0xD
    83 #define TSTATE_A    7
    84 #define TSTATE_AE   3
    86 #ifdef ENABLE_SH4STATS
    87 #define COUNT_INST(id) load_imm32(R_EAX,id); call_func1(sh4_stats_add, R_EAX); sh4_x86.tstate = TSTATE_NONE
    88 #else
    89 #define COUNT_INST(id)
    90 #endif
    92 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
    93 #define JT_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    94 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    95     OP(0x70+sh4_x86.tstate); MARK_JMP8(label); OP(-1)
    97 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
    98 #define JF_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    99 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
   100     OP(0x70+ (sh4_x86.tstate^1)); MARK_JMP8(label); OP(-1)
   102 static struct sh4_x86_state sh4_x86;
   104 static uint32_t max_int = 0x7FFFFFFF;
   105 static uint32_t min_int = 0x80000000;
   106 static uint32_t save_fcw; /* save value for fpu control word */
   107 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   109 gboolean is_sse3_supported()
   110 {
   111     uint32_t features;
   113     __asm__ __volatile__(
   114         "mov $0x01, %%eax\n\t"
   115         "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
   116     return (features & 1) ? TRUE : FALSE;
   117 }
   119 void sh4_translate_init(void)
   120 {
   121     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   122     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   123     sh4_x86.sse3_enabled = is_sse3_supported();
   124 }
   127 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   128 {
   129     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   130 	sh4_x86.backpatch_size <<= 1;
   131 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   132 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   133 	assert( sh4_x86.backpatch_list != NULL );
   134     }
   135     if( sh4_x86.in_delay_slot ) {
   136 	fixup_pc -= 2;
   137     }
   138     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
   139 	((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code);
   140     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   141     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   142     sh4_x86.backpatch_posn++;
   143 }
   145 /**
   146  * Emit an instruction to load an SH4 reg into a real register
   147  */
   148 static inline void load_reg( int x86reg, int sh4reg ) 
   149 {
   150     /* mov [bp+n], reg */
   151     OP(0x8B);
   152     OP(0x45 + (x86reg<<3));
   153     OP(REG_OFFSET(r[sh4reg]));
   154 }
   156 static inline void load_reg16s( int x86reg, int sh4reg )
   157 {
   158     OP(0x0F);
   159     OP(0xBF);
   160     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   161 }
   163 static inline void load_reg16u( int x86reg, int sh4reg )
   164 {
   165     OP(0x0F);
   166     OP(0xB7);
   167     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   169 }
   171 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   172 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   173 /**
   174  * Emit an instruction to load an immediate value into a register
   175  */
   176 static inline void load_imm32( int x86reg, uint32_t value ) {
   177     /* mov #value, reg */
   178     OP(0xB8 + x86reg);
   179     OP32(value);
   180 }
   182 /**
   183  * Load an immediate 64-bit quantity (note: x86-64 only)
   184  */
   185 static inline void load_imm64( int x86reg, uint64_t value ) {
   186     /* mov #value, reg */
   187     REXW();
   188     OP(0xB8 + x86reg);
   189     OP64(value);
   190 }
   192 /**
   193  * Emit an instruction to store an SH4 reg (RN)
   194  */
   195 void static inline store_reg( int x86reg, int sh4reg ) {
   196     /* mov reg, [bp+n] */
   197     OP(0x89);
   198     OP(0x45 + (x86reg<<3));
   199     OP(REG_OFFSET(r[sh4reg]));
   200 }
   202 /**
   203  * Load an FR register (single-precision floating point) into an integer x86
   204  * register (eg for register-to-register moves)
   205  */
   206 #define load_fr(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[0][(frm)^1]) )
   207 #define load_xf(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[1][(frm)^1]) )
   209 /**
   210  * Load the low half of a DR register (DR or XD) into an integer x86 register 
   211  */
   212 #define load_dr0(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   213 #define load_dr1(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   215 /**
   216  * Store an FR register (single-precision floating point) from an integer x86+
   217  * register (eg for register-to-register moves)
   218  */
   219 #define store_fr(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[0][(frm)^1]) )
   220 #define store_xf(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[1][(frm)^1]) )
   222 #define store_dr0(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   223 #define store_dr1(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   226 #define push_fpul()  FLDF_sh4r(R_FPUL)
   227 #define pop_fpul()   FSTPF_sh4r(R_FPUL)
   228 #define push_fr(frm) FLDF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   229 #define pop_fr(frm)  FSTPF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   230 #define push_xf(frm) FLDF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   231 #define pop_xf(frm)  FSTPF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   232 #define push_dr(frm) FLDD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   233 #define pop_dr(frm)  FSTPD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   234 #define push_xdr(frm) FLDD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   235 #define pop_xdr(frm)  FSTPD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   239 /* Exception checks - Note that all exception checks will clobber EAX */
   241 #define check_priv( ) \
   242     if( !sh4_x86.priv_checked ) { \
   243 	sh4_x86.priv_checked = TRUE;\
   244 	load_spreg( R_EAX, R_SR );\
   245 	AND_imm32_r32( SR_MD, R_EAX );\
   246 	if( sh4_x86.in_delay_slot ) {\
   247 	    JE_exc( EXC_SLOT_ILLEGAL );\
   248 	} else {\
   249 	    JE_exc( EXC_ILLEGAL );\
   250 	}\
   251 	sh4_x86.tstate = TSTATE_NONE; \
   252     }\
   254 #define check_fpuen( ) \
   255     if( !sh4_x86.fpuen_checked ) {\
   256 	sh4_x86.fpuen_checked = TRUE;\
   257 	load_spreg( R_EAX, R_SR );\
   258 	AND_imm32_r32( SR_FD, R_EAX );\
   259 	if( sh4_x86.in_delay_slot ) {\
   260 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   261 	} else {\
   262 	    JNE_exc(EXC_FPU_DISABLED);\
   263 	}\
   264 	sh4_x86.tstate = TSTATE_NONE; \
   265     }
   267 #define check_ralign16( x86reg ) \
   268     TEST_imm32_r32( 0x00000001, x86reg ); \
   269     JNE_exc(EXC_DATA_ADDR_READ)
   271 #define check_walign16( x86reg ) \
   272     TEST_imm32_r32( 0x00000001, x86reg ); \
   273     JNE_exc(EXC_DATA_ADDR_WRITE);
   275 #define check_ralign32( x86reg ) \
   276     TEST_imm32_r32( 0x00000003, x86reg ); \
   277     JNE_exc(EXC_DATA_ADDR_READ)
   279 #define check_walign32( x86reg ) \
   280     TEST_imm32_r32( 0x00000003, x86reg ); \
   281     JNE_exc(EXC_DATA_ADDR_WRITE);
   283 #define check_ralign64( x86reg ) \
   284     TEST_imm32_r32( 0x00000007, x86reg ); \
   285     JNE_exc(EXC_DATA_ADDR_READ)
   287 #define check_walign64( x86reg ) \
   288     TEST_imm32_r32( 0x00000007, x86reg ); \
   289     JNE_exc(EXC_DATA_ADDR_WRITE);
   291 #define UNDEF(ir)
   292 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   293 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
   294 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
   295 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
   296 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
   297 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
   298 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
   300 #ifdef HAVE_FRAME_ADDRESS
   301 /**
   302  * Perform MMU translation on the address in addr_reg for a read operation, iff the TLB is turned 
   303  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   304  */
   305 #define MMU_TRANSLATE_READ( addr_reg ) if( sh4_x86.tlb_on ) {  call_func1_exc(mmu_vma_to_phys_read, addr_reg, pc); MEM_RESULT(addr_reg); }
   307 /**
   308  * Perform MMU translation on the address in addr_reg for a write operation, iff the TLB is turned 
   309  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   310  */
   311 #define MMU_TRANSLATE_WRITE( addr_reg ) if( sh4_x86.tlb_on ) { call_func1_exc(mmu_vma_to_phys_write, addr_reg, pc); MEM_RESULT(addr_reg); }
   312 #else
   313 #define MMU_TRANSLATE_READ( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   314 #define MMU_TRANSLATE_WRITE( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_write, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   315 #endif
   317 #define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = DELAY_NONE; return 1;
   319 /****** Import appropriate calling conventions ******/
   320 #if SIZEOF_VOID_P == 8
   321 #include "sh4/ia64abi.h"
   322 #else /* 32-bit system */
   323 #include "sh4/ia32abi.h"
   324 #endif
   326 #define MEM_REGION_PTR(name) offsetof( struct mem_region_fn, name )
   328 /**
   329  * Given an address in addr_reg and a cache entry, test if the cache is valid
   330  * and decode otherwise.
   331  * At conclusion of this:
   332  *    R_EBX will contain the address
   333  *    R_ECX will contain the memory region vtable
   334  *    R_EAX, R_EDX (and any other volatiles) are clobbered
   335  */
   336 static inline void MEM_DECODE_ADDRESS( int addr_reg, int rm )
   337 {
   338     MOV_r32_r32( addr_reg, R_EBX );
   339     AND_sh4r_r32( REG_OFFSET(pointer_cache[rm].page_mask), addr_reg );
   340     CMP_sh4r_r32( REG_OFFSET(pointer_cache[rm].page_vma), addr_reg );
   341     EXPJE_rel8(uptodate);
   342     store_spreg( addr_reg, REG_OFFSET(pointer_cache[rm].page_vma) ); 
   343     call_func1( sh7750_decode_address, addr_reg );
   344     store_spreg( R_EAX, REG_OFFSET(pointer_cache[rm].page_fn) );
   345     JMP_TARGET(uptodate);
   346     load_spreg( R_ECX, REG_OFFSET(pointer_cache[rm].page_fn) );
   347 }
   349 static inline void MEM_READ_LONG_CACHED( int addr_reg, int value_reg, int rm )
   350 {
   351     MEM_DECODE_ADDRESS( addr_reg, rm );
   352     call_func1_r32ind( R_ECX, MEM_REGION_PTR(read_long), R_EBX );
   353     MEM_RESULT(value_reg);
   354 }
   356 static inline void MEM_READ_WORD_CACHED( int addr_reg, int value_reg, int rm )
   357 {
   358     MEM_DECODE_ADDRESS( addr_reg, rm );
   359     call_func1_r32ind( R_ECX, MEM_REGION_PTR(read_word), R_EBX );
   360     MEM_RESULT(value_reg);
   361 }
   363 static inline void MEM_READ_BYTE_CACHED( int addr_reg, int value_reg, int rm )
   364 {
   365     MEM_DECODE_ADDRESS( addr_reg, rm );
   366     call_func1_r32ind( R_ECX, MEM_REGION_PTR(read_byte), R_EBX );
   367     MEM_RESULT(value_reg);    
   368 }
   370 static inline void MEM_WRITE_LONG_CACHED_SP( int addr_reg, int ebpdisp, int rn )
   371 {
   372     MEM_DECODE_ADDRESS( addr_reg, rn );
   373     MOV_sh4r_r32( ebpdisp, R_EDX );
   374     call_func2_r32ind( R_ECX, MEM_REGION_PTR(write_long), R_EBX, R_EDX );
   375 } 
   377 #define MEM_WRITE_LONG_CACHED( addr_reg, value_rm, rn ) MEM_WRITE_LONG_CACHED_SP( addr_reg, REG_OFFSET(r[value_rm]), rn )
   379 static inline void MEM_WRITE_WORD_CACHED( int addr_reg, int value_rm, int rn )
   380 {
   381     MEM_DECODE_ADDRESS( addr_reg, rn );
   382     MOVZX_sh4r16_r32( REG_OFFSET(r[value_rm]), R_EDX );
   383     call_func2_r32ind( R_ECX, MEM_REGION_PTR(write_word), R_EBX, R_EDX );
   384 }
   386 static inline void MEM_WRITE_BYTE_CACHED( int addr_reg, int value_rm, int rn )
   387 {
   388     MEM_DECODE_ADDRESS( addr_reg, rn );
   389     MOVZX_sh4r8_r32( REG_OFFSET(r[value_rm]), R_EDX );
   390     call_func2_r32ind( R_ECX, MEM_REGION_PTR(write_byte), R_EBX, R_EDX );
   391 }
   393 static inline void MEM_WRITE_BYTE_UNCHECKED( int addr_reg, int value_reg, int rn )
   394 {
   395     load_spreg( R_ECX, REG_OFFSET(pointer_cache[rn].page_fn) );
   396     call_func2_r32ind( R_ECX, MEM_REGION_PTR(write_byte), addr_reg, R_EDX );
   397 }    
   399 static inline void MEM_WRITE_FLOAT_CACHED( int addr_reg, int value_frm, int rn )
   400 {
   401     MEM_DECODE_ADDRESS( addr_reg, rn );
   402     load_fr( R_EDX, value_frm );
   403     call_func2_r32ind( R_ECX, MEM_REGION_PTR(write_long), R_EBX, R_EDX );
   404 } 
   406 static inline void MEM_READ_DOUBLE_CACHED( int addr_reg, int value_reg1, int value_reg2, int rm )
   407 {
   408     MEM_DECODE_ADDRESS( addr_reg, rm );
   409     call_func1_r32ind( R_ECX, MEM_REGION_PTR(read_long), R_EBX );
   410     MOV_r32_esp8( R_EAX, 0 );
   411     load_spreg( R_ECX, REG_OFFSET(pointer_cache[rm].page_fn) );
   412     LEA_r32disp8_r32( R_EBX, 4, R_EBX );
   413     call_func1_r32ind( R_ECX, MEM_REGION_PTR(read_long), R_EBX );
   414     MEM_RESULT(value_reg2);
   415     MOV_esp8_r32( 0, value_reg1 );
   416 }
   418 static inline void MEM_WRITE_DOUBLE_CACHED( int addr_reg, int value_frm, int rn )
   419 {
   420     MEM_DECODE_ADDRESS( addr_reg, rn );
   421     load_dr0( R_EDX, value_frm );
   422     call_func2_r32ind( R_ECX, MEM_REGION_PTR(write_long), R_EBX, R_EDX );
   423     LEA_r32disp8_r32( R_EBX, 4, R_EBX );
   424     load_spreg( R_ECX, REG_OFFSET(pointer_cache[rn].page_fn) );
   425     load_dr1( R_EDX, value_frm );
   426     call_func2_r32ind( R_ECX, MEM_REGION_PTR(write_long), R_EBX, R_EDX );
   427 }
   431 void sh4_translate_begin_block( sh4addr_t pc ) 
   432 {
   433     enter_block();
   434     sh4_x86.in_delay_slot = FALSE;
   435     sh4_x86.priv_checked = FALSE;
   436     sh4_x86.fpuen_checked = FALSE;
   437     sh4_x86.branch_taken = FALSE;
   438     sh4_x86.backpatch_posn = 0;
   439     sh4_x86.block_start_pc = pc;
   440     sh4_x86.tlb_on = IS_MMU_ENABLED();
   441     sh4_x86.tstate = TSTATE_NONE;
   442     sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
   443     sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
   444 }
   447 uint32_t sh4_translate_end_block_size()
   448 {
   449     if( sh4_x86.backpatch_posn <= 3 ) {
   450         return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);
   451     } else {
   452         return EPILOGUE_SIZE + 48 + (sh4_x86.backpatch_posn-3)*15;
   453     }
   454 }
   457 /**
   458  * Embed a breakpoint into the generated code
   459  */
   460 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   461 {
   462     load_imm32( R_EAX, pc );
   463     call_func1( sh4_translate_breakpoint_hit, R_EAX );
   464     sh4_x86.tstate = TSTATE_NONE;
   465 }
   468 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   470 /**
   471  * Embed a call to sh4_execute_instruction for situations that we
   472  * can't translate (just page-crossing delay slots at the moment).
   473  * Caller is responsible for setting new_pc before calling this function.
   474  *
   475  * Performs:
   476  *   Set PC = endpc
   477  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   478  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   479  *   Call sh4_execute_instruction
   480  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   481  */
   482 void exit_block_emu( sh4vma_t endpc )
   483 {
   484     load_imm32( R_ECX, endpc - sh4_x86.block_start_pc );   // 5
   485     ADD_r32_sh4r( R_ECX, R_PC );
   487     load_imm32( R_ECX, (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period ); // 5
   488     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
   489     load_imm32( R_ECX, sh4_x86.in_delay_slot ? 1 : 0 );
   490     store_spreg( R_ECX, REG_OFFSET(in_delay_slot) );
   492     call_func0( sh4_execute_instruction );    
   493     load_spreg( R_EAX, R_PC );
   494     if( sh4_x86.tlb_on ) {
   495 	call_func1(xlat_get_code_by_vma,R_EAX);
   496     } else {
   497 	call_func1(xlat_get_code,R_EAX);
   498     }
   499     exit_block();
   500 } 
   502 /**
   503  * Translate a single instruction. Delayed branches are handled specially
   504  * by translating both branch and delayed instruction as a single unit (as
   505  * 
   506  * The instruction MUST be in the icache (assert check)
   507  *
   508  * @return true if the instruction marks the end of a basic block
   509  * (eg a branch or 
   510  */
   511 uint32_t sh4_translate_instruction( sh4vma_t pc )
   512 {
   513     uint32_t ir;
   514     /* Read instruction from icache */
   515     assert( IS_IN_ICACHE(pc) );
   516     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   518     if( !sh4_x86.in_delay_slot ) {
   519 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   520     }
   521 %%
   522 /* ALU operations */
   523 ADD Rm, Rn {:
   524     COUNT_INST(I_ADD);
   525     load_reg( R_EAX, Rm );
   526     load_reg( R_ECX, Rn );
   527     ADD_r32_r32( R_EAX, R_ECX );
   528     store_reg( R_ECX, Rn );
   529     sh4_x86.tstate = TSTATE_NONE;
   530 :}
   531 ADD #imm, Rn {:  
   532     COUNT_INST(I_ADDI);
   533     load_reg( R_EAX, Rn );
   534     ADD_imm8s_r32( imm, R_EAX );
   535     store_reg( R_EAX, Rn );
   536     sh4_x86.tstate = TSTATE_NONE;
   537 :}
   538 ADDC Rm, Rn {:
   539     COUNT_INST(I_ADDC);
   540     if( sh4_x86.tstate != TSTATE_C ) {
   541         LDC_t();
   542     }
   543     load_reg( R_EAX, Rm );
   544     load_reg( R_ECX, Rn );
   545     ADC_r32_r32( R_EAX, R_ECX );
   546     store_reg( R_ECX, Rn );
   547     SETC_t();
   548     sh4_x86.tstate = TSTATE_C;
   549 :}
   550 ADDV Rm, Rn {:
   551     COUNT_INST(I_ADDV);
   552     load_reg( R_EAX, Rm );
   553     load_reg( R_ECX, Rn );
   554     ADD_r32_r32( R_EAX, R_ECX );
   555     store_reg( R_ECX, Rn );
   556     SETO_t();
   557     sh4_x86.tstate = TSTATE_O;
   558 :}
   559 AND Rm, Rn {:
   560     COUNT_INST(I_AND);
   561     load_reg( R_EAX, Rm );
   562     load_reg( R_ECX, Rn );
   563     AND_r32_r32( R_EAX, R_ECX );
   564     store_reg( R_ECX, Rn );
   565     sh4_x86.tstate = TSTATE_NONE;
   566 :}
   567 AND #imm, R0 {:  
   568     COUNT_INST(I_ANDI);
   569     load_reg( R_EAX, 0 );
   570     AND_imm32_r32(imm, R_EAX); 
   571     store_reg( R_EAX, 0 );
   572     sh4_x86.tstate = TSTATE_NONE;
   573 :}
   574 AND.B #imm, @(R0, GBR) {: 
   575     COUNT_INST(I_ANDB);
   576     load_reg( R_EAX, 0 );
   577     load_spreg( R_ECX, R_GBR );
   578     ADD_r32_r32( R_ECX, R_EAX );
   579     MMU_TRANSLATE_WRITE( R_EAX );
   580     MEM_READ_BYTE_CACHED( R_EAX, R_EDX, 16 );
   581     AND_imm32_r32(imm, R_EDX );
   582     MEM_WRITE_BYTE_UNCHECKED( R_EBX, R_EDX, 16 );
   583     sh4_x86.tstate = TSTATE_NONE;
   584 :}
   585 CMP/EQ Rm, Rn {:  
   586     COUNT_INST(I_CMPEQ);
   587     load_reg( R_EAX, Rm );
   588     load_reg( R_ECX, Rn );
   589     CMP_r32_r32( R_EAX, R_ECX );
   590     SETE_t();
   591     sh4_x86.tstate = TSTATE_E;
   592 :}
   593 CMP/EQ #imm, R0 {:  
   594     COUNT_INST(I_CMPEQI);
   595     load_reg( R_EAX, 0 );
   596     CMP_imm8s_r32(imm, R_EAX);
   597     SETE_t();
   598     sh4_x86.tstate = TSTATE_E;
   599 :}
   600 CMP/GE Rm, Rn {:  
   601     COUNT_INST(I_CMPGE);
   602     load_reg( R_EAX, Rm );
   603     load_reg( R_ECX, Rn );
   604     CMP_r32_r32( R_EAX, R_ECX );
   605     SETGE_t();
   606     sh4_x86.tstate = TSTATE_GE;
   607 :}
   608 CMP/GT Rm, Rn {: 
   609     COUNT_INST(I_CMPGT);
   610     load_reg( R_EAX, Rm );
   611     load_reg( R_ECX, Rn );
   612     CMP_r32_r32( R_EAX, R_ECX );
   613     SETG_t();
   614     sh4_x86.tstate = TSTATE_G;
   615 :}
   616 CMP/HI Rm, Rn {:  
   617     COUNT_INST(I_CMPHI);
   618     load_reg( R_EAX, Rm );
   619     load_reg( R_ECX, Rn );
   620     CMP_r32_r32( R_EAX, R_ECX );
   621     SETA_t();
   622     sh4_x86.tstate = TSTATE_A;
   623 :}
   624 CMP/HS Rm, Rn {: 
   625     COUNT_INST(I_CMPHS);
   626     load_reg( R_EAX, Rm );
   627     load_reg( R_ECX, Rn );
   628     CMP_r32_r32( R_EAX, R_ECX );
   629     SETAE_t();
   630     sh4_x86.tstate = TSTATE_AE;
   631  :}
   632 CMP/PL Rn {: 
   633     COUNT_INST(I_CMPPL);
   634     load_reg( R_EAX, Rn );
   635     CMP_imm8s_r32( 0, R_EAX );
   636     SETG_t();
   637     sh4_x86.tstate = TSTATE_G;
   638 :}
   639 CMP/PZ Rn {:  
   640     COUNT_INST(I_CMPPZ);
   641     load_reg( R_EAX, Rn );
   642     CMP_imm8s_r32( 0, R_EAX );
   643     SETGE_t();
   644     sh4_x86.tstate = TSTATE_GE;
   645 :}
   646 CMP/STR Rm, Rn {:  
   647     COUNT_INST(I_CMPSTR);
   648     load_reg( R_EAX, Rm );
   649     load_reg( R_ECX, Rn );
   650     XOR_r32_r32( R_ECX, R_EAX );
   651     TEST_r8_r8( R_AL, R_AL );
   652     JE_rel8(target1);
   653     TEST_r8_r8( R_AH, R_AH );
   654     JE_rel8(target2);
   655     SHR_imm8_r32( 16, R_EAX );
   656     TEST_r8_r8( R_AL, R_AL );
   657     JE_rel8(target3);
   658     TEST_r8_r8( R_AH, R_AH );
   659     JMP_TARGET(target1);
   660     JMP_TARGET(target2);
   661     JMP_TARGET(target3);
   662     SETE_t();
   663     sh4_x86.tstate = TSTATE_E;
   664 :}
   665 DIV0S Rm, Rn {:
   666     COUNT_INST(I_DIV0S);
   667     load_reg( R_EAX, Rm );
   668     load_reg( R_ECX, Rn );
   669     SHR_imm8_r32( 31, R_EAX );
   670     SHR_imm8_r32( 31, R_ECX );
   671     store_spreg( R_EAX, R_M );
   672     store_spreg( R_ECX, R_Q );
   673     CMP_r32_r32( R_EAX, R_ECX );
   674     SETNE_t();
   675     sh4_x86.tstate = TSTATE_NE;
   676 :}
   677 DIV0U {:  
   678     COUNT_INST(I_DIV0U);
   679     XOR_r32_r32( R_EAX, R_EAX );
   680     store_spreg( R_EAX, R_Q );
   681     store_spreg( R_EAX, R_M );
   682     store_spreg( R_EAX, R_T );
   683     sh4_x86.tstate = TSTATE_C; // works for DIV1
   684 :}
   685 DIV1 Rm, Rn {:
   686     COUNT_INST(I_DIV1);
   687     load_spreg( R_ECX, R_M );
   688     load_reg( R_EAX, Rn );
   689     if( sh4_x86.tstate != TSTATE_C ) {
   690 	LDC_t();
   691     }
   692     RCL1_r32( R_EAX );
   693     SETC_r8( R_DL ); // Q'
   694     CMP_sh4r_r32( R_Q, R_ECX );
   695     JE_rel8(mqequal);
   696     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   697     JMP_rel8(end);
   698     JMP_TARGET(mqequal);
   699     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   700     JMP_TARGET(end);
   701     store_reg( R_EAX, Rn ); // Done with Rn now
   702     SETC_r8(R_AL); // tmp1
   703     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   704     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   705     store_spreg( R_ECX, R_Q );
   706     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   707     MOVZX_r8_r32( R_AL, R_EAX );
   708     store_spreg( R_EAX, R_T );
   709     sh4_x86.tstate = TSTATE_NONE;
   710 :}
   711 DMULS.L Rm, Rn {:  
   712     COUNT_INST(I_DMULS);
   713     load_reg( R_EAX, Rm );
   714     load_reg( R_ECX, Rn );
   715     IMUL_r32(R_ECX);
   716     store_spreg( R_EDX, R_MACH );
   717     store_spreg( R_EAX, R_MACL );
   718     sh4_x86.tstate = TSTATE_NONE;
   719 :}
   720 DMULU.L Rm, Rn {:  
   721     COUNT_INST(I_DMULU);
   722     load_reg( R_EAX, Rm );
   723     load_reg( R_ECX, Rn );
   724     MUL_r32(R_ECX);
   725     store_spreg( R_EDX, R_MACH );
   726     store_spreg( R_EAX, R_MACL );    
   727     sh4_x86.tstate = TSTATE_NONE;
   728 :}
   729 DT Rn {:  
   730     COUNT_INST(I_DT);
   731     load_reg( R_EAX, Rn );
   732     ADD_imm8s_r32( -1, R_EAX );
   733     store_reg( R_EAX, Rn );
   734     SETE_t();
   735     sh4_x86.tstate = TSTATE_E;
   736 :}
   737 EXTS.B Rm, Rn {:  
   738     COUNT_INST(I_EXTSB);
   739     load_reg( R_EAX, Rm );
   740     MOVSX_r8_r32( R_EAX, R_EAX );
   741     store_reg( R_EAX, Rn );
   742 :}
   743 EXTS.W Rm, Rn {:  
   744     COUNT_INST(I_EXTSW);
   745     load_reg( R_EAX, Rm );
   746     MOVSX_r16_r32( R_EAX, R_EAX );
   747     store_reg( R_EAX, Rn );
   748 :}
   749 EXTU.B Rm, Rn {:  
   750     COUNT_INST(I_EXTUB);
   751     load_reg( R_EAX, Rm );
   752     MOVZX_r8_r32( R_EAX, R_EAX );
   753     store_reg( R_EAX, Rn );
   754 :}
   755 EXTU.W Rm, Rn {:  
   756     COUNT_INST(I_EXTUW);
   757     load_reg( R_EAX, Rm );
   758     MOVZX_r16_r32( R_EAX, R_EAX );
   759     store_reg( R_EAX, Rn );
   760 :}
   761 MAC.L @Rm+, @Rn+ {:
   762     COUNT_INST(I_MACL);
   763     if( Rm == Rn ) {
   764 	load_reg( R_EAX, Rm );
   765 	check_ralign32( R_EAX );
   766 	MMU_TRANSLATE_READ( R_EAX );
   767 	MOV_r32_esp8(R_EAX, 0);
   768 	load_reg( R_EAX, Rn );
   769 	ADD_imm8s_r32( 4, R_EAX );
   770 	MMU_TRANSLATE_READ( R_EAX );
   771 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
   772 	// Note translate twice in case of page boundaries. Maybe worth
   773 	// adding a page-boundary check to skip the second translation
   774     } else {
   775 	load_reg( R_EAX, Rm );
   776 	check_ralign32( R_EAX );
   777 	MMU_TRANSLATE_READ( R_EAX );
   778 	MOV_r32_esp8( R_EAX, 0 );
   779 	load_reg( R_EAX, Rn );
   780 	check_ralign32( R_EAX );
   781 	MMU_TRANSLATE_READ( R_EAX );
   782 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   783 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   784     }
   785     MEM_READ_LONG( R_EAX, R_EAX );
   786     MOV_r32_r32( R_EAX, R_EBX );
   787     MOV_esp8_r32( 0, R_EAX );
   788     MEM_READ_LONG( R_EAX, R_EAX );
   789     MOV_r32_r32( R_EBX, R_ECX );
   791     IMUL_r32( R_ECX );
   792     ADD_r32_sh4r( R_EAX, R_MACL );
   793     ADC_r32_sh4r( R_EDX, R_MACH );
   795     load_spreg( R_ECX, R_S );
   796     TEST_r32_r32(R_ECX, R_ECX);
   797     JE_rel8( nosat );
   798     call_func0( signsat48 );
   799     JMP_TARGET( nosat );
   800     sh4_x86.tstate = TSTATE_NONE;
   801 :}
   802 MAC.W @Rm+, @Rn+ {:  
   803     COUNT_INST(I_MACW);
   804     if( Rm == Rn ) {
   805 	load_reg( R_EAX, Rm );
   806 	check_ralign16( R_EAX );
   807 	MMU_TRANSLATE_READ( R_EAX );
   808         MOV_r32_esp8( R_EAX, 0 );
   809 	load_reg( R_EAX, Rn );
   810 	ADD_imm8s_r32( 2, R_EAX );
   811 	MMU_TRANSLATE_READ( R_EAX );
   812 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   813 	// Note translate twice in case of page boundaries. Maybe worth
   814 	// adding a page-boundary check to skip the second translation
   815     } else {
   816 	load_reg( R_EAX, Rm );
   817 	check_ralign16( R_EAX );
   818 	MMU_TRANSLATE_READ( R_EAX );
   819         MOV_r32_esp8( R_EAX, 0 );
   820 	load_reg( R_EAX, Rn );
   821 	check_ralign16( R_EAX );
   822 	MMU_TRANSLATE_READ( R_EAX );
   823 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   824 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   825     }
   826     MEM_READ_WORD( R_EAX, R_EAX );
   827     MOV_r32_r32( R_EAX, R_EBX );
   828     MOV_esp8_r32( 0, R_EAX );
   829     MEM_READ_WORD( R_EAX, R_EAX );
   830     MOV_r32_r32( R_EBX, R_ECX );
   832     IMUL_r32( R_ECX );
   833     load_spreg( R_ECX, R_S );
   834     TEST_r32_r32( R_ECX, R_ECX );
   835     JE_rel8( nosat );
   837     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   838     JNO_rel8( end );            // 2
   839     load_imm32( R_EDX, 1 );         // 5
   840     store_spreg( R_EDX, R_MACH );   // 6
   841     JS_rel8( positive );        // 2
   842     load_imm32( R_EAX, 0x80000000 );// 5
   843     store_spreg( R_EAX, R_MACL );   // 6
   844     JMP_rel8(end2);           // 2
   846     JMP_TARGET(positive);
   847     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   848     store_spreg( R_EAX, R_MACL );   // 6
   849     JMP_rel8(end3);            // 2
   851     JMP_TARGET(nosat);
   852     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   853     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   854     JMP_TARGET(end);
   855     JMP_TARGET(end2);
   856     JMP_TARGET(end3);
   857     sh4_x86.tstate = TSTATE_NONE;
   858 :}
   859 MOVT Rn {:  
   860     COUNT_INST(I_MOVT);
   861     load_spreg( R_EAX, R_T );
   862     store_reg( R_EAX, Rn );
   863 :}
   864 MUL.L Rm, Rn {:  
   865     COUNT_INST(I_MULL);
   866     load_reg( R_EAX, Rm );
   867     load_reg( R_ECX, Rn );
   868     MUL_r32( R_ECX );
   869     store_spreg( R_EAX, R_MACL );
   870     sh4_x86.tstate = TSTATE_NONE;
   871 :}
   872 MULS.W Rm, Rn {:
   873     COUNT_INST(I_MULSW);
   874     load_reg16s( R_EAX, Rm );
   875     load_reg16s( R_ECX, Rn );
   876     MUL_r32( R_ECX );
   877     store_spreg( R_EAX, R_MACL );
   878     sh4_x86.tstate = TSTATE_NONE;
   879 :}
   880 MULU.W Rm, Rn {:  
   881     COUNT_INST(I_MULUW);
   882     load_reg16u( R_EAX, Rm );
   883     load_reg16u( R_ECX, Rn );
   884     MUL_r32( R_ECX );
   885     store_spreg( R_EAX, R_MACL );
   886     sh4_x86.tstate = TSTATE_NONE;
   887 :}
   888 NEG Rm, Rn {:
   889     COUNT_INST(I_NEG);
   890     load_reg( R_EAX, Rm );
   891     NEG_r32( R_EAX );
   892     store_reg( R_EAX, Rn );
   893     sh4_x86.tstate = TSTATE_NONE;
   894 :}
   895 NEGC Rm, Rn {:  
   896     COUNT_INST(I_NEGC);
   897     load_reg( R_EAX, Rm );
   898     XOR_r32_r32( R_ECX, R_ECX );
   899     LDC_t();
   900     SBB_r32_r32( R_EAX, R_ECX );
   901     store_reg( R_ECX, Rn );
   902     SETC_t();
   903     sh4_x86.tstate = TSTATE_C;
   904 :}
   905 NOT Rm, Rn {:  
   906     COUNT_INST(I_NOT);
   907     load_reg( R_EAX, Rm );
   908     NOT_r32( R_EAX );
   909     store_reg( R_EAX, Rn );
   910     sh4_x86.tstate = TSTATE_NONE;
   911 :}
   912 OR Rm, Rn {:  
   913     COUNT_INST(I_OR);
   914     load_reg( R_EAX, Rm );
   915     load_reg( R_ECX, Rn );
   916     OR_r32_r32( R_EAX, R_ECX );
   917     store_reg( R_ECX, Rn );
   918     sh4_x86.tstate = TSTATE_NONE;
   919 :}
   920 OR #imm, R0 {:
   921     COUNT_INST(I_ORI);
   922     load_reg( R_EAX, 0 );
   923     OR_imm32_r32(imm, R_EAX);
   924     store_reg( R_EAX, 0 );
   925     sh4_x86.tstate = TSTATE_NONE;
   926 :}
   927 OR.B #imm, @(R0, GBR) {:  
   928     COUNT_INST(I_ORB);
   929     load_reg( R_EAX, 0 );
   930     load_spreg( R_ECX, R_GBR );
   931     ADD_r32_r32( R_ECX, R_EAX );
   932     MMU_TRANSLATE_WRITE( R_EAX );
   933     MEM_READ_BYTE_CACHED( R_EAX, R_EDX, 16 );
   934     OR_imm32_r32(imm, R_EDX );
   935     MEM_WRITE_BYTE_UNCHECKED( R_EBX, R_EDX, 16 );
   936     sh4_x86.tstate = TSTATE_NONE;
   937 :}
   938 ROTCL Rn {:
   939     COUNT_INST(I_ROTCL);
   940     load_reg( R_EAX, Rn );
   941     if( sh4_x86.tstate != TSTATE_C ) {
   942 	LDC_t();
   943     }
   944     RCL1_r32( R_EAX );
   945     store_reg( R_EAX, Rn );
   946     SETC_t();
   947     sh4_x86.tstate = TSTATE_C;
   948 :}
   949 ROTCR Rn {:  
   950     COUNT_INST(I_ROTCR);
   951     load_reg( R_EAX, Rn );
   952     if( sh4_x86.tstate != TSTATE_C ) {
   953 	LDC_t();
   954     }
   955     RCR1_r32( R_EAX );
   956     store_reg( R_EAX, Rn );
   957     SETC_t();
   958     sh4_x86.tstate = TSTATE_C;
   959 :}
   960 ROTL Rn {:  
   961     COUNT_INST(I_ROTL);
   962     load_reg( R_EAX, Rn );
   963     ROL1_r32( R_EAX );
   964     store_reg( R_EAX, Rn );
   965     SETC_t();
   966     sh4_x86.tstate = TSTATE_C;
   967 :}
   968 ROTR Rn {:  
   969     COUNT_INST(I_ROTR);
   970     load_reg( R_EAX, Rn );
   971     ROR1_r32( R_EAX );
   972     store_reg( R_EAX, Rn );
   973     SETC_t();
   974     sh4_x86.tstate = TSTATE_C;
   975 :}
   976 SHAD Rm, Rn {:
   977     COUNT_INST(I_SHAD);
   978     /* Annoyingly enough, not directly convertible */
   979     load_reg( R_EAX, Rn );
   980     load_reg( R_ECX, Rm );
   981     CMP_imm32_r32( 0, R_ECX );
   982     JGE_rel8(doshl);
   984     NEG_r32( R_ECX );      // 2
   985     AND_imm8_r8( 0x1F, R_CL ); // 3
   986     JE_rel8(emptysar);     // 2
   987     SAR_r32_CL( R_EAX );       // 2
   988     JMP_rel8(end);          // 2
   990     JMP_TARGET(emptysar);
   991     SAR_imm8_r32(31, R_EAX );  // 3
   992     JMP_rel8(end2);
   994     JMP_TARGET(doshl);
   995     AND_imm8_r8( 0x1F, R_CL ); // 3
   996     SHL_r32_CL( R_EAX );       // 2
   997     JMP_TARGET(end);
   998     JMP_TARGET(end2);
   999     store_reg( R_EAX, Rn );
  1000     sh4_x86.tstate = TSTATE_NONE;
  1001 :}
  1002 SHLD Rm, Rn {:  
  1003     COUNT_INST(I_SHLD);
  1004     load_reg( R_EAX, Rn );
  1005     load_reg( R_ECX, Rm );
  1006     CMP_imm32_r32( 0, R_ECX );
  1007     JGE_rel8(doshl);
  1009     NEG_r32( R_ECX );      // 2
  1010     AND_imm8_r8( 0x1F, R_CL ); // 3
  1011     JE_rel8(emptyshr );
  1012     SHR_r32_CL( R_EAX );       // 2
  1013     JMP_rel8(end);          // 2
  1015     JMP_TARGET(emptyshr);
  1016     XOR_r32_r32( R_EAX, R_EAX );
  1017     JMP_rel8(end2);
  1019     JMP_TARGET(doshl);
  1020     AND_imm8_r8( 0x1F, R_CL ); // 3
  1021     SHL_r32_CL( R_EAX );       // 2
  1022     JMP_TARGET(end);
  1023     JMP_TARGET(end2);
  1024     store_reg( R_EAX, Rn );
  1025     sh4_x86.tstate = TSTATE_NONE;
  1026 :}
  1027 SHAL Rn {: 
  1028     COUNT_INST(I_SHAL);
  1029     load_reg( R_EAX, Rn );
  1030     SHL1_r32( R_EAX );
  1031     SETC_t();
  1032     store_reg( R_EAX, Rn );
  1033     sh4_x86.tstate = TSTATE_C;
  1034 :}
  1035 SHAR Rn {:  
  1036     COUNT_INST(I_SHAR);
  1037     load_reg( R_EAX, Rn );
  1038     SAR1_r32( R_EAX );
  1039     SETC_t();
  1040     store_reg( R_EAX, Rn );
  1041     sh4_x86.tstate = TSTATE_C;
  1042 :}
  1043 SHLL Rn {:  
  1044     COUNT_INST(I_SHLL);
  1045     load_reg( R_EAX, Rn );
  1046     SHL1_r32( R_EAX );
  1047     SETC_t();
  1048     store_reg( R_EAX, Rn );
  1049     sh4_x86.tstate = TSTATE_C;
  1050 :}
  1051 SHLL2 Rn {:
  1052     COUNT_INST(I_SHLL);
  1053     load_reg( R_EAX, Rn );
  1054     SHL_imm8_r32( 2, R_EAX );
  1055     store_reg( R_EAX, Rn );
  1056     sh4_x86.tstate = TSTATE_NONE;
  1057 :}
  1058 SHLL8 Rn {:  
  1059     COUNT_INST(I_SHLL);
  1060     load_reg( R_EAX, Rn );
  1061     SHL_imm8_r32( 8, R_EAX );
  1062     store_reg( R_EAX, Rn );
  1063     sh4_x86.tstate = TSTATE_NONE;
  1064 :}
  1065 SHLL16 Rn {:  
  1066     COUNT_INST(I_SHLL);
  1067     load_reg( R_EAX, Rn );
  1068     SHL_imm8_r32( 16, R_EAX );
  1069     store_reg( R_EAX, Rn );
  1070     sh4_x86.tstate = TSTATE_NONE;
  1071 :}
  1072 SHLR Rn {:  
  1073     COUNT_INST(I_SHLR);
  1074     load_reg( R_EAX, Rn );
  1075     SHR1_r32( R_EAX );
  1076     SETC_t();
  1077     store_reg( R_EAX, Rn );
  1078     sh4_x86.tstate = TSTATE_C;
  1079 :}
  1080 SHLR2 Rn {:  
  1081     COUNT_INST(I_SHLR);
  1082     load_reg( R_EAX, Rn );
  1083     SHR_imm8_r32( 2, R_EAX );
  1084     store_reg( R_EAX, Rn );
  1085     sh4_x86.tstate = TSTATE_NONE;
  1086 :}
  1087 SHLR8 Rn {:  
  1088     COUNT_INST(I_SHLR);
  1089     load_reg( R_EAX, Rn );
  1090     SHR_imm8_r32( 8, R_EAX );
  1091     store_reg( R_EAX, Rn );
  1092     sh4_x86.tstate = TSTATE_NONE;
  1093 :}
  1094 SHLR16 Rn {:  
  1095     COUNT_INST(I_SHLR);
  1096     load_reg( R_EAX, Rn );
  1097     SHR_imm8_r32( 16, R_EAX );
  1098     store_reg( R_EAX, Rn );
  1099     sh4_x86.tstate = TSTATE_NONE;
  1100 :}
  1101 SUB Rm, Rn {:  
  1102     COUNT_INST(I_SUB);
  1103     load_reg( R_EAX, Rm );
  1104     load_reg( R_ECX, Rn );
  1105     SUB_r32_r32( R_EAX, R_ECX );
  1106     store_reg( R_ECX, Rn );
  1107     sh4_x86.tstate = TSTATE_NONE;
  1108 :}
  1109 SUBC Rm, Rn {:  
  1110     COUNT_INST(I_SUBC);
  1111     load_reg( R_EAX, Rm );
  1112     load_reg( R_ECX, Rn );
  1113     if( sh4_x86.tstate != TSTATE_C ) {
  1114 	LDC_t();
  1116     SBB_r32_r32( R_EAX, R_ECX );
  1117     store_reg( R_ECX, Rn );
  1118     SETC_t();
  1119     sh4_x86.tstate = TSTATE_C;
  1120 :}
  1121 SUBV Rm, Rn {:  
  1122     COUNT_INST(I_SUBV);
  1123     load_reg( R_EAX, Rm );
  1124     load_reg( R_ECX, Rn );
  1125     SUB_r32_r32( R_EAX, R_ECX );
  1126     store_reg( R_ECX, Rn );
  1127     SETO_t();
  1128     sh4_x86.tstate = TSTATE_O;
  1129 :}
  1130 SWAP.B Rm, Rn {:  
  1131     COUNT_INST(I_SWAPB);
  1132     load_reg( R_EAX, Rm );
  1133     XCHG_r8_r8( R_AL, R_AH ); // NB: does not touch EFLAGS
  1134     store_reg( R_EAX, Rn );
  1135 :}
  1136 SWAP.W Rm, Rn {:  
  1137     COUNT_INST(I_SWAPB);
  1138     load_reg( R_EAX, Rm );
  1139     MOV_r32_r32( R_EAX, R_ECX );
  1140     SHL_imm8_r32( 16, R_ECX );
  1141     SHR_imm8_r32( 16, R_EAX );
  1142     OR_r32_r32( R_EAX, R_ECX );
  1143     store_reg( R_ECX, Rn );
  1144     sh4_x86.tstate = TSTATE_NONE;
  1145 :}
  1146 TAS.B @Rn {:  
  1147     COUNT_INST(I_TASB);
  1148     load_reg( R_EAX, Rn );
  1149     MMU_TRANSLATE_WRITE( R_EAX );
  1150     MEM_READ_BYTE_CACHED( R_EAX, R_EDX, 16 );
  1151     TEST_r8_r8( R_DL, R_DL );
  1152     SETE_t();
  1153     OR_imm8_r8( 0x80, R_DL );
  1154     MEM_WRITE_BYTE_UNCHECKED( R_EBX, R_EDX, 16 );
  1155     sh4_x86.tstate = TSTATE_NONE;
  1156 :}
  1157 TST Rm, Rn {:  
  1158     COUNT_INST(I_TST);
  1159     load_reg( R_EAX, Rm );
  1160     load_reg( R_ECX, Rn );
  1161     TEST_r32_r32( R_EAX, R_ECX );
  1162     SETE_t();
  1163     sh4_x86.tstate = TSTATE_E;
  1164 :}
  1165 TST #imm, R0 {:  
  1166     COUNT_INST(I_TSTI);
  1167     load_reg( R_EAX, 0 );
  1168     TEST_imm32_r32( imm, R_EAX );
  1169     SETE_t();
  1170     sh4_x86.tstate = TSTATE_E;
  1171 :}
  1172 TST.B #imm, @(R0, GBR) {:  
  1173     COUNT_INST(I_TSTB);
  1174     load_reg( R_EAX, 0);
  1175     load_reg( R_ECX, R_GBR);
  1176     ADD_r32_r32( R_ECX, R_EAX );
  1177     MMU_TRANSLATE_READ( R_EAX );
  1178     MEM_READ_BYTE_CACHED( R_EAX, R_EAX, 16 );
  1179     TEST_imm8_r8( imm, R_AL );
  1180     SETE_t();
  1181     sh4_x86.tstate = TSTATE_E;
  1182 :}
  1183 XOR Rm, Rn {:  
  1184     COUNT_INST(I_XOR);
  1185     load_reg( R_EAX, Rm );
  1186     load_reg( R_ECX, Rn );
  1187     XOR_r32_r32( R_EAX, R_ECX );
  1188     store_reg( R_ECX, Rn );
  1189     sh4_x86.tstate = TSTATE_NONE;
  1190 :}
  1191 XOR #imm, R0 {:  
  1192     COUNT_INST(I_XORI);
  1193     load_reg( R_EAX, 0 );
  1194     XOR_imm32_r32( imm, R_EAX );
  1195     store_reg( R_EAX, 0 );
  1196     sh4_x86.tstate = TSTATE_NONE;
  1197 :}
  1198 XOR.B #imm, @(R0, GBR) {:  
  1199     COUNT_INST(I_XORB);
  1200     load_reg( R_EAX, 0 );
  1201     load_spreg( R_ECX, R_GBR );
  1202     ADD_r32_r32( R_ECX, R_EAX );
  1203     MMU_TRANSLATE_WRITE( R_EAX );
  1204     MEM_READ_BYTE_CACHED(R_EAX, R_EDX, 16);
  1205     XOR_imm32_r32( imm, R_EDX );
  1206     MEM_WRITE_BYTE_UNCHECKED( R_EBX, R_EDX, 16 );
  1207     sh4_x86.tstate = TSTATE_NONE;
  1208 :}
  1209 XTRCT Rm, Rn {:
  1210     COUNT_INST(I_XTRCT);
  1211     load_reg( R_EAX, Rm );
  1212     load_reg( R_ECX, Rn );
  1213     SHL_imm8_r32( 16, R_EAX );
  1214     SHR_imm8_r32( 16, R_ECX );
  1215     OR_r32_r32( R_EAX, R_ECX );
  1216     store_reg( R_ECX, Rn );
  1217     sh4_x86.tstate = TSTATE_NONE;
  1218 :}
  1220 /* Data move instructions */
  1221 MOV Rm, Rn {:  
  1222     COUNT_INST(I_MOV);
  1223     load_reg( R_EAX, Rm );
  1224     store_reg( R_EAX, Rn );
  1225 :}
  1226 MOV #imm, Rn {:  
  1227     COUNT_INST(I_MOVI);
  1228     load_imm32( R_EAX, imm );
  1229     store_reg( R_EAX, Rn );
  1230 :}
  1231 MOV.B Rm, @Rn {:  
  1232     COUNT_INST(I_MOVB);
  1233     load_reg( R_EAX, Rn );
  1234     MMU_TRANSLATE_WRITE( R_EAX );
  1235     MEM_WRITE_BYTE_CACHED( R_EAX, Rm, Rn );
  1236     sh4_x86.tstate = TSTATE_NONE;
  1237 :}
  1238 MOV.B Rm, @-Rn {:  
  1239     COUNT_INST(I_MOVB);
  1240     load_reg( R_EAX, Rn );
  1241     ADD_imm8s_r32( -1, R_EAX );
  1242     MMU_TRANSLATE_WRITE( R_EAX );
  1243     ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
  1244     MEM_WRITE_BYTE_CACHED( R_EAX, Rm, Rn );
  1245     sh4_x86.tstate = TSTATE_NONE;
  1246 :}
  1247 MOV.B Rm, @(R0, Rn) {:  
  1248     COUNT_INST(I_MOVB);
  1249     load_reg( R_EAX, 0 );
  1250     load_reg( R_ECX, Rn );
  1251     ADD_r32_r32( R_ECX, R_EAX );
  1252     MMU_TRANSLATE_WRITE( R_EAX );
  1253     MEM_WRITE_BYTE_CACHED( R_EAX, Rm, 0 );
  1254     sh4_x86.tstate = TSTATE_NONE;
  1255 :}
  1256 MOV.B R0, @(disp, GBR) {:  
  1257     COUNT_INST(I_MOVB);
  1258     load_spreg( R_EAX, R_GBR );
  1259     ADD_imm32_r32( disp, R_EAX );
  1260     MMU_TRANSLATE_WRITE( R_EAX );
  1261     MEM_WRITE_BYTE_CACHED( R_EAX, 0, 16 );
  1262     sh4_x86.tstate = TSTATE_NONE;
  1263 :}
  1264 MOV.B R0, @(disp, Rn) {:  
  1265     COUNT_INST(I_MOVB);
  1266     load_reg( R_EAX, Rn );
  1267     ADD_imm32_r32( disp, R_EAX );
  1268     MMU_TRANSLATE_WRITE( R_EAX );
  1269     MEM_WRITE_BYTE_CACHED( R_EAX, 0, Rn );
  1270     sh4_x86.tstate = TSTATE_NONE;
  1271 :}
  1272 MOV.B @Rm, Rn {:  
  1273     COUNT_INST(I_MOVB);
  1274     load_reg( R_EAX, Rm );
  1275     MMU_TRANSLATE_READ( R_EAX );
  1276     MEM_READ_BYTE_CACHED( R_EAX, R_EAX, Rm );
  1277     store_reg( R_EAX, Rn );
  1278     sh4_x86.tstate = TSTATE_NONE;
  1279 :}
  1280 MOV.B @Rm+, Rn {:  
  1281     COUNT_INST(I_MOVB);
  1282     load_reg( R_EAX, Rm );
  1283     MMU_TRANSLATE_READ( R_EAX );
  1284     ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
  1285     MEM_READ_BYTE_CACHED( R_EAX, R_EAX, Rm );
  1286     store_reg( R_EAX, Rn );
  1287     sh4_x86.tstate = TSTATE_NONE;
  1288 :}
  1289 MOV.B @(R0, Rm), Rn {:  
  1290     COUNT_INST(I_MOVB);
  1291     load_reg( R_EAX, 0 );
  1292     load_reg( R_ECX, Rm );
  1293     ADD_r32_r32( R_ECX, R_EAX );
  1294     MMU_TRANSLATE_READ( R_EAX )
  1295     MEM_READ_BYTE_CACHED( R_EAX, R_EAX, 0 );
  1296     store_reg( R_EAX, Rn );
  1297     sh4_x86.tstate = TSTATE_NONE;
  1298 :}
  1299 MOV.B @(disp, GBR), R0 {:  
  1300     COUNT_INST(I_MOVB);
  1301     load_spreg( R_EAX, R_GBR );
  1302     ADD_imm32_r32( disp, R_EAX );
  1303     MMU_TRANSLATE_READ( R_EAX );
  1304     MEM_READ_BYTE_CACHED( R_EAX, R_EAX, 16 );
  1305     store_reg( R_EAX, 0 );
  1306     sh4_x86.tstate = TSTATE_NONE;
  1307 :}
  1308 MOV.B @(disp, Rm), R0 {:  
  1309     COUNT_INST(I_MOVB);
  1310     load_reg( R_EAX, Rm );
  1311     ADD_imm32_r32( disp, R_EAX );
  1312     MMU_TRANSLATE_READ( R_EAX );
  1313     MEM_READ_BYTE_CACHED( R_EAX, R_EAX, Rm );
  1314     store_reg( R_EAX, 0 );
  1315     sh4_x86.tstate = TSTATE_NONE;
  1316 :}
  1317 MOV.L Rm, @Rn {:
  1318     COUNT_INST(I_MOVL);
  1319     load_reg( R_EAX, Rn );
  1320     check_walign32(R_EAX);
  1321     MMU_TRANSLATE_WRITE( R_EAX );
  1322     MEM_WRITE_LONG_CACHED( R_EAX, Rm, Rn );
  1323     sh4_x86.tstate = TSTATE_NONE;
  1324 :}
  1325 MOV.L Rm, @-Rn {:  
  1326     COUNT_INST(I_MOVL);
  1327     load_reg( R_EAX, Rn );
  1328     ADD_imm8s_r32( -4, R_EAX );
  1329     check_walign32( R_EAX );
  1330     MMU_TRANSLATE_WRITE( R_EAX );
  1331     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  1332     MEM_WRITE_LONG_CACHED( R_EAX, Rm, Rn );
  1333     sh4_x86.tstate = TSTATE_NONE;
  1334 :}
  1335 MOV.L Rm, @(R0, Rn) {:  
  1336     COUNT_INST(I_MOVL);
  1337     load_reg( R_EAX, 0 );
  1338     load_reg( R_ECX, Rn );
  1339     ADD_r32_r32( R_ECX, R_EAX );
  1340     check_walign32( R_EAX );
  1341     MMU_TRANSLATE_WRITE( R_EAX );
  1342     MEM_WRITE_LONG_CACHED( R_EAX, Rm, 0 );
  1343     sh4_x86.tstate = TSTATE_NONE;
  1344 :}
  1345 MOV.L R0, @(disp, GBR) {:  
  1346     COUNT_INST(I_MOVL);
  1347     load_spreg( R_EAX, R_GBR );
  1348     ADD_imm32_r32( disp, R_EAX );
  1349     check_walign32( R_EAX );
  1350     MMU_TRANSLATE_WRITE( R_EAX );
  1351     MEM_WRITE_LONG_CACHED( R_EAX, 0, 16 );
  1352     sh4_x86.tstate = TSTATE_NONE;
  1353 :}
  1354 MOV.L Rm, @(disp, Rn) {:  
  1355     COUNT_INST(I_MOVL);
  1356     load_reg( R_EAX, Rn );
  1357     ADD_imm32_r32( disp, R_EAX );
  1358     check_walign32( R_EAX );
  1359     MMU_TRANSLATE_WRITE( R_EAX );
  1360     MEM_WRITE_LONG_CACHED( R_EAX, Rm, Rn );
  1361     sh4_x86.tstate = TSTATE_NONE;
  1362 :}
  1363 MOV.L @Rm, Rn {:  
  1364     COUNT_INST(I_MOVL);
  1365     load_reg( R_EAX, Rm );
  1366     check_ralign32( R_EAX );
  1367     MMU_TRANSLATE_READ( R_EAX );
  1368     MEM_READ_LONG_CACHED( R_EAX, R_EAX, Rm );
  1369     store_reg( R_EAX, Rn );
  1370     sh4_x86.tstate = TSTATE_NONE;
  1371 :}
  1372 MOV.L @Rm+, Rn {:  
  1373     COUNT_INST(I_MOVL);
  1374     load_reg( R_EAX, Rm );
  1375     check_ralign32( R_EAX );
  1376     MMU_TRANSLATE_READ( R_EAX );
  1377     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1378     MEM_READ_LONG_CACHED( R_EAX, R_EAX, Rm );
  1379     store_reg( R_EAX, Rn );
  1380     sh4_x86.tstate = TSTATE_NONE;
  1381 :}
  1382 MOV.L @(R0, Rm), Rn {:  
  1383     COUNT_INST(I_MOVL);
  1384     load_reg( R_EAX, 0 );
  1385     load_reg( R_ECX, Rm );
  1386     ADD_r32_r32( R_ECX, R_EAX );
  1387     check_ralign32( R_EAX );
  1388     MMU_TRANSLATE_READ( R_EAX );
  1389     MEM_READ_LONG_CACHED( R_EAX, R_EAX, 0 );
  1390     store_reg( R_EAX, Rn );
  1391     sh4_x86.tstate = TSTATE_NONE;
  1392 :}
  1393 MOV.L @(disp, GBR), R0 {:
  1394     COUNT_INST(I_MOVL);
  1395     load_spreg( R_EAX, R_GBR );
  1396     ADD_imm32_r32( disp, R_EAX );
  1397     check_ralign32( R_EAX );
  1398     MMU_TRANSLATE_READ( R_EAX );
  1399     MEM_READ_LONG_CACHED( R_EAX, R_EAX, 16 );
  1400     store_reg( R_EAX, 0 );
  1401     sh4_x86.tstate = TSTATE_NONE;
  1402 :}
  1403 MOV.L @(disp, PC), Rn {:  
  1404     COUNT_INST(I_MOVLPC);
  1405     if( sh4_x86.in_delay_slot ) {
  1406 	SLOTILLEGAL();
  1407     } else {
  1408 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1409 	if( IS_IN_ICACHE(target) ) {
  1410 	    // If the target address is in the same page as the code, it's
  1411 	    // pretty safe to just ref it directly and circumvent the whole
  1412 	    // memory subsystem. (this is a big performance win)
  1414 	    // FIXME: There's a corner-case that's not handled here when
  1415 	    // the current code-page is in the ITLB but not in the UTLB.
  1416 	    // (should generate a TLB miss although need to test SH4 
  1417 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1418 	    // behaviour though.
  1419 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1420 	    MOV_moff32_EAX( ptr );
  1421 	} else {
  1422 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1423 	    // different virtual address than the translation was done with,
  1424 	    // but we can safely assume that the low bits are the same.
  1425 	    load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1426 	    ADD_sh4r_r32( R_PC, R_EAX );
  1427 	    MMU_TRANSLATE_READ( R_EAX );
  1428 	    MEM_READ_LONG_CACHED( R_EAX, R_EAX, 16 );
  1429 	    sh4_x86.tstate = TSTATE_NONE;
  1431 	store_reg( R_EAX, Rn );
  1433 :}
  1434 MOV.L @(disp, Rm), Rn {:  
  1435     COUNT_INST(I_MOVL);
  1436     load_reg( R_EAX, Rm );
  1437     ADD_imm8s_r32( disp, R_EAX );
  1438     check_ralign32( R_EAX );
  1439     MMU_TRANSLATE_READ( R_EAX );
  1440     MEM_READ_LONG_CACHED( R_EAX, R_EAX, Rm );
  1441     store_reg( R_EAX, Rn );
  1442     sh4_x86.tstate = TSTATE_NONE;
  1443 :}
  1444 MOV.W Rm, @Rn {:  
  1445     COUNT_INST(I_MOVW);
  1446     load_reg( R_EAX, Rn );
  1447     check_walign16( R_EAX );
  1448     MMU_TRANSLATE_WRITE( R_EAX )
  1449     MEM_WRITE_WORD_CACHED( R_EAX, Rm, Rn );
  1450     sh4_x86.tstate = TSTATE_NONE;
  1451 :}
  1452 MOV.W Rm, @-Rn {:  
  1453     COUNT_INST(I_MOVW);
  1454     load_reg( R_EAX, Rn );
  1455     ADD_imm8s_r32( -2, R_EAX );
  1456     check_walign16( R_EAX );
  1457     MMU_TRANSLATE_WRITE( R_EAX );
  1458     ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
  1459     MEM_WRITE_WORD_CACHED( R_EAX, Rm, Rn );
  1460     sh4_x86.tstate = TSTATE_NONE;
  1461 :}
  1462 MOV.W Rm, @(R0, Rn) {:  
  1463     COUNT_INST(I_MOVW);
  1464     load_reg( R_EAX, 0 );
  1465     load_reg( R_ECX, Rn );
  1466     ADD_r32_r32( R_ECX, R_EAX );
  1467     check_walign16( R_EAX );
  1468     MMU_TRANSLATE_WRITE( R_EAX );
  1469     MEM_WRITE_WORD_CACHED( R_EAX, Rm, 0 );
  1470     sh4_x86.tstate = TSTATE_NONE;
  1471 :}
  1472 MOV.W R0, @(disp, GBR) {:  
  1473     COUNT_INST(I_MOVW);
  1474     load_spreg( R_EAX, R_GBR );
  1475     ADD_imm32_r32( disp, R_EAX );
  1476     check_walign16( R_EAX );
  1477     MMU_TRANSLATE_WRITE( R_EAX );
  1478     MEM_WRITE_WORD_CACHED( R_EAX, 0, 16 );
  1479     sh4_x86.tstate = TSTATE_NONE;
  1480 :}
  1481 MOV.W R0, @(disp, Rn) {:  
  1482     COUNT_INST(I_MOVW);
  1483     load_reg( R_EAX, Rn );
  1484     ADD_imm32_r32( disp, R_EAX );
  1485     check_walign16( R_EAX );
  1486     MMU_TRANSLATE_WRITE( R_EAX );
  1487     MEM_WRITE_WORD_CACHED( R_EAX, 0, Rn );
  1488     sh4_x86.tstate = TSTATE_NONE;
  1489 :}
  1490 MOV.W @Rm, Rn {:  
  1491     COUNT_INST(I_MOVW);
  1492     load_reg( R_EAX, Rm );
  1493     check_ralign16( R_EAX );
  1494     MMU_TRANSLATE_READ( R_EAX );
  1495     MEM_READ_WORD_CACHED( R_EAX, R_EAX, Rm );
  1496     store_reg( R_EAX, Rn );
  1497     sh4_x86.tstate = TSTATE_NONE;
  1498 :}
  1499 MOV.W @Rm+, Rn {:  
  1500     COUNT_INST(I_MOVW);
  1501     load_reg( R_EAX, Rm );
  1502     check_ralign16( R_EAX );
  1503     MMU_TRANSLATE_READ( R_EAX );
  1504     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
  1505     MEM_READ_WORD_CACHED( R_EAX, R_EAX, Rm );
  1506     store_reg( R_EAX, Rn );
  1507     sh4_x86.tstate = TSTATE_NONE;
  1508 :}
  1509 MOV.W @(R0, Rm), Rn {:  
  1510     COUNT_INST(I_MOVW);
  1511     load_reg( R_EAX, 0 );
  1512     load_reg( R_ECX, Rm );
  1513     ADD_r32_r32( R_ECX, R_EAX );
  1514     check_ralign16( R_EAX );
  1515     MMU_TRANSLATE_READ( R_EAX );
  1516     MEM_READ_WORD_CACHED( R_EAX, R_EAX, 0 );
  1517     store_reg( R_EAX, Rn );
  1518     sh4_x86.tstate = TSTATE_NONE;
  1519 :}
  1520 MOV.W @(disp, GBR), R0 {:  
  1521     COUNT_INST(I_MOVW);
  1522     load_spreg( R_EAX, R_GBR );
  1523     ADD_imm32_r32( disp, R_EAX );
  1524     check_ralign16( R_EAX );
  1525     MMU_TRANSLATE_READ( R_EAX );
  1526     MEM_READ_WORD_CACHED( R_EAX, R_EAX, 16 );
  1527     store_reg( R_EAX, 0 );
  1528     sh4_x86.tstate = TSTATE_NONE;
  1529 :}
  1530 MOV.W @(disp, PC), Rn {:  
  1531     COUNT_INST(I_MOVW);
  1532     if( sh4_x86.in_delay_slot ) {
  1533 	SLOTILLEGAL();
  1534     } else {
  1535 	// See comments for MOV.L @(disp, PC), Rn
  1536 	uint32_t target = pc + disp + 4;
  1537 	if( IS_IN_ICACHE(target) ) {
  1538 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1539 	    MOV_moff32_EAX( ptr );
  1540 	    MOVSX_r16_r32( R_EAX, R_EAX );
  1541 	} else {
  1542 	    load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
  1543 	    ADD_sh4r_r32( R_PC, R_EAX );
  1544 	    MMU_TRANSLATE_READ( R_EAX );
  1545 	    MEM_READ_WORD( R_EAX, R_EAX );
  1546 	    sh4_x86.tstate = TSTATE_NONE;
  1548 	store_reg( R_EAX, Rn );
  1550 :}
  1551 MOV.W @(disp, Rm), R0 {:  
  1552     COUNT_INST(I_MOVW);
  1553     load_reg( R_EAX, Rm );
  1554     ADD_imm32_r32( disp, R_EAX );
  1555     check_ralign16( R_EAX );
  1556     MMU_TRANSLATE_READ( R_EAX );
  1557     MEM_READ_WORD_CACHED( R_EAX, R_EAX, Rm );
  1558     store_reg( R_EAX, 0 );
  1559     sh4_x86.tstate = TSTATE_NONE;
  1560 :}
  1561 MOVA @(disp, PC), R0 {:  
  1562     COUNT_INST(I_MOVA);
  1563     if( sh4_x86.in_delay_slot ) {
  1564 	SLOTILLEGAL();
  1565     } else {
  1566 	load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1567 	ADD_sh4r_r32( R_PC, R_ECX );
  1568 	store_reg( R_ECX, 0 );
  1569 	sh4_x86.tstate = TSTATE_NONE;
  1571 :}
  1572 MOVCA.L R0, @Rn {:  
  1573     COUNT_INST(I_MOVCA);
  1574     load_reg( R_EAX, Rn );
  1575     check_walign32( R_EAX );
  1576     MMU_TRANSLATE_WRITE( R_EAX );
  1577     MEM_WRITE_LONG_CACHED( R_EAX, 0, Rn );
  1578     sh4_x86.tstate = TSTATE_NONE;
  1579 :}
  1581 /* Control transfer instructions */
  1582 BF disp {:
  1583     COUNT_INST(I_BF);
  1584     if( sh4_x86.in_delay_slot ) {
  1585 	SLOTILLEGAL();
  1586     } else {
  1587 	sh4vma_t target = disp + pc + 4;
  1588 	JT_rel8( nottaken );
  1589 	exit_block_rel(target, pc+2 );
  1590 	JMP_TARGET(nottaken);
  1591 	return 2;
  1593 :}
  1594 BF/S disp {:
  1595     COUNT_INST(I_BFS);
  1596     if( sh4_x86.in_delay_slot ) {
  1597 	SLOTILLEGAL();
  1598     } else {
  1599 	sh4_x86.in_delay_slot = DELAY_PC;
  1600 	if( UNTRANSLATABLE(pc+2) ) {
  1601 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1602 	    JT_rel8(nottaken);
  1603 	    ADD_imm32_r32( disp, R_EAX );
  1604 	    JMP_TARGET(nottaken);
  1605 	    ADD_sh4r_r32( R_PC, R_EAX );
  1606 	    store_spreg( R_EAX, R_NEW_PC );
  1607 	    exit_block_emu(pc+2);
  1608 	    sh4_x86.branch_taken = TRUE;
  1609 	    return 2;
  1610 	} else {
  1611 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1612 		CMP_imm8s_sh4r( 1, R_T );
  1613 		sh4_x86.tstate = TSTATE_E;
  1615 	    sh4vma_t target = disp + pc + 4;
  1616 	    OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JT rel32
  1617 	    int save_tstate = sh4_x86.tstate;
  1618 	    sh4_translate_instruction(pc+2);
  1619 	    exit_block_rel( target, pc+4 );
  1621 	    // not taken
  1622 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1623 	    sh4_x86.tstate = save_tstate;
  1624 	    sh4_translate_instruction(pc+2);
  1625 	    return 4;
  1628 :}
  1629 BRA disp {:  
  1630     COUNT_INST(I_BRA);
  1631     if( sh4_x86.in_delay_slot ) {
  1632 	SLOTILLEGAL();
  1633     } else {
  1634 	sh4_x86.in_delay_slot = DELAY_PC;
  1635 	sh4_x86.branch_taken = TRUE;
  1636 	if( UNTRANSLATABLE(pc+2) ) {
  1637 	    load_spreg( R_EAX, R_PC );
  1638 	    ADD_imm32_r32( pc + disp + 4 - sh4_x86.block_start_pc, R_EAX );
  1639 	    store_spreg( R_EAX, R_NEW_PC );
  1640 	    exit_block_emu(pc+2);
  1641 	    return 2;
  1642 	} else {
  1643 	    sh4_translate_instruction( pc + 2 );
  1644 	    exit_block_rel( disp + pc + 4, pc+4 );
  1645 	    return 4;
  1648 :}
  1649 BRAF Rn {:  
  1650     COUNT_INST(I_BRAF);
  1651     if( sh4_x86.in_delay_slot ) {
  1652 	SLOTILLEGAL();
  1653     } else {
  1654 	load_spreg( R_EAX, R_PC );
  1655 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1656 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1657 	store_spreg( R_EAX, R_NEW_PC );
  1658 	sh4_x86.in_delay_slot = DELAY_PC;
  1659 	sh4_x86.tstate = TSTATE_NONE;
  1660 	sh4_x86.branch_taken = TRUE;
  1661 	if( UNTRANSLATABLE(pc+2) ) {
  1662 	    exit_block_emu(pc+2);
  1663 	    return 2;
  1664 	} else {
  1665 	    sh4_translate_instruction( pc + 2 );
  1666 	    exit_block_newpcset(pc+2);
  1667 	    return 4;
  1670 :}
  1671 BSR disp {:  
  1672     COUNT_INST(I_BSR);
  1673     if( sh4_x86.in_delay_slot ) {
  1674 	SLOTILLEGAL();
  1675     } else {
  1676 	load_spreg( R_EAX, R_PC );
  1677 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1678 	store_spreg( R_EAX, R_PR );
  1679 	sh4_x86.in_delay_slot = DELAY_PC;
  1680 	sh4_x86.branch_taken = TRUE;
  1681 	sh4_x86.tstate = TSTATE_NONE;
  1682 	if( UNTRANSLATABLE(pc+2) ) {
  1683 	    ADD_imm32_r32( disp, R_EAX );
  1684 	    store_spreg( R_EAX, R_NEW_PC );
  1685 	    exit_block_emu(pc+2);
  1686 	    return 2;
  1687 	} else {
  1688 	    sh4_translate_instruction( pc + 2 );
  1689 	    exit_block_rel( disp + pc + 4, pc+4 );
  1690 	    return 4;
  1693 :}
  1694 BSRF Rn {:  
  1695     COUNT_INST(I_BSRF);
  1696     if( sh4_x86.in_delay_slot ) {
  1697 	SLOTILLEGAL();
  1698     } else {
  1699 	load_spreg( R_EAX, R_PC );
  1700 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1701 	store_spreg( R_EAX, R_PR );
  1702 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1703 	store_spreg( R_EAX, R_NEW_PC );
  1705 	sh4_x86.in_delay_slot = DELAY_PC;
  1706 	sh4_x86.tstate = TSTATE_NONE;
  1707 	sh4_x86.branch_taken = TRUE;
  1708 	if( UNTRANSLATABLE(pc+2) ) {
  1709 	    exit_block_emu(pc+2);
  1710 	    return 2;
  1711 	} else {
  1712 	    sh4_translate_instruction( pc + 2 );
  1713 	    exit_block_newpcset(pc+2);
  1714 	    return 4;
  1717 :}
  1718 BT disp {:
  1719     COUNT_INST(I_BT);
  1720     if( sh4_x86.in_delay_slot ) {
  1721 	SLOTILLEGAL();
  1722     } else {
  1723 	sh4vma_t target = disp + pc + 4;
  1724 	JF_rel8( nottaken );
  1725 	exit_block_rel(target, pc+2 );
  1726 	JMP_TARGET(nottaken);
  1727 	return 2;
  1729 :}
  1730 BT/S disp {:
  1731     COUNT_INST(I_BTS);
  1732     if( sh4_x86.in_delay_slot ) {
  1733 	SLOTILLEGAL();
  1734     } else {
  1735 	sh4_x86.in_delay_slot = DELAY_PC;
  1736 	if( UNTRANSLATABLE(pc+2) ) {
  1737 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1738 	    JF_rel8(nottaken);
  1739 	    ADD_imm32_r32( disp, R_EAX );
  1740 	    JMP_TARGET(nottaken);
  1741 	    ADD_sh4r_r32( R_PC, R_EAX );
  1742 	    store_spreg( R_EAX, R_NEW_PC );
  1743 	    exit_block_emu(pc+2);
  1744 	    sh4_x86.branch_taken = TRUE;
  1745 	    return 2;
  1746 	} else {
  1747 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1748 		CMP_imm8s_sh4r( 1, R_T );
  1749 		sh4_x86.tstate = TSTATE_E;
  1751 	    OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JF rel32
  1752 	    int save_tstate = sh4_x86.tstate;
  1753 	    sh4_translate_instruction(pc+2);
  1754 	    exit_block_rel( disp + pc + 4, pc+4 );
  1755 	    // not taken
  1756 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1757 	    sh4_x86.tstate = save_tstate;
  1758 	    sh4_translate_instruction(pc+2);
  1759 	    return 4;
  1762 :}
  1763 JMP @Rn {:  
  1764     COUNT_INST(I_JMP);
  1765     if( sh4_x86.in_delay_slot ) {
  1766 	SLOTILLEGAL();
  1767     } else {
  1768 	load_reg( R_ECX, Rn );
  1769 	store_spreg( R_ECX, R_NEW_PC );
  1770 	sh4_x86.in_delay_slot = DELAY_PC;
  1771 	sh4_x86.branch_taken = TRUE;
  1772 	if( UNTRANSLATABLE(pc+2) ) {
  1773 	    exit_block_emu(pc+2);
  1774 	    return 2;
  1775 	} else {
  1776 	    sh4_translate_instruction(pc+2);
  1777 	    exit_block_newpcset(pc+2);
  1778 	    return 4;
  1781 :}
  1782 JSR @Rn {:  
  1783     COUNT_INST(I_JSR);
  1784     if( sh4_x86.in_delay_slot ) {
  1785 	SLOTILLEGAL();
  1786     } else {
  1787 	load_spreg( R_EAX, R_PC );
  1788 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1789 	store_spreg( R_EAX, R_PR );
  1790 	load_reg( R_ECX, Rn );
  1791 	store_spreg( R_ECX, R_NEW_PC );
  1792 	sh4_x86.in_delay_slot = DELAY_PC;
  1793 	sh4_x86.branch_taken = TRUE;
  1794 	sh4_x86.tstate = TSTATE_NONE;
  1795 	if( UNTRANSLATABLE(pc+2) ) {
  1796 	    exit_block_emu(pc+2);
  1797 	    return 2;
  1798 	} else {
  1799 	    sh4_translate_instruction(pc+2);
  1800 	    exit_block_newpcset(pc+2);
  1801 	    return 4;
  1804 :}
  1805 RTE {:  
  1806     COUNT_INST(I_RTE);
  1807     if( sh4_x86.in_delay_slot ) {
  1808 	SLOTILLEGAL();
  1809     } else {
  1810 	check_priv();
  1811 	load_spreg( R_ECX, R_SPC );
  1812 	store_spreg( R_ECX, R_NEW_PC );
  1813 	load_spreg( R_EAX, R_SSR );
  1814 	call_func1( sh4_write_sr, R_EAX );
  1815 	sh4_x86.in_delay_slot = DELAY_PC;
  1816 	sh4_x86.priv_checked = FALSE;
  1817 	sh4_x86.fpuen_checked = FALSE;
  1818 	sh4_x86.tstate = TSTATE_NONE;
  1819 	sh4_x86.branch_taken = TRUE;
  1820 	if( UNTRANSLATABLE(pc+2) ) {
  1821 	    exit_block_emu(pc+2);
  1822 	    return 2;
  1823 	} else {
  1824 	    sh4_translate_instruction(pc+2);
  1825 	    exit_block_newpcset(pc+2);
  1826 	    return 4;
  1829 :}
  1830 RTS {:  
  1831     COUNT_INST(I_RTS);
  1832     if( sh4_x86.in_delay_slot ) {
  1833 	SLOTILLEGAL();
  1834     } else {
  1835 	load_spreg( R_ECX, R_PR );
  1836 	store_spreg( R_ECX, R_NEW_PC );
  1837 	sh4_x86.in_delay_slot = DELAY_PC;
  1838 	sh4_x86.branch_taken = TRUE;
  1839 	if( UNTRANSLATABLE(pc+2) ) {
  1840 	    exit_block_emu(pc+2);
  1841 	    return 2;
  1842 	} else {
  1843 	    sh4_translate_instruction(pc+2);
  1844 	    exit_block_newpcset(pc+2);
  1845 	    return 4;
  1848 :}
  1849 TRAPA #imm {:  
  1850     COUNT_INST(I_TRAPA);
  1851     if( sh4_x86.in_delay_slot ) {
  1852 	SLOTILLEGAL();
  1853     } else {
  1854 	load_imm32( R_ECX, pc+2 - sh4_x86.block_start_pc );   // 5
  1855 	ADD_r32_sh4r( R_ECX, R_PC );
  1856 	load_imm32( R_EAX, imm );
  1857 	call_func1( sh4_raise_trap, R_EAX );
  1858 	sh4_x86.tstate = TSTATE_NONE;
  1859 	exit_block_pcset(pc);
  1860 	sh4_x86.branch_taken = TRUE;
  1861 	return 2;
  1863 :}
  1864 UNDEF {:  
  1865     COUNT_INST(I_UNDEF);
  1866     if( sh4_x86.in_delay_slot ) {
  1867 	SLOTILLEGAL();
  1868     } else {
  1869 	JMP_exc(EXC_ILLEGAL);
  1870 	return 2;
  1872 :}
  1874 CLRMAC {:  
  1875     COUNT_INST(I_CLRMAC);
  1876     XOR_r32_r32(R_EAX, R_EAX);
  1877     store_spreg( R_EAX, R_MACL );
  1878     store_spreg( R_EAX, R_MACH );
  1879     sh4_x86.tstate = TSTATE_NONE;
  1880 :}
  1881 CLRS {:
  1882     COUNT_INST(I_CLRS);
  1883     CLC();
  1884     SETC_sh4r(R_S);
  1885     sh4_x86.tstate = TSTATE_NONE;
  1886 :}
  1887 CLRT {:  
  1888     COUNT_INST(I_CLRT);
  1889     CLC();
  1890     SETC_t();
  1891     sh4_x86.tstate = TSTATE_C;
  1892 :}
  1893 SETS {:  
  1894     COUNT_INST(I_SETS);
  1895     STC();
  1896     SETC_sh4r(R_S);
  1897     sh4_x86.tstate = TSTATE_NONE;
  1898 :}
  1899 SETT {:  
  1900     COUNT_INST(I_SETT);
  1901     STC();
  1902     SETC_t();
  1903     sh4_x86.tstate = TSTATE_C;
  1904 :}
  1906 /* Floating point moves */
  1907 FMOV FRm, FRn {:  
  1908     COUNT_INST(I_FMOV1);
  1909     check_fpuen();
  1910     if( sh4_x86.double_size ) {
  1911         load_dr0( R_EAX, FRm );
  1912         load_dr1( R_ECX, FRm );
  1913         store_dr0( R_EAX, FRn );
  1914         store_dr1( R_ECX, FRn );
  1915     } else {
  1916         load_fr( R_EAX, FRm ); // SZ=0 branch
  1917         store_fr( R_EAX, FRn );
  1919 :}
  1920 FMOV FRm, @Rn {: 
  1921     COUNT_INST(I_FMOV2);
  1922     check_fpuen();
  1923     load_reg( R_EAX, Rn );
  1924     if( sh4_x86.double_size ) {
  1925         check_walign64( R_EAX );
  1926         MMU_TRANSLATE_WRITE( R_EAX );
  1927         MEM_WRITE_DOUBLE_CACHED( R_EAX, FRm, Rn );
  1928     } else {
  1929         check_walign32( R_EAX );
  1930         MMU_TRANSLATE_WRITE( R_EAX );
  1931         MEM_WRITE_FLOAT_CACHED( R_EAX, FRm, Rn );
  1933     sh4_x86.tstate = TSTATE_NONE;
  1934 :}
  1935 FMOV @Rm, FRn {:  
  1936     COUNT_INST(I_FMOV5);
  1937     check_fpuen();
  1938     load_reg( R_EAX, Rm );
  1939     if( sh4_x86.double_size ) {
  1940         check_ralign64( R_EAX );
  1941         MMU_TRANSLATE_READ( R_EAX );
  1942         MEM_READ_DOUBLE_CACHED( R_EAX, R_EDX, R_EAX, Rm );
  1943         store_dr0( R_EDX, FRn );
  1944         store_dr1( R_EAX, FRn );    
  1945     } else {
  1946         check_ralign32( R_EAX );
  1947         MMU_TRANSLATE_READ( R_EAX );
  1948         MEM_READ_LONG_CACHED( R_EAX, R_EAX, Rm );
  1949         store_fr( R_EAX, FRn );
  1951     sh4_x86.tstate = TSTATE_NONE;
  1952 :}
  1953 FMOV FRm, @-Rn {:  
  1954     COUNT_INST(I_FMOV3);
  1955     check_fpuen();
  1956     load_reg( R_EAX, Rn );
  1957     if( sh4_x86.double_size ) {
  1958         check_walign64( R_EAX );
  1959         LEA_r32disp8_r32( R_EAX, -8, R_EAX );
  1960         MMU_TRANSLATE_WRITE( R_EAX );
  1961         ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
  1962         MEM_WRITE_DOUBLE_CACHED( R_EAX, FRm, Rn );
  1963     } else {
  1964         check_walign32( R_EAX );
  1965         LEA_r32disp8_r32( R_EAX, -4, R_EAX );
  1966         MMU_TRANSLATE_WRITE( R_EAX );
  1967         ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
  1968         MEM_WRITE_FLOAT_CACHED( R_EAX, FRm, Rn );
  1970     sh4_x86.tstate = TSTATE_NONE;
  1971 :}
  1972 FMOV @Rm+, FRn {:
  1973     COUNT_INST(I_FMOV6);
  1974     check_fpuen();
  1975     load_reg( R_EAX, Rm );
  1976     if( sh4_x86.double_size ) {
  1977         check_ralign64( R_EAX );
  1978         MMU_TRANSLATE_READ( R_EAX );
  1979         ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
  1980         MEM_READ_DOUBLE_CACHED( R_EAX, R_EDX, R_EAX, Rm );
  1981         store_dr0( R_EDX, FRn );
  1982         store_dr1( R_EAX, FRn );
  1983     } else {
  1984         check_ralign32( R_EAX );
  1985         MMU_TRANSLATE_READ( R_EAX );
  1986         ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1987         MEM_READ_LONG_CACHED( R_EAX, R_EAX, Rm );
  1988         store_fr( R_EAX, FRn );
  1990     sh4_x86.tstate = TSTATE_NONE;
  1991 :}
  1992 FMOV FRm, @(R0, Rn) {:  
  1993     COUNT_INST(I_FMOV4);
  1994     check_fpuen();
  1995     load_reg( R_EAX, Rn );
  1996     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1997     if( sh4_x86.double_size ) {
  1998         check_walign64( R_EAX );
  1999         MMU_TRANSLATE_WRITE( R_EAX );
  2000         MEM_WRITE_DOUBLE_CACHED( R_EAX, FRm, 0 );
  2001     } else {
  2002         check_walign32( R_EAX );
  2003         MMU_TRANSLATE_WRITE( R_EAX );
  2004         MEM_WRITE_FLOAT_CACHED( R_EAX, FRm, 0 );
  2006     sh4_x86.tstate = TSTATE_NONE;
  2007 :}
  2008 FMOV @(R0, Rm), FRn {:  
  2009     COUNT_INST(I_FMOV7);
  2010     check_fpuen();
  2011     load_reg( R_EAX, Rm );
  2012     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  2013     if( sh4_x86.double_size ) {
  2014         check_ralign64( R_EAX );
  2015         MMU_TRANSLATE_READ( R_EAX );
  2016         MEM_READ_DOUBLE_CACHED( R_EAX, R_ECX, R_EAX, 0 );
  2017         store_dr0( R_ECX, FRn );
  2018         store_dr1( R_EAX, FRn );
  2019     } else {
  2020         check_ralign32( R_EAX );
  2021         MMU_TRANSLATE_READ( R_EAX );
  2022         MEM_READ_LONG_CACHED( R_EAX, R_EAX, 0 );
  2023         store_fr( R_EAX, FRn );
  2025     sh4_x86.tstate = TSTATE_NONE;
  2026 :}
  2027 FLDI0 FRn {:  /* IFF PR=0 */
  2028     COUNT_INST(I_FLDI0);
  2029     check_fpuen();
  2030     if( sh4_x86.double_prec == 0 ) {
  2031         XOR_r32_r32( R_EAX, R_EAX );
  2032         store_fr( R_EAX, FRn );
  2034     sh4_x86.tstate = TSTATE_NONE;
  2035 :}
  2036 FLDI1 FRn {:  /* IFF PR=0 */
  2037     COUNT_INST(I_FLDI1);
  2038     check_fpuen();
  2039     if( sh4_x86.double_prec == 0 ) {
  2040         load_imm32(R_EAX, 0x3F800000);
  2041         store_fr( R_EAX, FRn );
  2043 :}
  2045 FLOAT FPUL, FRn {:  
  2046     COUNT_INST(I_FLOAT);
  2047     check_fpuen();
  2048     FILD_sh4r(R_FPUL);
  2049     if( sh4_x86.double_prec ) {
  2050         pop_dr( FRn );
  2051     } else {
  2052         pop_fr( FRn );
  2054 :}
  2055 FTRC FRm, FPUL {:  
  2056     COUNT_INST(I_FTRC);
  2057     check_fpuen();
  2058     if( sh4_x86.double_prec ) {
  2059         push_dr( FRm );
  2060     } else {
  2061         push_fr( FRm );
  2063     load_ptr( R_ECX, &max_int );
  2064     FILD_r32ind( R_ECX );
  2065     FCOMIP_st(1);
  2066     JNA_rel8( sat );
  2067     load_ptr( R_ECX, &min_int );  // 5
  2068     FILD_r32ind( R_ECX );           // 2
  2069     FCOMIP_st(1);                   // 2
  2070     JAE_rel8( sat2 );            // 2
  2071     load_ptr( R_EAX, &save_fcw );
  2072     FNSTCW_r32ind( R_EAX );
  2073     load_ptr( R_EDX, &trunc_fcw );
  2074     FLDCW_r32ind( R_EDX );
  2075     FISTP_sh4r(R_FPUL);             // 3
  2076     FLDCW_r32ind( R_EAX );
  2077     JMP_rel8(end);             // 2
  2079     JMP_TARGET(sat);
  2080     JMP_TARGET(sat2);
  2081     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  2082     store_spreg( R_ECX, R_FPUL );
  2083     FPOP_st();
  2084     JMP_TARGET(end);
  2085     sh4_x86.tstate = TSTATE_NONE;
  2086 :}
  2087 FLDS FRm, FPUL {:  
  2088     COUNT_INST(I_FLDS);
  2089     check_fpuen();
  2090     load_fr( R_EAX, FRm );
  2091     store_spreg( R_EAX, R_FPUL );
  2092 :}
  2093 FSTS FPUL, FRn {:  
  2094     COUNT_INST(I_FSTS);
  2095     check_fpuen();
  2096     load_spreg( R_EAX, R_FPUL );
  2097     store_fr( R_EAX, FRn );
  2098 :}
  2099 FCNVDS FRm, FPUL {:  
  2100     COUNT_INST(I_FCNVDS);
  2101     check_fpuen();
  2102     if( sh4_x86.double_prec ) {
  2103         push_dr( FRm );
  2104         pop_fpul();
  2106 :}
  2107 FCNVSD FPUL, FRn {:  
  2108     COUNT_INST(I_FCNVSD);
  2109     check_fpuen();
  2110     if( sh4_x86.double_prec ) {
  2111         push_fpul();
  2112         pop_dr( FRn );
  2114 :}
  2116 /* Floating point instructions */
  2117 FABS FRn {:  
  2118     COUNT_INST(I_FABS);
  2119     check_fpuen();
  2120     if( sh4_x86.double_prec ) {
  2121         push_dr(FRn);
  2122         FABS_st0();
  2123         pop_dr(FRn);
  2124     } else {
  2125         push_fr(FRn);
  2126         FABS_st0();
  2127         pop_fr(FRn);
  2129 :}
  2130 FADD FRm, FRn {:  
  2131     COUNT_INST(I_FADD);
  2132     check_fpuen();
  2133     if( sh4_x86.double_prec ) {
  2134         push_dr(FRm);
  2135         push_dr(FRn);
  2136         FADDP_st(1);
  2137         pop_dr(FRn);
  2138     } else {
  2139         push_fr(FRm);
  2140         push_fr(FRn);
  2141         FADDP_st(1);
  2142         pop_fr(FRn);
  2144 :}
  2145 FDIV FRm, FRn {:  
  2146     COUNT_INST(I_FDIV);
  2147     check_fpuen();
  2148     if( sh4_x86.double_prec ) {
  2149         push_dr(FRn);
  2150         push_dr(FRm);
  2151         FDIVP_st(1);
  2152         pop_dr(FRn);
  2153     } else {
  2154         push_fr(FRn);
  2155         push_fr(FRm);
  2156         FDIVP_st(1);
  2157         pop_fr(FRn);
  2159 :}
  2160 FMAC FR0, FRm, FRn {:  
  2161     COUNT_INST(I_FMAC);
  2162     check_fpuen();
  2163     if( sh4_x86.double_prec ) {
  2164         push_dr( 0 );
  2165         push_dr( FRm );
  2166         FMULP_st(1);
  2167         push_dr( FRn );
  2168         FADDP_st(1);
  2169         pop_dr( FRn );
  2170     } else {
  2171         push_fr( 0 );
  2172         push_fr( FRm );
  2173         FMULP_st(1);
  2174         push_fr( FRn );
  2175         FADDP_st(1);
  2176         pop_fr( FRn );
  2178 :}
  2180 FMUL FRm, FRn {:  
  2181     COUNT_INST(I_FMUL);
  2182     check_fpuen();
  2183     if( sh4_x86.double_prec ) {
  2184         push_dr(FRm);
  2185         push_dr(FRn);
  2186         FMULP_st(1);
  2187         pop_dr(FRn);
  2188     } else {
  2189         push_fr(FRm);
  2190         push_fr(FRn);
  2191         FMULP_st(1);
  2192         pop_fr(FRn);
  2194 :}
  2195 FNEG FRn {:  
  2196     COUNT_INST(I_FNEG);
  2197     check_fpuen();
  2198     if( sh4_x86.double_prec ) {
  2199         push_dr(FRn);
  2200         FCHS_st0();
  2201         pop_dr(FRn);
  2202     } else {
  2203         push_fr(FRn);
  2204         FCHS_st0();
  2205         pop_fr(FRn);
  2207 :}
  2208 FSRRA FRn {:  
  2209     COUNT_INST(I_FSRRA);
  2210     check_fpuen();
  2211     if( sh4_x86.double_prec == 0 ) {
  2212         FLD1_st0();
  2213         push_fr(FRn);
  2214         FSQRT_st0();
  2215         FDIVP_st(1);
  2216         pop_fr(FRn);
  2218 :}
  2219 FSQRT FRn {:  
  2220     COUNT_INST(I_FSQRT);
  2221     check_fpuen();
  2222     if( sh4_x86.double_prec ) {
  2223         push_dr(FRn);
  2224         FSQRT_st0();
  2225         pop_dr(FRn);
  2226     } else {
  2227         push_fr(FRn);
  2228         FSQRT_st0();
  2229         pop_fr(FRn);
  2231 :}
  2232 FSUB FRm, FRn {:  
  2233     COUNT_INST(I_FSUB);
  2234     check_fpuen();
  2235     if( sh4_x86.double_prec ) {
  2236         push_dr(FRn);
  2237         push_dr(FRm);
  2238         FSUBP_st(1);
  2239         pop_dr(FRn);
  2240     } else {
  2241         push_fr(FRn);
  2242         push_fr(FRm);
  2243         FSUBP_st(1);
  2244         pop_fr(FRn);
  2246 :}
  2248 FCMP/EQ FRm, FRn {:  
  2249     COUNT_INST(I_FCMPEQ);
  2250     check_fpuen();
  2251     if( sh4_x86.double_prec ) {
  2252         push_dr(FRm);
  2253         push_dr(FRn);
  2254     } else {
  2255         push_fr(FRm);
  2256         push_fr(FRn);
  2258     FCOMIP_st(1);
  2259     SETE_t();
  2260     FPOP_st();
  2261     sh4_x86.tstate = TSTATE_E;
  2262 :}
  2263 FCMP/GT FRm, FRn {:  
  2264     COUNT_INST(I_FCMPGT);
  2265     check_fpuen();
  2266     if( sh4_x86.double_prec ) {
  2267         push_dr(FRm);
  2268         push_dr(FRn);
  2269     } else {
  2270         push_fr(FRm);
  2271         push_fr(FRn);
  2273     FCOMIP_st(1);
  2274     SETA_t();
  2275     FPOP_st();
  2276     sh4_x86.tstate = TSTATE_A;
  2277 :}
  2279 FSCA FPUL, FRn {:  
  2280     COUNT_INST(I_FSCA);
  2281     check_fpuen();
  2282     if( sh4_x86.double_prec == 0 ) {
  2283         LEA_sh4r_rptr( REG_OFFSET(fr[0][FRn&0x0E]), R_EDX );
  2284         load_spreg( R_EAX, R_FPUL );
  2285         call_func2( sh4_fsca, R_EAX, R_EDX );
  2287     sh4_x86.tstate = TSTATE_NONE;
  2288 :}
  2289 FIPR FVm, FVn {:  
  2290     COUNT_INST(I_FIPR);
  2291     check_fpuen();
  2292     if( sh4_x86.double_prec == 0 ) {
  2293         if( sh4_x86.sse3_enabled ) {
  2294             MOVAPS_sh4r_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
  2295             MULPS_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
  2296             HADDPS_xmm_xmm( 4, 4 ); 
  2297             HADDPS_xmm_xmm( 4, 4 );
  2298             MOVSS_xmm_sh4r( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
  2299         } else {
  2300             push_fr( FVm<<2 );
  2301             push_fr( FVn<<2 );
  2302             FMULP_st(1);
  2303             push_fr( (FVm<<2)+1);
  2304             push_fr( (FVn<<2)+1);
  2305             FMULP_st(1);
  2306             FADDP_st(1);
  2307             push_fr( (FVm<<2)+2);
  2308             push_fr( (FVn<<2)+2);
  2309             FMULP_st(1);
  2310             FADDP_st(1);
  2311             push_fr( (FVm<<2)+3);
  2312             push_fr( (FVn<<2)+3);
  2313             FMULP_st(1);
  2314             FADDP_st(1);
  2315             pop_fr( (FVn<<2)+3);
  2318 :}
  2319 FTRV XMTRX, FVn {:  
  2320     COUNT_INST(I_FTRV);
  2321     check_fpuen();
  2322     if( sh4_x86.double_prec == 0 ) {
  2323         if( sh4_x86.sse3_enabled ) {
  2324             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1  M0  M3  M2
  2325             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5  M4  M7  M6
  2326             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9  M8  M11 M10
  2327             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
  2329             MOVSLDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
  2330             MOVSHDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
  2331             MOVAPS_xmm_xmm( 4, 6 );
  2332             MOVAPS_xmm_xmm( 5, 7 );
  2333             MOVLHPS_xmm_xmm( 4, 4 );  // V1 V1 V1 V1
  2334             MOVHLPS_xmm_xmm( 6, 6 );  // V3 V3 V3 V3
  2335             MOVLHPS_xmm_xmm( 5, 5 );  // V0 V0 V0 V0
  2336             MOVHLPS_xmm_xmm( 7, 7 );  // V2 V2 V2 V2
  2337             MULPS_xmm_xmm( 0, 4 );
  2338             MULPS_xmm_xmm( 1, 5 );
  2339             MULPS_xmm_xmm( 2, 6 );
  2340             MULPS_xmm_xmm( 3, 7 );
  2341             ADDPS_xmm_xmm( 5, 4 );
  2342             ADDPS_xmm_xmm( 7, 6 );
  2343             ADDPS_xmm_xmm( 6, 4 );
  2344             MOVAPS_xmm_sh4r( 4, REG_OFFSET(fr[0][FVn<<2]) );
  2345         } else {
  2346             LEA_sh4r_rptr( REG_OFFSET(fr[0][FVn<<2]), R_EAX );
  2347             call_func1( sh4_ftrv, R_EAX );
  2350     sh4_x86.tstate = TSTATE_NONE;
  2351 :}
  2353 FRCHG {:  
  2354     COUNT_INST(I_FRCHG);
  2355     check_fpuen();
  2356     load_spreg( R_ECX, R_FPSCR );
  2357     XOR_imm32_r32( FPSCR_FR, R_ECX );
  2358     store_spreg( R_ECX, R_FPSCR );
  2359     call_func0( sh4_switch_fr_banks );
  2360     sh4_x86.tstate = TSTATE_NONE;
  2361 :}
  2362 FSCHG {:  
  2363     COUNT_INST(I_FSCHG);
  2364     check_fpuen();
  2365     load_spreg( R_ECX, R_FPSCR );
  2366     XOR_imm32_r32( FPSCR_SZ, R_ECX );
  2367     store_spreg( R_ECX, R_FPSCR );
  2368     sh4_x86.tstate = TSTATE_NONE;
  2369     sh4_x86.double_size = !sh4_x86.double_size;
  2370 :}
  2372 /* Processor control instructions */
  2373 LDC Rm, SR {:
  2374     COUNT_INST(I_LDCSR);
  2375     if( sh4_x86.in_delay_slot ) {
  2376 	SLOTILLEGAL();
  2377     } else {
  2378 	check_priv();
  2379 	load_reg( R_EAX, Rm );
  2380 	call_func1( sh4_write_sr, R_EAX );
  2381 	sh4_x86.priv_checked = FALSE;
  2382 	sh4_x86.fpuen_checked = FALSE;
  2383 	sh4_x86.tstate = TSTATE_NONE;
  2385 :}
  2386 LDC Rm, GBR {: 
  2387     COUNT_INST(I_LDC);
  2388     load_reg( R_EAX, Rm );
  2389     store_spreg( R_EAX, R_GBR );
  2390 :}
  2391 LDC Rm, VBR {:  
  2392     COUNT_INST(I_LDC);
  2393     check_priv();
  2394     load_reg( R_EAX, Rm );
  2395     store_spreg( R_EAX, R_VBR );
  2396     sh4_x86.tstate = TSTATE_NONE;
  2397 :}
  2398 LDC Rm, SSR {:  
  2399     COUNT_INST(I_LDC);
  2400     check_priv();
  2401     load_reg( R_EAX, Rm );
  2402     store_spreg( R_EAX, R_SSR );
  2403     sh4_x86.tstate = TSTATE_NONE;
  2404 :}
  2405 LDC Rm, SGR {:  
  2406     COUNT_INST(I_LDC);
  2407     check_priv();
  2408     load_reg( R_EAX, Rm );
  2409     store_spreg( R_EAX, R_SGR );
  2410     sh4_x86.tstate = TSTATE_NONE;
  2411 :}
  2412 LDC Rm, SPC {:  
  2413     COUNT_INST(I_LDC);
  2414     check_priv();
  2415     load_reg( R_EAX, Rm );
  2416     store_spreg( R_EAX, R_SPC );
  2417     sh4_x86.tstate = TSTATE_NONE;
  2418 :}
  2419 LDC Rm, DBR {:  
  2420     COUNT_INST(I_LDC);
  2421     check_priv();
  2422     load_reg( R_EAX, Rm );
  2423     store_spreg( R_EAX, R_DBR );
  2424     sh4_x86.tstate = TSTATE_NONE;
  2425 :}
  2426 LDC Rm, Rn_BANK {:  
  2427     COUNT_INST(I_LDC);
  2428     check_priv();
  2429     load_reg( R_EAX, Rm );
  2430     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2431     sh4_x86.tstate = TSTATE_NONE;
  2432 :}
  2433 LDC.L @Rm+, GBR {:  
  2434     COUNT_INST(I_LDCM);
  2435     load_reg( R_EAX, Rm );
  2436     check_ralign32( R_EAX );
  2437     MMU_TRANSLATE_READ( R_EAX );
  2438     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2439     MEM_READ_LONG_CACHED( R_EAX, R_EAX, Rm );
  2440     store_spreg( R_EAX, R_GBR );
  2441     sh4_x86.tstate = TSTATE_NONE;
  2442 :}
  2443 LDC.L @Rm+, SR {:
  2444     COUNT_INST(I_LDCSRM);
  2445     if( sh4_x86.in_delay_slot ) {
  2446 	SLOTILLEGAL();
  2447     } else {
  2448 	check_priv();
  2449 	load_reg( R_EAX, Rm );
  2450 	check_ralign32( R_EAX );
  2451 	MMU_TRANSLATE_READ( R_EAX );
  2452 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2453 	MEM_READ_LONG_CACHED( R_EAX, R_EAX, Rm );
  2454 	call_func1( sh4_write_sr, R_EAX );
  2455 	sh4_x86.priv_checked = FALSE;
  2456 	sh4_x86.fpuen_checked = FALSE;
  2457 	sh4_x86.tstate = TSTATE_NONE;
  2459 :}
  2460 LDC.L @Rm+, VBR {:  
  2461     COUNT_INST(I_LDCM);
  2462     check_priv();
  2463     load_reg( R_EAX, Rm );
  2464     check_ralign32( R_EAX );
  2465     MMU_TRANSLATE_READ( R_EAX );
  2466     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2467     MEM_READ_LONG_CACHED( R_EAX, R_EAX, Rm );
  2468     store_spreg( R_EAX, R_VBR );
  2469     sh4_x86.tstate = TSTATE_NONE;
  2470 :}
  2471 LDC.L @Rm+, SSR {:
  2472     COUNT_INST(I_LDCM);
  2473     check_priv();
  2474     load_reg( R_EAX, Rm );
  2475     check_ralign32( R_EAX );
  2476     MMU_TRANSLATE_READ( R_EAX );
  2477     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2478     MEM_READ_LONG_CACHED( R_EAX, R_EAX, Rm );
  2479     store_spreg( R_EAX, R_SSR );
  2480     sh4_x86.tstate = TSTATE_NONE;
  2481 :}
  2482 LDC.L @Rm+, SGR {:  
  2483     COUNT_INST(I_LDCM);
  2484     check_priv();
  2485     load_reg( R_EAX, Rm );
  2486     check_ralign32( R_EAX );
  2487     MMU_TRANSLATE_READ( R_EAX );
  2488     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2489     MEM_READ_LONG_CACHED( R_EAX, R_EAX, Rm );
  2490     store_spreg( R_EAX, R_SGR );
  2491     sh4_x86.tstate = TSTATE_NONE;
  2492 :}
  2493 LDC.L @Rm+, SPC {:  
  2494     COUNT_INST(I_LDCM);
  2495     check_priv();
  2496     load_reg( R_EAX, Rm );
  2497     check_ralign32( R_EAX );
  2498     MMU_TRANSLATE_READ( R_EAX );
  2499     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2500     MEM_READ_LONG_CACHED( R_EAX, R_EAX, Rm );
  2501     store_spreg( R_EAX, R_SPC );
  2502     sh4_x86.tstate = TSTATE_NONE;
  2503 :}
  2504 LDC.L @Rm+, DBR {:  
  2505     COUNT_INST(I_LDCM);
  2506     check_priv();
  2507     load_reg( R_EAX, Rm );
  2508     check_ralign32( R_EAX );
  2509     MMU_TRANSLATE_READ( R_EAX );
  2510     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2511     MEM_READ_LONG_CACHED( R_EAX, R_EAX, Rm );
  2512     store_spreg( R_EAX, R_DBR );
  2513     sh4_x86.tstate = TSTATE_NONE;
  2514 :}
  2515 LDC.L @Rm+, Rn_BANK {:  
  2516     COUNT_INST(I_LDCM);
  2517     check_priv();
  2518     load_reg( R_EAX, Rm );
  2519     check_ralign32( R_EAX );
  2520     MMU_TRANSLATE_READ( R_EAX );
  2521     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2522     MEM_READ_LONG_CACHED( R_EAX, R_EAX, Rm );
  2523     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2524     sh4_x86.tstate = TSTATE_NONE;
  2525 :}
  2526 LDS Rm, FPSCR {:
  2527     COUNT_INST(I_LDSFPSCR);
  2528     check_fpuen();
  2529     load_reg( R_EAX, Rm );
  2530     call_func1( sh4_write_fpscr, R_EAX );
  2531     sh4_x86.tstate = TSTATE_NONE;
  2532     return 2;
  2533 :}
  2534 LDS.L @Rm+, FPSCR {:  
  2535     COUNT_INST(I_LDSFPSCRM);
  2536     check_fpuen();
  2537     load_reg( R_EAX, Rm );
  2538     check_ralign32( R_EAX );
  2539     MMU_TRANSLATE_READ( R_EAX );
  2540     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2541     MEM_READ_LONG_CACHED( R_EAX, R_EAX, Rm );
  2542     call_func1( sh4_write_fpscr, R_EAX );
  2543     sh4_x86.tstate = TSTATE_NONE;
  2544     return 2;
  2545 :}
  2546 LDS Rm, FPUL {:  
  2547     COUNT_INST(I_LDS);
  2548     check_fpuen();
  2549     load_reg( R_EAX, Rm );
  2550     store_spreg( R_EAX, R_FPUL );
  2551 :}
  2552 LDS.L @Rm+, FPUL {:  
  2553     COUNT_INST(I_LDSM);
  2554     check_fpuen();
  2555     load_reg( R_EAX, Rm );
  2556     check_ralign32( R_EAX );
  2557     MMU_TRANSLATE_READ( R_EAX );
  2558     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2559     MEM_READ_LONG_CACHED( R_EAX, R_EAX, Rm );
  2560     store_spreg( R_EAX, R_FPUL );
  2561     sh4_x86.tstate = TSTATE_NONE;
  2562 :}
  2563 LDS Rm, MACH {: 
  2564     COUNT_INST(I_LDS);
  2565     load_reg( R_EAX, Rm );
  2566     store_spreg( R_EAX, R_MACH );
  2567 :}
  2568 LDS.L @Rm+, MACH {:  
  2569     COUNT_INST(I_LDSM);
  2570     load_reg( R_EAX, Rm );
  2571     check_ralign32( R_EAX );
  2572     MMU_TRANSLATE_READ( R_EAX );
  2573     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2574     MEM_READ_LONG_CACHED( R_EAX, R_EAX, Rm );
  2575     store_spreg( R_EAX, R_MACH );
  2576     sh4_x86.tstate = TSTATE_NONE;
  2577 :}
  2578 LDS Rm, MACL {:  
  2579     COUNT_INST(I_LDS);
  2580     load_reg( R_EAX, Rm );
  2581     store_spreg( R_EAX, R_MACL );
  2582 :}
  2583 LDS.L @Rm+, MACL {:  
  2584     COUNT_INST(I_LDSM);
  2585     load_reg( R_EAX, Rm );
  2586     check_ralign32( R_EAX );
  2587     MMU_TRANSLATE_READ( R_EAX );
  2588     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2589     MEM_READ_LONG_CACHED( R_EAX, R_EAX, Rm );
  2590     store_spreg( R_EAX, R_MACL );
  2591     sh4_x86.tstate = TSTATE_NONE;
  2592 :}
  2593 LDS Rm, PR {:  
  2594     COUNT_INST(I_LDS);
  2595     load_reg( R_EAX, Rm );
  2596     store_spreg( R_EAX, R_PR );
  2597 :}
  2598 LDS.L @Rm+, PR {:  
  2599     COUNT_INST(I_LDSM);
  2600     load_reg( R_EAX, Rm );
  2601     check_ralign32( R_EAX );
  2602     MMU_TRANSLATE_READ( R_EAX );
  2603     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2604     MEM_READ_LONG_CACHED( R_EAX, R_EAX, Rm );
  2605     store_spreg( R_EAX, R_PR );
  2606     sh4_x86.tstate = TSTATE_NONE;
  2607 :}
  2608 LDTLB {:  
  2609     COUNT_INST(I_LDTLB);
  2610     call_func0( MMU_ldtlb );
  2611     sh4_x86.tstate = TSTATE_NONE;
  2612 :}
  2613 OCBI @Rn {:
  2614     COUNT_INST(I_OCBI);
  2615 :}
  2616 OCBP @Rn {:
  2617     COUNT_INST(I_OCBP);
  2618 :}
  2619 OCBWB @Rn {:
  2620     COUNT_INST(I_OCBWB);
  2621 :}
  2622 PREF @Rn {:
  2623     COUNT_INST(I_PREF);
  2624     load_reg( R_EAX, Rn );
  2625     MOV_r32_r32( R_EAX, R_ECX );
  2626     AND_imm32_r32( 0xFC000000, R_ECX );
  2627     CMP_imm32_r32( 0xE0000000, R_ECX );
  2628     JNE_rel8(end);
  2629     if( sh4_x86.tlb_on ) {
  2630     	call_func1( sh4_flush_store_queue_mmu, R_EAX );
  2631         TEST_r32_r32( R_EAX, R_EAX );
  2632         JE_exc(-1);
  2633     } else {
  2634     	call_func1( sh4_flush_store_queue, R_EAX );
  2636     JMP_TARGET(end);
  2637     sh4_x86.tstate = TSTATE_NONE;
  2638 :}
  2639 SLEEP {: 
  2640     COUNT_INST(I_SLEEP);
  2641     check_priv();
  2642     call_func0( sh4_sleep );
  2643     sh4_x86.tstate = TSTATE_NONE;
  2644     sh4_x86.in_delay_slot = DELAY_NONE;
  2645     return 2;
  2646 :}
  2647 STC SR, Rn {:
  2648     COUNT_INST(I_STCSR);
  2649     check_priv();
  2650     call_func0(sh4_read_sr);
  2651     store_reg( R_EAX, Rn );
  2652     sh4_x86.tstate = TSTATE_NONE;
  2653 :}
  2654 STC GBR, Rn {:  
  2655     COUNT_INST(I_STC);
  2656     load_spreg( R_EAX, R_GBR );
  2657     store_reg( R_EAX, Rn );
  2658 :}
  2659 STC VBR, Rn {:  
  2660     COUNT_INST(I_STC);
  2661     check_priv();
  2662     load_spreg( R_EAX, R_VBR );
  2663     store_reg( R_EAX, Rn );
  2664     sh4_x86.tstate = TSTATE_NONE;
  2665 :}
  2666 STC SSR, Rn {:  
  2667     COUNT_INST(I_STC);
  2668     check_priv();
  2669     load_spreg( R_EAX, R_SSR );
  2670     store_reg( R_EAX, Rn );
  2671     sh4_x86.tstate = TSTATE_NONE;
  2672 :}
  2673 STC SPC, Rn {:  
  2674     COUNT_INST(I_STC);
  2675     check_priv();
  2676     load_spreg( R_EAX, R_SPC );
  2677     store_reg( R_EAX, Rn );
  2678     sh4_x86.tstate = TSTATE_NONE;
  2679 :}
  2680 STC SGR, Rn {:  
  2681     COUNT_INST(I_STC);
  2682     check_priv();
  2683     load_spreg( R_EAX, R_SGR );
  2684     store_reg( R_EAX, Rn );
  2685     sh4_x86.tstate = TSTATE_NONE;
  2686 :}
  2687 STC DBR, Rn {:  
  2688     COUNT_INST(I_STC);
  2689     check_priv();
  2690     load_spreg( R_EAX, R_DBR );
  2691     store_reg( R_EAX, Rn );
  2692     sh4_x86.tstate = TSTATE_NONE;
  2693 :}
  2694 STC Rm_BANK, Rn {:
  2695     COUNT_INST(I_STC);
  2696     check_priv();
  2697     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2698     store_reg( R_EAX, Rn );
  2699     sh4_x86.tstate = TSTATE_NONE;
  2700 :}
  2701 STC.L SR, @-Rn {:
  2702     COUNT_INST(I_STCSRM);
  2703     check_priv();
  2704     load_reg( R_EAX, Rn );
  2705     check_walign32( R_EAX );
  2706     ADD_imm8s_r32( -4, R_EAX );
  2707     MMU_TRANSLATE_WRITE( R_EAX );
  2708     MOV_r32_r32( R_EAX, R_EBX );
  2709     call_func0( sh4_read_sr );
  2710     MOV_r32_r32( R_EAX, R_EDX );
  2711     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2712     MEM_WRITE_LONG( R_EBX, R_EDX );
  2713     sh4_x86.tstate = TSTATE_NONE;
  2714 :}
  2715 STC.L VBR, @-Rn {:  
  2716     COUNT_INST(I_STCM);
  2717     check_priv();
  2718     load_reg( R_EAX, Rn );
  2719     check_walign32( R_EAX );
  2720     ADD_imm8s_r32( -4, R_EAX );
  2721     MMU_TRANSLATE_WRITE( R_EAX );
  2722     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2723     MEM_WRITE_LONG_CACHED_SP( R_EAX, R_VBR, Rn );
  2724     sh4_x86.tstate = TSTATE_NONE;
  2725 :}
  2726 STC.L SSR, @-Rn {:  
  2727     COUNT_INST(I_STCM);
  2728     check_priv();
  2729     load_reg( R_EAX, Rn );
  2730     check_walign32( R_EAX );
  2731     ADD_imm8s_r32( -4, R_EAX );
  2732     MMU_TRANSLATE_WRITE( R_EAX );
  2733     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2734     MEM_WRITE_LONG_CACHED_SP( R_EAX, R_SSR, Rn );
  2735     sh4_x86.tstate = TSTATE_NONE;
  2736 :}
  2737 STC.L SPC, @-Rn {:
  2738     COUNT_INST(I_STCM);
  2739     check_priv();
  2740     load_reg( R_EAX, Rn );
  2741     check_walign32( R_EAX );
  2742     ADD_imm8s_r32( -4, R_EAX );
  2743     MMU_TRANSLATE_WRITE( R_EAX );
  2744     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2745     MEM_WRITE_LONG_CACHED_SP( R_EAX, R_SPC, Rn );
  2746     sh4_x86.tstate = TSTATE_NONE;
  2747 :}
  2748 STC.L SGR, @-Rn {:  
  2749     COUNT_INST(I_STCM);
  2750     check_priv();
  2751     load_reg( R_EAX, Rn );
  2752     check_walign32( R_EAX );
  2753     ADD_imm8s_r32( -4, R_EAX );
  2754     MMU_TRANSLATE_WRITE( R_EAX );
  2755     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2756     MEM_WRITE_LONG_CACHED_SP( R_EAX, R_SGR, Rn );
  2757     sh4_x86.tstate = TSTATE_NONE;
  2758 :}
  2759 STC.L DBR, @-Rn {:  
  2760     COUNT_INST(I_STCM);
  2761     check_priv();
  2762     load_reg( R_EAX, Rn );
  2763     check_walign32( R_EAX );
  2764     ADD_imm8s_r32( -4, R_EAX );
  2765     MMU_TRANSLATE_WRITE( R_EAX );
  2766     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2767     MEM_WRITE_LONG_CACHED_SP( R_EAX, R_DBR, Rn );
  2768     sh4_x86.tstate = TSTATE_NONE;
  2769 :}
  2770 STC.L Rm_BANK, @-Rn {:  
  2771     COUNT_INST(I_STCM);
  2772     check_priv();
  2773     load_reg( R_EAX, Rn );
  2774     check_walign32( R_EAX );
  2775     ADD_imm8s_r32( -4, R_EAX );
  2776     MMU_TRANSLATE_WRITE( R_EAX );
  2777     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2778     MEM_WRITE_LONG_CACHED_SP( R_EAX, REG_OFFSET(r_bank[Rm_BANK]), Rn );
  2779     sh4_x86.tstate = TSTATE_NONE;
  2780 :}
  2781 STC.L GBR, @-Rn {:  
  2782     COUNT_INST(I_STCM);
  2783     load_reg( R_EAX, Rn );
  2784     check_walign32( R_EAX );
  2785     ADD_imm8s_r32( -4, R_EAX );
  2786     MMU_TRANSLATE_WRITE( R_EAX );
  2787     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2788     MEM_WRITE_LONG_CACHED_SP( R_EAX, R_GBR, Rn );
  2789     sh4_x86.tstate = TSTATE_NONE;
  2790 :}
  2791 STS FPSCR, Rn {:  
  2792     COUNT_INST(I_STSFPSCR);
  2793     check_fpuen();
  2794     load_spreg( R_EAX, R_FPSCR );
  2795     store_reg( R_EAX, Rn );
  2796 :}
  2797 STS.L FPSCR, @-Rn {:  
  2798     COUNT_INST(I_STSFPSCRM);
  2799     check_fpuen();
  2800     load_reg( R_EAX, Rn );
  2801     check_walign32( R_EAX );
  2802     ADD_imm8s_r32( -4, R_EAX );
  2803     MMU_TRANSLATE_WRITE( R_EAX );
  2804     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2805     MEM_WRITE_LONG_CACHED_SP( R_EAX, R_FPSCR, Rn );
  2806     sh4_x86.tstate = TSTATE_NONE;
  2807 :}
  2808 STS FPUL, Rn {:  
  2809     COUNT_INST(I_STS);
  2810     check_fpuen();
  2811     load_spreg( R_EAX, R_FPUL );
  2812     store_reg( R_EAX, Rn );
  2813 :}
  2814 STS.L FPUL, @-Rn {:  
  2815     COUNT_INST(I_STSM);
  2816     check_fpuen();
  2817     load_reg( R_EAX, Rn );
  2818     check_walign32( R_EAX );
  2819     ADD_imm8s_r32( -4, R_EAX );
  2820     MMU_TRANSLATE_WRITE( R_EAX );
  2821     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2822     MEM_WRITE_LONG_CACHED_SP( R_EAX, R_FPUL, Rn );
  2823     sh4_x86.tstate = TSTATE_NONE;
  2824 :}
  2825 STS MACH, Rn {:  
  2826     COUNT_INST(I_STS);
  2827     load_spreg( R_EAX, R_MACH );
  2828     store_reg( R_EAX, Rn );
  2829 :}
  2830 STS.L MACH, @-Rn {:  
  2831     COUNT_INST(I_STSM);
  2832     load_reg( R_EAX, Rn );
  2833     check_walign32( R_EAX );
  2834     ADD_imm8s_r32( -4, R_EAX );
  2835     MMU_TRANSLATE_WRITE( R_EAX );
  2836     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2837     MEM_WRITE_LONG_CACHED_SP( R_EAX, R_MACH, Rn );
  2838     sh4_x86.tstate = TSTATE_NONE;
  2839 :}
  2840 STS MACL, Rn {:  
  2841     COUNT_INST(I_STS);
  2842     load_spreg( R_EAX, R_MACL );
  2843     store_reg( R_EAX, Rn );
  2844 :}
  2845 STS.L MACL, @-Rn {:  
  2846     COUNT_INST(I_STSM);
  2847     load_reg( R_EAX, Rn );
  2848     check_walign32( R_EAX );
  2849     ADD_imm8s_r32( -4, R_EAX );
  2850     MMU_TRANSLATE_WRITE( R_EAX );
  2851     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2852     MEM_WRITE_LONG_CACHED_SP( R_EAX, R_MACL, Rn );
  2853     sh4_x86.tstate = TSTATE_NONE;
  2854 :}
  2855 STS PR, Rn {:  
  2856     COUNT_INST(I_STS);
  2857     load_spreg( R_EAX, R_PR );
  2858     store_reg( R_EAX, Rn );
  2859 :}
  2860 STS.L PR, @-Rn {:  
  2861     COUNT_INST(I_STSM);
  2862     load_reg( R_EAX, Rn );
  2863     check_walign32( R_EAX );
  2864     ADD_imm8s_r32( -4, R_EAX );
  2865     MMU_TRANSLATE_WRITE( R_EAX );
  2866     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2867     MEM_WRITE_LONG_CACHED_SP( R_EAX, R_PR, Rn );
  2868     sh4_x86.tstate = TSTATE_NONE;
  2869 :}
  2871 NOP {: 
  2872     COUNT_INST(I_NOP);
  2873     /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ 
  2874 :}
  2875 %%
  2876     sh4_x86.in_delay_slot = DELAY_NONE;
  2877     return 0;
.