Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 604:1024c3a9cb88
prev601:d8d1af0d133c
next626:a010e30a30e9
author nkeynes
date Tue Jan 22 11:30:37 2008 +0000 (16 years ago)
permissions -rw-r--r--
last change Fix backpatching when the block moves during translation
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "sh4/xltcache.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4trans.h"
    31 #include "sh4/sh4mmio.h"
    32 #include "sh4/x86op.h"
    33 #include "clock.h"
    35 #define DEFAULT_BACKPATCH_SIZE 4096
    37 struct backpatch_record {
    38     uint32_t fixup_offset;
    39     uint32_t fixup_icount;
    40     int32_t exc_code;
    41 };
    43 #define MAX_RECOVERY_SIZE 2048
    45 #define DELAY_NONE 0
    46 #define DELAY_PC 1
    47 #define DELAY_PC_PR 2
    49 /** 
    50  * Struct to manage internal translation state. This state is not saved -
    51  * it is only valid between calls to sh4_translate_begin_block() and
    52  * sh4_translate_end_block()
    53  */
    54 struct sh4_x86_state {
    55     int in_delay_slot;
    56     gboolean priv_checked; /* true if we've already checked the cpu mode. */
    57     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    58     gboolean branch_taken; /* true if we branched unconditionally */
    59     uint32_t block_start_pc;
    60     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    61     int tstate;
    63     /* mode flags */
    64     gboolean tlb_on; /* True if tlb translation is active */
    66     /* Allocated memory for the (block-wide) back-patch list */
    67     struct backpatch_record *backpatch_list;
    68     uint32_t backpatch_posn;
    69     uint32_t backpatch_size;
    70 };
    72 #define TSTATE_NONE -1
    73 #define TSTATE_O    0
    74 #define TSTATE_C    2
    75 #define TSTATE_E    4
    76 #define TSTATE_NE   5
    77 #define TSTATE_G    0xF
    78 #define TSTATE_GE   0xD
    79 #define TSTATE_A    7
    80 #define TSTATE_AE   3
    82 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
    83 #define JT_rel8(rel8,label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    84 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    85     OP(0x70+sh4_x86.tstate); OP(rel8); \
    86     MARK_JMP(rel8,label)
    87 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
    88 #define JF_rel8(rel8,label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    89 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    90     OP(0x70+ (sh4_x86.tstate^1)); OP(rel8); \
    91     MARK_JMP(rel8, label)
    93 static struct sh4_x86_state sh4_x86;
    95 static uint32_t max_int = 0x7FFFFFFF;
    96 static uint32_t min_int = 0x80000000;
    97 static uint32_t save_fcw; /* save value for fpu control word */
    98 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   100 void sh4_x86_init()
   101 {
   102     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   103     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   104 }
   107 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   108 {
   109     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   110 	sh4_x86.backpatch_size <<= 1;
   111 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   112 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   113 	assert( sh4_x86.backpatch_list != NULL );
   114     }
   115     if( sh4_x86.in_delay_slot ) {
   116 	fixup_pc -= 2;
   117     }
   118     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
   119 	((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code);
   120     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   121     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   122     sh4_x86.backpatch_posn++;
   123 }
   125 /**
   126  * Emit an instruction to load an SH4 reg into a real register
   127  */
   128 static inline void load_reg( int x86reg, int sh4reg ) 
   129 {
   130     /* mov [bp+n], reg */
   131     OP(0x8B);
   132     OP(0x45 + (x86reg<<3));
   133     OP(REG_OFFSET(r[sh4reg]));
   134 }
   136 static inline void load_reg16s( int x86reg, int sh4reg )
   137 {
   138     OP(0x0F);
   139     OP(0xBF);
   140     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   141 }
   143 static inline void load_reg16u( int x86reg, int sh4reg )
   144 {
   145     OP(0x0F);
   146     OP(0xB7);
   147     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   149 }
   151 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   152 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   153 /**
   154  * Emit an instruction to load an immediate value into a register
   155  */
   156 static inline void load_imm32( int x86reg, uint32_t value ) {
   157     /* mov #value, reg */
   158     OP(0xB8 + x86reg);
   159     OP32(value);
   160 }
   162 /**
   163  * Load an immediate 64-bit quantity (note: x86-64 only)
   164  */
   165 static inline void load_imm64( int x86reg, uint32_t value ) {
   166     /* mov #value, reg */
   167     REXW();
   168     OP(0xB8 + x86reg);
   169     OP64(value);
   170 }
   173 /**
   174  * Emit an instruction to store an SH4 reg (RN)
   175  */
   176 void static inline store_reg( int x86reg, int sh4reg ) {
   177     /* mov reg, [bp+n] */
   178     OP(0x89);
   179     OP(0x45 + (x86reg<<3));
   180     OP(REG_OFFSET(r[sh4reg]));
   181 }
   183 #define load_fr_bank(bankreg) load_spreg( bankreg, REG_OFFSET(fr_bank))
   185 /**
   186  * Load an FR register (single-precision floating point) into an integer x86
   187  * register (eg for register-to-register moves)
   188  */
   189 void static inline load_fr( int bankreg, int x86reg, int frm )
   190 {
   191     OP(0x8B); OP(0x40+bankreg+(x86reg<<3)); OP((frm^1)<<2);
   192 }
   194 /**
   195  * Store an FR register (single-precision floating point) into an integer x86
   196  * register (eg for register-to-register moves)
   197  */
   198 void static inline store_fr( int bankreg, int x86reg, int frn )
   199 {
   200     OP(0x89);  OP(0x40+bankreg+(x86reg<<3)); OP((frn^1)<<2);
   201 }
   204 /**
   205  * Load a pointer to the back fp back into the specified x86 register. The
   206  * bankreg must have been previously loaded with FPSCR.
   207  * NB: 12 bytes
   208  */
   209 static inline void load_xf_bank( int bankreg )
   210 {
   211     NOT_r32( bankreg );
   212     SHR_imm8_r32( (21 - 6), bankreg ); // Extract bit 21 then *64 for bank size
   213     AND_imm8s_r32( 0x40, bankreg );    // Complete extraction
   214     OP(0x8D); OP(0x44+(bankreg<<3)); OP(0x28+bankreg); OP(REG_OFFSET(fr)); // LEA [ebp+bankreg+disp], bankreg
   215 }
   217 /**
   218  * Update the fr_bank pointer based on the current fpscr value.
   219  */
   220 static inline void update_fr_bank( int fpscrreg )
   221 {
   222     SHR_imm8_r32( (21 - 6), fpscrreg ); // Extract bit 21 then *64 for bank size
   223     AND_imm8s_r32( 0x40, fpscrreg );    // Complete extraction
   224     OP(0x8D); OP(0x44+(fpscrreg<<3)); OP(0x28+fpscrreg); OP(REG_OFFSET(fr)); // LEA [ebp+fpscrreg+disp], fpscrreg
   225     store_spreg( fpscrreg, REG_OFFSET(fr_bank) );
   226 }
   227 /**
   228  * Push FPUL (as a 32-bit float) onto the FPU stack
   229  */
   230 static inline void push_fpul( )
   231 {
   232     OP(0xD9); OP(0x45); OP(R_FPUL);
   233 }
   235 /**
   236  * Pop FPUL (as a 32-bit float) from the FPU stack
   237  */
   238 static inline void pop_fpul( )
   239 {
   240     OP(0xD9); OP(0x5D); OP(R_FPUL);
   241 }
   243 /**
   244  * Push a 32-bit float onto the FPU stack, with bankreg previously loaded
   245  * with the location of the current fp bank.
   246  */
   247 static inline void push_fr( int bankreg, int frm ) 
   248 {
   249     OP(0xD9); OP(0x40 + bankreg); OP((frm^1)<<2);  // FLD.S [bankreg + frm^1*4]
   250 }
   252 /**
   253  * Pop a 32-bit float from the FPU stack and store it back into the fp bank, 
   254  * with bankreg previously loaded with the location of the current fp bank.
   255  */
   256 static inline void pop_fr( int bankreg, int frm )
   257 {
   258     OP(0xD9); OP(0x58 + bankreg); OP((frm^1)<<2); // FST.S [bankreg + frm^1*4]
   259 }
   261 /**
   262  * Push a 64-bit double onto the FPU stack, with bankreg previously loaded
   263  * with the location of the current fp bank.
   264  */
   265 static inline void push_dr( int bankreg, int frm )
   266 {
   267     OP(0xDD); OP(0x40 + bankreg); OP(frm<<2); // FLD.D [bankreg + frm*4]
   268 }
   270 static inline void pop_dr( int bankreg, int frm )
   271 {
   272     OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]
   273 }
   275 /* Exception checks - Note that all exception checks will clobber EAX */
   277 #define check_priv( ) \
   278     if( !sh4_x86.priv_checked ) { \
   279 	sh4_x86.priv_checked = TRUE;\
   280 	load_spreg( R_EAX, R_SR );\
   281 	AND_imm32_r32( SR_MD, R_EAX );\
   282 	if( sh4_x86.in_delay_slot ) {\
   283 	    JE_exc( EXC_SLOT_ILLEGAL );\
   284 	} else {\
   285 	    JE_exc( EXC_ILLEGAL );\
   286 	}\
   287     }\
   289 #define check_fpuen( ) \
   290     if( !sh4_x86.fpuen_checked ) {\
   291 	sh4_x86.fpuen_checked = TRUE;\
   292 	load_spreg( R_EAX, R_SR );\
   293 	AND_imm32_r32( SR_FD, R_EAX );\
   294 	if( sh4_x86.in_delay_slot ) {\
   295 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   296 	} else {\
   297 	    JNE_exc(EXC_FPU_DISABLED);\
   298 	}\
   299     }
   301 #define check_ralign16( x86reg ) \
   302     TEST_imm32_r32( 0x00000001, x86reg ); \
   303     JNE_exc(EXC_DATA_ADDR_READ)
   305 #define check_walign16( x86reg ) \
   306     TEST_imm32_r32( 0x00000001, x86reg ); \
   307     JNE_exc(EXC_DATA_ADDR_WRITE);
   309 #define check_ralign32( x86reg ) \
   310     TEST_imm32_r32( 0x00000003, x86reg ); \
   311     JNE_exc(EXC_DATA_ADDR_READ)
   313 #define check_walign32( x86reg ) \
   314     TEST_imm32_r32( 0x00000003, x86reg ); \
   315     JNE_exc(EXC_DATA_ADDR_WRITE);
   317 #define UNDEF()
   318 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   319 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
   320 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
   321 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
   322 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
   323 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
   324 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
   326 /**
   327  * Perform MMU translation on the address in addr_reg for a read operation, iff the TLB is turned 
   328  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   329  */
   330 #define MMU_TRANSLATE_READ( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   332 #define MMU_TRANSLATE_READ_EXC( addr_reg, exc_code ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(exc_code); MEM_RESULT(addr_reg) }
   333 /**
   334  * Perform MMU translation on the address in addr_reg for a write operation, iff the TLB is turned 
   335  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   336  */
   337 #define MMU_TRANSLATE_WRITE( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_write, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   339 #define MEM_READ_SIZE (CALL_FUNC1_SIZE)
   340 #define MEM_WRITE_SIZE (CALL_FUNC2_SIZE)
   341 #define MMU_TRANSLATE_SIZE (sh4_x86.tlb_on ? (CALL_FUNC1_SIZE + 12) : 0 )
   343 #define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = DELAY_NONE; return 1;
   345 /****** Import appropriate calling conventions ******/
   346 #if SH4_TRANSLATOR == TARGET_X86_64
   347 #include "sh4/ia64abi.h"
   348 #else /* SH4_TRANSLATOR == TARGET_X86 */
   349 #ifdef APPLE_BUILD
   350 #include "sh4/ia32mac.h"
   351 #else
   352 #include "sh4/ia32abi.h"
   353 #endif
   354 #endif
   356 uint32_t sh4_translate_end_block_size()
   357 {
   358     if( sh4_x86.backpatch_posn <= 3 ) {
   359 	return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);
   360     } else {
   361 	return EPILOGUE_SIZE + 48 + (sh4_x86.backpatch_posn-3)*15;
   362     }
   363 }
   366 /**
   367  * Embed a breakpoint into the generated code
   368  */
   369 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   370 {
   371     load_imm32( R_EAX, pc );
   372     call_func1( sh4_translate_breakpoint_hit, R_EAX );
   373 }
   376 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   378 /**
   379  * Embed a call to sh4_execute_instruction for situations that we
   380  * can't translate (just page-crossing delay slots at the moment).
   381  * Caller is responsible for setting new_pc before calling this function.
   382  *
   383  * Performs:
   384  *   Set PC = endpc
   385  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   386  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   387  *   Call sh4_execute_instruction
   388  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   389  */
   390 void exit_block_emu( sh4vma_t endpc )
   391 {
   392     load_imm32( R_ECX, endpc - sh4_x86.block_start_pc );   // 5
   393     ADD_r32_sh4r( R_ECX, R_PC );
   395     load_imm32( R_ECX, (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period ); // 5
   396     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
   397     load_imm32( R_ECX, sh4_x86.in_delay_slot ? 1 : 0 );
   398     store_spreg( R_ECX, REG_OFFSET(in_delay_slot) );
   400     call_func0( sh4_execute_instruction );    
   401     load_spreg( R_EAX, R_PC );
   402     if( sh4_x86.tlb_on ) {
   403 	call_func1(xlat_get_code_by_vma,R_EAX);
   404     } else {
   405 	call_func1(xlat_get_code,R_EAX);
   406     }
   407     AND_imm8s_rptr( 0xFC, R_EAX );
   408     POP_r32(R_EBP);
   409     RET();
   410 } 
   412 /**
   413  * Translate a single instruction. Delayed branches are handled specially
   414  * by translating both branch and delayed instruction as a single unit (as
   415  * 
   416  * The instruction MUST be in the icache (assert check)
   417  *
   418  * @return true if the instruction marks the end of a basic block
   419  * (eg a branch or 
   420  */
   421 uint32_t sh4_translate_instruction( sh4vma_t pc )
   422 {
   423     uint32_t ir;
   424     /* Read instruction from icache */
   425     assert( IS_IN_ICACHE(pc) );
   426     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   428 	/* PC is not in the current icache - this usually means we're running
   429 	 * with MMU on, and we've gone past the end of the page. And since 
   430 	 * sh4_translate_block is pretty careful about this, it means we're
   431 	 * almost certainly in a delay slot.
   432 	 *
   433 	 * Since we can't assume the page is present (and we can't fault it in
   434 	 * at this point, inline a call to sh4_execute_instruction (with a few
   435 	 * small repairs to cope with the different environment).
   436 	 */
   438     if( !sh4_x86.in_delay_slot ) {
   439 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   440     }
   441 %%
   442 /* ALU operations */
   443 ADD Rm, Rn {:
   444     load_reg( R_EAX, Rm );
   445     load_reg( R_ECX, Rn );
   446     ADD_r32_r32( R_EAX, R_ECX );
   447     store_reg( R_ECX, Rn );
   448     sh4_x86.tstate = TSTATE_NONE;
   449 :}
   450 ADD #imm, Rn {:  
   451     load_reg( R_EAX, Rn );
   452     ADD_imm8s_r32( imm, R_EAX );
   453     store_reg( R_EAX, Rn );
   454     sh4_x86.tstate = TSTATE_NONE;
   455 :}
   456 ADDC Rm, Rn {:
   457     if( sh4_x86.tstate != TSTATE_C ) {
   458 	LDC_t();
   459     }
   460     load_reg( R_EAX, Rm );
   461     load_reg( R_ECX, Rn );
   462     ADC_r32_r32( R_EAX, R_ECX );
   463     store_reg( R_ECX, Rn );
   464     SETC_t();
   465     sh4_x86.tstate = TSTATE_C;
   466 :}
   467 ADDV Rm, Rn {:
   468     load_reg( R_EAX, Rm );
   469     load_reg( R_ECX, Rn );
   470     ADD_r32_r32( R_EAX, R_ECX );
   471     store_reg( R_ECX, Rn );
   472     SETO_t();
   473     sh4_x86.tstate = TSTATE_O;
   474 :}
   475 AND Rm, Rn {:
   476     load_reg( R_EAX, Rm );
   477     load_reg( R_ECX, Rn );
   478     AND_r32_r32( R_EAX, R_ECX );
   479     store_reg( R_ECX, Rn );
   480     sh4_x86.tstate = TSTATE_NONE;
   481 :}
   482 AND #imm, R0 {:  
   483     load_reg( R_EAX, 0 );
   484     AND_imm32_r32(imm, R_EAX); 
   485     store_reg( R_EAX, 0 );
   486     sh4_x86.tstate = TSTATE_NONE;
   487 :}
   488 AND.B #imm, @(R0, GBR) {: 
   489     load_reg( R_EAX, 0 );
   490     load_spreg( R_ECX, R_GBR );
   491     ADD_r32_r32( R_ECX, R_EAX );
   492     MMU_TRANSLATE_WRITE( R_EAX );
   493     PUSH_realigned_r32(R_EAX);
   494     MEM_READ_BYTE( R_EAX, R_EAX );
   495     POP_realigned_r32(R_ECX);
   496     AND_imm32_r32(imm, R_EAX );
   497     MEM_WRITE_BYTE( R_ECX, R_EAX );
   498     sh4_x86.tstate = TSTATE_NONE;
   499 :}
   500 CMP/EQ Rm, Rn {:  
   501     load_reg( R_EAX, Rm );
   502     load_reg( R_ECX, Rn );
   503     CMP_r32_r32( R_EAX, R_ECX );
   504     SETE_t();
   505     sh4_x86.tstate = TSTATE_E;
   506 :}
   507 CMP/EQ #imm, R0 {:  
   508     load_reg( R_EAX, 0 );
   509     CMP_imm8s_r32(imm, R_EAX);
   510     SETE_t();
   511     sh4_x86.tstate = TSTATE_E;
   512 :}
   513 CMP/GE Rm, Rn {:  
   514     load_reg( R_EAX, Rm );
   515     load_reg( R_ECX, Rn );
   516     CMP_r32_r32( R_EAX, R_ECX );
   517     SETGE_t();
   518     sh4_x86.tstate = TSTATE_GE;
   519 :}
   520 CMP/GT Rm, Rn {: 
   521     load_reg( R_EAX, Rm );
   522     load_reg( R_ECX, Rn );
   523     CMP_r32_r32( R_EAX, R_ECX );
   524     SETG_t();
   525     sh4_x86.tstate = TSTATE_G;
   526 :}
   527 CMP/HI Rm, Rn {:  
   528     load_reg( R_EAX, Rm );
   529     load_reg( R_ECX, Rn );
   530     CMP_r32_r32( R_EAX, R_ECX );
   531     SETA_t();
   532     sh4_x86.tstate = TSTATE_A;
   533 :}
   534 CMP/HS Rm, Rn {: 
   535     load_reg( R_EAX, Rm );
   536     load_reg( R_ECX, Rn );
   537     CMP_r32_r32( R_EAX, R_ECX );
   538     SETAE_t();
   539     sh4_x86.tstate = TSTATE_AE;
   540  :}
   541 CMP/PL Rn {: 
   542     load_reg( R_EAX, Rn );
   543     CMP_imm8s_r32( 0, R_EAX );
   544     SETG_t();
   545     sh4_x86.tstate = TSTATE_G;
   546 :}
   547 CMP/PZ Rn {:  
   548     load_reg( R_EAX, Rn );
   549     CMP_imm8s_r32( 0, R_EAX );
   550     SETGE_t();
   551     sh4_x86.tstate = TSTATE_GE;
   552 :}
   553 CMP/STR Rm, Rn {:  
   554     load_reg( R_EAX, Rm );
   555     load_reg( R_ECX, Rn );
   556     XOR_r32_r32( R_ECX, R_EAX );
   557     TEST_r8_r8( R_AL, R_AL );
   558     JE_rel8(13, target1);
   559     TEST_r8_r8( R_AH, R_AH ); // 2
   560     JE_rel8(9, target2);
   561     SHR_imm8_r32( 16, R_EAX ); // 3
   562     TEST_r8_r8( R_AL, R_AL ); // 2
   563     JE_rel8(2, target3);
   564     TEST_r8_r8( R_AH, R_AH ); // 2
   565     JMP_TARGET(target1);
   566     JMP_TARGET(target2);
   567     JMP_TARGET(target3);
   568     SETE_t();
   569     sh4_x86.tstate = TSTATE_E;
   570 :}
   571 DIV0S Rm, Rn {:
   572     load_reg( R_EAX, Rm );
   573     load_reg( R_ECX, Rn );
   574     SHR_imm8_r32( 31, R_EAX );
   575     SHR_imm8_r32( 31, R_ECX );
   576     store_spreg( R_EAX, R_M );
   577     store_spreg( R_ECX, R_Q );
   578     CMP_r32_r32( R_EAX, R_ECX );
   579     SETNE_t();
   580     sh4_x86.tstate = TSTATE_NE;
   581 :}
   582 DIV0U {:  
   583     XOR_r32_r32( R_EAX, R_EAX );
   584     store_spreg( R_EAX, R_Q );
   585     store_spreg( R_EAX, R_M );
   586     store_spreg( R_EAX, R_T );
   587     sh4_x86.tstate = TSTATE_C; // works for DIV1
   588 :}
   589 DIV1 Rm, Rn {:
   590     load_spreg( R_ECX, R_M );
   591     load_reg( R_EAX, Rn );
   592     if( sh4_x86.tstate != TSTATE_C ) {
   593 	LDC_t();
   594     }
   595     RCL1_r32( R_EAX );
   596     SETC_r8( R_DL ); // Q'
   597     CMP_sh4r_r32( R_Q, R_ECX );
   598     JE_rel8(5, mqequal);
   599     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   600     JMP_rel8(3, end);
   601     JMP_TARGET(mqequal);
   602     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   603     JMP_TARGET(end);
   604     store_reg( R_EAX, Rn ); // Done with Rn now
   605     SETC_r8(R_AL); // tmp1
   606     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   607     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   608     store_spreg( R_ECX, R_Q );
   609     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   610     MOVZX_r8_r32( R_AL, R_EAX );
   611     store_spreg( R_EAX, R_T );
   612     sh4_x86.tstate = TSTATE_NONE;
   613 :}
   614 DMULS.L Rm, Rn {:  
   615     load_reg( R_EAX, Rm );
   616     load_reg( R_ECX, Rn );
   617     IMUL_r32(R_ECX);
   618     store_spreg( R_EDX, R_MACH );
   619     store_spreg( R_EAX, R_MACL );
   620     sh4_x86.tstate = TSTATE_NONE;
   621 :}
   622 DMULU.L Rm, Rn {:  
   623     load_reg( R_EAX, Rm );
   624     load_reg( R_ECX, Rn );
   625     MUL_r32(R_ECX);
   626     store_spreg( R_EDX, R_MACH );
   627     store_spreg( R_EAX, R_MACL );    
   628     sh4_x86.tstate = TSTATE_NONE;
   629 :}
   630 DT Rn {:  
   631     load_reg( R_EAX, Rn );
   632     ADD_imm8s_r32( -1, R_EAX );
   633     store_reg( R_EAX, Rn );
   634     SETE_t();
   635     sh4_x86.tstate = TSTATE_E;
   636 :}
   637 EXTS.B Rm, Rn {:  
   638     load_reg( R_EAX, Rm );
   639     MOVSX_r8_r32( R_EAX, R_EAX );
   640     store_reg( R_EAX, Rn );
   641 :}
   642 EXTS.W Rm, Rn {:  
   643     load_reg( R_EAX, Rm );
   644     MOVSX_r16_r32( R_EAX, R_EAX );
   645     store_reg( R_EAX, Rn );
   646 :}
   647 EXTU.B Rm, Rn {:  
   648     load_reg( R_EAX, Rm );
   649     MOVZX_r8_r32( R_EAX, R_EAX );
   650     store_reg( R_EAX, Rn );
   651 :}
   652 EXTU.W Rm, Rn {:  
   653     load_reg( R_EAX, Rm );
   654     MOVZX_r16_r32( R_EAX, R_EAX );
   655     store_reg( R_EAX, Rn );
   656 :}
   657 MAC.L @Rm+, @Rn+ {:
   658     if( Rm == Rn ) {
   659 	load_reg( R_EAX, Rm );
   660 	check_ralign32( R_EAX );
   661 	MMU_TRANSLATE_READ( R_EAX );
   662 	PUSH_realigned_r32( R_EAX );
   663 	load_reg( R_EAX, Rn );
   664 	ADD_imm8s_r32( 4, R_EAX );
   665 	MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
   666 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
   667 	// Note translate twice in case of page boundaries. Maybe worth
   668 	// adding a page-boundary check to skip the second translation
   669     } else {
   670 	load_reg( R_EAX, Rm );
   671 	check_ralign32( R_EAX );
   672 	MMU_TRANSLATE_READ( R_EAX );
   673 	load_reg( R_ECX, Rn );
   674 	check_ralign32( R_ECX );
   675 	PUSH_realigned_r32( R_EAX );
   676 	MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
   677 	MOV_r32_r32( R_ECX, R_EAX );
   678 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   679 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   680     }
   681     MEM_READ_LONG( R_EAX, R_EAX );
   682     POP_r32( R_ECX );
   683     PUSH_r32( R_EAX );
   684     MEM_READ_LONG( R_ECX, R_EAX );
   685     POP_realigned_r32( R_ECX );
   687     IMUL_r32( R_ECX );
   688     ADD_r32_sh4r( R_EAX, R_MACL );
   689     ADC_r32_sh4r( R_EDX, R_MACH );
   691     load_spreg( R_ECX, R_S );
   692     TEST_r32_r32(R_ECX, R_ECX);
   693     JE_rel8( CALL_FUNC0_SIZE, nosat );
   694     call_func0( signsat48 );
   695     JMP_TARGET( nosat );
   696     sh4_x86.tstate = TSTATE_NONE;
   697 :}
   698 MAC.W @Rm+, @Rn+ {:  
   699     if( Rm == Rn ) {
   700 	load_reg( R_EAX, Rm );
   701 	check_ralign16( R_EAX );
   702 	MMU_TRANSLATE_READ( R_EAX );
   703 	PUSH_realigned_r32( R_EAX );
   704 	load_reg( R_EAX, Rn );
   705 	ADD_imm8s_r32( 2, R_EAX );
   706 	MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
   707 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   708 	// Note translate twice in case of page boundaries. Maybe worth
   709 	// adding a page-boundary check to skip the second translation
   710     } else {
   711 	load_reg( R_EAX, Rm );
   712 	check_ralign16( R_EAX );
   713 	MMU_TRANSLATE_READ( R_EAX );
   714 	load_reg( R_ECX, Rn );
   715 	check_ralign16( R_ECX );
   716 	PUSH_realigned_r32( R_EAX );
   717 	MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
   718 	MOV_r32_r32( R_ECX, R_EAX );
   719 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   720 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   721     }
   722     MEM_READ_WORD( R_EAX, R_EAX );
   723     POP_r32( R_ECX );
   724     PUSH_r32( R_EAX );
   725     MEM_READ_WORD( R_ECX, R_EAX );
   726     POP_realigned_r32( R_ECX );
   727     IMUL_r32( R_ECX );
   729     load_spreg( R_ECX, R_S );
   730     TEST_r32_r32( R_ECX, R_ECX );
   731     JE_rel8( 47, nosat );
   733     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   734     JNO_rel8( 51, end );            // 2
   735     load_imm32( R_EDX, 1 );         // 5
   736     store_spreg( R_EDX, R_MACH );   // 6
   737     JS_rel8( 13, positive );        // 2
   738     load_imm32( R_EAX, 0x80000000 );// 5
   739     store_spreg( R_EAX, R_MACL );   // 6
   740     JMP_rel8( 25, end2 );           // 2
   742     JMP_TARGET(positive);
   743     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   744     store_spreg( R_EAX, R_MACL );   // 6
   745     JMP_rel8( 12, end3);            // 2
   747     JMP_TARGET(nosat);
   748     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   749     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   750     JMP_TARGET(end);
   751     JMP_TARGET(end2);
   752     JMP_TARGET(end3);
   753     sh4_x86.tstate = TSTATE_NONE;
   754 :}
   755 MOVT Rn {:  
   756     load_spreg( R_EAX, R_T );
   757     store_reg( R_EAX, Rn );
   758 :}
   759 MUL.L Rm, Rn {:  
   760     load_reg( R_EAX, Rm );
   761     load_reg( R_ECX, Rn );
   762     MUL_r32( R_ECX );
   763     store_spreg( R_EAX, R_MACL );
   764     sh4_x86.tstate = TSTATE_NONE;
   765 :}
   766 MULS.W Rm, Rn {:
   767     load_reg16s( R_EAX, Rm );
   768     load_reg16s( R_ECX, Rn );
   769     MUL_r32( R_ECX );
   770     store_spreg( R_EAX, R_MACL );
   771     sh4_x86.tstate = TSTATE_NONE;
   772 :}
   773 MULU.W Rm, Rn {:  
   774     load_reg16u( R_EAX, Rm );
   775     load_reg16u( R_ECX, Rn );
   776     MUL_r32( R_ECX );
   777     store_spreg( R_EAX, R_MACL );
   778     sh4_x86.tstate = TSTATE_NONE;
   779 :}
   780 NEG Rm, Rn {:
   781     load_reg( R_EAX, Rm );
   782     NEG_r32( R_EAX );
   783     store_reg( R_EAX, Rn );
   784     sh4_x86.tstate = TSTATE_NONE;
   785 :}
   786 NEGC Rm, Rn {:  
   787     load_reg( R_EAX, Rm );
   788     XOR_r32_r32( R_ECX, R_ECX );
   789     LDC_t();
   790     SBB_r32_r32( R_EAX, R_ECX );
   791     store_reg( R_ECX, Rn );
   792     SETC_t();
   793     sh4_x86.tstate = TSTATE_C;
   794 :}
   795 NOT Rm, Rn {:  
   796     load_reg( R_EAX, Rm );
   797     NOT_r32( R_EAX );
   798     store_reg( R_EAX, Rn );
   799     sh4_x86.tstate = TSTATE_NONE;
   800 :}
   801 OR Rm, Rn {:  
   802     load_reg( R_EAX, Rm );
   803     load_reg( R_ECX, Rn );
   804     OR_r32_r32( R_EAX, R_ECX );
   805     store_reg( R_ECX, Rn );
   806     sh4_x86.tstate = TSTATE_NONE;
   807 :}
   808 OR #imm, R0 {:
   809     load_reg( R_EAX, 0 );
   810     OR_imm32_r32(imm, R_EAX);
   811     store_reg( R_EAX, 0 );
   812     sh4_x86.tstate = TSTATE_NONE;
   813 :}
   814 OR.B #imm, @(R0, GBR) {:  
   815     load_reg( R_EAX, 0 );
   816     load_spreg( R_ECX, R_GBR );
   817     ADD_r32_r32( R_ECX, R_EAX );
   818     MMU_TRANSLATE_WRITE( R_EAX );
   819     PUSH_realigned_r32(R_EAX);
   820     MEM_READ_BYTE( R_EAX, R_EAX );
   821     POP_realigned_r32(R_ECX);
   822     OR_imm32_r32(imm, R_EAX );
   823     MEM_WRITE_BYTE( R_ECX, R_EAX );
   824     sh4_x86.tstate = TSTATE_NONE;
   825 :}
   826 ROTCL Rn {:
   827     load_reg( R_EAX, Rn );
   828     if( sh4_x86.tstate != TSTATE_C ) {
   829 	LDC_t();
   830     }
   831     RCL1_r32( R_EAX );
   832     store_reg( R_EAX, Rn );
   833     SETC_t();
   834     sh4_x86.tstate = TSTATE_C;
   835 :}
   836 ROTCR Rn {:  
   837     load_reg( R_EAX, Rn );
   838     if( sh4_x86.tstate != TSTATE_C ) {
   839 	LDC_t();
   840     }
   841     RCR1_r32( R_EAX );
   842     store_reg( R_EAX, Rn );
   843     SETC_t();
   844     sh4_x86.tstate = TSTATE_C;
   845 :}
   846 ROTL Rn {:  
   847     load_reg( R_EAX, Rn );
   848     ROL1_r32( R_EAX );
   849     store_reg( R_EAX, Rn );
   850     SETC_t();
   851     sh4_x86.tstate = TSTATE_C;
   852 :}
   853 ROTR Rn {:  
   854     load_reg( R_EAX, Rn );
   855     ROR1_r32( R_EAX );
   856     store_reg( R_EAX, Rn );
   857     SETC_t();
   858     sh4_x86.tstate = TSTATE_C;
   859 :}
   860 SHAD Rm, Rn {:
   861     /* Annoyingly enough, not directly convertible */
   862     load_reg( R_EAX, Rn );
   863     load_reg( R_ECX, Rm );
   864     CMP_imm32_r32( 0, R_ECX );
   865     JGE_rel8(16, doshl);
   867     NEG_r32( R_ECX );      // 2
   868     AND_imm8_r8( 0x1F, R_CL ); // 3
   869     JE_rel8( 4, emptysar);     // 2
   870     SAR_r32_CL( R_EAX );       // 2
   871     JMP_rel8(10, end);          // 2
   873     JMP_TARGET(emptysar);
   874     SAR_imm8_r32(31, R_EAX );  // 3
   875     JMP_rel8(5, end2);
   877     JMP_TARGET(doshl);
   878     AND_imm8_r8( 0x1F, R_CL ); // 3
   879     SHL_r32_CL( R_EAX );       // 2
   880     JMP_TARGET(end);
   881     JMP_TARGET(end2);
   882     store_reg( R_EAX, Rn );
   883     sh4_x86.tstate = TSTATE_NONE;
   884 :}
   885 SHLD Rm, Rn {:  
   886     load_reg( R_EAX, Rn );
   887     load_reg( R_ECX, Rm );
   888     CMP_imm32_r32( 0, R_ECX );
   889     JGE_rel8(15, doshl);
   891     NEG_r32( R_ECX );      // 2
   892     AND_imm8_r8( 0x1F, R_CL ); // 3
   893     JE_rel8( 4, emptyshr );
   894     SHR_r32_CL( R_EAX );       // 2
   895     JMP_rel8(9, end);          // 2
   897     JMP_TARGET(emptyshr);
   898     XOR_r32_r32( R_EAX, R_EAX );
   899     JMP_rel8(5, end2);
   901     JMP_TARGET(doshl);
   902     AND_imm8_r8( 0x1F, R_CL ); // 3
   903     SHL_r32_CL( R_EAX );       // 2
   904     JMP_TARGET(end);
   905     JMP_TARGET(end2);
   906     store_reg( R_EAX, Rn );
   907     sh4_x86.tstate = TSTATE_NONE;
   908 :}
   909 SHAL Rn {: 
   910     load_reg( R_EAX, Rn );
   911     SHL1_r32( R_EAX );
   912     SETC_t();
   913     store_reg( R_EAX, Rn );
   914     sh4_x86.tstate = TSTATE_C;
   915 :}
   916 SHAR Rn {:  
   917     load_reg( R_EAX, Rn );
   918     SAR1_r32( R_EAX );
   919     SETC_t();
   920     store_reg( R_EAX, Rn );
   921     sh4_x86.tstate = TSTATE_C;
   922 :}
   923 SHLL Rn {:  
   924     load_reg( R_EAX, Rn );
   925     SHL1_r32( R_EAX );
   926     SETC_t();
   927     store_reg( R_EAX, Rn );
   928     sh4_x86.tstate = TSTATE_C;
   929 :}
   930 SHLL2 Rn {:
   931     load_reg( R_EAX, Rn );
   932     SHL_imm8_r32( 2, R_EAX );
   933     store_reg( R_EAX, Rn );
   934     sh4_x86.tstate = TSTATE_NONE;
   935 :}
   936 SHLL8 Rn {:  
   937     load_reg( R_EAX, Rn );
   938     SHL_imm8_r32( 8, R_EAX );
   939     store_reg( R_EAX, Rn );
   940     sh4_x86.tstate = TSTATE_NONE;
   941 :}
   942 SHLL16 Rn {:  
   943     load_reg( R_EAX, Rn );
   944     SHL_imm8_r32( 16, R_EAX );
   945     store_reg( R_EAX, Rn );
   946     sh4_x86.tstate = TSTATE_NONE;
   947 :}
   948 SHLR Rn {:  
   949     load_reg( R_EAX, Rn );
   950     SHR1_r32( R_EAX );
   951     SETC_t();
   952     store_reg( R_EAX, Rn );
   953     sh4_x86.tstate = TSTATE_C;
   954 :}
   955 SHLR2 Rn {:  
   956     load_reg( R_EAX, Rn );
   957     SHR_imm8_r32( 2, R_EAX );
   958     store_reg( R_EAX, Rn );
   959     sh4_x86.tstate = TSTATE_NONE;
   960 :}
   961 SHLR8 Rn {:  
   962     load_reg( R_EAX, Rn );
   963     SHR_imm8_r32( 8, R_EAX );
   964     store_reg( R_EAX, Rn );
   965     sh4_x86.tstate = TSTATE_NONE;
   966 :}
   967 SHLR16 Rn {:  
   968     load_reg( R_EAX, Rn );
   969     SHR_imm8_r32( 16, R_EAX );
   970     store_reg( R_EAX, Rn );
   971     sh4_x86.tstate = TSTATE_NONE;
   972 :}
   973 SUB Rm, Rn {:  
   974     load_reg( R_EAX, Rm );
   975     load_reg( R_ECX, Rn );
   976     SUB_r32_r32( R_EAX, R_ECX );
   977     store_reg( R_ECX, Rn );
   978     sh4_x86.tstate = TSTATE_NONE;
   979 :}
   980 SUBC Rm, Rn {:  
   981     load_reg( R_EAX, Rm );
   982     load_reg( R_ECX, Rn );
   983     if( sh4_x86.tstate != TSTATE_C ) {
   984 	LDC_t();
   985     }
   986     SBB_r32_r32( R_EAX, R_ECX );
   987     store_reg( R_ECX, Rn );
   988     SETC_t();
   989     sh4_x86.tstate = TSTATE_C;
   990 :}
   991 SUBV Rm, Rn {:  
   992     load_reg( R_EAX, Rm );
   993     load_reg( R_ECX, Rn );
   994     SUB_r32_r32( R_EAX, R_ECX );
   995     store_reg( R_ECX, Rn );
   996     SETO_t();
   997     sh4_x86.tstate = TSTATE_O;
   998 :}
   999 SWAP.B Rm, Rn {:  
  1000     load_reg( R_EAX, Rm );
  1001     XCHG_r8_r8( R_AL, R_AH ); // NB: does not touch EFLAGS
  1002     store_reg( R_EAX, Rn );
  1003 :}
  1004 SWAP.W Rm, Rn {:  
  1005     load_reg( R_EAX, Rm );
  1006     MOV_r32_r32( R_EAX, R_ECX );
  1007     SHL_imm8_r32( 16, R_ECX );
  1008     SHR_imm8_r32( 16, R_EAX );
  1009     OR_r32_r32( R_EAX, R_ECX );
  1010     store_reg( R_ECX, Rn );
  1011     sh4_x86.tstate = TSTATE_NONE;
  1012 :}
  1013 TAS.B @Rn {:  
  1014     load_reg( R_EAX, Rn );
  1015     MMU_TRANSLATE_WRITE( R_EAX );
  1016     PUSH_realigned_r32( R_EAX );
  1017     MEM_READ_BYTE( R_EAX, R_EAX );
  1018     TEST_r8_r8( R_AL, R_AL );
  1019     SETE_t();
  1020     OR_imm8_r8( 0x80, R_AL );
  1021     POP_realigned_r32( R_ECX );
  1022     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1023     sh4_x86.tstate = TSTATE_NONE;
  1024 :}
  1025 TST Rm, Rn {:  
  1026     load_reg( R_EAX, Rm );
  1027     load_reg( R_ECX, Rn );
  1028     TEST_r32_r32( R_EAX, R_ECX );
  1029     SETE_t();
  1030     sh4_x86.tstate = TSTATE_E;
  1031 :}
  1032 TST #imm, R0 {:  
  1033     load_reg( R_EAX, 0 );
  1034     TEST_imm32_r32( imm, R_EAX );
  1035     SETE_t();
  1036     sh4_x86.tstate = TSTATE_E;
  1037 :}
  1038 TST.B #imm, @(R0, GBR) {:  
  1039     load_reg( R_EAX, 0);
  1040     load_reg( R_ECX, R_GBR);
  1041     ADD_r32_r32( R_ECX, R_EAX );
  1042     MMU_TRANSLATE_READ( R_EAX );
  1043     MEM_READ_BYTE( R_EAX, R_EAX );
  1044     TEST_imm8_r8( imm, R_AL );
  1045     SETE_t();
  1046     sh4_x86.tstate = TSTATE_E;
  1047 :}
  1048 XOR Rm, Rn {:  
  1049     load_reg( R_EAX, Rm );
  1050     load_reg( R_ECX, Rn );
  1051     XOR_r32_r32( R_EAX, R_ECX );
  1052     store_reg( R_ECX, Rn );
  1053     sh4_x86.tstate = TSTATE_NONE;
  1054 :}
  1055 XOR #imm, R0 {:  
  1056     load_reg( R_EAX, 0 );
  1057     XOR_imm32_r32( imm, R_EAX );
  1058     store_reg( R_EAX, 0 );
  1059     sh4_x86.tstate = TSTATE_NONE;
  1060 :}
  1061 XOR.B #imm, @(R0, GBR) {:  
  1062     load_reg( R_EAX, 0 );
  1063     load_spreg( R_ECX, R_GBR );
  1064     ADD_r32_r32( R_ECX, R_EAX );
  1065     MMU_TRANSLATE_WRITE( R_EAX );
  1066     PUSH_realigned_r32(R_EAX);
  1067     MEM_READ_BYTE(R_EAX, R_EAX);
  1068     POP_realigned_r32(R_ECX);
  1069     XOR_imm32_r32( imm, R_EAX );
  1070     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1071     sh4_x86.tstate = TSTATE_NONE;
  1072 :}
  1073 XTRCT Rm, Rn {:
  1074     load_reg( R_EAX, Rm );
  1075     load_reg( R_ECX, Rn );
  1076     SHL_imm8_r32( 16, R_EAX );
  1077     SHR_imm8_r32( 16, R_ECX );
  1078     OR_r32_r32( R_EAX, R_ECX );
  1079     store_reg( R_ECX, Rn );
  1080     sh4_x86.tstate = TSTATE_NONE;
  1081 :}
  1083 /* Data move instructions */
  1084 MOV Rm, Rn {:  
  1085     load_reg( R_EAX, Rm );
  1086     store_reg( R_EAX, Rn );
  1087 :}
  1088 MOV #imm, Rn {:  
  1089     load_imm32( R_EAX, imm );
  1090     store_reg( R_EAX, Rn );
  1091 :}
  1092 MOV.B Rm, @Rn {:  
  1093     load_reg( R_EAX, Rn );
  1094     MMU_TRANSLATE_WRITE( R_EAX );
  1095     load_reg( R_EDX, Rm );
  1096     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1097     sh4_x86.tstate = TSTATE_NONE;
  1098 :}
  1099 MOV.B Rm, @-Rn {:  
  1100     load_reg( R_EAX, Rn );
  1101     ADD_imm8s_r32( -1, R_EAX );
  1102     MMU_TRANSLATE_WRITE( R_EAX );
  1103     load_reg( R_EDX, Rm );
  1104     ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
  1105     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1106     sh4_x86.tstate = TSTATE_NONE;
  1107 :}
  1108 MOV.B Rm, @(R0, Rn) {:  
  1109     load_reg( R_EAX, 0 );
  1110     load_reg( R_ECX, Rn );
  1111     ADD_r32_r32( R_ECX, R_EAX );
  1112     MMU_TRANSLATE_WRITE( R_EAX );
  1113     load_reg( R_EDX, Rm );
  1114     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1115     sh4_x86.tstate = TSTATE_NONE;
  1116 :}
  1117 MOV.B R0, @(disp, GBR) {:  
  1118     load_spreg( R_EAX, R_GBR );
  1119     ADD_imm32_r32( disp, R_EAX );
  1120     MMU_TRANSLATE_WRITE( R_EAX );
  1121     load_reg( R_EDX, 0 );
  1122     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1123     sh4_x86.tstate = TSTATE_NONE;
  1124 :}
  1125 MOV.B R0, @(disp, Rn) {:  
  1126     load_reg( R_EAX, Rn );
  1127     ADD_imm32_r32( disp, R_EAX );
  1128     MMU_TRANSLATE_WRITE( R_EAX );
  1129     load_reg( R_EDX, 0 );
  1130     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1131     sh4_x86.tstate = TSTATE_NONE;
  1132 :}
  1133 MOV.B @Rm, Rn {:  
  1134     load_reg( R_EAX, Rm );
  1135     MMU_TRANSLATE_READ( R_EAX );
  1136     MEM_READ_BYTE( R_EAX, R_EAX );
  1137     store_reg( R_EAX, Rn );
  1138     sh4_x86.tstate = TSTATE_NONE;
  1139 :}
  1140 MOV.B @Rm+, Rn {:  
  1141     load_reg( R_EAX, Rm );
  1142     MMU_TRANSLATE_READ( R_EAX );
  1143     ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
  1144     MEM_READ_BYTE( R_EAX, R_EAX );
  1145     store_reg( R_EAX, Rn );
  1146     sh4_x86.tstate = TSTATE_NONE;
  1147 :}
  1148 MOV.B @(R0, Rm), Rn {:  
  1149     load_reg( R_EAX, 0 );
  1150     load_reg( R_ECX, Rm );
  1151     ADD_r32_r32( R_ECX, R_EAX );
  1152     MMU_TRANSLATE_READ( R_EAX )
  1153     MEM_READ_BYTE( R_EAX, R_EAX );
  1154     store_reg( R_EAX, Rn );
  1155     sh4_x86.tstate = TSTATE_NONE;
  1156 :}
  1157 MOV.B @(disp, GBR), R0 {:  
  1158     load_spreg( R_EAX, R_GBR );
  1159     ADD_imm32_r32( disp, R_EAX );
  1160     MMU_TRANSLATE_READ( R_EAX );
  1161     MEM_READ_BYTE( R_EAX, R_EAX );
  1162     store_reg( R_EAX, 0 );
  1163     sh4_x86.tstate = TSTATE_NONE;
  1164 :}
  1165 MOV.B @(disp, Rm), R0 {:  
  1166     load_reg( R_EAX, Rm );
  1167     ADD_imm32_r32( disp, R_EAX );
  1168     MMU_TRANSLATE_READ( R_EAX );
  1169     MEM_READ_BYTE( R_EAX, R_EAX );
  1170     store_reg( R_EAX, 0 );
  1171     sh4_x86.tstate = TSTATE_NONE;
  1172 :}
  1173 MOV.L Rm, @Rn {:
  1174     load_reg( R_EAX, Rn );
  1175     check_walign32(R_EAX);
  1176     MMU_TRANSLATE_WRITE( R_EAX );
  1177     load_reg( R_EDX, Rm );
  1178     MEM_WRITE_LONG( R_EAX, R_EDX );
  1179     sh4_x86.tstate = TSTATE_NONE;
  1180 :}
  1181 MOV.L Rm, @-Rn {:  
  1182     load_reg( R_EAX, Rn );
  1183     ADD_imm8s_r32( -4, R_EAX );
  1184     check_walign32( R_EAX );
  1185     MMU_TRANSLATE_WRITE( R_EAX );
  1186     load_reg( R_EDX, Rm );
  1187     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  1188     MEM_WRITE_LONG( R_EAX, R_EDX );
  1189     sh4_x86.tstate = TSTATE_NONE;
  1190 :}
  1191 MOV.L Rm, @(R0, Rn) {:  
  1192     load_reg( R_EAX, 0 );
  1193     load_reg( R_ECX, Rn );
  1194     ADD_r32_r32( R_ECX, R_EAX );
  1195     check_walign32( R_EAX );
  1196     MMU_TRANSLATE_WRITE( R_EAX );
  1197     load_reg( R_EDX, Rm );
  1198     MEM_WRITE_LONG( R_EAX, R_EDX );
  1199     sh4_x86.tstate = TSTATE_NONE;
  1200 :}
  1201 MOV.L R0, @(disp, GBR) {:  
  1202     load_spreg( R_EAX, R_GBR );
  1203     ADD_imm32_r32( disp, R_EAX );
  1204     check_walign32( R_EAX );
  1205     MMU_TRANSLATE_WRITE( R_EAX );
  1206     load_reg( R_EDX, 0 );
  1207     MEM_WRITE_LONG( R_EAX, R_EDX );
  1208     sh4_x86.tstate = TSTATE_NONE;
  1209 :}
  1210 MOV.L Rm, @(disp, Rn) {:  
  1211     load_reg( R_EAX, Rn );
  1212     ADD_imm32_r32( disp, R_EAX );
  1213     check_walign32( R_EAX );
  1214     MMU_TRANSLATE_WRITE( R_EAX );
  1215     load_reg( R_EDX, Rm );
  1216     MEM_WRITE_LONG( R_EAX, R_EDX );
  1217     sh4_x86.tstate = TSTATE_NONE;
  1218 :}
  1219 MOV.L @Rm, Rn {:  
  1220     load_reg( R_EAX, Rm );
  1221     check_ralign32( R_EAX );
  1222     MMU_TRANSLATE_READ( R_EAX );
  1223     MEM_READ_LONG( R_EAX, R_EAX );
  1224     store_reg( R_EAX, Rn );
  1225     sh4_x86.tstate = TSTATE_NONE;
  1226 :}
  1227 MOV.L @Rm+, Rn {:  
  1228     load_reg( R_EAX, Rm );
  1229     check_ralign32( R_EAX );
  1230     MMU_TRANSLATE_READ( R_EAX );
  1231     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1232     MEM_READ_LONG( R_EAX, R_EAX );
  1233     store_reg( R_EAX, Rn );
  1234     sh4_x86.tstate = TSTATE_NONE;
  1235 :}
  1236 MOV.L @(R0, Rm), Rn {:  
  1237     load_reg( R_EAX, 0 );
  1238     load_reg( R_ECX, Rm );
  1239     ADD_r32_r32( R_ECX, R_EAX );
  1240     check_ralign32( R_EAX );
  1241     MMU_TRANSLATE_READ( R_EAX );
  1242     MEM_READ_LONG( R_EAX, R_EAX );
  1243     store_reg( R_EAX, Rn );
  1244     sh4_x86.tstate = TSTATE_NONE;
  1245 :}
  1246 MOV.L @(disp, GBR), R0 {:
  1247     load_spreg( R_EAX, R_GBR );
  1248     ADD_imm32_r32( disp, R_EAX );
  1249     check_ralign32( R_EAX );
  1250     MMU_TRANSLATE_READ( R_EAX );
  1251     MEM_READ_LONG( R_EAX, R_EAX );
  1252     store_reg( R_EAX, 0 );
  1253     sh4_x86.tstate = TSTATE_NONE;
  1254 :}
  1255 MOV.L @(disp, PC), Rn {:  
  1256     if( sh4_x86.in_delay_slot ) {
  1257 	SLOTILLEGAL();
  1258     } else {
  1259 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1260 	if( IS_IN_ICACHE(target) ) {
  1261 	    // If the target address is in the same page as the code, it's
  1262 	    // pretty safe to just ref it directly and circumvent the whole
  1263 	    // memory subsystem. (this is a big performance win)
  1265 	    // FIXME: There's a corner-case that's not handled here when
  1266 	    // the current code-page is in the ITLB but not in the UTLB.
  1267 	    // (should generate a TLB miss although need to test SH4 
  1268 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1269 	    // behaviour though.
  1270 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1271 	    MOV_moff32_EAX( ptr );
  1272 	} else {
  1273 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1274 	    // different virtual address than the translation was done with,
  1275 	    // but we can safely assume that the low bits are the same.
  1276 	    load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1277 	    ADD_sh4r_r32( R_PC, R_EAX );
  1278 	    MMU_TRANSLATE_READ( R_EAX );
  1279 	    MEM_READ_LONG( R_EAX, R_EAX );
  1280 	    sh4_x86.tstate = TSTATE_NONE;
  1282 	store_reg( R_EAX, Rn );
  1284 :}
  1285 MOV.L @(disp, Rm), Rn {:  
  1286     load_reg( R_EAX, Rm );
  1287     ADD_imm8s_r32( disp, R_EAX );
  1288     check_ralign32( R_EAX );
  1289     MMU_TRANSLATE_READ( R_EAX );
  1290     MEM_READ_LONG( R_EAX, R_EAX );
  1291     store_reg( R_EAX, Rn );
  1292     sh4_x86.tstate = TSTATE_NONE;
  1293 :}
  1294 MOV.W Rm, @Rn {:  
  1295     load_reg( R_EAX, Rn );
  1296     check_walign16( R_EAX );
  1297     MMU_TRANSLATE_WRITE( R_EAX )
  1298     load_reg( R_EDX, Rm );
  1299     MEM_WRITE_WORD( R_EAX, R_EDX );
  1300     sh4_x86.tstate = TSTATE_NONE;
  1301 :}
  1302 MOV.W Rm, @-Rn {:  
  1303     load_reg( R_EAX, Rn );
  1304     ADD_imm8s_r32( -2, R_EAX );
  1305     check_walign16( R_EAX );
  1306     MMU_TRANSLATE_WRITE( R_EAX );
  1307     load_reg( R_EDX, Rm );
  1308     ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
  1309     MEM_WRITE_WORD( R_EAX, R_EDX );
  1310     sh4_x86.tstate = TSTATE_NONE;
  1311 :}
  1312 MOV.W Rm, @(R0, Rn) {:  
  1313     load_reg( R_EAX, 0 );
  1314     load_reg( R_ECX, Rn );
  1315     ADD_r32_r32( R_ECX, R_EAX );
  1316     check_walign16( R_EAX );
  1317     MMU_TRANSLATE_WRITE( R_EAX );
  1318     load_reg( R_EDX, Rm );
  1319     MEM_WRITE_WORD( R_EAX, R_EDX );
  1320     sh4_x86.tstate = TSTATE_NONE;
  1321 :}
  1322 MOV.W R0, @(disp, GBR) {:  
  1323     load_spreg( R_EAX, R_GBR );
  1324     ADD_imm32_r32( disp, R_EAX );
  1325     check_walign16( R_EAX );
  1326     MMU_TRANSLATE_WRITE( R_EAX );
  1327     load_reg( R_EDX, 0 );
  1328     MEM_WRITE_WORD( R_EAX, R_EDX );
  1329     sh4_x86.tstate = TSTATE_NONE;
  1330 :}
  1331 MOV.W R0, @(disp, Rn) {:  
  1332     load_reg( R_EAX, Rn );
  1333     ADD_imm32_r32( disp, R_EAX );
  1334     check_walign16( R_EAX );
  1335     MMU_TRANSLATE_WRITE( R_EAX );
  1336     load_reg( R_EDX, 0 );
  1337     MEM_WRITE_WORD( R_EAX, R_EDX );
  1338     sh4_x86.tstate = TSTATE_NONE;
  1339 :}
  1340 MOV.W @Rm, Rn {:  
  1341     load_reg( R_EAX, Rm );
  1342     check_ralign16( R_EAX );
  1343     MMU_TRANSLATE_READ( R_EAX );
  1344     MEM_READ_WORD( R_EAX, R_EAX );
  1345     store_reg( R_EAX, Rn );
  1346     sh4_x86.tstate = TSTATE_NONE;
  1347 :}
  1348 MOV.W @Rm+, Rn {:  
  1349     load_reg( R_EAX, Rm );
  1350     check_ralign16( R_EAX );
  1351     MMU_TRANSLATE_READ( R_EAX );
  1352     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
  1353     MEM_READ_WORD( R_EAX, R_EAX );
  1354     store_reg( R_EAX, Rn );
  1355     sh4_x86.tstate = TSTATE_NONE;
  1356 :}
  1357 MOV.W @(R0, Rm), Rn {:  
  1358     load_reg( R_EAX, 0 );
  1359     load_reg( R_ECX, Rm );
  1360     ADD_r32_r32( R_ECX, R_EAX );
  1361     check_ralign16( R_EAX );
  1362     MMU_TRANSLATE_READ( R_EAX );
  1363     MEM_READ_WORD( R_EAX, R_EAX );
  1364     store_reg( R_EAX, Rn );
  1365     sh4_x86.tstate = TSTATE_NONE;
  1366 :}
  1367 MOV.W @(disp, GBR), R0 {:  
  1368     load_spreg( R_EAX, R_GBR );
  1369     ADD_imm32_r32( disp, R_EAX );
  1370     check_ralign16( R_EAX );
  1371     MMU_TRANSLATE_READ( R_EAX );
  1372     MEM_READ_WORD( R_EAX, R_EAX );
  1373     store_reg( R_EAX, 0 );
  1374     sh4_x86.tstate = TSTATE_NONE;
  1375 :}
  1376 MOV.W @(disp, PC), Rn {:  
  1377     if( sh4_x86.in_delay_slot ) {
  1378 	SLOTILLEGAL();
  1379     } else {
  1380 	// See comments for MOV.L @(disp, PC), Rn
  1381 	uint32_t target = pc + disp + 4;
  1382 	if( IS_IN_ICACHE(target) ) {
  1383 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1384 	    MOV_moff32_EAX( ptr );
  1385 	    MOVSX_r16_r32( R_EAX, R_EAX );
  1386 	} else {
  1387 	    load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
  1388 	    ADD_sh4r_r32( R_PC, R_EAX );
  1389 	    MMU_TRANSLATE_READ( R_EAX );
  1390 	    MEM_READ_WORD( R_EAX, R_EAX );
  1391 	    sh4_x86.tstate = TSTATE_NONE;
  1393 	store_reg( R_EAX, Rn );
  1395 :}
  1396 MOV.W @(disp, Rm), R0 {:  
  1397     load_reg( R_EAX, Rm );
  1398     ADD_imm32_r32( disp, R_EAX );
  1399     check_ralign16( R_EAX );
  1400     MMU_TRANSLATE_READ( R_EAX );
  1401     MEM_READ_WORD( R_EAX, R_EAX );
  1402     store_reg( R_EAX, 0 );
  1403     sh4_x86.tstate = TSTATE_NONE;
  1404 :}
  1405 MOVA @(disp, PC), R0 {:  
  1406     if( sh4_x86.in_delay_slot ) {
  1407 	SLOTILLEGAL();
  1408     } else {
  1409 	load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1410 	ADD_sh4r_r32( R_PC, R_ECX );
  1411 	store_reg( R_ECX, 0 );
  1412 	sh4_x86.tstate = TSTATE_NONE;
  1414 :}
  1415 MOVCA.L R0, @Rn {:  
  1416     load_reg( R_EAX, Rn );
  1417     check_walign32( R_EAX );
  1418     MMU_TRANSLATE_WRITE( R_EAX );
  1419     load_reg( R_EDX, 0 );
  1420     MEM_WRITE_LONG( R_EAX, R_EDX );
  1421     sh4_x86.tstate = TSTATE_NONE;
  1422 :}
  1424 /* Control transfer instructions */
  1425 BF disp {:
  1426     if( sh4_x86.in_delay_slot ) {
  1427 	SLOTILLEGAL();
  1428     } else {
  1429 	sh4vma_t target = disp + pc + 4;
  1430 	JT_rel8( EXIT_BLOCK_REL_SIZE(target), nottaken );
  1431 	exit_block_rel(target, pc+2 );
  1432 	JMP_TARGET(nottaken);
  1433 	return 2;
  1435 :}
  1436 BF/S disp {:
  1437     if( sh4_x86.in_delay_slot ) {
  1438 	SLOTILLEGAL();
  1439     } else {
  1440 	sh4_x86.in_delay_slot = DELAY_PC;
  1441 	if( UNTRANSLATABLE(pc+2) ) {
  1442 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1443 	    JT_rel8(6,nottaken);
  1444 	    ADD_imm32_r32( disp, R_EAX );
  1445 	    JMP_TARGET(nottaken);
  1446 	    ADD_sh4r_r32( R_PC, R_EAX );
  1447 	    store_spreg( R_EAX, R_NEW_PC );
  1448 	    exit_block_emu(pc+2);
  1449 	    sh4_x86.branch_taken = TRUE;
  1450 	    return 2;
  1451 	} else {
  1452 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1453 		CMP_imm8s_sh4r( 1, R_T );
  1454 		sh4_x86.tstate = TSTATE_E;
  1456 	    sh4vma_t target = disp + pc + 4;
  1457 	    OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JT rel32
  1458 	    sh4_translate_instruction(pc+2);
  1459 	    exit_block_rel( target, pc+4 );
  1461 	    // not taken
  1462 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1463 	    sh4_translate_instruction(pc+2);
  1464 	    return 4;
  1467 :}
  1468 BRA disp {:  
  1469     if( sh4_x86.in_delay_slot ) {
  1470 	SLOTILLEGAL();
  1471     } else {
  1472 	sh4_x86.in_delay_slot = DELAY_PC;
  1473 	sh4_x86.branch_taken = TRUE;
  1474 	if( UNTRANSLATABLE(pc+2) ) {
  1475 	    load_spreg( R_EAX, R_PC );
  1476 	    ADD_imm32_r32( pc + disp + 4 - sh4_x86.block_start_pc, R_EAX );
  1477 	    store_spreg( R_EAX, R_NEW_PC );
  1478 	    exit_block_emu(pc+2);
  1479 	    return 2;
  1480 	} else {
  1481 	    sh4_translate_instruction( pc + 2 );
  1482 	    exit_block_rel( disp + pc + 4, pc+4 );
  1483 	    return 4;
  1486 :}
  1487 BRAF Rn {:  
  1488     if( sh4_x86.in_delay_slot ) {
  1489 	SLOTILLEGAL();
  1490     } else {
  1491 	load_spreg( R_EAX, R_PC );
  1492 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1493 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1494 	store_spreg( R_EAX, R_NEW_PC );
  1495 	sh4_x86.in_delay_slot = DELAY_PC;
  1496 	sh4_x86.tstate = TSTATE_NONE;
  1497 	sh4_x86.branch_taken = TRUE;
  1498 	if( UNTRANSLATABLE(pc+2) ) {
  1499 	    exit_block_emu(pc+2);
  1500 	    return 2;
  1501 	} else {
  1502 	    sh4_translate_instruction( pc + 2 );
  1503 	    exit_block_newpcset(pc+2);
  1504 	    return 4;
  1507 :}
  1508 BSR disp {:  
  1509     if( sh4_x86.in_delay_slot ) {
  1510 	SLOTILLEGAL();
  1511     } else {
  1512 	load_spreg( R_EAX, R_PC );
  1513 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1514 	store_spreg( R_EAX, R_PR );
  1515 	sh4_x86.in_delay_slot = DELAY_PC;
  1516 	sh4_x86.branch_taken = TRUE;
  1517 	sh4_x86.tstate = TSTATE_NONE;
  1518 	if( UNTRANSLATABLE(pc+2) ) {
  1519 	    ADD_imm32_r32( disp, R_EAX );
  1520 	    store_spreg( R_EAX, R_NEW_PC );
  1521 	    exit_block_emu(pc+2);
  1522 	    return 2;
  1523 	} else {
  1524 	    sh4_translate_instruction( pc + 2 );
  1525 	    exit_block_rel( disp + pc + 4, pc+4 );
  1526 	    return 4;
  1529 :}
  1530 BSRF Rn {:  
  1531     if( sh4_x86.in_delay_slot ) {
  1532 	SLOTILLEGAL();
  1533     } else {
  1534 	load_spreg( R_EAX, R_PC );
  1535 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1536 	store_spreg( R_EAX, R_PR );
  1537 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1538 	store_spreg( R_EAX, R_NEW_PC );
  1540 	sh4_x86.in_delay_slot = DELAY_PC;
  1541 	sh4_x86.tstate = TSTATE_NONE;
  1542 	sh4_x86.branch_taken = TRUE;
  1543 	if( UNTRANSLATABLE(pc+2) ) {
  1544 	    exit_block_emu(pc+2);
  1545 	    return 2;
  1546 	} else {
  1547 	    sh4_translate_instruction( pc + 2 );
  1548 	    exit_block_newpcset(pc+2);
  1549 	    return 4;
  1552 :}
  1553 BT disp {:
  1554     if( sh4_x86.in_delay_slot ) {
  1555 	SLOTILLEGAL();
  1556     } else {
  1557 	sh4vma_t target = disp + pc + 4;
  1558 	JF_rel8( EXIT_BLOCK_REL_SIZE(target), nottaken );
  1559 	exit_block_rel(target, pc+2 );
  1560 	JMP_TARGET(nottaken);
  1561 	return 2;
  1563 :}
  1564 BT/S disp {:
  1565     if( sh4_x86.in_delay_slot ) {
  1566 	SLOTILLEGAL();
  1567     } else {
  1568 	sh4_x86.in_delay_slot = DELAY_PC;
  1569 	if( UNTRANSLATABLE(pc+2) ) {
  1570 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1571 	    JF_rel8(6,nottaken);
  1572 	    ADD_imm32_r32( disp, R_EAX );
  1573 	    JMP_TARGET(nottaken);
  1574 	    ADD_sh4r_r32( R_PC, R_EAX );
  1575 	    store_spreg( R_EAX, R_NEW_PC );
  1576 	    exit_block_emu(pc+2);
  1577 	    sh4_x86.branch_taken = TRUE;
  1578 	    return 2;
  1579 	} else {
  1580 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1581 		CMP_imm8s_sh4r( 1, R_T );
  1582 		sh4_x86.tstate = TSTATE_E;
  1584 	    OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JF rel32
  1585 	    sh4_translate_instruction(pc+2);
  1586 	    exit_block_rel( disp + pc + 4, pc+4 );
  1587 	    // not taken
  1588 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1589 	    sh4_translate_instruction(pc+2);
  1590 	    return 4;
  1593 :}
  1594 JMP @Rn {:  
  1595     if( sh4_x86.in_delay_slot ) {
  1596 	SLOTILLEGAL();
  1597     } else {
  1598 	load_reg( R_ECX, Rn );
  1599 	store_spreg( R_ECX, R_NEW_PC );
  1600 	sh4_x86.in_delay_slot = DELAY_PC;
  1601 	sh4_x86.branch_taken = TRUE;
  1602 	if( UNTRANSLATABLE(pc+2) ) {
  1603 	    exit_block_emu(pc+2);
  1604 	    return 2;
  1605 	} else {
  1606 	    sh4_translate_instruction(pc+2);
  1607 	    exit_block_newpcset(pc+2);
  1608 	    return 4;
  1611 :}
  1612 JSR @Rn {:  
  1613     if( sh4_x86.in_delay_slot ) {
  1614 	SLOTILLEGAL();
  1615     } else {
  1616 	load_spreg( R_EAX, R_PC );
  1617 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1618 	store_spreg( R_EAX, R_PR );
  1619 	load_reg( R_ECX, Rn );
  1620 	store_spreg( R_ECX, R_NEW_PC );
  1621 	sh4_x86.in_delay_slot = DELAY_PC;
  1622 	sh4_x86.branch_taken = TRUE;
  1623 	sh4_x86.tstate = TSTATE_NONE;
  1624 	if( UNTRANSLATABLE(pc+2) ) {
  1625 	    exit_block_emu(pc+2);
  1626 	    return 2;
  1627 	} else {
  1628 	    sh4_translate_instruction(pc+2);
  1629 	    exit_block_newpcset(pc+2);
  1630 	    return 4;
  1633 :}
  1634 RTE {:  
  1635     if( sh4_x86.in_delay_slot ) {
  1636 	SLOTILLEGAL();
  1637     } else {
  1638 	check_priv();
  1639 	load_spreg( R_ECX, R_SPC );
  1640 	store_spreg( R_ECX, R_NEW_PC );
  1641 	load_spreg( R_EAX, R_SSR );
  1642 	call_func1( sh4_write_sr, R_EAX );
  1643 	sh4_x86.in_delay_slot = DELAY_PC;
  1644 	sh4_x86.priv_checked = FALSE;
  1645 	sh4_x86.fpuen_checked = FALSE;
  1646 	sh4_x86.tstate = TSTATE_NONE;
  1647 	sh4_x86.branch_taken = TRUE;
  1648 	if( UNTRANSLATABLE(pc+2) ) {
  1649 	    exit_block_emu(pc+2);
  1650 	    return 2;
  1651 	} else {
  1652 	    sh4_translate_instruction(pc+2);
  1653 	    exit_block_newpcset(pc+2);
  1654 	    return 4;
  1657 :}
  1658 RTS {:  
  1659     if( sh4_x86.in_delay_slot ) {
  1660 	SLOTILLEGAL();
  1661     } else {
  1662 	load_spreg( R_ECX, R_PR );
  1663 	store_spreg( R_ECX, R_NEW_PC );
  1664 	sh4_x86.in_delay_slot = DELAY_PC;
  1665 	sh4_x86.branch_taken = TRUE;
  1666 	if( UNTRANSLATABLE(pc+2) ) {
  1667 	    exit_block_emu(pc+2);
  1668 	    return 2;
  1669 	} else {
  1670 	    sh4_translate_instruction(pc+2);
  1671 	    exit_block_newpcset(pc+2);
  1672 	    return 4;
  1675 :}
  1676 TRAPA #imm {:  
  1677     if( sh4_x86.in_delay_slot ) {
  1678 	SLOTILLEGAL();
  1679     } else {
  1680 	load_imm32( R_ECX, pc+2 - sh4_x86.block_start_pc );   // 5
  1681 	ADD_r32_sh4r( R_ECX, R_PC );
  1682 	load_imm32( R_EAX, imm );
  1683 	call_func1( sh4_raise_trap, R_EAX );
  1684 	sh4_x86.tstate = TSTATE_NONE;
  1685 	exit_block_pcset(pc);
  1686 	sh4_x86.branch_taken = TRUE;
  1687 	return 2;
  1689 :}
  1690 UNDEF {:  
  1691     if( sh4_x86.in_delay_slot ) {
  1692 	SLOTILLEGAL();
  1693     } else {
  1694 	JMP_exc(EXC_ILLEGAL);
  1695 	return 2;
  1697 :}
  1699 CLRMAC {:  
  1700     XOR_r32_r32(R_EAX, R_EAX);
  1701     store_spreg( R_EAX, R_MACL );
  1702     store_spreg( R_EAX, R_MACH );
  1703     sh4_x86.tstate = TSTATE_NONE;
  1704 :}
  1705 CLRS {:
  1706     CLC();
  1707     SETC_sh4r(R_S);
  1708     sh4_x86.tstate = TSTATE_C;
  1709 :}
  1710 CLRT {:  
  1711     CLC();
  1712     SETC_t();
  1713     sh4_x86.tstate = TSTATE_C;
  1714 :}
  1715 SETS {:  
  1716     STC();
  1717     SETC_sh4r(R_S);
  1718     sh4_x86.tstate = TSTATE_C;
  1719 :}
  1720 SETT {:  
  1721     STC();
  1722     SETC_t();
  1723     sh4_x86.tstate = TSTATE_C;
  1724 :}
  1726 /* Floating point moves */
  1727 FMOV FRm, FRn {:  
  1728     /* As horrible as this looks, it's actually covering 5 separate cases:
  1729      * 1. 32-bit fr-to-fr (PR=0)
  1730      * 2. 64-bit dr-to-dr (PR=1, FRm&1 == 0, FRn&1 == 0 )
  1731      * 3. 64-bit dr-to-xd (PR=1, FRm&1 == 0, FRn&1 == 1 )
  1732      * 4. 64-bit xd-to-dr (PR=1, FRm&1 == 1, FRn&1 == 0 )
  1733      * 5. 64-bit xd-to-xd (PR=1, FRm&1 == 1, FRn&1 == 1 )
  1734      */
  1735     check_fpuen();
  1736     load_spreg( R_ECX, R_FPSCR );
  1737     load_fr_bank( R_EDX );
  1738     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1739     JNE_rel8(8, doublesize);
  1740     load_fr( R_EDX, R_EAX, FRm ); // PR=0 branch
  1741     store_fr( R_EDX, R_EAX, FRn );
  1742     if( FRm&1 ) {
  1743 	JMP_rel8(24, end);
  1744 	JMP_TARGET(doublesize);
  1745 	load_xf_bank( R_ECX ); 
  1746 	load_fr( R_ECX, R_EAX, FRm-1 );
  1747 	if( FRn&1 ) {
  1748 	    load_fr( R_ECX, R_EDX, FRm );
  1749 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1750 	    store_fr( R_ECX, R_EDX, FRn );
  1751 	} else /* FRn&1 == 0 */ {
  1752 	    load_fr( R_ECX, R_ECX, FRm );
  1753 	    store_fr( R_EDX, R_EAX, FRn );
  1754 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1756 	JMP_TARGET(end);
  1757     } else /* FRm&1 == 0 */ {
  1758 	if( FRn&1 ) {
  1759 	    JMP_rel8(24, end);
  1760 	    load_xf_bank( R_ECX );
  1761 	    load_fr( R_EDX, R_EAX, FRm );
  1762 	    load_fr( R_EDX, R_EDX, FRm+1 );
  1763 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1764 	    store_fr( R_ECX, R_EDX, FRn );
  1765 	    JMP_TARGET(end);
  1766 	} else /* FRn&1 == 0 */ {
  1767 	    JMP_rel8(12, end);
  1768 	    load_fr( R_EDX, R_EAX, FRm );
  1769 	    load_fr( R_EDX, R_ECX, FRm+1 );
  1770 	    store_fr( R_EDX, R_EAX, FRn );
  1771 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1772 	    JMP_TARGET(end);
  1775     sh4_x86.tstate = TSTATE_NONE;
  1776 :}
  1777 FMOV FRm, @Rn {: 
  1778     check_fpuen();
  1779     load_reg( R_EAX, Rn );
  1780     check_walign32( R_EAX );
  1781     MMU_TRANSLATE_WRITE( R_EAX );
  1782     load_spreg( R_EDX, R_FPSCR );
  1783     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1784     JNE_rel8(8 + MEM_WRITE_SIZE, doublesize);
  1785     load_fr_bank( R_EDX );
  1786     load_fr( R_EDX, R_ECX, FRm );
  1787     MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
  1788     if( FRm&1 ) {
  1789 	JMP_rel8( 18 + MEM_WRITE_DOUBLE_SIZE, end );
  1790 	JMP_TARGET(doublesize);
  1791 	load_xf_bank( R_EDX );
  1792 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1793 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1794 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1795 	JMP_TARGET(end);
  1796     } else {
  1797 	JMP_rel8( 9 + MEM_WRITE_DOUBLE_SIZE, end );
  1798 	JMP_TARGET(doublesize);
  1799 	load_fr_bank( R_EDX );
  1800 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1801 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1802 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1803 	JMP_TARGET(end);
  1805     sh4_x86.tstate = TSTATE_NONE;
  1806 :}
  1807 FMOV @Rm, FRn {:  
  1808     check_fpuen();
  1809     load_reg( R_EAX, Rm );
  1810     check_ralign32( R_EAX );
  1811     MMU_TRANSLATE_READ( R_EAX );
  1812     load_spreg( R_EDX, R_FPSCR );
  1813     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1814     JNE_rel8(8 + MEM_READ_SIZE, doublesize);
  1815     MEM_READ_LONG( R_EAX, R_EAX );
  1816     load_fr_bank( R_EDX );
  1817     store_fr( R_EDX, R_EAX, FRn );
  1818     if( FRn&1 ) {
  1819 	JMP_rel8(21 + MEM_READ_DOUBLE_SIZE, end);
  1820 	JMP_TARGET(doublesize);
  1821 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1822 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1823 	load_xf_bank( R_EDX );
  1824 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1825 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1826 	JMP_TARGET(end);
  1827     } else {
  1828 	JMP_rel8(9 + MEM_READ_DOUBLE_SIZE, end);
  1829 	JMP_TARGET(doublesize);
  1830 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1831 	load_fr_bank( R_EDX );
  1832 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1833 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1834 	JMP_TARGET(end);
  1836     sh4_x86.tstate = TSTATE_NONE;
  1837 :}
  1838 FMOV FRm, @-Rn {:  
  1839     check_fpuen();
  1840     load_reg( R_EAX, Rn );
  1841     check_walign32( R_EAX );
  1842     load_spreg( R_EDX, R_FPSCR );
  1843     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1844     JNE_rel8(15 + MEM_WRITE_SIZE + MMU_TRANSLATE_SIZE, doublesize);
  1845     ADD_imm8s_r32( -4, R_EAX );
  1846     MMU_TRANSLATE_WRITE( R_EAX );
  1847     load_fr_bank( R_EDX );
  1848     load_fr( R_EDX, R_ECX, FRm );
  1849     ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
  1850     MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
  1851     if( FRm&1 ) {
  1852 	JMP_rel8( 25 + MEM_WRITE_DOUBLE_SIZE + MMU_TRANSLATE_SIZE, end );
  1853 	JMP_TARGET(doublesize);
  1854 	ADD_imm8s_r32(-8,R_EAX);
  1855 	MMU_TRANSLATE_WRITE( R_EAX );
  1856 	load_xf_bank( R_EDX );
  1857 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1858 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1859 	ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
  1860 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1861 	JMP_TARGET(end);
  1862     } else {
  1863 	JMP_rel8( 16 + MEM_WRITE_DOUBLE_SIZE + MMU_TRANSLATE_SIZE, end );
  1864 	JMP_TARGET(doublesize);
  1865 	ADD_imm8s_r32(-8,R_EAX);
  1866 	MMU_TRANSLATE_WRITE( R_EAX );
  1867 	load_fr_bank( R_EDX );
  1868 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1869 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1870 	ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
  1871 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1872 	JMP_TARGET(end);
  1874     sh4_x86.tstate = TSTATE_NONE;
  1875 :}
  1876 FMOV @Rm+, FRn {:
  1877     check_fpuen();
  1878     load_reg( R_EAX, Rm );
  1879     check_ralign32( R_EAX );
  1880     MMU_TRANSLATE_READ( R_EAX );
  1881     load_spreg( R_EDX, R_FPSCR );
  1882     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1883     JNE_rel8(12 + MEM_READ_SIZE, doublesize);
  1884     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1885     MEM_READ_LONG( R_EAX, R_EAX );
  1886     load_fr_bank( R_EDX );
  1887     store_fr( R_EDX, R_EAX, FRn );
  1888     if( FRn&1 ) {
  1889 	JMP_rel8(25 + MEM_READ_DOUBLE_SIZE, end);
  1890 	JMP_TARGET(doublesize);
  1891 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
  1892 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1893 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1894 	load_xf_bank( R_EDX );
  1895 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1896 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1897 	JMP_TARGET(end);
  1898     } else {
  1899 	JMP_rel8(13 + MEM_READ_DOUBLE_SIZE, end);
  1900 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
  1901 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1902 	load_fr_bank( R_EDX );
  1903 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1904 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1905 	JMP_TARGET(end);
  1907     sh4_x86.tstate = TSTATE_NONE;
  1908 :}
  1909 FMOV FRm, @(R0, Rn) {:  
  1910     check_fpuen();
  1911     load_reg( R_EAX, Rn );
  1912     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1913     check_walign32( R_EAX );
  1914     MMU_TRANSLATE_WRITE( R_EAX );
  1915     load_spreg( R_EDX, R_FPSCR );
  1916     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1917     JNE_rel8(8 + MEM_WRITE_SIZE, doublesize);
  1918     load_fr_bank( R_EDX );
  1919     load_fr( R_EDX, R_ECX, FRm );
  1920     MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
  1921     if( FRm&1 ) {
  1922 	JMP_rel8( 18 + MEM_WRITE_DOUBLE_SIZE, end );
  1923 	JMP_TARGET(doublesize);
  1924 	load_xf_bank( R_EDX );
  1925 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1926 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1927 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1928 	JMP_TARGET(end);
  1929     } else {
  1930 	JMP_rel8( 9 + MEM_WRITE_DOUBLE_SIZE, end );
  1931 	JMP_TARGET(doublesize);
  1932 	load_fr_bank( R_EDX );
  1933 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1934 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1935 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1936 	JMP_TARGET(end);
  1938     sh4_x86.tstate = TSTATE_NONE;
  1939 :}
  1940 FMOV @(R0, Rm), FRn {:  
  1941     check_fpuen();
  1942     load_reg( R_EAX, Rm );
  1943     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1944     check_ralign32( R_EAX );
  1945     MMU_TRANSLATE_READ( R_EAX );
  1946     load_spreg( R_EDX, R_FPSCR );
  1947     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1948     JNE_rel8(8 + MEM_READ_SIZE, doublesize);
  1949     MEM_READ_LONG( R_EAX, R_EAX );
  1950     load_fr_bank( R_EDX );
  1951     store_fr( R_EDX, R_EAX, FRn );
  1952     if( FRn&1 ) {
  1953 	JMP_rel8(21 + MEM_READ_DOUBLE_SIZE, end);
  1954 	JMP_TARGET(doublesize);
  1955 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1956 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1957 	load_xf_bank( R_EDX );
  1958 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1959 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1960 	JMP_TARGET(end);
  1961     } else {
  1962 	JMP_rel8(9 + MEM_READ_DOUBLE_SIZE, end);
  1963 	JMP_TARGET(doublesize);
  1964 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1965 	load_fr_bank( R_EDX );
  1966 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1967 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1968 	JMP_TARGET(end);
  1970     sh4_x86.tstate = TSTATE_NONE;
  1971 :}
  1972 FLDI0 FRn {:  /* IFF PR=0 */
  1973     check_fpuen();
  1974     load_spreg( R_ECX, R_FPSCR );
  1975     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1976     JNE_rel8(8, end);
  1977     XOR_r32_r32( R_EAX, R_EAX );
  1978     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  1979     store_fr( R_ECX, R_EAX, FRn );
  1980     JMP_TARGET(end);
  1981     sh4_x86.tstate = TSTATE_NONE;
  1982 :}
  1983 FLDI1 FRn {:  /* IFF PR=0 */
  1984     check_fpuen();
  1985     load_spreg( R_ECX, R_FPSCR );
  1986     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1987     JNE_rel8(11, end);
  1988     load_imm32(R_EAX, 0x3F800000);
  1989     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  1990     store_fr( R_ECX, R_EAX, FRn );
  1991     JMP_TARGET(end);
  1992     sh4_x86.tstate = TSTATE_NONE;
  1993 :}
  1995 FLOAT FPUL, FRn {:  
  1996     check_fpuen();
  1997     load_spreg( R_ECX, R_FPSCR );
  1998     load_spreg(R_EDX, REG_OFFSET(fr_bank));
  1999     FILD_sh4r(R_FPUL);
  2000     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2001     JNE_rel8(5, doubleprec);
  2002     pop_fr( R_EDX, FRn );
  2003     JMP_rel8(3, end);
  2004     JMP_TARGET(doubleprec);
  2005     pop_dr( R_EDX, FRn );
  2006     JMP_TARGET(end);
  2007     sh4_x86.tstate = TSTATE_NONE;
  2008 :}
  2009 FTRC FRm, FPUL {:  
  2010     check_fpuen();
  2011     load_spreg( R_ECX, R_FPSCR );
  2012     load_fr_bank( R_EDX );
  2013     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2014     JNE_rel8(5, doubleprec);
  2015     push_fr( R_EDX, FRm );
  2016     JMP_rel8(3, doop);
  2017     JMP_TARGET(doubleprec);
  2018     push_dr( R_EDX, FRm );
  2019     JMP_TARGET( doop );
  2020     load_imm32( R_ECX, (uint32_t)&max_int );
  2021     FILD_r32ind( R_ECX );
  2022     FCOMIP_st(1);
  2023     JNA_rel8( 32, sat );
  2024     load_imm32( R_ECX, (uint32_t)&min_int );  // 5
  2025     FILD_r32ind( R_ECX );           // 2
  2026     FCOMIP_st(1);                   // 2
  2027     JAE_rel8( 21, sat2 );            // 2
  2028     load_imm32( R_EAX, (uint32_t)&save_fcw );
  2029     FNSTCW_r32ind( R_EAX );
  2030     load_imm32( R_EDX, (uint32_t)&trunc_fcw );
  2031     FLDCW_r32ind( R_EDX );
  2032     FISTP_sh4r(R_FPUL);             // 3
  2033     FLDCW_r32ind( R_EAX );
  2034     JMP_rel8( 9, end );             // 2
  2036     JMP_TARGET(sat);
  2037     JMP_TARGET(sat2);
  2038     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  2039     store_spreg( R_ECX, R_FPUL );
  2040     FPOP_st();
  2041     JMP_TARGET(end);
  2042     sh4_x86.tstate = TSTATE_NONE;
  2043 :}
  2044 FLDS FRm, FPUL {:  
  2045     check_fpuen();
  2046     load_fr_bank( R_ECX );
  2047     load_fr( R_ECX, R_EAX, FRm );
  2048     store_spreg( R_EAX, R_FPUL );
  2049     sh4_x86.tstate = TSTATE_NONE;
  2050 :}
  2051 FSTS FPUL, FRn {:  
  2052     check_fpuen();
  2053     load_fr_bank( R_ECX );
  2054     load_spreg( R_EAX, R_FPUL );
  2055     store_fr( R_ECX, R_EAX, FRn );
  2056     sh4_x86.tstate = TSTATE_NONE;
  2057 :}
  2058 FCNVDS FRm, FPUL {:  
  2059     check_fpuen();
  2060     load_spreg( R_ECX, R_FPSCR );
  2061     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2062     JE_rel8(9, end); // only when PR=1
  2063     load_fr_bank( R_ECX );
  2064     push_dr( R_ECX, FRm );
  2065     pop_fpul();
  2066     JMP_TARGET(end);
  2067     sh4_x86.tstate = TSTATE_NONE;
  2068 :}
  2069 FCNVSD FPUL, FRn {:  
  2070     check_fpuen();
  2071     load_spreg( R_ECX, R_FPSCR );
  2072     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2073     JE_rel8(9, end); // only when PR=1
  2074     load_fr_bank( R_ECX );
  2075     push_fpul();
  2076     pop_dr( R_ECX, FRn );
  2077     JMP_TARGET(end);
  2078     sh4_x86.tstate = TSTATE_NONE;
  2079 :}
  2081 /* Floating point instructions */
  2082 FABS FRn {:  
  2083     check_fpuen();
  2084     load_spreg( R_ECX, R_FPSCR );
  2085     load_fr_bank( R_EDX );
  2086     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2087     JNE_rel8(10, doubleprec);
  2088     push_fr(R_EDX, FRn); // 3
  2089     FABS_st0(); // 2
  2090     pop_fr( R_EDX, FRn); //3
  2091     JMP_rel8(8,end); // 2
  2092     JMP_TARGET(doubleprec);
  2093     push_dr(R_EDX, FRn);
  2094     FABS_st0();
  2095     pop_dr(R_EDX, FRn);
  2096     JMP_TARGET(end);
  2097     sh4_x86.tstate = TSTATE_NONE;
  2098 :}
  2099 FADD FRm, FRn {:  
  2100     check_fpuen();
  2101     load_spreg( R_ECX, R_FPSCR );
  2102     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2103     load_fr_bank( R_EDX );
  2104     JNE_rel8(13,doubleprec);
  2105     push_fr(R_EDX, FRm);
  2106     push_fr(R_EDX, FRn);
  2107     FADDP_st(1);
  2108     pop_fr(R_EDX, FRn);
  2109     JMP_rel8(11,end);
  2110     JMP_TARGET(doubleprec);
  2111     push_dr(R_EDX, FRm);
  2112     push_dr(R_EDX, FRn);
  2113     FADDP_st(1);
  2114     pop_dr(R_EDX, FRn);
  2115     JMP_TARGET(end);
  2116     sh4_x86.tstate = TSTATE_NONE;
  2117 :}
  2118 FDIV FRm, FRn {:  
  2119     check_fpuen();
  2120     load_spreg( R_ECX, R_FPSCR );
  2121     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2122     load_fr_bank( R_EDX );
  2123     JNE_rel8(13, doubleprec);
  2124     push_fr(R_EDX, FRn);
  2125     push_fr(R_EDX, FRm);
  2126     FDIVP_st(1);
  2127     pop_fr(R_EDX, FRn);
  2128     JMP_rel8(11, end);
  2129     JMP_TARGET(doubleprec);
  2130     push_dr(R_EDX, FRn);
  2131     push_dr(R_EDX, FRm);
  2132     FDIVP_st(1);
  2133     pop_dr(R_EDX, FRn);
  2134     JMP_TARGET(end);
  2135     sh4_x86.tstate = TSTATE_NONE;
  2136 :}
  2137 FMAC FR0, FRm, FRn {:  
  2138     check_fpuen();
  2139     load_spreg( R_ECX, R_FPSCR );
  2140     load_spreg( R_EDX, REG_OFFSET(fr_bank));
  2141     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2142     JNE_rel8(18, doubleprec);
  2143     push_fr( R_EDX, 0 );
  2144     push_fr( R_EDX, FRm );
  2145     FMULP_st(1);
  2146     push_fr( R_EDX, FRn );
  2147     FADDP_st(1);
  2148     pop_fr( R_EDX, FRn );
  2149     JMP_rel8(16, end);
  2150     JMP_TARGET(doubleprec);
  2151     push_dr( R_EDX, 0 );
  2152     push_dr( R_EDX, FRm );
  2153     FMULP_st(1);
  2154     push_dr( R_EDX, FRn );
  2155     FADDP_st(1);
  2156     pop_dr( R_EDX, FRn );
  2157     JMP_TARGET(end);
  2158     sh4_x86.tstate = TSTATE_NONE;
  2159 :}
  2161 FMUL FRm, FRn {:  
  2162     check_fpuen();
  2163     load_spreg( R_ECX, R_FPSCR );
  2164     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2165     load_fr_bank( R_EDX );
  2166     JNE_rel8(13, doubleprec);
  2167     push_fr(R_EDX, FRm);
  2168     push_fr(R_EDX, FRn);
  2169     FMULP_st(1);
  2170     pop_fr(R_EDX, FRn);
  2171     JMP_rel8(11, end);
  2172     JMP_TARGET(doubleprec);
  2173     push_dr(R_EDX, FRm);
  2174     push_dr(R_EDX, FRn);
  2175     FMULP_st(1);
  2176     pop_dr(R_EDX, FRn);
  2177     JMP_TARGET(end);
  2178     sh4_x86.tstate = TSTATE_NONE;
  2179 :}
  2180 FNEG FRn {:  
  2181     check_fpuen();
  2182     load_spreg( R_ECX, R_FPSCR );
  2183     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2184     load_fr_bank( R_EDX );
  2185     JNE_rel8(10, doubleprec);
  2186     push_fr(R_EDX, FRn);
  2187     FCHS_st0();
  2188     pop_fr(R_EDX, FRn);
  2189     JMP_rel8(8, end);
  2190     JMP_TARGET(doubleprec);
  2191     push_dr(R_EDX, FRn);
  2192     FCHS_st0();
  2193     pop_dr(R_EDX, FRn);
  2194     JMP_TARGET(end);
  2195     sh4_x86.tstate = TSTATE_NONE;
  2196 :}
  2197 FSRRA FRn {:  
  2198     check_fpuen();
  2199     load_spreg( R_ECX, R_FPSCR );
  2200     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2201     load_fr_bank( R_EDX );
  2202     JNE_rel8(12, end); // PR=0 only
  2203     FLD1_st0();
  2204     push_fr(R_EDX, FRn);
  2205     FSQRT_st0();
  2206     FDIVP_st(1);
  2207     pop_fr(R_EDX, FRn);
  2208     JMP_TARGET(end);
  2209     sh4_x86.tstate = TSTATE_NONE;
  2210 :}
  2211 FSQRT FRn {:  
  2212     check_fpuen();
  2213     load_spreg( R_ECX, R_FPSCR );
  2214     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2215     load_fr_bank( R_EDX );
  2216     JNE_rel8(10, doubleprec);
  2217     push_fr(R_EDX, FRn);
  2218     FSQRT_st0();
  2219     pop_fr(R_EDX, FRn);
  2220     JMP_rel8(8, end);
  2221     JMP_TARGET(doubleprec);
  2222     push_dr(R_EDX, FRn);
  2223     FSQRT_st0();
  2224     pop_dr(R_EDX, FRn);
  2225     JMP_TARGET(end);
  2226     sh4_x86.tstate = TSTATE_NONE;
  2227 :}
  2228 FSUB FRm, FRn {:  
  2229     check_fpuen();
  2230     load_spreg( R_ECX, R_FPSCR );
  2231     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2232     load_fr_bank( R_EDX );
  2233     JNE_rel8(13, doubleprec);
  2234     push_fr(R_EDX, FRn);
  2235     push_fr(R_EDX, FRm);
  2236     FSUBP_st(1);
  2237     pop_fr(R_EDX, FRn);
  2238     JMP_rel8(11, end);
  2239     JMP_TARGET(doubleprec);
  2240     push_dr(R_EDX, FRn);
  2241     push_dr(R_EDX, FRm);
  2242     FSUBP_st(1);
  2243     pop_dr(R_EDX, FRn);
  2244     JMP_TARGET(end);
  2245     sh4_x86.tstate = TSTATE_NONE;
  2246 :}
  2248 FCMP/EQ FRm, FRn {:  
  2249     check_fpuen();
  2250     load_spreg( R_ECX, R_FPSCR );
  2251     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2252     load_fr_bank( R_EDX );
  2253     JNE_rel8(8, doubleprec);
  2254     push_fr(R_EDX, FRm);
  2255     push_fr(R_EDX, FRn);
  2256     JMP_rel8(6, end);
  2257     JMP_TARGET(doubleprec);
  2258     push_dr(R_EDX, FRm);
  2259     push_dr(R_EDX, FRn);
  2260     JMP_TARGET(end);
  2261     FCOMIP_st(1);
  2262     SETE_t();
  2263     FPOP_st();
  2264     sh4_x86.tstate = TSTATE_NONE;
  2265 :}
  2266 FCMP/GT FRm, FRn {:  
  2267     check_fpuen();
  2268     load_spreg( R_ECX, R_FPSCR );
  2269     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2270     load_fr_bank( R_EDX );
  2271     JNE_rel8(8, doubleprec);
  2272     push_fr(R_EDX, FRm);
  2273     push_fr(R_EDX, FRn);
  2274     JMP_rel8(6, end);
  2275     JMP_TARGET(doubleprec);
  2276     push_dr(R_EDX, FRm);
  2277     push_dr(R_EDX, FRn);
  2278     JMP_TARGET(end);
  2279     FCOMIP_st(1);
  2280     SETA_t();
  2281     FPOP_st();
  2282     sh4_x86.tstate = TSTATE_NONE;
  2283 :}
  2285 FSCA FPUL, FRn {:  
  2286     check_fpuen();
  2287     load_spreg( R_ECX, R_FPSCR );
  2288     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2289     JNE_rel8( CALL_FUNC2_SIZE + 9, doubleprec );
  2290     load_fr_bank( R_ECX );
  2291     ADD_imm8s_r32( (FRn&0x0E)<<2, R_ECX );
  2292     load_spreg( R_EDX, R_FPUL );
  2293     call_func2( sh4_fsca, R_EDX, R_ECX );
  2294     JMP_TARGET(doubleprec);
  2295     sh4_x86.tstate = TSTATE_NONE;
  2296 :}
  2297 FIPR FVm, FVn {:  
  2298     check_fpuen();
  2299     load_spreg( R_ECX, R_FPSCR );
  2300     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2301     JNE_rel8(44, doubleprec);
  2303     load_fr_bank( R_ECX );
  2304     push_fr( R_ECX, FVm<<2 );
  2305     push_fr( R_ECX, FVn<<2 );
  2306     FMULP_st(1);
  2307     push_fr( R_ECX, (FVm<<2)+1);
  2308     push_fr( R_ECX, (FVn<<2)+1);
  2309     FMULP_st(1);
  2310     FADDP_st(1);
  2311     push_fr( R_ECX, (FVm<<2)+2);
  2312     push_fr( R_ECX, (FVn<<2)+2);
  2313     FMULP_st(1);
  2314     FADDP_st(1);
  2315     push_fr( R_ECX, (FVm<<2)+3);
  2316     push_fr( R_ECX, (FVn<<2)+3);
  2317     FMULP_st(1);
  2318     FADDP_st(1);
  2319     pop_fr( R_ECX, (FVn<<2)+3);
  2320     JMP_TARGET(doubleprec);
  2321     sh4_x86.tstate = TSTATE_NONE;
  2322 :}
  2323 FTRV XMTRX, FVn {:  
  2324     check_fpuen();
  2325     load_spreg( R_ECX, R_FPSCR );
  2326     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2327     JNE_rel8( 18 + CALL_FUNC2_SIZE, doubleprec );
  2328     load_fr_bank( R_EDX );                 // 3
  2329     ADD_imm8s_r32( FVn<<4, R_EDX );        // 3
  2330     load_xf_bank( R_ECX );                 // 12
  2331     call_func2( sh4_ftrv, R_EDX, R_ECX );  // 12
  2332     JMP_TARGET(doubleprec);
  2333     sh4_x86.tstate = TSTATE_NONE;
  2334 :}
  2336 FRCHG {:  
  2337     check_fpuen();
  2338     load_spreg( R_ECX, R_FPSCR );
  2339     XOR_imm32_r32( FPSCR_FR, R_ECX );
  2340     store_spreg( R_ECX, R_FPSCR );
  2341     update_fr_bank( R_ECX );
  2342     sh4_x86.tstate = TSTATE_NONE;
  2343 :}
  2344 FSCHG {:  
  2345     check_fpuen();
  2346     load_spreg( R_ECX, R_FPSCR );
  2347     XOR_imm32_r32( FPSCR_SZ, R_ECX );
  2348     store_spreg( R_ECX, R_FPSCR );
  2349     sh4_x86.tstate = TSTATE_NONE;
  2350 :}
  2352 /* Processor control instructions */
  2353 LDC Rm, SR {:
  2354     if( sh4_x86.in_delay_slot ) {
  2355 	SLOTILLEGAL();
  2356     } else {
  2357 	check_priv();
  2358 	load_reg( R_EAX, Rm );
  2359 	call_func1( sh4_write_sr, R_EAX );
  2360 	sh4_x86.priv_checked = FALSE;
  2361 	sh4_x86.fpuen_checked = FALSE;
  2362 	sh4_x86.tstate = TSTATE_NONE;
  2364 :}
  2365 LDC Rm, GBR {: 
  2366     load_reg( R_EAX, Rm );
  2367     store_spreg( R_EAX, R_GBR );
  2368 :}
  2369 LDC Rm, VBR {:  
  2370     check_priv();
  2371     load_reg( R_EAX, Rm );
  2372     store_spreg( R_EAX, R_VBR );
  2373     sh4_x86.tstate = TSTATE_NONE;
  2374 :}
  2375 LDC Rm, SSR {:  
  2376     check_priv();
  2377     load_reg( R_EAX, Rm );
  2378     store_spreg( R_EAX, R_SSR );
  2379     sh4_x86.tstate = TSTATE_NONE;
  2380 :}
  2381 LDC Rm, SGR {:  
  2382     check_priv();
  2383     load_reg( R_EAX, Rm );
  2384     store_spreg( R_EAX, R_SGR );
  2385     sh4_x86.tstate = TSTATE_NONE;
  2386 :}
  2387 LDC Rm, SPC {:  
  2388     check_priv();
  2389     load_reg( R_EAX, Rm );
  2390     store_spreg( R_EAX, R_SPC );
  2391     sh4_x86.tstate = TSTATE_NONE;
  2392 :}
  2393 LDC Rm, DBR {:  
  2394     check_priv();
  2395     load_reg( R_EAX, Rm );
  2396     store_spreg( R_EAX, R_DBR );
  2397     sh4_x86.tstate = TSTATE_NONE;
  2398 :}
  2399 LDC Rm, Rn_BANK {:  
  2400     check_priv();
  2401     load_reg( R_EAX, Rm );
  2402     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2403     sh4_x86.tstate = TSTATE_NONE;
  2404 :}
  2405 LDC.L @Rm+, GBR {:  
  2406     load_reg( R_EAX, Rm );
  2407     check_ralign32( R_EAX );
  2408     MMU_TRANSLATE_READ( R_EAX );
  2409     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2410     MEM_READ_LONG( R_EAX, R_EAX );
  2411     store_spreg( R_EAX, R_GBR );
  2412     sh4_x86.tstate = TSTATE_NONE;
  2413 :}
  2414 LDC.L @Rm+, SR {:
  2415     if( sh4_x86.in_delay_slot ) {
  2416 	SLOTILLEGAL();
  2417     } else {
  2418 	check_priv();
  2419 	load_reg( R_EAX, Rm );
  2420 	check_ralign32( R_EAX );
  2421 	MMU_TRANSLATE_READ( R_EAX );
  2422 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2423 	MEM_READ_LONG( R_EAX, R_EAX );
  2424 	call_func1( sh4_write_sr, R_EAX );
  2425 	sh4_x86.priv_checked = FALSE;
  2426 	sh4_x86.fpuen_checked = FALSE;
  2427 	sh4_x86.tstate = TSTATE_NONE;
  2429 :}
  2430 LDC.L @Rm+, VBR {:  
  2431     check_priv();
  2432     load_reg( R_EAX, Rm );
  2433     check_ralign32( R_EAX );
  2434     MMU_TRANSLATE_READ( R_EAX );
  2435     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2436     MEM_READ_LONG( R_EAX, R_EAX );
  2437     store_spreg( R_EAX, R_VBR );
  2438     sh4_x86.tstate = TSTATE_NONE;
  2439 :}
  2440 LDC.L @Rm+, SSR {:
  2441     check_priv();
  2442     load_reg( R_EAX, Rm );
  2443     check_ralign32( R_EAX );
  2444     MMU_TRANSLATE_READ( R_EAX );
  2445     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2446     MEM_READ_LONG( R_EAX, R_EAX );
  2447     store_spreg( R_EAX, R_SSR );
  2448     sh4_x86.tstate = TSTATE_NONE;
  2449 :}
  2450 LDC.L @Rm+, SGR {:  
  2451     check_priv();
  2452     load_reg( R_EAX, Rm );
  2453     check_ralign32( R_EAX );
  2454     MMU_TRANSLATE_READ( R_EAX );
  2455     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2456     MEM_READ_LONG( R_EAX, R_EAX );
  2457     store_spreg( R_EAX, R_SGR );
  2458     sh4_x86.tstate = TSTATE_NONE;
  2459 :}
  2460 LDC.L @Rm+, SPC {:  
  2461     check_priv();
  2462     load_reg( R_EAX, Rm );
  2463     check_ralign32( R_EAX );
  2464     MMU_TRANSLATE_READ( R_EAX );
  2465     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2466     MEM_READ_LONG( R_EAX, R_EAX );
  2467     store_spreg( R_EAX, R_SPC );
  2468     sh4_x86.tstate = TSTATE_NONE;
  2469 :}
  2470 LDC.L @Rm+, DBR {:  
  2471     check_priv();
  2472     load_reg( R_EAX, Rm );
  2473     check_ralign32( R_EAX );
  2474     MMU_TRANSLATE_READ( R_EAX );
  2475     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2476     MEM_READ_LONG( R_EAX, R_EAX );
  2477     store_spreg( R_EAX, R_DBR );
  2478     sh4_x86.tstate = TSTATE_NONE;
  2479 :}
  2480 LDC.L @Rm+, Rn_BANK {:  
  2481     check_priv();
  2482     load_reg( R_EAX, Rm );
  2483     check_ralign32( R_EAX );
  2484     MMU_TRANSLATE_READ( R_EAX );
  2485     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2486     MEM_READ_LONG( R_EAX, R_EAX );
  2487     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2488     sh4_x86.tstate = TSTATE_NONE;
  2489 :}
  2490 LDS Rm, FPSCR {:  
  2491     load_reg( R_EAX, Rm );
  2492     store_spreg( R_EAX, R_FPSCR );
  2493     update_fr_bank( R_EAX );
  2494     sh4_x86.tstate = TSTATE_NONE;
  2495 :}
  2496 LDS.L @Rm+, FPSCR {:  
  2497     load_reg( R_EAX, Rm );
  2498     check_ralign32( R_EAX );
  2499     MMU_TRANSLATE_READ( R_EAX );
  2500     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2501     MEM_READ_LONG( R_EAX, R_EAX );
  2502     store_spreg( R_EAX, R_FPSCR );
  2503     update_fr_bank( R_EAX );
  2504     sh4_x86.tstate = TSTATE_NONE;
  2505 :}
  2506 LDS Rm, FPUL {:  
  2507     load_reg( R_EAX, Rm );
  2508     store_spreg( R_EAX, R_FPUL );
  2509 :}
  2510 LDS.L @Rm+, FPUL {:  
  2511     load_reg( R_EAX, Rm );
  2512     check_ralign32( R_EAX );
  2513     MMU_TRANSLATE_READ( R_EAX );
  2514     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2515     MEM_READ_LONG( R_EAX, R_EAX );
  2516     store_spreg( R_EAX, R_FPUL );
  2517     sh4_x86.tstate = TSTATE_NONE;
  2518 :}
  2519 LDS Rm, MACH {: 
  2520     load_reg( R_EAX, Rm );
  2521     store_spreg( R_EAX, R_MACH );
  2522 :}
  2523 LDS.L @Rm+, MACH {:  
  2524     load_reg( R_EAX, Rm );
  2525     check_ralign32( R_EAX );
  2526     MMU_TRANSLATE_READ( R_EAX );
  2527     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2528     MEM_READ_LONG( R_EAX, R_EAX );
  2529     store_spreg( R_EAX, R_MACH );
  2530     sh4_x86.tstate = TSTATE_NONE;
  2531 :}
  2532 LDS Rm, MACL {:  
  2533     load_reg( R_EAX, Rm );
  2534     store_spreg( R_EAX, R_MACL );
  2535 :}
  2536 LDS.L @Rm+, MACL {:  
  2537     load_reg( R_EAX, Rm );
  2538     check_ralign32( R_EAX );
  2539     MMU_TRANSLATE_READ( R_EAX );
  2540     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2541     MEM_READ_LONG( R_EAX, R_EAX );
  2542     store_spreg( R_EAX, R_MACL );
  2543     sh4_x86.tstate = TSTATE_NONE;
  2544 :}
  2545 LDS Rm, PR {:  
  2546     load_reg( R_EAX, Rm );
  2547     store_spreg( R_EAX, R_PR );
  2548 :}
  2549 LDS.L @Rm+, PR {:  
  2550     load_reg( R_EAX, Rm );
  2551     check_ralign32( R_EAX );
  2552     MMU_TRANSLATE_READ( R_EAX );
  2553     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2554     MEM_READ_LONG( R_EAX, R_EAX );
  2555     store_spreg( R_EAX, R_PR );
  2556     sh4_x86.tstate = TSTATE_NONE;
  2557 :}
  2558 LDTLB {:  
  2559     call_func0( MMU_ldtlb );
  2560 :}
  2561 OCBI @Rn {:  :}
  2562 OCBP @Rn {:  :}
  2563 OCBWB @Rn {:  :}
  2564 PREF @Rn {:
  2565     load_reg( R_EAX, Rn );
  2566     MOV_r32_r32( R_EAX, R_ECX );
  2567     AND_imm32_r32( 0xFC000000, R_EAX );
  2568     CMP_imm32_r32( 0xE0000000, R_EAX );
  2569     JNE_rel8(8+CALL_FUNC1_SIZE, end);
  2570     call_func1( sh4_flush_store_queue, R_ECX );
  2571     TEST_r32_r32( R_EAX, R_EAX );
  2572     JE_exc(-1);
  2573     JMP_TARGET(end);
  2574     sh4_x86.tstate = TSTATE_NONE;
  2575 :}
  2576 SLEEP {: 
  2577     check_priv();
  2578     call_func0( sh4_sleep );
  2579     sh4_x86.tstate = TSTATE_NONE;
  2580     sh4_x86.in_delay_slot = DELAY_NONE;
  2581     return 2;
  2582 :}
  2583 STC SR, Rn {:
  2584     check_priv();
  2585     call_func0(sh4_read_sr);
  2586     store_reg( R_EAX, Rn );
  2587     sh4_x86.tstate = TSTATE_NONE;
  2588 :}
  2589 STC GBR, Rn {:  
  2590     load_spreg( R_EAX, R_GBR );
  2591     store_reg( R_EAX, Rn );
  2592 :}
  2593 STC VBR, Rn {:  
  2594     check_priv();
  2595     load_spreg( R_EAX, R_VBR );
  2596     store_reg( R_EAX, Rn );
  2597     sh4_x86.tstate = TSTATE_NONE;
  2598 :}
  2599 STC SSR, Rn {:  
  2600     check_priv();
  2601     load_spreg( R_EAX, R_SSR );
  2602     store_reg( R_EAX, Rn );
  2603     sh4_x86.tstate = TSTATE_NONE;
  2604 :}
  2605 STC SPC, Rn {:  
  2606     check_priv();
  2607     load_spreg( R_EAX, R_SPC );
  2608     store_reg( R_EAX, Rn );
  2609     sh4_x86.tstate = TSTATE_NONE;
  2610 :}
  2611 STC SGR, Rn {:  
  2612     check_priv();
  2613     load_spreg( R_EAX, R_SGR );
  2614     store_reg( R_EAX, Rn );
  2615     sh4_x86.tstate = TSTATE_NONE;
  2616 :}
  2617 STC DBR, Rn {:  
  2618     check_priv();
  2619     load_spreg( R_EAX, R_DBR );
  2620     store_reg( R_EAX, Rn );
  2621     sh4_x86.tstate = TSTATE_NONE;
  2622 :}
  2623 STC Rm_BANK, Rn {:
  2624     check_priv();
  2625     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2626     store_reg( R_EAX, Rn );
  2627     sh4_x86.tstate = TSTATE_NONE;
  2628 :}
  2629 STC.L SR, @-Rn {:
  2630     check_priv();
  2631     load_reg( R_EAX, Rn );
  2632     check_walign32( R_EAX );
  2633     ADD_imm8s_r32( -4, R_EAX );
  2634     MMU_TRANSLATE_WRITE( R_EAX );
  2635     PUSH_realigned_r32( R_EAX );
  2636     call_func0( sh4_read_sr );
  2637     POP_realigned_r32( R_ECX );
  2638     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2639     MEM_WRITE_LONG( R_ECX, R_EAX );
  2640     sh4_x86.tstate = TSTATE_NONE;
  2641 :}
  2642 STC.L VBR, @-Rn {:  
  2643     check_priv();
  2644     load_reg( R_EAX, Rn );
  2645     check_walign32( R_EAX );
  2646     ADD_imm8s_r32( -4, R_EAX );
  2647     MMU_TRANSLATE_WRITE( R_EAX );
  2648     load_spreg( R_EDX, R_VBR );
  2649     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2650     MEM_WRITE_LONG( R_EAX, R_EDX );
  2651     sh4_x86.tstate = TSTATE_NONE;
  2652 :}
  2653 STC.L SSR, @-Rn {:  
  2654     check_priv();
  2655     load_reg( R_EAX, Rn );
  2656     check_walign32( R_EAX );
  2657     ADD_imm8s_r32( -4, R_EAX );
  2658     MMU_TRANSLATE_WRITE( R_EAX );
  2659     load_spreg( R_EDX, R_SSR );
  2660     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2661     MEM_WRITE_LONG( R_EAX, R_EDX );
  2662     sh4_x86.tstate = TSTATE_NONE;
  2663 :}
  2664 STC.L SPC, @-Rn {:
  2665     check_priv();
  2666     load_reg( R_EAX, Rn );
  2667     check_walign32( R_EAX );
  2668     ADD_imm8s_r32( -4, R_EAX );
  2669     MMU_TRANSLATE_WRITE( R_EAX );
  2670     load_spreg( R_EDX, R_SPC );
  2671     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2672     MEM_WRITE_LONG( R_EAX, R_EDX );
  2673     sh4_x86.tstate = TSTATE_NONE;
  2674 :}
  2675 STC.L SGR, @-Rn {:  
  2676     check_priv();
  2677     load_reg( R_EAX, Rn );
  2678     check_walign32( R_EAX );
  2679     ADD_imm8s_r32( -4, R_EAX );
  2680     MMU_TRANSLATE_WRITE( R_EAX );
  2681     load_spreg( R_EDX, R_SGR );
  2682     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2683     MEM_WRITE_LONG( R_EAX, R_EDX );
  2684     sh4_x86.tstate = TSTATE_NONE;
  2685 :}
  2686 STC.L DBR, @-Rn {:  
  2687     check_priv();
  2688     load_reg( R_EAX, Rn );
  2689     check_walign32( R_EAX );
  2690     ADD_imm8s_r32( -4, R_EAX );
  2691     MMU_TRANSLATE_WRITE( R_EAX );
  2692     load_spreg( R_EDX, R_DBR );
  2693     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2694     MEM_WRITE_LONG( R_EAX, R_EDX );
  2695     sh4_x86.tstate = TSTATE_NONE;
  2696 :}
  2697 STC.L Rm_BANK, @-Rn {:  
  2698     check_priv();
  2699     load_reg( R_EAX, Rn );
  2700     check_walign32( R_EAX );
  2701     ADD_imm8s_r32( -4, R_EAX );
  2702     MMU_TRANSLATE_WRITE( R_EAX );
  2703     load_spreg( R_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
  2704     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2705     MEM_WRITE_LONG( R_EAX, R_EDX );
  2706     sh4_x86.tstate = TSTATE_NONE;
  2707 :}
  2708 STC.L GBR, @-Rn {:  
  2709     load_reg( R_EAX, Rn );
  2710     check_walign32( R_EAX );
  2711     ADD_imm8s_r32( -4, R_EAX );
  2712     MMU_TRANSLATE_WRITE( R_EAX );
  2713     load_spreg( R_EDX, R_GBR );
  2714     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2715     MEM_WRITE_LONG( R_EAX, R_EDX );
  2716     sh4_x86.tstate = TSTATE_NONE;
  2717 :}
  2718 STS FPSCR, Rn {:  
  2719     load_spreg( R_EAX, R_FPSCR );
  2720     store_reg( R_EAX, Rn );
  2721 :}
  2722 STS.L FPSCR, @-Rn {:  
  2723     load_reg( R_EAX, Rn );
  2724     check_walign32( R_EAX );
  2725     ADD_imm8s_r32( -4, R_EAX );
  2726     MMU_TRANSLATE_WRITE( R_EAX );
  2727     load_spreg( R_EDX, R_FPSCR );
  2728     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2729     MEM_WRITE_LONG( R_EAX, R_EDX );
  2730     sh4_x86.tstate = TSTATE_NONE;
  2731 :}
  2732 STS FPUL, Rn {:  
  2733     load_spreg( R_EAX, R_FPUL );
  2734     store_reg( R_EAX, Rn );
  2735 :}
  2736 STS.L FPUL, @-Rn {:  
  2737     load_reg( R_EAX, Rn );
  2738     check_walign32( R_EAX );
  2739     ADD_imm8s_r32( -4, R_EAX );
  2740     MMU_TRANSLATE_WRITE( R_EAX );
  2741     load_spreg( R_EDX, R_FPUL );
  2742     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2743     MEM_WRITE_LONG( R_EAX, R_EDX );
  2744     sh4_x86.tstate = TSTATE_NONE;
  2745 :}
  2746 STS MACH, Rn {:  
  2747     load_spreg( R_EAX, R_MACH );
  2748     store_reg( R_EAX, Rn );
  2749 :}
  2750 STS.L MACH, @-Rn {:  
  2751     load_reg( R_EAX, Rn );
  2752     check_walign32( R_EAX );
  2753     ADD_imm8s_r32( -4, R_EAX );
  2754     MMU_TRANSLATE_WRITE( R_EAX );
  2755     load_spreg( R_EDX, R_MACH );
  2756     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2757     MEM_WRITE_LONG( R_EAX, R_EDX );
  2758     sh4_x86.tstate = TSTATE_NONE;
  2759 :}
  2760 STS MACL, Rn {:  
  2761     load_spreg( R_EAX, R_MACL );
  2762     store_reg( R_EAX, Rn );
  2763 :}
  2764 STS.L MACL, @-Rn {:  
  2765     load_reg( R_EAX, Rn );
  2766     check_walign32( R_EAX );
  2767     ADD_imm8s_r32( -4, R_EAX );
  2768     MMU_TRANSLATE_WRITE( R_EAX );
  2769     load_spreg( R_EDX, R_MACL );
  2770     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2771     MEM_WRITE_LONG( R_EAX, R_EDX );
  2772     sh4_x86.tstate = TSTATE_NONE;
  2773 :}
  2774 STS PR, Rn {:  
  2775     load_spreg( R_EAX, R_PR );
  2776     store_reg( R_EAX, Rn );
  2777 :}
  2778 STS.L PR, @-Rn {:  
  2779     load_reg( R_EAX, Rn );
  2780     check_walign32( R_EAX );
  2781     ADD_imm8s_r32( -4, R_EAX );
  2782     MMU_TRANSLATE_WRITE( R_EAX );
  2783     load_spreg( R_EDX, R_PR );
  2784     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2785     MEM_WRITE_LONG( R_EAX, R_EDX );
  2786     sh4_x86.tstate = TSTATE_NONE;
  2787 :}
  2789 NOP {: /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ :}
  2790 %%
  2791     sh4_x86.in_delay_slot = DELAY_NONE;
  2792     return 0;
.