Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 586:2a3ba82cf243
prev553:4e6166258c22
next590:4db6a084ca3c
author nkeynes
date Tue Jan 15 20:50:23 2008 +0000 (12 years ago)
permissions -rw-r--r--
last change Merged lxdream-mmu r570:596 to trunk
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "sh4/xltcache.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4trans.h"
    31 #include "sh4/sh4mmio.h"
    32 #include "sh4/x86op.h"
    33 #include "clock.h"
    35 #define DEFAULT_BACKPATCH_SIZE 4096
    37 struct backpatch_record {
    38     uint32_t *fixup_addr;
    39     uint32_t fixup_icount;
    40     uint32_t exc_code;
    41 };
    43 #define MAX_RECOVERY_SIZE 2048
    45 /** 
    46  * Struct to manage internal translation state. This state is not saved -
    47  * it is only valid between calls to sh4_translate_begin_block() and
    48  * sh4_translate_end_block()
    49  */
    50 struct sh4_x86_state {
    51     gboolean in_delay_slot;
    52     gboolean priv_checked; /* true if we've already checked the cpu mode. */
    53     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    54     gboolean branch_taken; /* true if we branched unconditionally */
    55     uint32_t block_start_pc;
    56     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    57     int tstate;
    59     /* mode flags */
    60     gboolean tlb_on; /* True if tlb translation is active */
    62     /* Allocated memory for the (block-wide) back-patch list */
    63     struct backpatch_record *backpatch_list;
    64     uint32_t backpatch_posn;
    65     uint32_t backpatch_size;
    66     struct xlat_recovery_record recovery_list[MAX_RECOVERY_SIZE];
    67     uint32_t recovery_posn;
    68 };
    70 #define TSTATE_NONE -1
    71 #define TSTATE_O    0
    72 #define TSTATE_C    2
    73 #define TSTATE_E    4
    74 #define TSTATE_NE   5
    75 #define TSTATE_G    0xF
    76 #define TSTATE_GE   0xD
    77 #define TSTATE_A    7
    78 #define TSTATE_AE   3
    80 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
    81 #define JT_rel8(rel8,label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    82 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    83     OP(0x70+sh4_x86.tstate); OP(rel8); \
    84     MARK_JMP(rel8,label)
    85 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
    86 #define JF_rel8(rel8,label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    87 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    88     OP(0x70+ (sh4_x86.tstate^1)); OP(rel8); \
    89     MARK_JMP(rel8, label)
    91 static struct sh4_x86_state sh4_x86;
    93 static uint32_t max_int = 0x7FFFFFFF;
    94 static uint32_t min_int = 0x80000000;
    95 static uint32_t save_fcw; /* save value for fpu control word */
    96 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
    98 void sh4_x86_init()
    99 {
   100     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   101     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   102 }
   105 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   106 {
   107     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   108 	sh4_x86.backpatch_size <<= 1;
   109 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   110 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   111 	assert( sh4_x86.backpatch_list != NULL );
   112     }
   113     if( sh4_x86.in_delay_slot ) {
   114 	fixup_pc -= 2;
   115     }
   116     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_addr = (uint32_t *)fixup_addr;
   117     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   118     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   119     sh4_x86.backpatch_posn++;
   120 }
   122 void sh4_x86_add_recovery( uint32_t pc )
   123 {
   124     xlat_recovery[xlat_recovery_posn].xlat_pc = (uintptr_t)xlat_output;
   125     xlat_recovery[xlat_recovery_posn].sh4_icount = (pc - sh4_x86.block_start_pc)>>1;
   126     xlat_recovery_posn++;
   127 }
   129 /**
   130  * Emit an instruction to load an SH4 reg into a real register
   131  */
   132 static inline void load_reg( int x86reg, int sh4reg ) 
   133 {
   134     /* mov [bp+n], reg */
   135     OP(0x8B);
   136     OP(0x45 + (x86reg<<3));
   137     OP(REG_OFFSET(r[sh4reg]));
   138 }
   140 static inline void load_reg16s( int x86reg, int sh4reg )
   141 {
   142     OP(0x0F);
   143     OP(0xBF);
   144     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   145 }
   147 static inline void load_reg16u( int x86reg, int sh4reg )
   148 {
   149     OP(0x0F);
   150     OP(0xB7);
   151     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   153 }
   155 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   156 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   157 /**
   158  * Emit an instruction to load an immediate value into a register
   159  */
   160 static inline void load_imm32( int x86reg, uint32_t value ) {
   161     /* mov #value, reg */
   162     OP(0xB8 + x86reg);
   163     OP32(value);
   164 }
   166 /**
   167  * Load an immediate 64-bit quantity (note: x86-64 only)
   168  */
   169 static inline void load_imm64( int x86reg, uint32_t value ) {
   170     /* mov #value, reg */
   171     REXW();
   172     OP(0xB8 + x86reg);
   173     OP64(value);
   174 }
   177 /**
   178  * Emit an instruction to store an SH4 reg (RN)
   179  */
   180 void static inline store_reg( int x86reg, int sh4reg ) {
   181     /* mov reg, [bp+n] */
   182     OP(0x89);
   183     OP(0x45 + (x86reg<<3));
   184     OP(REG_OFFSET(r[sh4reg]));
   185 }
   187 #define load_fr_bank(bankreg) load_spreg( bankreg, REG_OFFSET(fr_bank))
   189 /**
   190  * Load an FR register (single-precision floating point) into an integer x86
   191  * register (eg for register-to-register moves)
   192  */
   193 void static inline load_fr( int bankreg, int x86reg, int frm )
   194 {
   195     OP(0x8B); OP(0x40+bankreg+(x86reg<<3)); OP((frm^1)<<2);
   196 }
   198 /**
   199  * Store an FR register (single-precision floating point) into an integer x86
   200  * register (eg for register-to-register moves)
   201  */
   202 void static inline store_fr( int bankreg, int x86reg, int frn )
   203 {
   204     OP(0x89);  OP(0x40+bankreg+(x86reg<<3)); OP((frn^1)<<2);
   205 }
   208 /**
   209  * Load a pointer to the back fp back into the specified x86 register. The
   210  * bankreg must have been previously loaded with FPSCR.
   211  * NB: 12 bytes
   212  */
   213 static inline void load_xf_bank( int bankreg )
   214 {
   215     NOT_r32( bankreg );
   216     SHR_imm8_r32( (21 - 6), bankreg ); // Extract bit 21 then *64 for bank size
   217     AND_imm8s_r32( 0x40, bankreg );    // Complete extraction
   218     OP(0x8D); OP(0x44+(bankreg<<3)); OP(0x28+bankreg); OP(REG_OFFSET(fr)); // LEA [ebp+bankreg+disp], bankreg
   219 }
   221 /**
   222  * Update the fr_bank pointer based on the current fpscr value.
   223  */
   224 static inline void update_fr_bank( int fpscrreg )
   225 {
   226     SHR_imm8_r32( (21 - 6), fpscrreg ); // Extract bit 21 then *64 for bank size
   227     AND_imm8s_r32( 0x40, fpscrreg );    // Complete extraction
   228     OP(0x8D); OP(0x44+(fpscrreg<<3)); OP(0x28+fpscrreg); OP(REG_OFFSET(fr)); // LEA [ebp+fpscrreg+disp], fpscrreg
   229     store_spreg( fpscrreg, REG_OFFSET(fr_bank) );
   230 }
   231 /**
   232  * Push FPUL (as a 32-bit float) onto the FPU stack
   233  */
   234 static inline void push_fpul( )
   235 {
   236     OP(0xD9); OP(0x45); OP(R_FPUL);
   237 }
   239 /**
   240  * Pop FPUL (as a 32-bit float) from the FPU stack
   241  */
   242 static inline void pop_fpul( )
   243 {
   244     OP(0xD9); OP(0x5D); OP(R_FPUL);
   245 }
   247 /**
   248  * Push a 32-bit float onto the FPU stack, with bankreg previously loaded
   249  * with the location of the current fp bank.
   250  */
   251 static inline void push_fr( int bankreg, int frm ) 
   252 {
   253     OP(0xD9); OP(0x40 + bankreg); OP((frm^1)<<2);  // FLD.S [bankreg + frm^1*4]
   254 }
   256 /**
   257  * Pop a 32-bit float from the FPU stack and store it back into the fp bank, 
   258  * with bankreg previously loaded with the location of the current fp bank.
   259  */
   260 static inline void pop_fr( int bankreg, int frm )
   261 {
   262     OP(0xD9); OP(0x58 + bankreg); OP((frm^1)<<2); // FST.S [bankreg + frm^1*4]
   263 }
   265 /**
   266  * Push a 64-bit double onto the FPU stack, with bankreg previously loaded
   267  * with the location of the current fp bank.
   268  */
   269 static inline void push_dr( int bankreg, int frm )
   270 {
   271     OP(0xDD); OP(0x40 + bankreg); OP(frm<<2); // FLD.D [bankreg + frm*4]
   272 }
   274 static inline void pop_dr( int bankreg, int frm )
   275 {
   276     OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]
   277 }
   279 /* Exception checks - Note that all exception checks will clobber EAX */
   281 #define check_priv( ) \
   282     if( !sh4_x86.priv_checked ) { \
   283 	sh4_x86.priv_checked = TRUE;\
   284 	load_spreg( R_EAX, R_SR );\
   285 	AND_imm32_r32( SR_MD, R_EAX );\
   286 	if( sh4_x86.in_delay_slot ) {\
   287 	    JE_exc( EXC_SLOT_ILLEGAL );\
   288 	} else {\
   289 	    JE_exc( EXC_ILLEGAL );\
   290 	}\
   291     }\
   293 #define check_fpuen( ) \
   294     if( !sh4_x86.fpuen_checked ) {\
   295 	sh4_x86.fpuen_checked = TRUE;\
   296 	load_spreg( R_EAX, R_SR );\
   297 	AND_imm32_r32( SR_FD, R_EAX );\
   298 	if( sh4_x86.in_delay_slot ) {\
   299 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   300 	} else {\
   301 	    JNE_exc(EXC_FPU_DISABLED);\
   302 	}\
   303     }
   305 #define check_ralign16( x86reg ) \
   306     TEST_imm32_r32( 0x00000001, x86reg ); \
   307     JNE_exc(EXC_DATA_ADDR_READ)
   309 #define check_walign16( x86reg ) \
   310     TEST_imm32_r32( 0x00000001, x86reg ); \
   311     JNE_exc(EXC_DATA_ADDR_WRITE);
   313 #define check_ralign32( x86reg ) \
   314     TEST_imm32_r32( 0x00000003, x86reg ); \
   315     JNE_exc(EXC_DATA_ADDR_READ)
   317 #define check_walign32( x86reg ) \
   318     TEST_imm32_r32( 0x00000003, x86reg ); \
   319     JNE_exc(EXC_DATA_ADDR_WRITE);
   321 #define UNDEF()
   322 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   323 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
   324 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
   325 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
   326 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
   327 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
   328 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
   330 /**
   331  * Perform MMU translation on the address in addr_reg for a read operation, iff the TLB is turned 
   332  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   333  */
   334 #define MMU_TRANSLATE_READ( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   335 /**
   336  * Perform MMU translation on the address in addr_reg for a write operation, iff the TLB is turned 
   337  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   338  */
   339 #define MMU_TRANSLATE_WRITE( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_write, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   341 #define MEM_READ_SIZE (CALL_FUNC1_SIZE)
   342 #define MEM_WRITE_SIZE (CALL_FUNC2_SIZE)
   343 #define MMU_TRANSLATE_SIZE (sh4_x86.tlb_on ? (CALL_FUNC1_SIZE + 12) : 0 )
   345 #define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = FALSE; return 1;
   347 /****** Import appropriate calling conventions ******/
   348 #if SH4_TRANSLATOR == TARGET_X86_64
   349 #include "sh4/ia64abi.h"
   350 #else /* SH4_TRANSLATOR == TARGET_X86 */
   351 #ifdef APPLE_BUILD
   352 #include "sh4/ia32mac.h"
   353 #else
   354 #include "sh4/ia32abi.h"
   355 #endif
   356 #endif
   358 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   359 {
   360     load_imm32( R_EAX, XLAT_EXIT_BREAKPOINT );
   361     call_func1( sh4_translate_exit, R_EAX );
   362 }
   365 /**
   366  * Translate a single instruction. Delayed branches are handled specially
   367  * by translating both branch and delayed instruction as a single unit (as
   368  * 
   369  * The instruction MUST be in the icache (assert check)
   370  *
   371  * @return true if the instruction marks the end of a basic block
   372  * (eg a branch or 
   373  */
   374 uint32_t sh4_translate_instruction( sh4addr_t pc )
   375 {
   376     uint32_t ir;
   377     /* Read instruction from icache */
   378     assert( IS_IN_ICACHE(pc) );
   379     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   381 	/* PC is not in the current icache - this usually means we're running
   382 	 * with MMU on, and we've gone past the end of the page. And since 
   383 	 * sh4_translate_block is pretty careful about this, it means we're
   384 	 * almost certainly in a delay slot.
   385 	 *
   386 	 * Since we can't assume the page is present (and we can't fault it in
   387 	 * at this point, inline a call to sh4_execute_instruction (with a few
   388 	 * small repairs to cope with the different environment).
   389 	 */
   391     if( !sh4_x86.in_delay_slot ) {
   392 	sh4_x86_add_recovery(pc);
   393     }
   394 %%
   395 /* ALU operations */
   396 ADD Rm, Rn {:
   397     load_reg( R_EAX, Rm );
   398     load_reg( R_ECX, Rn );
   399     ADD_r32_r32( R_EAX, R_ECX );
   400     store_reg( R_ECX, Rn );
   401     sh4_x86.tstate = TSTATE_NONE;
   402 :}
   403 ADD #imm, Rn {:  
   404     load_reg( R_EAX, Rn );
   405     ADD_imm8s_r32( imm, R_EAX );
   406     store_reg( R_EAX, Rn );
   407     sh4_x86.tstate = TSTATE_NONE;
   408 :}
   409 ADDC Rm, Rn {:
   410     if( sh4_x86.tstate != TSTATE_C ) {
   411 	LDC_t();
   412     }
   413     load_reg( R_EAX, Rm );
   414     load_reg( R_ECX, Rn );
   415     ADC_r32_r32( R_EAX, R_ECX );
   416     store_reg( R_ECX, Rn );
   417     SETC_t();
   418     sh4_x86.tstate = TSTATE_C;
   419 :}
   420 ADDV Rm, Rn {:
   421     load_reg( R_EAX, Rm );
   422     load_reg( R_ECX, Rn );
   423     ADD_r32_r32( R_EAX, R_ECX );
   424     store_reg( R_ECX, Rn );
   425     SETO_t();
   426     sh4_x86.tstate = TSTATE_O;
   427 :}
   428 AND Rm, Rn {:
   429     load_reg( R_EAX, Rm );
   430     load_reg( R_ECX, Rn );
   431     AND_r32_r32( R_EAX, R_ECX );
   432     store_reg( R_ECX, Rn );
   433     sh4_x86.tstate = TSTATE_NONE;
   434 :}
   435 AND #imm, R0 {:  
   436     load_reg( R_EAX, 0 );
   437     AND_imm32_r32(imm, R_EAX); 
   438     store_reg( R_EAX, 0 );
   439     sh4_x86.tstate = TSTATE_NONE;
   440 :}
   441 AND.B #imm, @(R0, GBR) {: 
   442     load_reg( R_EAX, 0 );
   443     load_spreg( R_ECX, R_GBR );
   444     ADD_r32_r32( R_ECX, R_EAX );
   445     MMU_TRANSLATE_WRITE( R_EAX );
   446     PUSH_realigned_r32(R_EAX);
   447     MEM_READ_BYTE( R_EAX, R_EAX );
   448     POP_realigned_r32(R_ECX);
   449     AND_imm32_r32(imm, R_EAX );
   450     MEM_WRITE_BYTE( R_ECX, R_EAX );
   451     sh4_x86.tstate = TSTATE_NONE;
   452 :}
   453 CMP/EQ Rm, Rn {:  
   454     load_reg( R_EAX, Rm );
   455     load_reg( R_ECX, Rn );
   456     CMP_r32_r32( R_EAX, R_ECX );
   457     SETE_t();
   458     sh4_x86.tstate = TSTATE_E;
   459 :}
   460 CMP/EQ #imm, R0 {:  
   461     load_reg( R_EAX, 0 );
   462     CMP_imm8s_r32(imm, R_EAX);
   463     SETE_t();
   464     sh4_x86.tstate = TSTATE_E;
   465 :}
   466 CMP/GE Rm, Rn {:  
   467     load_reg( R_EAX, Rm );
   468     load_reg( R_ECX, Rn );
   469     CMP_r32_r32( R_EAX, R_ECX );
   470     SETGE_t();
   471     sh4_x86.tstate = TSTATE_GE;
   472 :}
   473 CMP/GT Rm, Rn {: 
   474     load_reg( R_EAX, Rm );
   475     load_reg( R_ECX, Rn );
   476     CMP_r32_r32( R_EAX, R_ECX );
   477     SETG_t();
   478     sh4_x86.tstate = TSTATE_G;
   479 :}
   480 CMP/HI Rm, Rn {:  
   481     load_reg( R_EAX, Rm );
   482     load_reg( R_ECX, Rn );
   483     CMP_r32_r32( R_EAX, R_ECX );
   484     SETA_t();
   485     sh4_x86.tstate = TSTATE_A;
   486 :}
   487 CMP/HS Rm, Rn {: 
   488     load_reg( R_EAX, Rm );
   489     load_reg( R_ECX, Rn );
   490     CMP_r32_r32( R_EAX, R_ECX );
   491     SETAE_t();
   492     sh4_x86.tstate = TSTATE_AE;
   493  :}
   494 CMP/PL Rn {: 
   495     load_reg( R_EAX, Rn );
   496     CMP_imm8s_r32( 0, R_EAX );
   497     SETG_t();
   498     sh4_x86.tstate = TSTATE_G;
   499 :}
   500 CMP/PZ Rn {:  
   501     load_reg( R_EAX, Rn );
   502     CMP_imm8s_r32( 0, R_EAX );
   503     SETGE_t();
   504     sh4_x86.tstate = TSTATE_GE;
   505 :}
   506 CMP/STR Rm, Rn {:  
   507     load_reg( R_EAX, Rm );
   508     load_reg( R_ECX, Rn );
   509     XOR_r32_r32( R_ECX, R_EAX );
   510     TEST_r8_r8( R_AL, R_AL );
   511     JE_rel8(13, target1);
   512     TEST_r8_r8( R_AH, R_AH ); // 2
   513     JE_rel8(9, target2);
   514     SHR_imm8_r32( 16, R_EAX ); // 3
   515     TEST_r8_r8( R_AL, R_AL ); // 2
   516     JE_rel8(2, target3);
   517     TEST_r8_r8( R_AH, R_AH ); // 2
   518     JMP_TARGET(target1);
   519     JMP_TARGET(target2);
   520     JMP_TARGET(target3);
   521     SETE_t();
   522     sh4_x86.tstate = TSTATE_E;
   523 :}
   524 DIV0S Rm, Rn {:
   525     load_reg( R_EAX, Rm );
   526     load_reg( R_ECX, Rn );
   527     SHR_imm8_r32( 31, R_EAX );
   528     SHR_imm8_r32( 31, R_ECX );
   529     store_spreg( R_EAX, R_M );
   530     store_spreg( R_ECX, R_Q );
   531     CMP_r32_r32( R_EAX, R_ECX );
   532     SETNE_t();
   533     sh4_x86.tstate = TSTATE_NE;
   534 :}
   535 DIV0U {:  
   536     XOR_r32_r32( R_EAX, R_EAX );
   537     store_spreg( R_EAX, R_Q );
   538     store_spreg( R_EAX, R_M );
   539     store_spreg( R_EAX, R_T );
   540     sh4_x86.tstate = TSTATE_C; // works for DIV1
   541 :}
   542 DIV1 Rm, Rn {:
   543     load_spreg( R_ECX, R_M );
   544     load_reg( R_EAX, Rn );
   545     if( sh4_x86.tstate != TSTATE_C ) {
   546 	LDC_t();
   547     }
   548     RCL1_r32( R_EAX );
   549     SETC_r8( R_DL ); // Q'
   550     CMP_sh4r_r32( R_Q, R_ECX );
   551     JE_rel8(5, mqequal);
   552     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   553     JMP_rel8(3, end);
   554     JMP_TARGET(mqequal);
   555     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   556     JMP_TARGET(end);
   557     store_reg( R_EAX, Rn ); // Done with Rn now
   558     SETC_r8(R_AL); // tmp1
   559     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   560     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   561     store_spreg( R_ECX, R_Q );
   562     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   563     MOVZX_r8_r32( R_AL, R_EAX );
   564     store_spreg( R_EAX, R_T );
   565     sh4_x86.tstate = TSTATE_NONE;
   566 :}
   567 DMULS.L Rm, Rn {:  
   568     load_reg( R_EAX, Rm );
   569     load_reg( R_ECX, Rn );
   570     IMUL_r32(R_ECX);
   571     store_spreg( R_EDX, R_MACH );
   572     store_spreg( R_EAX, R_MACL );
   573     sh4_x86.tstate = TSTATE_NONE;
   574 :}
   575 DMULU.L Rm, Rn {:  
   576     load_reg( R_EAX, Rm );
   577     load_reg( R_ECX, Rn );
   578     MUL_r32(R_ECX);
   579     store_spreg( R_EDX, R_MACH );
   580     store_spreg( R_EAX, R_MACL );    
   581     sh4_x86.tstate = TSTATE_NONE;
   582 :}
   583 DT Rn {:  
   584     load_reg( R_EAX, Rn );
   585     ADD_imm8s_r32( -1, R_EAX );
   586     store_reg( R_EAX, Rn );
   587     SETE_t();
   588     sh4_x86.tstate = TSTATE_E;
   589 :}
   590 EXTS.B Rm, Rn {:  
   591     load_reg( R_EAX, Rm );
   592     MOVSX_r8_r32( R_EAX, R_EAX );
   593     store_reg( R_EAX, Rn );
   594 :}
   595 EXTS.W Rm, Rn {:  
   596     load_reg( R_EAX, Rm );
   597     MOVSX_r16_r32( R_EAX, R_EAX );
   598     store_reg( R_EAX, Rn );
   599 :}
   600 EXTU.B Rm, Rn {:  
   601     load_reg( R_EAX, Rm );
   602     MOVZX_r8_r32( R_EAX, R_EAX );
   603     store_reg( R_EAX, Rn );
   604 :}
   605 EXTU.W Rm, Rn {:  
   606     load_reg( R_EAX, Rm );
   607     MOVZX_r16_r32( R_EAX, R_EAX );
   608     store_reg( R_EAX, Rn );
   609 :}
   610 MAC.L @Rm+, @Rn+ {:
   611     if( Rm == Rn ) {
   612 	load_reg( R_EAX, Rm );
   613 	check_ralign32( R_EAX );
   614 	MMU_TRANSLATE_READ( R_EAX );
   615 	PUSH_realigned_r32( R_EAX );
   616 	load_reg( R_EAX, Rn );
   617 	ADD_imm8s_r32( 4, R_EAX );
   618 	MMU_TRANSLATE_READ( R_EAX );
   619 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
   620 	// Note translate twice in case of page boundaries. Maybe worth
   621 	// adding a page-boundary check to skip the second translation
   622     } else {
   623 	load_reg( R_EAX, Rm );
   624 	check_ralign32( R_EAX );
   625 	MMU_TRANSLATE_READ( R_EAX );
   626 	PUSH_realigned_r32( R_EAX );
   627 	load_reg( R_EAX, Rn );
   628 	check_ralign32( R_EAX );
   629 	MMU_TRANSLATE_READ( R_EAX );
   630 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   631 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   632     }
   633     MEM_READ_LONG( R_EAX, R_EAX );
   634     POP_r32( R_ECX );
   635     PUSH_r32( R_EAX );
   636     MEM_READ_LONG( R_ECX, R_EAX );
   637     POP_realigned_r32( R_ECX );
   639     IMUL_r32( R_ECX );
   640     ADD_r32_sh4r( R_EAX, R_MACL );
   641     ADC_r32_sh4r( R_EDX, R_MACH );
   643     load_spreg( R_ECX, R_S );
   644     TEST_r32_r32(R_ECX, R_ECX);
   645     JE_rel8( CALL_FUNC0_SIZE, nosat );
   646     call_func0( signsat48 );
   647     JMP_TARGET( nosat );
   648     sh4_x86.tstate = TSTATE_NONE;
   649 :}
   650 MAC.W @Rm+, @Rn+ {:  
   651     if( Rm == Rn ) {
   652 	load_reg( R_EAX, Rm );
   653 	check_ralign16( R_EAX );
   654 	MMU_TRANSLATE_READ( R_EAX );
   655 	PUSH_realigned_r32( R_EAX );
   656 	load_reg( R_EAX, Rn );
   657 	ADD_imm8s_r32( 2, R_EAX );
   658 	MMU_TRANSLATE_READ( R_EAX );
   659 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   660 	// Note translate twice in case of page boundaries. Maybe worth
   661 	// adding a page-boundary check to skip the second translation
   662     } else {
   663 	load_reg( R_EAX, Rm );
   664 	check_ralign16( R_EAX );
   665 	MMU_TRANSLATE_READ( R_EAX );
   666 	PUSH_realigned_r32( R_EAX );
   667 	load_reg( R_EAX, Rn );
   668 	check_ralign16( R_EAX );
   669 	MMU_TRANSLATE_READ( R_EAX );
   670 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   671 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   672     }
   673     MEM_READ_WORD( R_EAX, R_EAX );
   674     POP_r32( R_ECX );
   675     PUSH_r32( R_EAX );
   676     MEM_READ_WORD( R_ECX, R_EAX );
   677     POP_realigned_r32( R_ECX );
   678     IMUL_r32( R_ECX );
   680     load_spreg( R_ECX, R_S );
   681     TEST_r32_r32( R_ECX, R_ECX );
   682     JE_rel8( 47, nosat );
   684     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   685     JNO_rel8( 51, end );            // 2
   686     load_imm32( R_EDX, 1 );         // 5
   687     store_spreg( R_EDX, R_MACH );   // 6
   688     JS_rel8( 13, positive );        // 2
   689     load_imm32( R_EAX, 0x80000000 );// 5
   690     store_spreg( R_EAX, R_MACL );   // 6
   691     JMP_rel8( 25, end2 );           // 2
   693     JMP_TARGET(positive);
   694     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   695     store_spreg( R_EAX, R_MACL );   // 6
   696     JMP_rel8( 12, end3);            // 2
   698     JMP_TARGET(nosat);
   699     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   700     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   701     JMP_TARGET(end);
   702     JMP_TARGET(end2);
   703     JMP_TARGET(end3);
   704     sh4_x86.tstate = TSTATE_NONE;
   705 :}
   706 MOVT Rn {:  
   707     load_spreg( R_EAX, R_T );
   708     store_reg( R_EAX, Rn );
   709 :}
   710 MUL.L Rm, Rn {:  
   711     load_reg( R_EAX, Rm );
   712     load_reg( R_ECX, Rn );
   713     MUL_r32( R_ECX );
   714     store_spreg( R_EAX, R_MACL );
   715     sh4_x86.tstate = TSTATE_NONE;
   716 :}
   717 MULS.W Rm, Rn {:
   718     load_reg16s( R_EAX, Rm );
   719     load_reg16s( R_ECX, Rn );
   720     MUL_r32( R_ECX );
   721     store_spreg( R_EAX, R_MACL );
   722     sh4_x86.tstate = TSTATE_NONE;
   723 :}
   724 MULU.W Rm, Rn {:  
   725     load_reg16u( R_EAX, Rm );
   726     load_reg16u( R_ECX, Rn );
   727     MUL_r32( R_ECX );
   728     store_spreg( R_EAX, R_MACL );
   729     sh4_x86.tstate = TSTATE_NONE;
   730 :}
   731 NEG Rm, Rn {:
   732     load_reg( R_EAX, Rm );
   733     NEG_r32( R_EAX );
   734     store_reg( R_EAX, Rn );
   735     sh4_x86.tstate = TSTATE_NONE;
   736 :}
   737 NEGC Rm, Rn {:  
   738     load_reg( R_EAX, Rm );
   739     XOR_r32_r32( R_ECX, R_ECX );
   740     LDC_t();
   741     SBB_r32_r32( R_EAX, R_ECX );
   742     store_reg( R_ECX, Rn );
   743     SETC_t();
   744     sh4_x86.tstate = TSTATE_C;
   745 :}
   746 NOT Rm, Rn {:  
   747     load_reg( R_EAX, Rm );
   748     NOT_r32( R_EAX );
   749     store_reg( R_EAX, Rn );
   750     sh4_x86.tstate = TSTATE_NONE;
   751 :}
   752 OR Rm, Rn {:  
   753     load_reg( R_EAX, Rm );
   754     load_reg( R_ECX, Rn );
   755     OR_r32_r32( R_EAX, R_ECX );
   756     store_reg( R_ECX, Rn );
   757     sh4_x86.tstate = TSTATE_NONE;
   758 :}
   759 OR #imm, R0 {:
   760     load_reg( R_EAX, 0 );
   761     OR_imm32_r32(imm, R_EAX);
   762     store_reg( R_EAX, 0 );
   763     sh4_x86.tstate = TSTATE_NONE;
   764 :}
   765 OR.B #imm, @(R0, GBR) {:  
   766     load_reg( R_EAX, 0 );
   767     load_spreg( R_ECX, R_GBR );
   768     ADD_r32_r32( R_ECX, R_EAX );
   769     MMU_TRANSLATE_WRITE( R_EAX );
   770     PUSH_realigned_r32(R_EAX);
   771     MEM_READ_BYTE( R_EAX, R_EAX );
   772     POP_realigned_r32(R_ECX);
   773     OR_imm32_r32(imm, R_EAX );
   774     MEM_WRITE_BYTE( R_ECX, R_EAX );
   775     sh4_x86.tstate = TSTATE_NONE;
   776 :}
   777 ROTCL Rn {:
   778     load_reg( R_EAX, Rn );
   779     if( sh4_x86.tstate != TSTATE_C ) {
   780 	LDC_t();
   781     }
   782     RCL1_r32( R_EAX );
   783     store_reg( R_EAX, Rn );
   784     SETC_t();
   785     sh4_x86.tstate = TSTATE_C;
   786 :}
   787 ROTCR Rn {:  
   788     load_reg( R_EAX, Rn );
   789     if( sh4_x86.tstate != TSTATE_C ) {
   790 	LDC_t();
   791     }
   792     RCR1_r32( R_EAX );
   793     store_reg( R_EAX, Rn );
   794     SETC_t();
   795     sh4_x86.tstate = TSTATE_C;
   796 :}
   797 ROTL Rn {:  
   798     load_reg( R_EAX, Rn );
   799     ROL1_r32( R_EAX );
   800     store_reg( R_EAX, Rn );
   801     SETC_t();
   802     sh4_x86.tstate = TSTATE_C;
   803 :}
   804 ROTR Rn {:  
   805     load_reg( R_EAX, Rn );
   806     ROR1_r32( R_EAX );
   807     store_reg( R_EAX, Rn );
   808     SETC_t();
   809     sh4_x86.tstate = TSTATE_C;
   810 :}
   811 SHAD Rm, Rn {:
   812     /* Annoyingly enough, not directly convertible */
   813     load_reg( R_EAX, Rn );
   814     load_reg( R_ECX, Rm );
   815     CMP_imm32_r32( 0, R_ECX );
   816     JGE_rel8(16, doshl);
   818     NEG_r32( R_ECX );      // 2
   819     AND_imm8_r8( 0x1F, R_CL ); // 3
   820     JE_rel8( 4, emptysar);     // 2
   821     SAR_r32_CL( R_EAX );       // 2
   822     JMP_rel8(10, end);          // 2
   824     JMP_TARGET(emptysar);
   825     SAR_imm8_r32(31, R_EAX );  // 3
   826     JMP_rel8(5, end2);
   828     JMP_TARGET(doshl);
   829     AND_imm8_r8( 0x1F, R_CL ); // 3
   830     SHL_r32_CL( R_EAX );       // 2
   831     JMP_TARGET(end);
   832     JMP_TARGET(end2);
   833     store_reg( R_EAX, Rn );
   834     sh4_x86.tstate = TSTATE_NONE;
   835 :}
   836 SHLD Rm, Rn {:  
   837     load_reg( R_EAX, Rn );
   838     load_reg( R_ECX, Rm );
   839     CMP_imm32_r32( 0, R_ECX );
   840     JGE_rel8(15, doshl);
   842     NEG_r32( R_ECX );      // 2
   843     AND_imm8_r8( 0x1F, R_CL ); // 3
   844     JE_rel8( 4, emptyshr );
   845     SHR_r32_CL( R_EAX );       // 2
   846     JMP_rel8(9, end);          // 2
   848     JMP_TARGET(emptyshr);
   849     XOR_r32_r32( R_EAX, R_EAX );
   850     JMP_rel8(5, end2);
   852     JMP_TARGET(doshl);
   853     AND_imm8_r8( 0x1F, R_CL ); // 3
   854     SHL_r32_CL( R_EAX );       // 2
   855     JMP_TARGET(end);
   856     JMP_TARGET(end2);
   857     store_reg( R_EAX, Rn );
   858     sh4_x86.tstate = TSTATE_NONE;
   859 :}
   860 SHAL Rn {: 
   861     load_reg( R_EAX, Rn );
   862     SHL1_r32( R_EAX );
   863     SETC_t();
   864     store_reg( R_EAX, Rn );
   865     sh4_x86.tstate = TSTATE_C;
   866 :}
   867 SHAR Rn {:  
   868     load_reg( R_EAX, Rn );
   869     SAR1_r32( R_EAX );
   870     SETC_t();
   871     store_reg( R_EAX, Rn );
   872     sh4_x86.tstate = TSTATE_C;
   873 :}
   874 SHLL Rn {:  
   875     load_reg( R_EAX, Rn );
   876     SHL1_r32( R_EAX );
   877     SETC_t();
   878     store_reg( R_EAX, Rn );
   879     sh4_x86.tstate = TSTATE_C;
   880 :}
   881 SHLL2 Rn {:
   882     load_reg( R_EAX, Rn );
   883     SHL_imm8_r32( 2, R_EAX );
   884     store_reg( R_EAX, Rn );
   885     sh4_x86.tstate = TSTATE_NONE;
   886 :}
   887 SHLL8 Rn {:  
   888     load_reg( R_EAX, Rn );
   889     SHL_imm8_r32( 8, R_EAX );
   890     store_reg( R_EAX, Rn );
   891     sh4_x86.tstate = TSTATE_NONE;
   892 :}
   893 SHLL16 Rn {:  
   894     load_reg( R_EAX, Rn );
   895     SHL_imm8_r32( 16, R_EAX );
   896     store_reg( R_EAX, Rn );
   897     sh4_x86.tstate = TSTATE_NONE;
   898 :}
   899 SHLR Rn {:  
   900     load_reg( R_EAX, Rn );
   901     SHR1_r32( R_EAX );
   902     SETC_t();
   903     store_reg( R_EAX, Rn );
   904     sh4_x86.tstate = TSTATE_C;
   905 :}
   906 SHLR2 Rn {:  
   907     load_reg( R_EAX, Rn );
   908     SHR_imm8_r32( 2, R_EAX );
   909     store_reg( R_EAX, Rn );
   910     sh4_x86.tstate = TSTATE_NONE;
   911 :}
   912 SHLR8 Rn {:  
   913     load_reg( R_EAX, Rn );
   914     SHR_imm8_r32( 8, R_EAX );
   915     store_reg( R_EAX, Rn );
   916     sh4_x86.tstate = TSTATE_NONE;
   917 :}
   918 SHLR16 Rn {:  
   919     load_reg( R_EAX, Rn );
   920     SHR_imm8_r32( 16, R_EAX );
   921     store_reg( R_EAX, Rn );
   922     sh4_x86.tstate = TSTATE_NONE;
   923 :}
   924 SUB Rm, Rn {:  
   925     load_reg( R_EAX, Rm );
   926     load_reg( R_ECX, Rn );
   927     SUB_r32_r32( R_EAX, R_ECX );
   928     store_reg( R_ECX, Rn );
   929     sh4_x86.tstate = TSTATE_NONE;
   930 :}
   931 SUBC Rm, Rn {:  
   932     load_reg( R_EAX, Rm );
   933     load_reg( R_ECX, Rn );
   934     if( sh4_x86.tstate != TSTATE_C ) {
   935 	LDC_t();
   936     }
   937     SBB_r32_r32( R_EAX, R_ECX );
   938     store_reg( R_ECX, Rn );
   939     SETC_t();
   940     sh4_x86.tstate = TSTATE_C;
   941 :}
   942 SUBV Rm, Rn {:  
   943     load_reg( R_EAX, Rm );
   944     load_reg( R_ECX, Rn );
   945     SUB_r32_r32( R_EAX, R_ECX );
   946     store_reg( R_ECX, Rn );
   947     SETO_t();
   948     sh4_x86.tstate = TSTATE_O;
   949 :}
   950 SWAP.B Rm, Rn {:  
   951     load_reg( R_EAX, Rm );
   952     XCHG_r8_r8( R_AL, R_AH );
   953     store_reg( R_EAX, Rn );
   954 :}
   955 SWAP.W Rm, Rn {:  
   956     load_reg( R_EAX, Rm );
   957     MOV_r32_r32( R_EAX, R_ECX );
   958     SHL_imm8_r32( 16, R_ECX );
   959     SHR_imm8_r32( 16, R_EAX );
   960     OR_r32_r32( R_EAX, R_ECX );
   961     store_reg( R_ECX, Rn );
   962     sh4_x86.tstate = TSTATE_NONE;
   963 :}
   964 TAS.B @Rn {:  
   965     load_reg( R_EAX, Rn );
   966     MMU_TRANSLATE_WRITE( R_EAX );
   967     PUSH_realigned_r32( R_EAX );
   968     MEM_READ_BYTE( R_EAX, R_EAX );
   969     TEST_r8_r8( R_AL, R_AL );
   970     SETE_t();
   971     OR_imm8_r8( 0x80, R_AL );
   972     POP_realigned_r32( R_ECX );
   973     MEM_WRITE_BYTE( R_ECX, R_EAX );
   974     sh4_x86.tstate = TSTATE_NONE;
   975 :}
   976 TST Rm, Rn {:  
   977     load_reg( R_EAX, Rm );
   978     load_reg( R_ECX, Rn );
   979     TEST_r32_r32( R_EAX, R_ECX );
   980     SETE_t();
   981     sh4_x86.tstate = TSTATE_E;
   982 :}
   983 TST #imm, R0 {:  
   984     load_reg( R_EAX, 0 );
   985     TEST_imm32_r32( imm, R_EAX );
   986     SETE_t();
   987     sh4_x86.tstate = TSTATE_E;
   988 :}
   989 TST.B #imm, @(R0, GBR) {:  
   990     load_reg( R_EAX, 0);
   991     load_reg( R_ECX, R_GBR);
   992     ADD_r32_r32( R_ECX, R_EAX );
   993     MMU_TRANSLATE_READ( R_EAX );
   994     MEM_READ_BYTE( R_EAX, R_EAX );
   995     TEST_imm8_r8( imm, R_AL );
   996     SETE_t();
   997     sh4_x86.tstate = TSTATE_E;
   998 :}
   999 XOR Rm, Rn {:  
  1000     load_reg( R_EAX, Rm );
  1001     load_reg( R_ECX, Rn );
  1002     XOR_r32_r32( R_EAX, R_ECX );
  1003     store_reg( R_ECX, Rn );
  1004     sh4_x86.tstate = TSTATE_NONE;
  1005 :}
  1006 XOR #imm, R0 {:  
  1007     load_reg( R_EAX, 0 );
  1008     XOR_imm32_r32( imm, R_EAX );
  1009     store_reg( R_EAX, 0 );
  1010     sh4_x86.tstate = TSTATE_NONE;
  1011 :}
  1012 XOR.B #imm, @(R0, GBR) {:  
  1013     load_reg( R_EAX, 0 );
  1014     load_spreg( R_ECX, R_GBR );
  1015     ADD_r32_r32( R_ECX, R_EAX );
  1016     MMU_TRANSLATE_WRITE( R_EAX );
  1017     PUSH_realigned_r32(R_EAX);
  1018     MEM_READ_BYTE(R_EAX, R_EAX);
  1019     POP_realigned_r32(R_ECX);
  1020     XOR_imm32_r32( imm, R_EAX );
  1021     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1022     sh4_x86.tstate = TSTATE_NONE;
  1023 :}
  1024 XTRCT Rm, Rn {:
  1025     load_reg( R_EAX, Rm );
  1026     load_reg( R_ECX, Rn );
  1027     SHL_imm8_r32( 16, R_EAX );
  1028     SHR_imm8_r32( 16, R_ECX );
  1029     OR_r32_r32( R_EAX, R_ECX );
  1030     store_reg( R_ECX, Rn );
  1031     sh4_x86.tstate = TSTATE_NONE;
  1032 :}
  1034 /* Data move instructions */
  1035 MOV Rm, Rn {:  
  1036     load_reg( R_EAX, Rm );
  1037     store_reg( R_EAX, Rn );
  1038 :}
  1039 MOV #imm, Rn {:  
  1040     load_imm32( R_EAX, imm );
  1041     store_reg( R_EAX, Rn );
  1042 :}
  1043 MOV.B Rm, @Rn {:  
  1044     load_reg( R_EAX, Rn );
  1045     MMU_TRANSLATE_WRITE( R_EAX );
  1046     load_reg( R_EDX, Rm );
  1047     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1048     sh4_x86.tstate = TSTATE_NONE;
  1049 :}
  1050 MOV.B Rm, @-Rn {:  
  1051     load_reg( R_EAX, Rn );
  1052     ADD_imm8s_r32( -1, R_EAX );
  1053     MMU_TRANSLATE_WRITE( R_EAX );
  1054     load_reg( R_EDX, Rm );
  1055     ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
  1056     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1057     sh4_x86.tstate = TSTATE_NONE;
  1058 :}
  1059 MOV.B Rm, @(R0, Rn) {:  
  1060     load_reg( R_EAX, 0 );
  1061     load_reg( R_ECX, Rn );
  1062     ADD_r32_r32( R_ECX, R_EAX );
  1063     MMU_TRANSLATE_WRITE( R_EAX );
  1064     load_reg( R_EDX, Rm );
  1065     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1066     sh4_x86.tstate = TSTATE_NONE;
  1067 :}
  1068 MOV.B R0, @(disp, GBR) {:  
  1069     load_spreg( R_EAX, R_GBR );
  1070     ADD_imm32_r32( disp, R_EAX );
  1071     MMU_TRANSLATE_WRITE( R_EAX );
  1072     load_reg( R_EDX, 0 );
  1073     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1074     sh4_x86.tstate = TSTATE_NONE;
  1075 :}
  1076 MOV.B R0, @(disp, Rn) {:  
  1077     load_reg( R_EAX, Rn );
  1078     ADD_imm32_r32( disp, R_EAX );
  1079     MMU_TRANSLATE_WRITE( R_EAX );
  1080     load_reg( R_EDX, 0 );
  1081     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1082     sh4_x86.tstate = TSTATE_NONE;
  1083 :}
  1084 MOV.B @Rm, Rn {:  
  1085     load_reg( R_EAX, Rm );
  1086     MMU_TRANSLATE_READ( R_EAX );
  1087     MEM_READ_BYTE( R_EAX, R_EAX );
  1088     store_reg( R_EAX, Rn );
  1089     sh4_x86.tstate = TSTATE_NONE;
  1090 :}
  1091 MOV.B @Rm+, Rn {:  
  1092     load_reg( R_EAX, Rm );
  1093     MMU_TRANSLATE_READ( R_EAX );
  1094     ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
  1095     MEM_READ_BYTE( R_EAX, R_EAX );
  1096     store_reg( R_EAX, Rn );
  1097     sh4_x86.tstate = TSTATE_NONE;
  1098 :}
  1099 MOV.B @(R0, Rm), Rn {:  
  1100     load_reg( R_EAX, 0 );
  1101     load_reg( R_ECX, Rm );
  1102     ADD_r32_r32( R_ECX, R_EAX );
  1103     MMU_TRANSLATE_READ( R_EAX )
  1104     MEM_READ_BYTE( R_EAX, R_EAX );
  1105     store_reg( R_EAX, Rn );
  1106     sh4_x86.tstate = TSTATE_NONE;
  1107 :}
  1108 MOV.B @(disp, GBR), R0 {:  
  1109     load_spreg( R_EAX, R_GBR );
  1110     ADD_imm32_r32( disp, R_EAX );
  1111     MMU_TRANSLATE_READ( R_EAX );
  1112     MEM_READ_BYTE( R_EAX, R_EAX );
  1113     store_reg( R_EAX, 0 );
  1114     sh4_x86.tstate = TSTATE_NONE;
  1115 :}
  1116 MOV.B @(disp, Rm), R0 {:  
  1117     load_reg( R_EAX, Rm );
  1118     ADD_imm32_r32( disp, R_EAX );
  1119     MMU_TRANSLATE_READ( R_EAX );
  1120     MEM_READ_BYTE( R_EAX, R_EAX );
  1121     store_reg( R_EAX, 0 );
  1122     sh4_x86.tstate = TSTATE_NONE;
  1123 :}
  1124 MOV.L Rm, @Rn {:
  1125     load_reg( R_EAX, Rn );
  1126     check_walign32(R_EAX);
  1127     MMU_TRANSLATE_WRITE( R_EAX );
  1128     load_reg( R_EDX, Rm );
  1129     MEM_WRITE_LONG( R_EAX, R_EDX );
  1130     sh4_x86.tstate = TSTATE_NONE;
  1131 :}
  1132 MOV.L Rm, @-Rn {:  
  1133     load_reg( R_EAX, Rn );
  1134     ADD_imm8s_r32( -4, R_EAX );
  1135     check_walign32( R_EAX );
  1136     MMU_TRANSLATE_WRITE( R_EAX );
  1137     load_reg( R_EDX, Rm );
  1138     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  1139     MEM_WRITE_LONG( R_EAX, R_EDX );
  1140     sh4_x86.tstate = TSTATE_NONE;
  1141 :}
  1142 MOV.L Rm, @(R0, Rn) {:  
  1143     load_reg( R_EAX, 0 );
  1144     load_reg( R_ECX, Rn );
  1145     ADD_r32_r32( R_ECX, R_EAX );
  1146     check_walign32( R_EAX );
  1147     MMU_TRANSLATE_WRITE( R_EAX );
  1148     load_reg( R_EDX, Rm );
  1149     MEM_WRITE_LONG( R_EAX, R_EDX );
  1150     sh4_x86.tstate = TSTATE_NONE;
  1151 :}
  1152 MOV.L R0, @(disp, GBR) {:  
  1153     load_spreg( R_EAX, R_GBR );
  1154     ADD_imm32_r32( disp, R_EAX );
  1155     check_walign32( R_EAX );
  1156     MMU_TRANSLATE_WRITE( R_EAX );
  1157     load_reg( R_EDX, 0 );
  1158     MEM_WRITE_LONG( R_EAX, R_EDX );
  1159     sh4_x86.tstate = TSTATE_NONE;
  1160 :}
  1161 MOV.L Rm, @(disp, Rn) {:  
  1162     load_reg( R_EAX, Rn );
  1163     ADD_imm32_r32( disp, R_EAX );
  1164     check_walign32( R_EAX );
  1165     MMU_TRANSLATE_WRITE( R_EAX );
  1166     load_reg( R_EDX, Rm );
  1167     MEM_WRITE_LONG( R_EAX, R_EDX );
  1168     sh4_x86.tstate = TSTATE_NONE;
  1169 :}
  1170 MOV.L @Rm, Rn {:  
  1171     load_reg( R_EAX, Rm );
  1172     check_ralign32( R_EAX );
  1173     MMU_TRANSLATE_READ( R_EAX );
  1174     MEM_READ_LONG( R_EAX, R_EAX );
  1175     store_reg( R_EAX, Rn );
  1176     sh4_x86.tstate = TSTATE_NONE;
  1177 :}
  1178 MOV.L @Rm+, Rn {:  
  1179     load_reg( R_EAX, Rm );
  1180     check_ralign32( R_EAX );
  1181     MMU_TRANSLATE_READ( R_EAX );
  1182     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1183     MEM_READ_LONG( R_EAX, R_EAX );
  1184     store_reg( R_EAX, Rn );
  1185     sh4_x86.tstate = TSTATE_NONE;
  1186 :}
  1187 MOV.L @(R0, Rm), Rn {:  
  1188     load_reg( R_EAX, 0 );
  1189     load_reg( R_ECX, Rm );
  1190     ADD_r32_r32( R_ECX, R_EAX );
  1191     check_ralign32( R_EAX );
  1192     MMU_TRANSLATE_READ( R_EAX );
  1193     MEM_READ_LONG( R_EAX, R_EAX );
  1194     store_reg( R_EAX, Rn );
  1195     sh4_x86.tstate = TSTATE_NONE;
  1196 :}
  1197 MOV.L @(disp, GBR), R0 {:
  1198     load_spreg( R_EAX, R_GBR );
  1199     ADD_imm32_r32( disp, R_EAX );
  1200     check_ralign32( R_EAX );
  1201     MMU_TRANSLATE_READ( R_EAX );
  1202     MEM_READ_LONG( R_EAX, R_EAX );
  1203     store_reg( R_EAX, 0 );
  1204     sh4_x86.tstate = TSTATE_NONE;
  1205 :}
  1206 MOV.L @(disp, PC), Rn {:  
  1207     if( sh4_x86.in_delay_slot ) {
  1208 	SLOTILLEGAL();
  1209     } else {
  1210 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1211 	if( IS_IN_ICACHE(target) ) {
  1212 	    // If the target address is in the same page as the code, it's
  1213 	    // pretty safe to just ref it directly and circumvent the whole
  1214 	    // memory subsystem. (this is a big performance win)
  1216 	    // FIXME: There's a corner-case that's not handled here when
  1217 	    // the current code-page is in the ITLB but not in the UTLB.
  1218 	    // (should generate a TLB miss although need to test SH4 
  1219 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1220 	    // behaviour though.
  1221 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1222 	    MOV_moff32_EAX( ptr );
  1223 	} else {
  1224 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1225 	    // different virtual address than the translation was done with,
  1226 	    // but we can safely assume that the low bits are the same.
  1227 	    load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1228 	    ADD_sh4r_r32( R_PC, R_EAX );
  1229 	    MMU_TRANSLATE_READ( R_EAX );
  1230 	    MEM_READ_LONG( R_EAX, R_EAX );
  1231 	    sh4_x86.tstate = TSTATE_NONE;
  1233 	store_reg( R_EAX, Rn );
  1235 :}
  1236 MOV.L @(disp, Rm), Rn {:  
  1237     load_reg( R_EAX, Rm );
  1238     ADD_imm8s_r32( disp, R_EAX );
  1239     check_ralign32( R_EAX );
  1240     MMU_TRANSLATE_READ( R_EAX );
  1241     MEM_READ_LONG( R_EAX, R_EAX );
  1242     store_reg( R_EAX, Rn );
  1243     sh4_x86.tstate = TSTATE_NONE;
  1244 :}
  1245 MOV.W Rm, @Rn {:  
  1246     load_reg( R_EAX, Rn );
  1247     check_walign16( R_EAX );
  1248     MMU_TRANSLATE_WRITE( R_EAX )
  1249     load_reg( R_EDX, Rm );
  1250     MEM_WRITE_WORD( R_EAX, R_EDX );
  1251     sh4_x86.tstate = TSTATE_NONE;
  1252 :}
  1253 MOV.W Rm, @-Rn {:  
  1254     load_reg( R_EAX, Rn );
  1255     ADD_imm8s_r32( -2, R_EAX );
  1256     check_walign16( R_EAX );
  1257     MMU_TRANSLATE_WRITE( R_EAX );
  1258     load_reg( R_EDX, Rm );
  1259     ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
  1260     MEM_WRITE_WORD( R_EAX, R_EDX );
  1261     sh4_x86.tstate = TSTATE_NONE;
  1262 :}
  1263 MOV.W Rm, @(R0, Rn) {:  
  1264     load_reg( R_EAX, 0 );
  1265     load_reg( R_ECX, Rn );
  1266     ADD_r32_r32( R_ECX, R_EAX );
  1267     check_walign16( R_EAX );
  1268     MMU_TRANSLATE_WRITE( R_EAX );
  1269     load_reg( R_EDX, Rm );
  1270     MEM_WRITE_WORD( R_EAX, R_EDX );
  1271     sh4_x86.tstate = TSTATE_NONE;
  1272 :}
  1273 MOV.W R0, @(disp, GBR) {:  
  1274     load_spreg( R_EAX, R_GBR );
  1275     ADD_imm32_r32( disp, R_EAX );
  1276     check_walign16( R_EAX );
  1277     MMU_TRANSLATE_WRITE( R_EAX );
  1278     load_reg( R_EDX, 0 );
  1279     MEM_WRITE_WORD( R_EAX, R_EDX );
  1280     sh4_x86.tstate = TSTATE_NONE;
  1281 :}
  1282 MOV.W R0, @(disp, Rn) {:  
  1283     load_reg( R_EAX, Rn );
  1284     ADD_imm32_r32( disp, R_EAX );
  1285     check_walign16( R_EAX );
  1286     MMU_TRANSLATE_WRITE( R_EAX );
  1287     load_reg( R_EDX, 0 );
  1288     MEM_WRITE_WORD( R_EAX, R_EDX );
  1289     sh4_x86.tstate = TSTATE_NONE;
  1290 :}
  1291 MOV.W @Rm, Rn {:  
  1292     load_reg( R_EAX, Rm );
  1293     check_ralign16( R_EAX );
  1294     MMU_TRANSLATE_READ( R_EAX );
  1295     MEM_READ_WORD( R_EAX, R_EAX );
  1296     store_reg( R_EAX, Rn );
  1297     sh4_x86.tstate = TSTATE_NONE;
  1298 :}
  1299 MOV.W @Rm+, Rn {:  
  1300     load_reg( R_EAX, Rm );
  1301     check_ralign16( R_EAX );
  1302     MMU_TRANSLATE_READ( R_EAX );
  1303     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
  1304     MEM_READ_WORD( R_EAX, R_EAX );
  1305     store_reg( R_EAX, Rn );
  1306     sh4_x86.tstate = TSTATE_NONE;
  1307 :}
  1308 MOV.W @(R0, Rm), Rn {:  
  1309     load_reg( R_EAX, 0 );
  1310     load_reg( R_ECX, Rm );
  1311     ADD_r32_r32( R_ECX, R_EAX );
  1312     check_ralign16( R_EAX );
  1313     MMU_TRANSLATE_READ( R_EAX );
  1314     MEM_READ_WORD( R_EAX, R_EAX );
  1315     store_reg( R_EAX, Rn );
  1316     sh4_x86.tstate = TSTATE_NONE;
  1317 :}
  1318 MOV.W @(disp, GBR), R0 {:  
  1319     load_spreg( R_EAX, R_GBR );
  1320     ADD_imm32_r32( disp, R_EAX );
  1321     check_ralign16( R_EAX );
  1322     MMU_TRANSLATE_READ( R_EAX );
  1323     MEM_READ_WORD( R_EAX, R_EAX );
  1324     store_reg( R_EAX, 0 );
  1325     sh4_x86.tstate = TSTATE_NONE;
  1326 :}
  1327 MOV.W @(disp, PC), Rn {:  
  1328     if( sh4_x86.in_delay_slot ) {
  1329 	SLOTILLEGAL();
  1330     } else {
  1331 	// See comments for MOV.L @(disp, PC), Rn
  1332 	uint32_t target = pc + disp + 4;
  1333 	if( IS_IN_ICACHE(target) ) {
  1334 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1335 	    MOV_moff32_EAX( ptr );
  1336 	    MOVSX_r16_r32( R_EAX, R_EAX );
  1337 	} else {
  1338 	    load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
  1339 	    ADD_sh4r_r32( R_PC, R_EAX );
  1340 	    MMU_TRANSLATE_READ( R_EAX );
  1341 	    MEM_READ_WORD( R_EAX, R_EAX );
  1342 	    sh4_x86.tstate = TSTATE_NONE;
  1344 	store_reg( R_EAX, Rn );
  1346 :}
  1347 MOV.W @(disp, Rm), R0 {:  
  1348     load_reg( R_EAX, Rm );
  1349     ADD_imm32_r32( disp, R_EAX );
  1350     check_ralign16( R_EAX );
  1351     MMU_TRANSLATE_READ( R_EAX );
  1352     MEM_READ_WORD( R_EAX, R_EAX );
  1353     store_reg( R_EAX, 0 );
  1354     sh4_x86.tstate = TSTATE_NONE;
  1355 :}
  1356 MOVA @(disp, PC), R0 {:  
  1357     if( sh4_x86.in_delay_slot ) {
  1358 	SLOTILLEGAL();
  1359     } else {
  1360 	load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1361 	ADD_sh4r_r32( R_PC, R_ECX );
  1362 	store_reg( R_ECX, 0 );
  1363 	sh4_x86.tstate = TSTATE_NONE;
  1365 :}
  1366 MOVCA.L R0, @Rn {:  
  1367     load_reg( R_EAX, Rn );
  1368     check_walign32( R_EAX );
  1369     MMU_TRANSLATE_WRITE( R_EAX );
  1370     load_reg( R_EDX, 0 );
  1371     MEM_WRITE_LONG( R_EAX, R_EDX );
  1372     sh4_x86.tstate = TSTATE_NONE;
  1373 :}
  1375 /* Control transfer instructions */
  1376 BF disp {:
  1377     if( sh4_x86.in_delay_slot ) {
  1378 	SLOTILLEGAL();
  1379     } else {
  1380 	sh4vma_t target = disp + pc + 4;
  1381 	JT_rel8( EXIT_BLOCK_REL_SIZE(target), nottaken );
  1382 	exit_block_rel(target, pc+2 );
  1383 	JMP_TARGET(nottaken);
  1384 	return 2;
  1386 :}
  1387 BF/S disp {:
  1388     if( sh4_x86.in_delay_slot ) {
  1389 	SLOTILLEGAL();
  1390     } else {
  1391 	sh4vma_t target = disp + pc + 4;
  1392 	sh4_x86.in_delay_slot = TRUE;
  1393 	if( sh4_x86.tstate == TSTATE_NONE ) {
  1394 	    CMP_imm8s_sh4r( 1, R_T );
  1395 	    sh4_x86.tstate = TSTATE_E;
  1397 	OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JNE rel32
  1398 	sh4_translate_instruction(pc+2);
  1399 	exit_block_rel( target, pc+4 );
  1400 	// not taken
  1401 	*patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1402 	sh4_translate_instruction(pc+2);
  1403 	return 4;
  1405 :}
  1406 BRA disp {:  
  1407     if( sh4_x86.in_delay_slot ) {
  1408 	SLOTILLEGAL();
  1409     } else {
  1410 	sh4_x86.in_delay_slot = TRUE;
  1411 	sh4_translate_instruction( pc + 2 );
  1412 	exit_block_rel( disp + pc + 4, pc+4 );
  1413 	sh4_x86.branch_taken = TRUE;
  1414 	return 4;
  1416 :}
  1417 BRAF Rn {:  
  1418     if( sh4_x86.in_delay_slot ) {
  1419 	SLOTILLEGAL();
  1420     } else {
  1421 	load_reg( R_EAX, Rn );
  1422 	ADD_imm32_r32( pc + 4, R_EAX );
  1423 	store_spreg( R_EAX, REG_OFFSET(pc) );
  1424 	sh4_x86.in_delay_slot = TRUE;
  1425 	sh4_x86.tstate = TSTATE_NONE;
  1426 	sh4_translate_instruction( pc + 2 );
  1427 	exit_block_pcset(pc+2);
  1428 	sh4_x86.branch_taken = TRUE;
  1429 	return 4;
  1431 :}
  1432 BSR disp {:  
  1433     if( sh4_x86.in_delay_slot ) {
  1434 	SLOTILLEGAL();
  1435     } else {
  1436 	load_imm32( R_EAX, pc + 4 );
  1437 	store_spreg( R_EAX, R_PR );
  1438 	sh4_x86.in_delay_slot = TRUE;
  1439 	sh4_translate_instruction( pc + 2 );
  1440 	exit_block_rel( disp + pc + 4, pc+4 );
  1441 	sh4_x86.branch_taken = TRUE;
  1442 	return 4;
  1444 :}
  1445 BSRF Rn {:  
  1446     if( sh4_x86.in_delay_slot ) {
  1447 	SLOTILLEGAL();
  1448     } else {
  1449 	load_imm32( R_ECX, pc + 4 );
  1450 	store_spreg( R_ECX, R_PR );
  1451 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_ECX );
  1452 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1453 	sh4_x86.in_delay_slot = TRUE;
  1454 	sh4_x86.tstate = TSTATE_NONE;
  1455 	sh4_translate_instruction( pc + 2 );
  1456 	exit_block_pcset(pc+2);
  1457 	sh4_x86.branch_taken = TRUE;
  1458 	return 4;
  1460 :}
  1461 BT disp {:
  1462     if( sh4_x86.in_delay_slot ) {
  1463 	SLOTILLEGAL();
  1464     } else {
  1465 	sh4vma_t target = disp + pc + 4;
  1466 	JF_rel8( EXIT_BLOCK_REL_SIZE(target), nottaken );
  1467 	exit_block_rel(target, pc+2 );
  1468 	JMP_TARGET(nottaken);
  1469 	return 2;
  1471 :}
  1472 BT/S disp {:
  1473     if( sh4_x86.in_delay_slot ) {
  1474 	SLOTILLEGAL();
  1475     } else {
  1476 	sh4_x86.in_delay_slot = TRUE;
  1477 	if( sh4_x86.tstate == TSTATE_NONE ) {
  1478 	    CMP_imm8s_sh4r( 1, R_T );
  1479 	    sh4_x86.tstate = TSTATE_E;
  1481 	OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JE rel32
  1482 	sh4_translate_instruction(pc+2);
  1483 	exit_block_rel( disp + pc + 4, pc+4 );
  1484 	// not taken
  1485 	*patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1486 	sh4_translate_instruction(pc+2);
  1487 	return 4;
  1489 :}
  1490 JMP @Rn {:  
  1491     if( sh4_x86.in_delay_slot ) {
  1492 	SLOTILLEGAL();
  1493     } else {
  1494 	load_reg( R_ECX, Rn );
  1495 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1496 	sh4_x86.in_delay_slot = TRUE;
  1497 	sh4_translate_instruction(pc+2);
  1498 	exit_block_pcset(pc+2);
  1499 	sh4_x86.branch_taken = TRUE;
  1500 	return 4;
  1502 :}
  1503 JSR @Rn {:  
  1504     if( sh4_x86.in_delay_slot ) {
  1505 	SLOTILLEGAL();
  1506     } else {
  1507 	load_imm32( R_EAX, pc + 4 );
  1508 	store_spreg( R_EAX, R_PR );
  1509 	load_reg( R_ECX, Rn );
  1510 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1511 	sh4_x86.in_delay_slot = TRUE;
  1512 	sh4_translate_instruction(pc+2);
  1513 	exit_block_pcset(pc+2);
  1514 	sh4_x86.branch_taken = TRUE;
  1515 	return 4;
  1517 :}
  1518 RTE {:  
  1519     if( sh4_x86.in_delay_slot ) {
  1520 	SLOTILLEGAL();
  1521     } else {
  1522 	check_priv();
  1523 	load_spreg( R_ECX, R_SPC );
  1524 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1525 	load_spreg( R_EAX, R_SSR );
  1526 	call_func1( sh4_write_sr, R_EAX );
  1527 	sh4_x86.in_delay_slot = TRUE;
  1528 	sh4_x86.priv_checked = FALSE;
  1529 	sh4_x86.fpuen_checked = FALSE;
  1530 	sh4_x86.tstate = TSTATE_NONE;
  1531 	sh4_translate_instruction(pc+2);
  1532 	exit_block_pcset(pc+2);
  1533 	sh4_x86.branch_taken = TRUE;
  1534 	return 4;
  1536 :}
  1537 RTS {:  
  1538     if( sh4_x86.in_delay_slot ) {
  1539 	SLOTILLEGAL();
  1540     } else {
  1541 	load_spreg( R_ECX, R_PR );
  1542 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1543 	sh4_x86.in_delay_slot = TRUE;
  1544 	sh4_translate_instruction(pc+2);
  1545 	exit_block_pcset(pc+2);
  1546 	sh4_x86.branch_taken = TRUE;
  1547 	return 4;
  1549 :}
  1550 TRAPA #imm {:  
  1551     if( sh4_x86.in_delay_slot ) {
  1552 	SLOTILLEGAL();
  1553     } else {
  1554 	load_imm32( R_ECX, pc+2 );
  1555 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1556 	load_imm32( R_EAX, imm );
  1557 	call_func1( sh4_raise_trap, R_EAX );
  1558 	sh4_x86.tstate = TSTATE_NONE;
  1559 	exit_block_pcset(pc);
  1560 	sh4_x86.branch_taken = TRUE;
  1561 	return 2;
  1563 :}
  1564 UNDEF {:  
  1565     if( sh4_x86.in_delay_slot ) {
  1566 	SLOTILLEGAL();
  1567     } else {
  1568 	JMP_exc(EXC_ILLEGAL);
  1569 	return 2;
  1571 :}
  1573 CLRMAC {:  
  1574     XOR_r32_r32(R_EAX, R_EAX);
  1575     store_spreg( R_EAX, R_MACL );
  1576     store_spreg( R_EAX, R_MACH );
  1577     sh4_x86.tstate = TSTATE_NONE;
  1578 :}
  1579 CLRS {:
  1580     CLC();
  1581     SETC_sh4r(R_S);
  1582     sh4_x86.tstate = TSTATE_C;
  1583 :}
  1584 CLRT {:  
  1585     CLC();
  1586     SETC_t();
  1587     sh4_x86.tstate = TSTATE_C;
  1588 :}
  1589 SETS {:  
  1590     STC();
  1591     SETC_sh4r(R_S);
  1592     sh4_x86.tstate = TSTATE_C;
  1593 :}
  1594 SETT {:  
  1595     STC();
  1596     SETC_t();
  1597     sh4_x86.tstate = TSTATE_C;
  1598 :}
  1600 /* Floating point moves */
  1601 FMOV FRm, FRn {:  
  1602     /* As horrible as this looks, it's actually covering 5 separate cases:
  1603      * 1. 32-bit fr-to-fr (PR=0)
  1604      * 2. 64-bit dr-to-dr (PR=1, FRm&1 == 0, FRn&1 == 0 )
  1605      * 3. 64-bit dr-to-xd (PR=1, FRm&1 == 0, FRn&1 == 1 )
  1606      * 4. 64-bit xd-to-dr (PR=1, FRm&1 == 1, FRn&1 == 0 )
  1607      * 5. 64-bit xd-to-xd (PR=1, FRm&1 == 1, FRn&1 == 1 )
  1608      */
  1609     check_fpuen();
  1610     load_spreg( R_ECX, R_FPSCR );
  1611     load_fr_bank( R_EDX );
  1612     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1613     JNE_rel8(8, doublesize);
  1614     load_fr( R_EDX, R_EAX, FRm ); // PR=0 branch
  1615     store_fr( R_EDX, R_EAX, FRn );
  1616     if( FRm&1 ) {
  1617 	JMP_rel8(24, end);
  1618 	JMP_TARGET(doublesize);
  1619 	load_xf_bank( R_ECX ); 
  1620 	load_fr( R_ECX, R_EAX, FRm-1 );
  1621 	if( FRn&1 ) {
  1622 	    load_fr( R_ECX, R_EDX, FRm );
  1623 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1624 	    store_fr( R_ECX, R_EDX, FRn );
  1625 	} else /* FRn&1 == 0 */ {
  1626 	    load_fr( R_ECX, R_ECX, FRm );
  1627 	    store_fr( R_EDX, R_EAX, FRn );
  1628 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1630 	JMP_TARGET(end);
  1631     } else /* FRm&1 == 0 */ {
  1632 	if( FRn&1 ) {
  1633 	    JMP_rel8(24, end);
  1634 	    load_xf_bank( R_ECX );
  1635 	    load_fr( R_EDX, R_EAX, FRm );
  1636 	    load_fr( R_EDX, R_EDX, FRm+1 );
  1637 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1638 	    store_fr( R_ECX, R_EDX, FRn );
  1639 	    JMP_TARGET(end);
  1640 	} else /* FRn&1 == 0 */ {
  1641 	    JMP_rel8(12, end);
  1642 	    load_fr( R_EDX, R_EAX, FRm );
  1643 	    load_fr( R_EDX, R_ECX, FRm+1 );
  1644 	    store_fr( R_EDX, R_EAX, FRn );
  1645 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1646 	    JMP_TARGET(end);
  1649     sh4_x86.tstate = TSTATE_NONE;
  1650 :}
  1651 FMOV FRm, @Rn {: 
  1652     check_fpuen();
  1653     load_reg( R_EAX, Rn );
  1654     check_walign32( R_EAX );
  1655     MMU_TRANSLATE_WRITE( R_EAX );
  1656     load_spreg( R_EDX, R_FPSCR );
  1657     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1658     JNE_rel8(8 + MEM_WRITE_SIZE, doublesize);
  1659     load_fr_bank( R_EDX );
  1660     load_fr( R_EDX, R_ECX, FRm );
  1661     MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
  1662     if( FRm&1 ) {
  1663 	JMP_rel8( 18 + MEM_WRITE_DOUBLE_SIZE, end );
  1664 	JMP_TARGET(doublesize);
  1665 	load_xf_bank( R_EDX );
  1666 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1667 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1668 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1669 	JMP_TARGET(end);
  1670     } else {
  1671 	JMP_rel8( 9 + MEM_WRITE_DOUBLE_SIZE, end );
  1672 	JMP_TARGET(doublesize);
  1673 	load_fr_bank( R_EDX );
  1674 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1675 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1676 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1677 	JMP_TARGET(end);
  1679     sh4_x86.tstate = TSTATE_NONE;
  1680 :}
  1681 FMOV @Rm, FRn {:  
  1682     check_fpuen();
  1683     load_reg( R_EAX, Rm );
  1684     check_ralign32( R_EAX );
  1685     MMU_TRANSLATE_READ( R_EAX );
  1686     load_spreg( R_EDX, R_FPSCR );
  1687     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1688     JNE_rel8(8 + MEM_READ_SIZE, doublesize);
  1689     MEM_READ_LONG( R_EAX, R_EAX );
  1690     load_fr_bank( R_EDX );
  1691     store_fr( R_EDX, R_EAX, FRn );
  1692     if( FRn&1 ) {
  1693 	JMP_rel8(21 + MEM_READ_DOUBLE_SIZE, end);
  1694 	JMP_TARGET(doublesize);
  1695 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1696 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1697 	load_xf_bank( R_EDX );
  1698 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1699 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1700 	JMP_TARGET(end);
  1701     } else {
  1702 	JMP_rel8(9 + MEM_READ_DOUBLE_SIZE, end);
  1703 	JMP_TARGET(doublesize);
  1704 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1705 	load_fr_bank( R_EDX );
  1706 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1707 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1708 	JMP_TARGET(end);
  1710     sh4_x86.tstate = TSTATE_NONE;
  1711 :}
  1712 FMOV FRm, @-Rn {:  
  1713     check_fpuen();
  1714     load_reg( R_EAX, Rn );
  1715     check_walign32( R_EAX );
  1716     load_spreg( R_EDX, R_FPSCR );
  1717     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1718     JNE_rel8(15 + MEM_WRITE_SIZE + MMU_TRANSLATE_SIZE, doublesize);
  1719     ADD_imm8s_r32( -4, R_EAX );
  1720     MMU_TRANSLATE_WRITE( R_EAX );
  1721     load_fr_bank( R_EDX );
  1722     load_fr( R_EDX, R_ECX, FRm );
  1723     ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
  1724     MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
  1725     if( FRm&1 ) {
  1726 	JMP_rel8( 25 + MEM_WRITE_DOUBLE_SIZE + MMU_TRANSLATE_SIZE, end );
  1727 	JMP_TARGET(doublesize);
  1728 	ADD_imm8s_r32(-8,R_EAX);
  1729 	MMU_TRANSLATE_WRITE( R_EAX );
  1730 	load_xf_bank( R_EDX );
  1731 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1732 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1733 	ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
  1734 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1735 	JMP_TARGET(end);
  1736     } else {
  1737 	JMP_rel8( 16 + MEM_WRITE_DOUBLE_SIZE + MMU_TRANSLATE_SIZE, end );
  1738 	JMP_TARGET(doublesize);
  1739 	ADD_imm8s_r32(-8,R_EAX);
  1740 	MMU_TRANSLATE_WRITE( R_EAX );
  1741 	load_fr_bank( R_EDX );
  1742 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1743 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1744 	ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
  1745 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1746 	JMP_TARGET(end);
  1748     sh4_x86.tstate = TSTATE_NONE;
  1749 :}
  1750 FMOV @Rm+, FRn {:
  1751     check_fpuen();
  1752     load_reg( R_EAX, Rm );
  1753     check_ralign32( R_EAX );
  1754     MMU_TRANSLATE_READ( R_EAX );
  1755     load_spreg( R_EDX, R_FPSCR );
  1756     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1757     JNE_rel8(12 + MEM_READ_SIZE, doublesize);
  1758     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1759     MEM_READ_LONG( R_EAX, R_EAX );
  1760     load_fr_bank( R_EDX );
  1761     store_fr( R_EDX, R_EAX, FRn );
  1762     if( FRn&1 ) {
  1763 	JMP_rel8(25 + MEM_READ_DOUBLE_SIZE, end);
  1764 	JMP_TARGET(doublesize);
  1765 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
  1766 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1767 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1768 	load_xf_bank( R_EDX );
  1769 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1770 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1771 	JMP_TARGET(end);
  1772     } else {
  1773 	JMP_rel8(13 + MEM_READ_DOUBLE_SIZE, end);
  1774 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
  1775 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1776 	load_fr_bank( R_EDX );
  1777 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1778 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1779 	JMP_TARGET(end);
  1781     sh4_x86.tstate = TSTATE_NONE;
  1782 :}
  1783 FMOV FRm, @(R0, Rn) {:  
  1784     check_fpuen();
  1785     load_reg( R_EAX, Rn );
  1786     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1787     check_walign32( R_EAX );
  1788     MMU_TRANSLATE_WRITE( R_EAX );
  1789     load_spreg( R_EDX, R_FPSCR );
  1790     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1791     JNE_rel8(8 + MEM_WRITE_SIZE, doublesize);
  1792     load_fr_bank( R_EDX );
  1793     load_fr( R_EDX, R_ECX, FRm );
  1794     MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
  1795     if( FRm&1 ) {
  1796 	JMP_rel8( 18 + MEM_WRITE_DOUBLE_SIZE, end );
  1797 	JMP_TARGET(doublesize);
  1798 	load_xf_bank( R_EDX );
  1799 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1800 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1801 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1802 	JMP_TARGET(end);
  1803     } else {
  1804 	JMP_rel8( 9 + MEM_WRITE_DOUBLE_SIZE, end );
  1805 	JMP_TARGET(doublesize);
  1806 	load_fr_bank( R_EDX );
  1807 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1808 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1809 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1810 	JMP_TARGET(end);
  1812     sh4_x86.tstate = TSTATE_NONE;
  1813 :}
  1814 FMOV @(R0, Rm), FRn {:  
  1815     check_fpuen();
  1816     load_reg( R_EAX, Rm );
  1817     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1818     check_ralign32( R_EAX );
  1819     MMU_TRANSLATE_READ( R_EAX );
  1820     load_spreg( R_EDX, R_FPSCR );
  1821     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1822     JNE_rel8(8 + MEM_READ_SIZE, doublesize);
  1823     MEM_READ_LONG( R_EAX, R_EAX );
  1824     load_fr_bank( R_EDX );
  1825     store_fr( R_EDX, R_EAX, FRn );
  1826     if( FRn&1 ) {
  1827 	JMP_rel8(21 + MEM_READ_DOUBLE_SIZE, end);
  1828 	JMP_TARGET(doublesize);
  1829 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1830 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1831 	load_xf_bank( R_EDX );
  1832 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1833 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1834 	JMP_TARGET(end);
  1835     } else {
  1836 	JMP_rel8(9 + MEM_READ_DOUBLE_SIZE, end);
  1837 	JMP_TARGET(doublesize);
  1838 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1839 	load_fr_bank( R_EDX );
  1840 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1841 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1842 	JMP_TARGET(end);
  1844     sh4_x86.tstate = TSTATE_NONE;
  1845 :}
  1846 FLDI0 FRn {:  /* IFF PR=0 */
  1847     check_fpuen();
  1848     load_spreg( R_ECX, R_FPSCR );
  1849     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1850     JNE_rel8(8, end);
  1851     XOR_r32_r32( R_EAX, R_EAX );
  1852     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  1853     store_fr( R_ECX, R_EAX, FRn );
  1854     JMP_TARGET(end);
  1855     sh4_x86.tstate = TSTATE_NONE;
  1856 :}
  1857 FLDI1 FRn {:  /* IFF PR=0 */
  1858     check_fpuen();
  1859     load_spreg( R_ECX, R_FPSCR );
  1860     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1861     JNE_rel8(11, end);
  1862     load_imm32(R_EAX, 0x3F800000);
  1863     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  1864     store_fr( R_ECX, R_EAX, FRn );
  1865     JMP_TARGET(end);
  1866     sh4_x86.tstate = TSTATE_NONE;
  1867 :}
  1869 FLOAT FPUL, FRn {:  
  1870     check_fpuen();
  1871     load_spreg( R_ECX, R_FPSCR );
  1872     load_spreg(R_EDX, REG_OFFSET(fr_bank));
  1873     FILD_sh4r(R_FPUL);
  1874     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1875     JNE_rel8(5, doubleprec);
  1876     pop_fr( R_EDX, FRn );
  1877     JMP_rel8(3, end);
  1878     JMP_TARGET(doubleprec);
  1879     pop_dr( R_EDX, FRn );
  1880     JMP_TARGET(end);
  1881     sh4_x86.tstate = TSTATE_NONE;
  1882 :}
  1883 FTRC FRm, FPUL {:  
  1884     check_fpuen();
  1885     load_spreg( R_ECX, R_FPSCR );
  1886     load_fr_bank( R_EDX );
  1887     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1888     JNE_rel8(5, doubleprec);
  1889     push_fr( R_EDX, FRm );
  1890     JMP_rel8(3, doop);
  1891     JMP_TARGET(doubleprec);
  1892     push_dr( R_EDX, FRm );
  1893     JMP_TARGET( doop );
  1894     load_imm32( R_ECX, (uint32_t)&max_int );
  1895     FILD_r32ind( R_ECX );
  1896     FCOMIP_st(1);
  1897     JNA_rel8( 32, sat );
  1898     load_imm32( R_ECX, (uint32_t)&min_int );  // 5
  1899     FILD_r32ind( R_ECX );           // 2
  1900     FCOMIP_st(1);                   // 2
  1901     JAE_rel8( 21, sat2 );            // 2
  1902     load_imm32( R_EAX, (uint32_t)&save_fcw );
  1903     FNSTCW_r32ind( R_EAX );
  1904     load_imm32( R_EDX, (uint32_t)&trunc_fcw );
  1905     FLDCW_r32ind( R_EDX );
  1906     FISTP_sh4r(R_FPUL);             // 3
  1907     FLDCW_r32ind( R_EAX );
  1908     JMP_rel8( 9, end );             // 2
  1910     JMP_TARGET(sat);
  1911     JMP_TARGET(sat2);
  1912     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  1913     store_spreg( R_ECX, R_FPUL );
  1914     FPOP_st();
  1915     JMP_TARGET(end);
  1916     sh4_x86.tstate = TSTATE_NONE;
  1917 :}
  1918 FLDS FRm, FPUL {:  
  1919     check_fpuen();
  1920     load_fr_bank( R_ECX );
  1921     load_fr( R_ECX, R_EAX, FRm );
  1922     store_spreg( R_EAX, R_FPUL );
  1923     sh4_x86.tstate = TSTATE_NONE;
  1924 :}
  1925 FSTS FPUL, FRn {:  
  1926     check_fpuen();
  1927     load_fr_bank( R_ECX );
  1928     load_spreg( R_EAX, R_FPUL );
  1929     store_fr( R_ECX, R_EAX, FRn );
  1930     sh4_x86.tstate = TSTATE_NONE;
  1931 :}
  1932 FCNVDS FRm, FPUL {:  
  1933     check_fpuen();
  1934     load_spreg( R_ECX, R_FPSCR );
  1935     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1936     JE_rel8(9, end); // only when PR=1
  1937     load_fr_bank( R_ECX );
  1938     push_dr( R_ECX, FRm );
  1939     pop_fpul();
  1940     JMP_TARGET(end);
  1941     sh4_x86.tstate = TSTATE_NONE;
  1942 :}
  1943 FCNVSD FPUL, FRn {:  
  1944     check_fpuen();
  1945     load_spreg( R_ECX, R_FPSCR );
  1946     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1947     JE_rel8(9, end); // only when PR=1
  1948     load_fr_bank( R_ECX );
  1949     push_fpul();
  1950     pop_dr( R_ECX, FRn );
  1951     JMP_TARGET(end);
  1952     sh4_x86.tstate = TSTATE_NONE;
  1953 :}
  1955 /* Floating point instructions */
  1956 FABS FRn {:  
  1957     check_fpuen();
  1958     load_spreg( R_ECX, R_FPSCR );
  1959     load_fr_bank( R_EDX );
  1960     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1961     JNE_rel8(10, doubleprec);
  1962     push_fr(R_EDX, FRn); // 3
  1963     FABS_st0(); // 2
  1964     pop_fr( R_EDX, FRn); //3
  1965     JMP_rel8(8,end); // 2
  1966     JMP_TARGET(doubleprec);
  1967     push_dr(R_EDX, FRn);
  1968     FABS_st0();
  1969     pop_dr(R_EDX, FRn);
  1970     JMP_TARGET(end);
  1971     sh4_x86.tstate = TSTATE_NONE;
  1972 :}
  1973 FADD FRm, FRn {:  
  1974     check_fpuen();
  1975     load_spreg( R_ECX, R_FPSCR );
  1976     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1977     load_fr_bank( R_EDX );
  1978     JNE_rel8(13,doubleprec);
  1979     push_fr(R_EDX, FRm);
  1980     push_fr(R_EDX, FRn);
  1981     FADDP_st(1);
  1982     pop_fr(R_EDX, FRn);
  1983     JMP_rel8(11,end);
  1984     JMP_TARGET(doubleprec);
  1985     push_dr(R_EDX, FRm);
  1986     push_dr(R_EDX, FRn);
  1987     FADDP_st(1);
  1988     pop_dr(R_EDX, FRn);
  1989     JMP_TARGET(end);
  1990     sh4_x86.tstate = TSTATE_NONE;
  1991 :}
  1992 FDIV FRm, FRn {:  
  1993     check_fpuen();
  1994     load_spreg( R_ECX, R_FPSCR );
  1995     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1996     load_fr_bank( R_EDX );
  1997     JNE_rel8(13, doubleprec);
  1998     push_fr(R_EDX, FRn);
  1999     push_fr(R_EDX, FRm);
  2000     FDIVP_st(1);
  2001     pop_fr(R_EDX, FRn);
  2002     JMP_rel8(11, end);
  2003     JMP_TARGET(doubleprec);
  2004     push_dr(R_EDX, FRn);
  2005     push_dr(R_EDX, FRm);
  2006     FDIVP_st(1);
  2007     pop_dr(R_EDX, FRn);
  2008     JMP_TARGET(end);
  2009     sh4_x86.tstate = TSTATE_NONE;
  2010 :}
  2011 FMAC FR0, FRm, FRn {:  
  2012     check_fpuen();
  2013     load_spreg( R_ECX, R_FPSCR );
  2014     load_spreg( R_EDX, REG_OFFSET(fr_bank));
  2015     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2016     JNE_rel8(18, doubleprec);
  2017     push_fr( R_EDX, 0 );
  2018     push_fr( R_EDX, FRm );
  2019     FMULP_st(1);
  2020     push_fr( R_EDX, FRn );
  2021     FADDP_st(1);
  2022     pop_fr( R_EDX, FRn );
  2023     JMP_rel8(16, end);
  2024     JMP_TARGET(doubleprec);
  2025     push_dr( R_EDX, 0 );
  2026     push_dr( R_EDX, FRm );
  2027     FMULP_st(1);
  2028     push_dr( R_EDX, FRn );
  2029     FADDP_st(1);
  2030     pop_dr( R_EDX, FRn );
  2031     JMP_TARGET(end);
  2032     sh4_x86.tstate = TSTATE_NONE;
  2033 :}
  2035 FMUL FRm, FRn {:  
  2036     check_fpuen();
  2037     load_spreg( R_ECX, R_FPSCR );
  2038     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2039     load_fr_bank( R_EDX );
  2040     JNE_rel8(13, doubleprec);
  2041     push_fr(R_EDX, FRm);
  2042     push_fr(R_EDX, FRn);
  2043     FMULP_st(1);
  2044     pop_fr(R_EDX, FRn);
  2045     JMP_rel8(11, end);
  2046     JMP_TARGET(doubleprec);
  2047     push_dr(R_EDX, FRm);
  2048     push_dr(R_EDX, FRn);
  2049     FMULP_st(1);
  2050     pop_dr(R_EDX, FRn);
  2051     JMP_TARGET(end);
  2052     sh4_x86.tstate = TSTATE_NONE;
  2053 :}
  2054 FNEG FRn {:  
  2055     check_fpuen();
  2056     load_spreg( R_ECX, R_FPSCR );
  2057     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2058     load_fr_bank( R_EDX );
  2059     JNE_rel8(10, doubleprec);
  2060     push_fr(R_EDX, FRn);
  2061     FCHS_st0();
  2062     pop_fr(R_EDX, FRn);
  2063     JMP_rel8(8, end);
  2064     JMP_TARGET(doubleprec);
  2065     push_dr(R_EDX, FRn);
  2066     FCHS_st0();
  2067     pop_dr(R_EDX, FRn);
  2068     JMP_TARGET(end);
  2069     sh4_x86.tstate = TSTATE_NONE;
  2070 :}
  2071 FSRRA FRn {:  
  2072     check_fpuen();
  2073     load_spreg( R_ECX, R_FPSCR );
  2074     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2075     load_fr_bank( R_EDX );
  2076     JNE_rel8(12, end); // PR=0 only
  2077     FLD1_st0();
  2078     push_fr(R_EDX, FRn);
  2079     FSQRT_st0();
  2080     FDIVP_st(1);
  2081     pop_fr(R_EDX, FRn);
  2082     JMP_TARGET(end);
  2083     sh4_x86.tstate = TSTATE_NONE;
  2084 :}
  2085 FSQRT FRn {:  
  2086     check_fpuen();
  2087     load_spreg( R_ECX, R_FPSCR );
  2088     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2089     load_fr_bank( R_EDX );
  2090     JNE_rel8(10, doubleprec);
  2091     push_fr(R_EDX, FRn);
  2092     FSQRT_st0();
  2093     pop_fr(R_EDX, FRn);
  2094     JMP_rel8(8, end);
  2095     JMP_TARGET(doubleprec);
  2096     push_dr(R_EDX, FRn);
  2097     FSQRT_st0();
  2098     pop_dr(R_EDX, FRn);
  2099     JMP_TARGET(end);
  2100     sh4_x86.tstate = TSTATE_NONE;
  2101 :}
  2102 FSUB FRm, FRn {:  
  2103     check_fpuen();
  2104     load_spreg( R_ECX, R_FPSCR );
  2105     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2106     load_fr_bank( R_EDX );
  2107     JNE_rel8(13, doubleprec);
  2108     push_fr(R_EDX, FRn);
  2109     push_fr(R_EDX, FRm);
  2110     FSUBP_st(1);
  2111     pop_fr(R_EDX, FRn);
  2112     JMP_rel8(11, end);
  2113     JMP_TARGET(doubleprec);
  2114     push_dr(R_EDX, FRn);
  2115     push_dr(R_EDX, FRm);
  2116     FSUBP_st(1);
  2117     pop_dr(R_EDX, FRn);
  2118     JMP_TARGET(end);
  2119     sh4_x86.tstate = TSTATE_NONE;
  2120 :}
  2122 FCMP/EQ FRm, FRn {:  
  2123     check_fpuen();
  2124     load_spreg( R_ECX, R_FPSCR );
  2125     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2126     load_fr_bank( R_EDX );
  2127     JNE_rel8(8, doubleprec);
  2128     push_fr(R_EDX, FRm);
  2129     push_fr(R_EDX, FRn);
  2130     JMP_rel8(6, end);
  2131     JMP_TARGET(doubleprec);
  2132     push_dr(R_EDX, FRm);
  2133     push_dr(R_EDX, FRn);
  2134     JMP_TARGET(end);
  2135     FCOMIP_st(1);
  2136     SETE_t();
  2137     FPOP_st();
  2138     sh4_x86.tstate = TSTATE_NONE;
  2139 :}
  2140 FCMP/GT FRm, FRn {:  
  2141     check_fpuen();
  2142     load_spreg( R_ECX, R_FPSCR );
  2143     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2144     load_fr_bank( R_EDX );
  2145     JNE_rel8(8, doubleprec);
  2146     push_fr(R_EDX, FRm);
  2147     push_fr(R_EDX, FRn);
  2148     JMP_rel8(6, end);
  2149     JMP_TARGET(doubleprec);
  2150     push_dr(R_EDX, FRm);
  2151     push_dr(R_EDX, FRn);
  2152     JMP_TARGET(end);
  2153     FCOMIP_st(1);
  2154     SETA_t();
  2155     FPOP_st();
  2156     sh4_x86.tstate = TSTATE_NONE;
  2157 :}
  2159 FSCA FPUL, FRn {:  
  2160     check_fpuen();
  2161     load_spreg( R_ECX, R_FPSCR );
  2162     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2163     JNE_rel8( CALL_FUNC2_SIZE + 9, doubleprec );
  2164     load_fr_bank( R_ECX );
  2165     ADD_imm8s_r32( (FRn&0x0E)<<2, R_ECX );
  2166     load_spreg( R_EDX, R_FPUL );
  2167     call_func2( sh4_fsca, R_EDX, R_ECX );
  2168     JMP_TARGET(doubleprec);
  2169     sh4_x86.tstate = TSTATE_NONE;
  2170 :}
  2171 FIPR FVm, FVn {:  
  2172     check_fpuen();
  2173     load_spreg( R_ECX, R_FPSCR );
  2174     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2175     JNE_rel8(44, doubleprec);
  2177     load_fr_bank( R_ECX );
  2178     push_fr( R_ECX, FVm<<2 );
  2179     push_fr( R_ECX, FVn<<2 );
  2180     FMULP_st(1);
  2181     push_fr( R_ECX, (FVm<<2)+1);
  2182     push_fr( R_ECX, (FVn<<2)+1);
  2183     FMULP_st(1);
  2184     FADDP_st(1);
  2185     push_fr( R_ECX, (FVm<<2)+2);
  2186     push_fr( R_ECX, (FVn<<2)+2);
  2187     FMULP_st(1);
  2188     FADDP_st(1);
  2189     push_fr( R_ECX, (FVm<<2)+3);
  2190     push_fr( R_ECX, (FVn<<2)+3);
  2191     FMULP_st(1);
  2192     FADDP_st(1);
  2193     pop_fr( R_ECX, (FVn<<2)+3);
  2194     JMP_TARGET(doubleprec);
  2195     sh4_x86.tstate = TSTATE_NONE;
  2196 :}
  2197 FTRV XMTRX, FVn {:  
  2198     check_fpuen();
  2199     load_spreg( R_ECX, R_FPSCR );
  2200     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2201     JNE_rel8( 18 + CALL_FUNC2_SIZE, doubleprec );
  2202     load_fr_bank( R_EDX );                 // 3
  2203     ADD_imm8s_r32( FVn<<4, R_EDX );        // 3
  2204     load_xf_bank( R_ECX );                 // 12
  2205     call_func2( sh4_ftrv, R_EDX, R_ECX );  // 12
  2206     JMP_TARGET(doubleprec);
  2207     sh4_x86.tstate = TSTATE_NONE;
  2208 :}
  2210 FRCHG {:  
  2211     check_fpuen();
  2212     load_spreg( R_ECX, R_FPSCR );
  2213     XOR_imm32_r32( FPSCR_FR, R_ECX );
  2214     store_spreg( R_ECX, R_FPSCR );
  2215     update_fr_bank( R_ECX );
  2216     sh4_x86.tstate = TSTATE_NONE;
  2217 :}
  2218 FSCHG {:  
  2219     check_fpuen();
  2220     load_spreg( R_ECX, R_FPSCR );
  2221     XOR_imm32_r32( FPSCR_SZ, R_ECX );
  2222     store_spreg( R_ECX, R_FPSCR );
  2223     sh4_x86.tstate = TSTATE_NONE;
  2224 :}
  2226 /* Processor control instructions */
  2227 LDC Rm, SR {:
  2228     if( sh4_x86.in_delay_slot ) {
  2229 	SLOTILLEGAL();
  2230     } else {
  2231 	check_priv();
  2232 	load_reg( R_EAX, Rm );
  2233 	call_func1( sh4_write_sr, R_EAX );
  2234 	sh4_x86.priv_checked = FALSE;
  2235 	sh4_x86.fpuen_checked = FALSE;
  2236 	sh4_x86.tstate = TSTATE_NONE;
  2238 :}
  2239 LDC Rm, GBR {: 
  2240     load_reg( R_EAX, Rm );
  2241     store_spreg( R_EAX, R_GBR );
  2242 :}
  2243 LDC Rm, VBR {:  
  2244     check_priv();
  2245     load_reg( R_EAX, Rm );
  2246     store_spreg( R_EAX, R_VBR );
  2247     sh4_x86.tstate = TSTATE_NONE;
  2248 :}
  2249 LDC Rm, SSR {:  
  2250     check_priv();
  2251     load_reg( R_EAX, Rm );
  2252     store_spreg( R_EAX, R_SSR );
  2253     sh4_x86.tstate = TSTATE_NONE;
  2254 :}
  2255 LDC Rm, SGR {:  
  2256     check_priv();
  2257     load_reg( R_EAX, Rm );
  2258     store_spreg( R_EAX, R_SGR );
  2259     sh4_x86.tstate = TSTATE_NONE;
  2260 :}
  2261 LDC Rm, SPC {:  
  2262     check_priv();
  2263     load_reg( R_EAX, Rm );
  2264     store_spreg( R_EAX, R_SPC );
  2265     sh4_x86.tstate = TSTATE_NONE;
  2266 :}
  2267 LDC Rm, DBR {:  
  2268     check_priv();
  2269     load_reg( R_EAX, Rm );
  2270     store_spreg( R_EAX, R_DBR );
  2271     sh4_x86.tstate = TSTATE_NONE;
  2272 :}
  2273 LDC Rm, Rn_BANK {:  
  2274     check_priv();
  2275     load_reg( R_EAX, Rm );
  2276     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2277     sh4_x86.tstate = TSTATE_NONE;
  2278 :}
  2279 LDC.L @Rm+, GBR {:  
  2280     load_reg( R_EAX, Rm );
  2281     check_ralign32( R_EAX );
  2282     MMU_TRANSLATE_READ( R_EAX );
  2283     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2284     MEM_READ_LONG( R_EAX, R_EAX );
  2285     store_spreg( R_EAX, R_GBR );
  2286     sh4_x86.tstate = TSTATE_NONE;
  2287 :}
  2288 LDC.L @Rm+, SR {:
  2289     if( sh4_x86.in_delay_slot ) {
  2290 	SLOTILLEGAL();
  2291     } else {
  2292 	check_priv();
  2293 	load_reg( R_EAX, Rm );
  2294 	check_ralign32( R_EAX );
  2295 	MMU_TRANSLATE_READ( R_EAX );
  2296 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2297 	MEM_READ_LONG( R_EAX, R_EAX );
  2298 	call_func1( sh4_write_sr, R_EAX );
  2299 	sh4_x86.priv_checked = FALSE;
  2300 	sh4_x86.fpuen_checked = FALSE;
  2301 	sh4_x86.tstate = TSTATE_NONE;
  2303 :}
  2304 LDC.L @Rm+, VBR {:  
  2305     check_priv();
  2306     load_reg( R_EAX, Rm );
  2307     check_ralign32( R_EAX );
  2308     MMU_TRANSLATE_READ( R_EAX );
  2309     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2310     MEM_READ_LONG( R_EAX, R_EAX );
  2311     store_spreg( R_EAX, R_VBR );
  2312     sh4_x86.tstate = TSTATE_NONE;
  2313 :}
  2314 LDC.L @Rm+, SSR {:
  2315     check_priv();
  2316     load_reg( R_EAX, Rm );
  2317     check_ralign32( R_EAX );
  2318     MMU_TRANSLATE_READ( R_EAX );
  2319     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2320     MEM_READ_LONG( R_EAX, R_EAX );
  2321     store_spreg( R_EAX, R_SSR );
  2322     sh4_x86.tstate = TSTATE_NONE;
  2323 :}
  2324 LDC.L @Rm+, SGR {:  
  2325     check_priv();
  2326     load_reg( R_EAX, Rm );
  2327     check_ralign32( R_EAX );
  2328     MMU_TRANSLATE_READ( R_EAX );
  2329     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2330     MEM_READ_LONG( R_EAX, R_EAX );
  2331     store_spreg( R_EAX, R_SGR );
  2332     sh4_x86.tstate = TSTATE_NONE;
  2333 :}
  2334 LDC.L @Rm+, SPC {:  
  2335     check_priv();
  2336     load_reg( R_EAX, Rm );
  2337     check_ralign32( R_EAX );
  2338     MMU_TRANSLATE_READ( R_EAX );
  2339     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2340     MEM_READ_LONG( R_EAX, R_EAX );
  2341     store_spreg( R_EAX, R_SPC );
  2342     sh4_x86.tstate = TSTATE_NONE;
  2343 :}
  2344 LDC.L @Rm+, DBR {:  
  2345     check_priv();
  2346     load_reg( R_EAX, Rm );
  2347     check_ralign32( R_EAX );
  2348     MMU_TRANSLATE_READ( R_EAX );
  2349     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2350     MEM_READ_LONG( R_EAX, R_EAX );
  2351     store_spreg( R_EAX, R_DBR );
  2352     sh4_x86.tstate = TSTATE_NONE;
  2353 :}
  2354 LDC.L @Rm+, Rn_BANK {:  
  2355     check_priv();
  2356     load_reg( R_EAX, Rm );
  2357     check_ralign32( R_EAX );
  2358     MMU_TRANSLATE_READ( R_EAX );
  2359     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2360     MEM_READ_LONG( R_EAX, R_EAX );
  2361     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2362     sh4_x86.tstate = TSTATE_NONE;
  2363 :}
  2364 LDS Rm, FPSCR {:  
  2365     load_reg( R_EAX, Rm );
  2366     store_spreg( R_EAX, R_FPSCR );
  2367     update_fr_bank( R_EAX );
  2368     sh4_x86.tstate = TSTATE_NONE;
  2369 :}
  2370 LDS.L @Rm+, FPSCR {:  
  2371     load_reg( R_EAX, Rm );
  2372     check_ralign32( R_EAX );
  2373     MMU_TRANSLATE_READ( R_EAX );
  2374     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2375     MEM_READ_LONG( R_EAX, R_EAX );
  2376     store_spreg( R_EAX, R_FPSCR );
  2377     update_fr_bank( R_EAX );
  2378     sh4_x86.tstate = TSTATE_NONE;
  2379 :}
  2380 LDS Rm, FPUL {:  
  2381     load_reg( R_EAX, Rm );
  2382     store_spreg( R_EAX, R_FPUL );
  2383 :}
  2384 LDS.L @Rm+, FPUL {:  
  2385     load_reg( R_EAX, Rm );
  2386     check_ralign32( R_EAX );
  2387     MMU_TRANSLATE_READ( R_EAX );
  2388     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2389     MEM_READ_LONG( R_EAX, R_EAX );
  2390     store_spreg( R_EAX, R_FPUL );
  2391     sh4_x86.tstate = TSTATE_NONE;
  2392 :}
  2393 LDS Rm, MACH {: 
  2394     load_reg( R_EAX, Rm );
  2395     store_spreg( R_EAX, R_MACH );
  2396 :}
  2397 LDS.L @Rm+, MACH {:  
  2398     load_reg( R_EAX, Rm );
  2399     check_ralign32( R_EAX );
  2400     MMU_TRANSLATE_READ( R_EAX );
  2401     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2402     MEM_READ_LONG( R_EAX, R_EAX );
  2403     store_spreg( R_EAX, R_MACH );
  2404     sh4_x86.tstate = TSTATE_NONE;
  2405 :}
  2406 LDS Rm, MACL {:  
  2407     load_reg( R_EAX, Rm );
  2408     store_spreg( R_EAX, R_MACL );
  2409 :}
  2410 LDS.L @Rm+, MACL {:  
  2411     load_reg( R_EAX, Rm );
  2412     check_ralign32( R_EAX );
  2413     MMU_TRANSLATE_READ( R_EAX );
  2414     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2415     MEM_READ_LONG( R_EAX, R_EAX );
  2416     store_spreg( R_EAX, R_MACL );
  2417     sh4_x86.tstate = TSTATE_NONE;
  2418 :}
  2419 LDS Rm, PR {:  
  2420     load_reg( R_EAX, Rm );
  2421     store_spreg( R_EAX, R_PR );
  2422 :}
  2423 LDS.L @Rm+, PR {:  
  2424     load_reg( R_EAX, Rm );
  2425     check_ralign32( R_EAX );
  2426     MMU_TRANSLATE_READ( R_EAX );
  2427     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2428     MEM_READ_LONG( R_EAX, R_EAX );
  2429     store_spreg( R_EAX, R_PR );
  2430     sh4_x86.tstate = TSTATE_NONE;
  2431 :}
  2432 LDTLB {:  
  2433     call_func0( MMU_ldtlb );
  2434 :}
  2435 OCBI @Rn {:  :}
  2436 OCBP @Rn {:  :}
  2437 OCBWB @Rn {:  :}
  2438 PREF @Rn {:
  2439     load_reg( R_EAX, Rn );
  2440     MOV_r32_r32( R_EAX, R_ECX );
  2441     AND_imm32_r32( 0xFC000000, R_EAX );
  2442     CMP_imm32_r32( 0xE0000000, R_EAX );
  2443     JNE_rel8(8+CALL_FUNC1_SIZE, end);
  2444     call_func1( sh4_flush_store_queue, R_ECX );
  2445     TEST_r32_r32( R_EAX, R_EAX );
  2446     JE_exc(-1);
  2447     JMP_TARGET(end);
  2448     sh4_x86.tstate = TSTATE_NONE;
  2449 :}
  2450 SLEEP {: 
  2451     check_priv();
  2452     call_func0( sh4_sleep );
  2453     sh4_x86.tstate = TSTATE_NONE;
  2454     sh4_x86.in_delay_slot = FALSE;
  2455     return 2;
  2456 :}
  2457 STC SR, Rn {:
  2458     check_priv();
  2459     call_func0(sh4_read_sr);
  2460     store_reg( R_EAX, Rn );
  2461     sh4_x86.tstate = TSTATE_NONE;
  2462 :}
  2463 STC GBR, Rn {:  
  2464     load_spreg( R_EAX, R_GBR );
  2465     store_reg( R_EAX, Rn );
  2466 :}
  2467 STC VBR, Rn {:  
  2468     check_priv();
  2469     load_spreg( R_EAX, R_VBR );
  2470     store_reg( R_EAX, Rn );
  2471     sh4_x86.tstate = TSTATE_NONE;
  2472 :}
  2473 STC SSR, Rn {:  
  2474     check_priv();
  2475     load_spreg( R_EAX, R_SSR );
  2476     store_reg( R_EAX, Rn );
  2477     sh4_x86.tstate = TSTATE_NONE;
  2478 :}
  2479 STC SPC, Rn {:  
  2480     check_priv();
  2481     load_spreg( R_EAX, R_SPC );
  2482     store_reg( R_EAX, Rn );
  2483     sh4_x86.tstate = TSTATE_NONE;
  2484 :}
  2485 STC SGR, Rn {:  
  2486     check_priv();
  2487     load_spreg( R_EAX, R_SGR );
  2488     store_reg( R_EAX, Rn );
  2489     sh4_x86.tstate = TSTATE_NONE;
  2490 :}
  2491 STC DBR, Rn {:  
  2492     check_priv();
  2493     load_spreg( R_EAX, R_DBR );
  2494     store_reg( R_EAX, Rn );
  2495     sh4_x86.tstate = TSTATE_NONE;
  2496 :}
  2497 STC Rm_BANK, Rn {:
  2498     check_priv();
  2499     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2500     store_reg( R_EAX, Rn );
  2501     sh4_x86.tstate = TSTATE_NONE;
  2502 :}
  2503 STC.L SR, @-Rn {:
  2504     check_priv();
  2505     load_reg( R_EAX, Rn );
  2506     check_walign32( R_EAX );
  2507     ADD_imm8s_r32( -4, R_EAX );
  2508     MMU_TRANSLATE_WRITE( R_EAX );
  2509     PUSH_realigned_r32( R_EAX );
  2510     call_func0( sh4_read_sr );
  2511     POP_realigned_r32( R_ECX );
  2512     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2513     MEM_WRITE_LONG( R_ECX, R_EAX );
  2514     sh4_x86.tstate = TSTATE_NONE;
  2515 :}
  2516 STC.L VBR, @-Rn {:  
  2517     check_priv();
  2518     load_reg( R_EAX, Rn );
  2519     check_walign32( R_EAX );
  2520     ADD_imm8s_r32( -4, R_EAX );
  2521     MMU_TRANSLATE_WRITE( R_EAX );
  2522     load_spreg( R_EDX, R_VBR );
  2523     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2524     MEM_WRITE_LONG( R_EAX, R_EDX );
  2525     sh4_x86.tstate = TSTATE_NONE;
  2526 :}
  2527 STC.L SSR, @-Rn {:  
  2528     check_priv();
  2529     load_reg( R_EAX, Rn );
  2530     check_walign32( R_EAX );
  2531     ADD_imm8s_r32( -4, R_EAX );
  2532     MMU_TRANSLATE_WRITE( R_EAX );
  2533     load_spreg( R_EDX, R_SSR );
  2534     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2535     MEM_WRITE_LONG( R_EAX, R_EDX );
  2536     sh4_x86.tstate = TSTATE_NONE;
  2537 :}
  2538 STC.L SPC, @-Rn {:
  2539     check_priv();
  2540     load_reg( R_EAX, Rn );
  2541     check_walign32( R_EAX );
  2542     ADD_imm8s_r32( -4, R_EAX );
  2543     MMU_TRANSLATE_WRITE( R_EAX );
  2544     load_spreg( R_EDX, R_SPC );
  2545     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2546     MEM_WRITE_LONG( R_EAX, R_EDX );
  2547     sh4_x86.tstate = TSTATE_NONE;
  2548 :}
  2549 STC.L SGR, @-Rn {:  
  2550     check_priv();
  2551     load_reg( R_EAX, Rn );
  2552     check_walign32( R_EAX );
  2553     ADD_imm8s_r32( -4, R_EAX );
  2554     MMU_TRANSLATE_WRITE( R_EAX );
  2555     load_spreg( R_EDX, R_SGR );
  2556     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2557     MEM_WRITE_LONG( R_EAX, R_EDX );
  2558     sh4_x86.tstate = TSTATE_NONE;
  2559 :}
  2560 STC.L DBR, @-Rn {:  
  2561     check_priv();
  2562     load_reg( R_EAX, Rn );
  2563     check_walign32( R_EAX );
  2564     ADD_imm8s_r32( -4, R_EAX );
  2565     MMU_TRANSLATE_WRITE( R_EAX );
  2566     load_spreg( R_EDX, R_DBR );
  2567     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2568     MEM_WRITE_LONG( R_EAX, R_EDX );
  2569     sh4_x86.tstate = TSTATE_NONE;
  2570 :}
  2571 STC.L Rm_BANK, @-Rn {:  
  2572     check_priv();
  2573     load_reg( R_EAX, Rn );
  2574     check_walign32( R_EAX );
  2575     ADD_imm8s_r32( -4, R_EAX );
  2576     MMU_TRANSLATE_WRITE( R_EAX );
  2577     load_spreg( R_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
  2578     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2579     MEM_WRITE_LONG( R_EAX, R_EDX );
  2580     sh4_x86.tstate = TSTATE_NONE;
  2581 :}
  2582 STC.L GBR, @-Rn {:  
  2583     load_reg( R_EAX, Rn );
  2584     check_walign32( R_EAX );
  2585     ADD_imm8s_r32( -4, R_EAX );
  2586     MMU_TRANSLATE_WRITE( R_EAX );
  2587     load_spreg( R_EDX, R_GBR );
  2588     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2589     MEM_WRITE_LONG( R_EAX, R_EDX );
  2590     sh4_x86.tstate = TSTATE_NONE;
  2591 :}
  2592 STS FPSCR, Rn {:  
  2593     load_spreg( R_EAX, R_FPSCR );
  2594     store_reg( R_EAX, Rn );
  2595 :}
  2596 STS.L FPSCR, @-Rn {:  
  2597     load_reg( R_EAX, Rn );
  2598     check_walign32( R_EAX );
  2599     ADD_imm8s_r32( -4, R_EAX );
  2600     MMU_TRANSLATE_WRITE( R_EAX );
  2601     load_spreg( R_EDX, R_FPSCR );
  2602     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2603     MEM_WRITE_LONG( R_EAX, R_EDX );
  2604     sh4_x86.tstate = TSTATE_NONE;
  2605 :}
  2606 STS FPUL, Rn {:  
  2607     load_spreg( R_EAX, R_FPUL );
  2608     store_reg( R_EAX, Rn );
  2609 :}
  2610 STS.L FPUL, @-Rn {:  
  2611     load_reg( R_EAX, Rn );
  2612     check_walign32( R_EAX );
  2613     ADD_imm8s_r32( -4, R_EAX );
  2614     MMU_TRANSLATE_WRITE( R_EAX );
  2615     load_spreg( R_EDX, R_FPUL );
  2616     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2617     MEM_WRITE_LONG( R_EAX, R_EDX );
  2618     sh4_x86.tstate = TSTATE_NONE;
  2619 :}
  2620 STS MACH, Rn {:  
  2621     load_spreg( R_EAX, R_MACH );
  2622     store_reg( R_EAX, Rn );
  2623 :}
  2624 STS.L MACH, @-Rn {:  
  2625     load_reg( R_EAX, Rn );
  2626     check_walign32( R_EAX );
  2627     ADD_imm8s_r32( -4, R_EAX );
  2628     MMU_TRANSLATE_WRITE( R_EAX );
  2629     load_spreg( R_EDX, R_MACH );
  2630     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2631     MEM_WRITE_LONG( R_EAX, R_EDX );
  2632     sh4_x86.tstate = TSTATE_NONE;
  2633 :}
  2634 STS MACL, Rn {:  
  2635     load_spreg( R_EAX, R_MACL );
  2636     store_reg( R_EAX, Rn );
  2637 :}
  2638 STS.L MACL, @-Rn {:  
  2639     load_reg( R_EAX, Rn );
  2640     check_walign32( R_EAX );
  2641     ADD_imm8s_r32( -4, R_EAX );
  2642     MMU_TRANSLATE_WRITE( R_EAX );
  2643     load_spreg( R_EDX, R_MACL );
  2644     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2645     MEM_WRITE_LONG( R_EAX, R_EDX );
  2646     sh4_x86.tstate = TSTATE_NONE;
  2647 :}
  2648 STS PR, Rn {:  
  2649     load_spreg( R_EAX, R_PR );
  2650     store_reg( R_EAX, Rn );
  2651 :}
  2652 STS.L PR, @-Rn {:  
  2653     load_reg( R_EAX, Rn );
  2654     check_walign32( R_EAX );
  2655     ADD_imm8s_r32( -4, R_EAX );
  2656     MMU_TRANSLATE_WRITE( R_EAX );
  2657     load_spreg( R_EDX, R_PR );
  2658     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2659     MEM_WRITE_LONG( R_EAX, R_EDX );
  2660     sh4_x86.tstate = TSTATE_NONE;
  2661 :}
  2663 NOP {: /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ :}
  2664 %%
  2665     sh4_x86.in_delay_slot = FALSE;
  2666     return 0;
.