Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 571:9bc09948d0f2
prev570:d2893980fbf5
next577:a181aeacd6e8
author nkeynes
date Mon Jan 14 09:08:58 2008 +0000 (16 years ago)
branchlxdream-mmu
permissions -rw-r--r--
last change Fix TRAPA in emulator core
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "sh4/xltcache.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4trans.h"
    31 #include "sh4/sh4mmio.h"
    32 #include "sh4/x86op.h"
    33 #include "clock.h"
    35 #define DEFAULT_BACKPATCH_SIZE 4096
    37 struct backpatch_record {
    38     uint32_t *fixup_addr;
    39     uint32_t fixup_icount;
    40     uint32_t exc_code;
    41 };
    43 #define MAX_RECOVERY_SIZE 2048
    45 /** 
    46  * Struct to manage internal translation state. This state is not saved -
    47  * it is only valid between calls to sh4_translate_begin_block() and
    48  * sh4_translate_end_block()
    49  */
    50 struct sh4_x86_state {
    51     gboolean in_delay_slot;
    52     gboolean priv_checked; /* true if we've already checked the cpu mode. */
    53     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    54     gboolean branch_taken; /* true if we branched unconditionally */
    55     uint32_t block_start_pc;
    56     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    57     int tstate;
    59     /* mode flags */
    60     gboolean tlb_on; /* True if tlb translation is active */
    62     /* Allocated memory for the (block-wide) back-patch list */
    63     struct backpatch_record *backpatch_list;
    64     uint32_t backpatch_posn;
    65     uint32_t backpatch_size;
    66     struct xlat_recovery_record recovery_list[MAX_RECOVERY_SIZE];
    67     uint32_t recovery_posn;
    68 };
    70 #define TSTATE_NONE -1
    71 #define TSTATE_O    0
    72 #define TSTATE_C    2
    73 #define TSTATE_E    4
    74 #define TSTATE_NE   5
    75 #define TSTATE_G    0xF
    76 #define TSTATE_GE   0xD
    77 #define TSTATE_A    7
    78 #define TSTATE_AE   3
    80 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
    81 #define JT_rel8(rel8,label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    82 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    83     OP(0x70+sh4_x86.tstate); OP(rel8); \
    84     MARK_JMP(rel8,label)
    85 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
    86 #define JF_rel8(rel8,label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    87 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    88     OP(0x70+ (sh4_x86.tstate^1)); OP(rel8); \
    89     MARK_JMP(rel8, label)
    91 static struct sh4_x86_state sh4_x86;
    93 static uint32_t max_int = 0x7FFFFFFF;
    94 static uint32_t min_int = 0x80000000;
    95 static uint32_t save_fcw; /* save value for fpu control word */
    96 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
    98 void sh4_x86_init()
    99 {
   100     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   101     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   102 }
   105 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   106 {
   107     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   108 	sh4_x86.backpatch_size <<= 1;
   109 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   110 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   111 	assert( sh4_x86.backpatch_list != NULL );
   112     }
   113     if( sh4_x86.in_delay_slot ) {
   114 	fixup_pc -= 2;
   115     }
   116     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_addr = (uint32_t *)fixup_addr;
   117     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   118     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   119     sh4_x86.backpatch_posn++;
   120 }
   122 void sh4_x86_add_recovery( uint32_t pc )
   123 {
   124     xlat_recovery[xlat_recovery_posn].xlat_pc = (uintptr_t)xlat_output;
   125     xlat_recovery[xlat_recovery_posn].sh4_icount = (pc - sh4_x86.block_start_pc)>>1;
   126     xlat_recovery_posn++;
   127 }
   129 /**
   130  * Emit an instruction to load an SH4 reg into a real register
   131  */
   132 static inline void load_reg( int x86reg, int sh4reg ) 
   133 {
   134     /* mov [bp+n], reg */
   135     OP(0x8B);
   136     OP(0x45 + (x86reg<<3));
   137     OP(REG_OFFSET(r[sh4reg]));
   138 }
   140 static inline void load_reg16s( int x86reg, int sh4reg )
   141 {
   142     OP(0x0F);
   143     OP(0xBF);
   144     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   145 }
   147 static inline void load_reg16u( int x86reg, int sh4reg )
   148 {
   149     OP(0x0F);
   150     OP(0xB7);
   151     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   153 }
   155 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   156 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   157 /**
   158  * Emit an instruction to load an immediate value into a register
   159  */
   160 static inline void load_imm32( int x86reg, uint32_t value ) {
   161     /* mov #value, reg */
   162     OP(0xB8 + x86reg);
   163     OP32(value);
   164 }
   166 /**
   167  * Load an immediate 64-bit quantity (note: x86-64 only)
   168  */
   169 static inline void load_imm64( int x86reg, uint32_t value ) {
   170     /* mov #value, reg */
   171     REXW();
   172     OP(0xB8 + x86reg);
   173     OP64(value);
   174 }
   177 /**
   178  * Emit an instruction to store an SH4 reg (RN)
   179  */
   180 void static inline store_reg( int x86reg, int sh4reg ) {
   181     /* mov reg, [bp+n] */
   182     OP(0x89);
   183     OP(0x45 + (x86reg<<3));
   184     OP(REG_OFFSET(r[sh4reg]));
   185 }
   187 #define load_fr_bank(bankreg) load_spreg( bankreg, REG_OFFSET(fr_bank))
   189 /**
   190  * Load an FR register (single-precision floating point) into an integer x86
   191  * register (eg for register-to-register moves)
   192  */
   193 void static inline load_fr( int bankreg, int x86reg, int frm )
   194 {
   195     OP(0x8B); OP(0x40+bankreg+(x86reg<<3)); OP((frm^1)<<2);
   196 }
   198 /**
   199  * Store an FR register (single-precision floating point) into an integer x86
   200  * register (eg for register-to-register moves)
   201  */
   202 void static inline store_fr( int bankreg, int x86reg, int frn )
   203 {
   204     OP(0x89);  OP(0x40+bankreg+(x86reg<<3)); OP((frn^1)<<2);
   205 }
   208 /**
   209  * Load a pointer to the back fp back into the specified x86 register. The
   210  * bankreg must have been previously loaded with FPSCR.
   211  * NB: 12 bytes
   212  */
   213 static inline void load_xf_bank( int bankreg )
   214 {
   215     NOT_r32( bankreg );
   216     SHR_imm8_r32( (21 - 6), bankreg ); // Extract bit 21 then *64 for bank size
   217     AND_imm8s_r32( 0x40, bankreg );    // Complete extraction
   218     OP(0x8D); OP(0x44+(bankreg<<3)); OP(0x28+bankreg); OP(REG_OFFSET(fr)); // LEA [ebp+bankreg+disp], bankreg
   219 }
   221 /**
   222  * Update the fr_bank pointer based on the current fpscr value.
   223  */
   224 static inline void update_fr_bank( int fpscrreg )
   225 {
   226     SHR_imm8_r32( (21 - 6), fpscrreg ); // Extract bit 21 then *64 for bank size
   227     AND_imm8s_r32( 0x40, fpscrreg );    // Complete extraction
   228     OP(0x8D); OP(0x44+(fpscrreg<<3)); OP(0x28+fpscrreg); OP(REG_OFFSET(fr)); // LEA [ebp+fpscrreg+disp], fpscrreg
   229     store_spreg( fpscrreg, REG_OFFSET(fr_bank) );
   230 }
   231 /**
   232  * Push FPUL (as a 32-bit float) onto the FPU stack
   233  */
   234 static inline void push_fpul( )
   235 {
   236     OP(0xD9); OP(0x45); OP(R_FPUL);
   237 }
   239 /**
   240  * Pop FPUL (as a 32-bit float) from the FPU stack
   241  */
   242 static inline void pop_fpul( )
   243 {
   244     OP(0xD9); OP(0x5D); OP(R_FPUL);
   245 }
   247 /**
   248  * Push a 32-bit float onto the FPU stack, with bankreg previously loaded
   249  * with the location of the current fp bank.
   250  */
   251 static inline void push_fr( int bankreg, int frm ) 
   252 {
   253     OP(0xD9); OP(0x40 + bankreg); OP((frm^1)<<2);  // FLD.S [bankreg + frm^1*4]
   254 }
   256 /**
   257  * Pop a 32-bit float from the FPU stack and store it back into the fp bank, 
   258  * with bankreg previously loaded with the location of the current fp bank.
   259  */
   260 static inline void pop_fr( int bankreg, int frm )
   261 {
   262     OP(0xD9); OP(0x58 + bankreg); OP((frm^1)<<2); // FST.S [bankreg + frm^1*4]
   263 }
   265 /**
   266  * Push a 64-bit double onto the FPU stack, with bankreg previously loaded
   267  * with the location of the current fp bank.
   268  */
   269 static inline void push_dr( int bankreg, int frm )
   270 {
   271     OP(0xDD); OP(0x40 + bankreg); OP(frm<<2); // FLD.D [bankreg + frm*4]
   272 }
   274 static inline void pop_dr( int bankreg, int frm )
   275 {
   276     OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]
   277 }
   279 /* Exception checks - Note that all exception checks will clobber EAX */
   281 #define check_priv( ) \
   282     if( !sh4_x86.priv_checked ) { \
   283 	sh4_x86.priv_checked = TRUE;\
   284 	load_spreg( R_EAX, R_SR );\
   285 	AND_imm32_r32( SR_MD, R_EAX );\
   286 	if( sh4_x86.in_delay_slot ) {\
   287 	    JE_exc( EXC_SLOT_ILLEGAL );\
   288 	} else {\
   289 	    JE_exc( EXC_ILLEGAL );\
   290 	}\
   291     }\
   293 #define check_fpuen( ) \
   294     if( !sh4_x86.fpuen_checked ) {\
   295 	sh4_x86.fpuen_checked = TRUE;\
   296 	load_spreg( R_EAX, R_SR );\
   297 	AND_imm32_r32( SR_FD, R_EAX );\
   298 	if( sh4_x86.in_delay_slot ) {\
   299 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   300 	} else {\
   301 	    JNE_exc(EXC_FPU_DISABLED);\
   302 	}\
   303     }
   305 #define check_ralign16( x86reg ) \
   306     TEST_imm32_r32( 0x00000001, x86reg ); \
   307     JNE_exc(EXC_DATA_ADDR_READ)
   309 #define check_walign16( x86reg ) \
   310     TEST_imm32_r32( 0x00000001, x86reg ); \
   311     JNE_exc(EXC_DATA_ADDR_WRITE);
   313 #define check_ralign32( x86reg ) \
   314     TEST_imm32_r32( 0x00000003, x86reg ); \
   315     JNE_exc(EXC_DATA_ADDR_READ)
   317 #define check_walign32( x86reg ) \
   318     TEST_imm32_r32( 0x00000003, x86reg ); \
   319     JNE_exc(EXC_DATA_ADDR_WRITE);
   321 #define UNDEF()
   322 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   323 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
   324 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
   325 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
   326 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
   327 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
   328 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
   330 /**
   331  * Perform MMU translation on the address in addr_reg for a read operation, iff the TLB is turned 
   332  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   333  */
   334 #define MMU_TRANSLATE_READ( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   335 /**
   336  * Perform MMU translation on the address in addr_reg for a write operation, iff the TLB is turned 
   337  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   338  */
   339 #define MMU_TRANSLATE_WRITE( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_write, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   341 #define MEM_READ_SIZE (CALL_FUNC1_SIZE)
   342 #define MEM_WRITE_SIZE (CALL_FUNC2_SIZE)
   343 #define MMU_TRANSLATE_SIZE (sh4_x86.tlb_on ? (CALL_FUNC1_SIZE + 12) : 0 )
   345 #define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = FALSE; return 1;
   347 /****** Import appropriate calling conventions ******/
   348 #if SH4_TRANSLATOR == TARGET_X86_64
   349 #include "sh4/ia64abi.h"
   350 #else /* SH4_TRANSLATOR == TARGET_X86 */
   351 #ifdef APPLE_BUILD
   352 #include "sh4/ia32mac.h"
   353 #else
   354 #include "sh4/ia32abi.h"
   355 #endif
   356 #endif
   359 /**
   360  * Translate a single instruction. Delayed branches are handled specially
   361  * by translating both branch and delayed instruction as a single unit (as
   362  * 
   363  *
   364  * @return true if the instruction marks the end of a basic block
   365  * (eg a branch or 
   366  */
   367 uint32_t sh4_translate_instruction( sh4addr_t pc )
   368 {
   369     uint32_t ir;
   370     /* Read instruction */
   371     if( IS_IN_ICACHE(pc) ) {
   372 	ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   373     } else {
   374 	ir = sh4_read_word(pc);
   375     }
   376     if( !sh4_x86.in_delay_slot ) {
   377 	sh4_x86_add_recovery(pc);
   378     }
   379 %%
   380 /* ALU operations */
   381 ADD Rm, Rn {:
   382     load_reg( R_EAX, Rm );
   383     load_reg( R_ECX, Rn );
   384     ADD_r32_r32( R_EAX, R_ECX );
   385     store_reg( R_ECX, Rn );
   386     sh4_x86.tstate = TSTATE_NONE;
   387 :}
   388 ADD #imm, Rn {:  
   389     load_reg( R_EAX, Rn );
   390     ADD_imm8s_r32( imm, R_EAX );
   391     store_reg( R_EAX, Rn );
   392     sh4_x86.tstate = TSTATE_NONE;
   393 :}
   394 ADDC Rm, Rn {:
   395     if( sh4_x86.tstate != TSTATE_C ) {
   396 	LDC_t();
   397     }
   398     load_reg( R_EAX, Rm );
   399     load_reg( R_ECX, Rn );
   400     ADC_r32_r32( R_EAX, R_ECX );
   401     store_reg( R_ECX, Rn );
   402     SETC_t();
   403     sh4_x86.tstate = TSTATE_C;
   404 :}
   405 ADDV Rm, Rn {:
   406     load_reg( R_EAX, Rm );
   407     load_reg( R_ECX, Rn );
   408     ADD_r32_r32( R_EAX, R_ECX );
   409     store_reg( R_ECX, Rn );
   410     SETO_t();
   411     sh4_x86.tstate = TSTATE_O;
   412 :}
   413 AND Rm, Rn {:
   414     load_reg( R_EAX, Rm );
   415     load_reg( R_ECX, Rn );
   416     AND_r32_r32( R_EAX, R_ECX );
   417     store_reg( R_ECX, Rn );
   418     sh4_x86.tstate = TSTATE_NONE;
   419 :}
   420 AND #imm, R0 {:  
   421     load_reg( R_EAX, 0 );
   422     AND_imm32_r32(imm, R_EAX); 
   423     store_reg( R_EAX, 0 );
   424     sh4_x86.tstate = TSTATE_NONE;
   425 :}
   426 AND.B #imm, @(R0, GBR) {: 
   427     load_reg( R_EAX, 0 );
   428     load_spreg( R_ECX, R_GBR );
   429     ADD_r32_r32( R_ECX, R_EAX );
   430     MMU_TRANSLATE_WRITE( R_EAX );
   431     PUSH_realigned_r32(R_EAX);
   432     MEM_READ_BYTE( R_EAX, R_EAX );
   433     POP_realigned_r32(R_ECX);
   434     AND_imm32_r32(imm, R_EAX );
   435     MEM_WRITE_BYTE( R_ECX, R_EAX );
   436     sh4_x86.tstate = TSTATE_NONE;
   437 :}
   438 CMP/EQ Rm, Rn {:  
   439     load_reg( R_EAX, Rm );
   440     load_reg( R_ECX, Rn );
   441     CMP_r32_r32( R_EAX, R_ECX );
   442     SETE_t();
   443     sh4_x86.tstate = TSTATE_E;
   444 :}
   445 CMP/EQ #imm, R0 {:  
   446     load_reg( R_EAX, 0 );
   447     CMP_imm8s_r32(imm, R_EAX);
   448     SETE_t();
   449     sh4_x86.tstate = TSTATE_E;
   450 :}
   451 CMP/GE Rm, Rn {:  
   452     load_reg( R_EAX, Rm );
   453     load_reg( R_ECX, Rn );
   454     CMP_r32_r32( R_EAX, R_ECX );
   455     SETGE_t();
   456     sh4_x86.tstate = TSTATE_GE;
   457 :}
   458 CMP/GT Rm, Rn {: 
   459     load_reg( R_EAX, Rm );
   460     load_reg( R_ECX, Rn );
   461     CMP_r32_r32( R_EAX, R_ECX );
   462     SETG_t();
   463     sh4_x86.tstate = TSTATE_G;
   464 :}
   465 CMP/HI Rm, Rn {:  
   466     load_reg( R_EAX, Rm );
   467     load_reg( R_ECX, Rn );
   468     CMP_r32_r32( R_EAX, R_ECX );
   469     SETA_t();
   470     sh4_x86.tstate = TSTATE_A;
   471 :}
   472 CMP/HS Rm, Rn {: 
   473     load_reg( R_EAX, Rm );
   474     load_reg( R_ECX, Rn );
   475     CMP_r32_r32( R_EAX, R_ECX );
   476     SETAE_t();
   477     sh4_x86.tstate = TSTATE_AE;
   478  :}
   479 CMP/PL Rn {: 
   480     load_reg( R_EAX, Rn );
   481     CMP_imm8s_r32( 0, R_EAX );
   482     SETG_t();
   483     sh4_x86.tstate = TSTATE_G;
   484 :}
   485 CMP/PZ Rn {:  
   486     load_reg( R_EAX, Rn );
   487     CMP_imm8s_r32( 0, R_EAX );
   488     SETGE_t();
   489     sh4_x86.tstate = TSTATE_GE;
   490 :}
   491 CMP/STR Rm, Rn {:  
   492     load_reg( R_EAX, Rm );
   493     load_reg( R_ECX, Rn );
   494     XOR_r32_r32( R_ECX, R_EAX );
   495     TEST_r8_r8( R_AL, R_AL );
   496     JE_rel8(13, target1);
   497     TEST_r8_r8( R_AH, R_AH ); // 2
   498     JE_rel8(9, target2);
   499     SHR_imm8_r32( 16, R_EAX ); // 3
   500     TEST_r8_r8( R_AL, R_AL ); // 2
   501     JE_rel8(2, target3);
   502     TEST_r8_r8( R_AH, R_AH ); // 2
   503     JMP_TARGET(target1);
   504     JMP_TARGET(target2);
   505     JMP_TARGET(target3);
   506     SETE_t();
   507     sh4_x86.tstate = TSTATE_E;
   508 :}
   509 DIV0S Rm, Rn {:
   510     load_reg( R_EAX, Rm );
   511     load_reg( R_ECX, Rn );
   512     SHR_imm8_r32( 31, R_EAX );
   513     SHR_imm8_r32( 31, R_ECX );
   514     store_spreg( R_EAX, R_M );
   515     store_spreg( R_ECX, R_Q );
   516     CMP_r32_r32( R_EAX, R_ECX );
   517     SETNE_t();
   518     sh4_x86.tstate = TSTATE_NE;
   519 :}
   520 DIV0U {:  
   521     XOR_r32_r32( R_EAX, R_EAX );
   522     store_spreg( R_EAX, R_Q );
   523     store_spreg( R_EAX, R_M );
   524     store_spreg( R_EAX, R_T );
   525     sh4_x86.tstate = TSTATE_C; // works for DIV1
   526 :}
   527 DIV1 Rm, Rn {:
   528     load_spreg( R_ECX, R_M );
   529     load_reg( R_EAX, Rn );
   530     if( sh4_x86.tstate != TSTATE_C ) {
   531 	LDC_t();
   532     }
   533     RCL1_r32( R_EAX );
   534     SETC_r8( R_DL ); // Q'
   535     CMP_sh4r_r32( R_Q, R_ECX );
   536     JE_rel8(5, mqequal);
   537     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   538     JMP_rel8(3, end);
   539     JMP_TARGET(mqequal);
   540     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   541     JMP_TARGET(end);
   542     store_reg( R_EAX, Rn ); // Done with Rn now
   543     SETC_r8(R_AL); // tmp1
   544     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   545     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   546     store_spreg( R_ECX, R_Q );
   547     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   548     MOVZX_r8_r32( R_AL, R_EAX );
   549     store_spreg( R_EAX, R_T );
   550     sh4_x86.tstate = TSTATE_NONE;
   551 :}
   552 DMULS.L Rm, Rn {:  
   553     load_reg( R_EAX, Rm );
   554     load_reg( R_ECX, Rn );
   555     IMUL_r32(R_ECX);
   556     store_spreg( R_EDX, R_MACH );
   557     store_spreg( R_EAX, R_MACL );
   558     sh4_x86.tstate = TSTATE_NONE;
   559 :}
   560 DMULU.L Rm, Rn {:  
   561     load_reg( R_EAX, Rm );
   562     load_reg( R_ECX, Rn );
   563     MUL_r32(R_ECX);
   564     store_spreg( R_EDX, R_MACH );
   565     store_spreg( R_EAX, R_MACL );    
   566     sh4_x86.tstate = TSTATE_NONE;
   567 :}
   568 DT Rn {:  
   569     load_reg( R_EAX, Rn );
   570     ADD_imm8s_r32( -1, R_EAX );
   571     store_reg( R_EAX, Rn );
   572     SETE_t();
   573     sh4_x86.tstate = TSTATE_E;
   574 :}
   575 EXTS.B Rm, Rn {:  
   576     load_reg( R_EAX, Rm );
   577     MOVSX_r8_r32( R_EAX, R_EAX );
   578     store_reg( R_EAX, Rn );
   579 :}
   580 EXTS.W Rm, Rn {:  
   581     load_reg( R_EAX, Rm );
   582     MOVSX_r16_r32( R_EAX, R_EAX );
   583     store_reg( R_EAX, Rn );
   584 :}
   585 EXTU.B Rm, Rn {:  
   586     load_reg( R_EAX, Rm );
   587     MOVZX_r8_r32( R_EAX, R_EAX );
   588     store_reg( R_EAX, Rn );
   589 :}
   590 EXTU.W Rm, Rn {:  
   591     load_reg( R_EAX, Rm );
   592     MOVZX_r16_r32( R_EAX, R_EAX );
   593     store_reg( R_EAX, Rn );
   594 :}
   595 MAC.L @Rm+, @Rn+ {:
   596     if( Rm == Rn ) {
   597 	load_reg( R_EAX, Rm );
   598 	check_ralign32( R_EAX );
   599 	MMU_TRANSLATE_READ( R_EAX );
   600 	PUSH_realigned_r32( R_EAX );
   601 	load_reg( R_EAX, Rn );
   602 	ADD_imm8s_r32( 4, R_EAX );
   603 	MMU_TRANSLATE_READ( R_EAX );
   604 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
   605 	// Note translate twice in case of page boundaries. Maybe worth
   606 	// adding a page-boundary check to skip the second translation
   607     } else {
   608 	load_reg( R_EAX, Rm );
   609 	check_ralign32( R_EAX );
   610 	MMU_TRANSLATE_READ( R_EAX );
   611 	PUSH_realigned_r32( R_EAX );
   612 	load_reg( R_EAX, Rn );
   613 	check_ralign32( R_EAX );
   614 	MMU_TRANSLATE_READ( R_EAX );
   615 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   616 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   617     }
   618     MEM_READ_LONG( R_EAX, R_EAX );
   619     POP_r32( R_ECX );
   620     PUSH_r32( R_EAX );
   621     MEM_READ_LONG( R_ECX, R_EAX );
   622     POP_realigned_r32( R_ECX );
   624     IMUL_r32( R_ECX );
   625     ADD_r32_sh4r( R_EAX, R_MACL );
   626     ADC_r32_sh4r( R_EDX, R_MACH );
   628     load_spreg( R_ECX, R_S );
   629     TEST_r32_r32(R_ECX, R_ECX);
   630     JE_rel8( CALL_FUNC0_SIZE, nosat );
   631     call_func0( signsat48 );
   632     JMP_TARGET( nosat );
   633     sh4_x86.tstate = TSTATE_NONE;
   634 :}
   635 MAC.W @Rm+, @Rn+ {:  
   636     if( Rm == Rn ) {
   637 	load_reg( R_EAX, Rm );
   638 	check_ralign16( R_EAX );
   639 	MMU_TRANSLATE_READ( R_EAX );
   640 	PUSH_realigned_r32( R_EAX );
   641 	load_reg( R_EAX, Rn );
   642 	ADD_imm8s_r32( 2, R_EAX );
   643 	MMU_TRANSLATE_READ( R_EAX );
   644 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   645 	// Note translate twice in case of page boundaries. Maybe worth
   646 	// adding a page-boundary check to skip the second translation
   647     } else {
   648 	load_reg( R_EAX, Rm );
   649 	check_ralign16( R_EAX );
   650 	MMU_TRANSLATE_READ( R_EAX );
   651 	PUSH_realigned_r32( R_EAX );
   652 	load_reg( R_EAX, Rn );
   653 	check_ralign16( R_EAX );
   654 	MMU_TRANSLATE_READ( R_EAX );
   655 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   656 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   657     }
   658     MEM_READ_WORD( R_EAX, R_EAX );
   659     POP_r32( R_ECX );
   660     PUSH_r32( R_EAX );
   661     MEM_READ_WORD( R_ECX, R_EAX );
   662     POP_realigned_r32( R_ECX );
   663     IMUL_r32( R_ECX );
   665     load_spreg( R_ECX, R_S );
   666     TEST_r32_r32( R_ECX, R_ECX );
   667     JE_rel8( 47, nosat );
   669     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   670     JNO_rel8( 51, end );            // 2
   671     load_imm32( R_EDX, 1 );         // 5
   672     store_spreg( R_EDX, R_MACH );   // 6
   673     JS_rel8( 13, positive );        // 2
   674     load_imm32( R_EAX, 0x80000000 );// 5
   675     store_spreg( R_EAX, R_MACL );   // 6
   676     JMP_rel8( 25, end2 );           // 2
   678     JMP_TARGET(positive);
   679     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   680     store_spreg( R_EAX, R_MACL );   // 6
   681     JMP_rel8( 12, end3);            // 2
   683     JMP_TARGET(nosat);
   684     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   685     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   686     JMP_TARGET(end);
   687     JMP_TARGET(end2);
   688     JMP_TARGET(end3);
   689     sh4_x86.tstate = TSTATE_NONE;
   690 :}
   691 MOVT Rn {:  
   692     load_spreg( R_EAX, R_T );
   693     store_reg( R_EAX, Rn );
   694 :}
   695 MUL.L Rm, Rn {:  
   696     load_reg( R_EAX, Rm );
   697     load_reg( R_ECX, Rn );
   698     MUL_r32( R_ECX );
   699     store_spreg( R_EAX, R_MACL );
   700     sh4_x86.tstate = TSTATE_NONE;
   701 :}
   702 MULS.W Rm, Rn {:
   703     load_reg16s( R_EAX, Rm );
   704     load_reg16s( R_ECX, Rn );
   705     MUL_r32( R_ECX );
   706     store_spreg( R_EAX, R_MACL );
   707     sh4_x86.tstate = TSTATE_NONE;
   708 :}
   709 MULU.W Rm, Rn {:  
   710     load_reg16u( R_EAX, Rm );
   711     load_reg16u( R_ECX, Rn );
   712     MUL_r32( R_ECX );
   713     store_spreg( R_EAX, R_MACL );
   714     sh4_x86.tstate = TSTATE_NONE;
   715 :}
   716 NEG Rm, Rn {:
   717     load_reg( R_EAX, Rm );
   718     NEG_r32( R_EAX );
   719     store_reg( R_EAX, Rn );
   720     sh4_x86.tstate = TSTATE_NONE;
   721 :}
   722 NEGC Rm, Rn {:  
   723     load_reg( R_EAX, Rm );
   724     XOR_r32_r32( R_ECX, R_ECX );
   725     LDC_t();
   726     SBB_r32_r32( R_EAX, R_ECX );
   727     store_reg( R_ECX, Rn );
   728     SETC_t();
   729     sh4_x86.tstate = TSTATE_C;
   730 :}
   731 NOT Rm, Rn {:  
   732     load_reg( R_EAX, Rm );
   733     NOT_r32( R_EAX );
   734     store_reg( R_EAX, Rn );
   735     sh4_x86.tstate = TSTATE_NONE;
   736 :}
   737 OR Rm, Rn {:  
   738     load_reg( R_EAX, Rm );
   739     load_reg( R_ECX, Rn );
   740     OR_r32_r32( R_EAX, R_ECX );
   741     store_reg( R_ECX, Rn );
   742     sh4_x86.tstate = TSTATE_NONE;
   743 :}
   744 OR #imm, R0 {:
   745     load_reg( R_EAX, 0 );
   746     OR_imm32_r32(imm, R_EAX);
   747     store_reg( R_EAX, 0 );
   748     sh4_x86.tstate = TSTATE_NONE;
   749 :}
   750 OR.B #imm, @(R0, GBR) {:  
   751     load_reg( R_EAX, 0 );
   752     load_spreg( R_ECX, R_GBR );
   753     ADD_r32_r32( R_ECX, R_EAX );
   754     MMU_TRANSLATE_WRITE( R_EAX );
   755     PUSH_realigned_r32(R_EAX);
   756     MEM_READ_BYTE( R_EAX, R_EAX );
   757     POP_realigned_r32(R_ECX);
   758     OR_imm32_r32(imm, R_EAX );
   759     MEM_WRITE_BYTE( R_ECX, R_EAX );
   760     sh4_x86.tstate = TSTATE_NONE;
   761 :}
   762 ROTCL Rn {:
   763     load_reg( R_EAX, Rn );
   764     if( sh4_x86.tstate != TSTATE_C ) {
   765 	LDC_t();
   766     }
   767     RCL1_r32( R_EAX );
   768     store_reg( R_EAX, Rn );
   769     SETC_t();
   770     sh4_x86.tstate = TSTATE_C;
   771 :}
   772 ROTCR Rn {:  
   773     load_reg( R_EAX, Rn );
   774     if( sh4_x86.tstate != TSTATE_C ) {
   775 	LDC_t();
   776     }
   777     RCR1_r32( R_EAX );
   778     store_reg( R_EAX, Rn );
   779     SETC_t();
   780     sh4_x86.tstate = TSTATE_C;
   781 :}
   782 ROTL Rn {:  
   783     load_reg( R_EAX, Rn );
   784     ROL1_r32( R_EAX );
   785     store_reg( R_EAX, Rn );
   786     SETC_t();
   787     sh4_x86.tstate = TSTATE_C;
   788 :}
   789 ROTR Rn {:  
   790     load_reg( R_EAX, Rn );
   791     ROR1_r32( R_EAX );
   792     store_reg( R_EAX, Rn );
   793     SETC_t();
   794     sh4_x86.tstate = TSTATE_C;
   795 :}
   796 SHAD Rm, Rn {:
   797     /* Annoyingly enough, not directly convertible */
   798     load_reg( R_EAX, Rn );
   799     load_reg( R_ECX, Rm );
   800     CMP_imm32_r32( 0, R_ECX );
   801     JGE_rel8(16, doshl);
   803     NEG_r32( R_ECX );      // 2
   804     AND_imm8_r8( 0x1F, R_CL ); // 3
   805     JE_rel8( 4, emptysar);     // 2
   806     SAR_r32_CL( R_EAX );       // 2
   807     JMP_rel8(10, end);          // 2
   809     JMP_TARGET(emptysar);
   810     SAR_imm8_r32(31, R_EAX );  // 3
   811     JMP_rel8(5, end2);
   813     JMP_TARGET(doshl);
   814     AND_imm8_r8( 0x1F, R_CL ); // 3
   815     SHL_r32_CL( R_EAX );       // 2
   816     JMP_TARGET(end);
   817     JMP_TARGET(end2);
   818     store_reg( R_EAX, Rn );
   819     sh4_x86.tstate = TSTATE_NONE;
   820 :}
   821 SHLD Rm, Rn {:  
   822     load_reg( R_EAX, Rn );
   823     load_reg( R_ECX, Rm );
   824     CMP_imm32_r32( 0, R_ECX );
   825     JGE_rel8(15, doshl);
   827     NEG_r32( R_ECX );      // 2
   828     AND_imm8_r8( 0x1F, R_CL ); // 3
   829     JE_rel8( 4, emptyshr );
   830     SHR_r32_CL( R_EAX );       // 2
   831     JMP_rel8(9, end);          // 2
   833     JMP_TARGET(emptyshr);
   834     XOR_r32_r32( R_EAX, R_EAX );
   835     JMP_rel8(5, end2);
   837     JMP_TARGET(doshl);
   838     AND_imm8_r8( 0x1F, R_CL ); // 3
   839     SHL_r32_CL( R_EAX );       // 2
   840     JMP_TARGET(end);
   841     JMP_TARGET(end2);
   842     store_reg( R_EAX, Rn );
   843     sh4_x86.tstate = TSTATE_NONE;
   844 :}
   845 SHAL Rn {: 
   846     load_reg( R_EAX, Rn );
   847     SHL1_r32( R_EAX );
   848     SETC_t();
   849     store_reg( R_EAX, Rn );
   850     sh4_x86.tstate = TSTATE_C;
   851 :}
   852 SHAR Rn {:  
   853     load_reg( R_EAX, Rn );
   854     SAR1_r32( R_EAX );
   855     SETC_t();
   856     store_reg( R_EAX, Rn );
   857     sh4_x86.tstate = TSTATE_C;
   858 :}
   859 SHLL Rn {:  
   860     load_reg( R_EAX, Rn );
   861     SHL1_r32( R_EAX );
   862     SETC_t();
   863     store_reg( R_EAX, Rn );
   864     sh4_x86.tstate = TSTATE_C;
   865 :}
   866 SHLL2 Rn {:
   867     load_reg( R_EAX, Rn );
   868     SHL_imm8_r32( 2, R_EAX );
   869     store_reg( R_EAX, Rn );
   870     sh4_x86.tstate = TSTATE_NONE;
   871 :}
   872 SHLL8 Rn {:  
   873     load_reg( R_EAX, Rn );
   874     SHL_imm8_r32( 8, R_EAX );
   875     store_reg( R_EAX, Rn );
   876     sh4_x86.tstate = TSTATE_NONE;
   877 :}
   878 SHLL16 Rn {:  
   879     load_reg( R_EAX, Rn );
   880     SHL_imm8_r32( 16, R_EAX );
   881     store_reg( R_EAX, Rn );
   882     sh4_x86.tstate = TSTATE_NONE;
   883 :}
   884 SHLR Rn {:  
   885     load_reg( R_EAX, Rn );
   886     SHR1_r32( R_EAX );
   887     SETC_t();
   888     store_reg( R_EAX, Rn );
   889     sh4_x86.tstate = TSTATE_C;
   890 :}
   891 SHLR2 Rn {:  
   892     load_reg( R_EAX, Rn );
   893     SHR_imm8_r32( 2, R_EAX );
   894     store_reg( R_EAX, Rn );
   895     sh4_x86.tstate = TSTATE_NONE;
   896 :}
   897 SHLR8 Rn {:  
   898     load_reg( R_EAX, Rn );
   899     SHR_imm8_r32( 8, R_EAX );
   900     store_reg( R_EAX, Rn );
   901     sh4_x86.tstate = TSTATE_NONE;
   902 :}
   903 SHLR16 Rn {:  
   904     load_reg( R_EAX, Rn );
   905     SHR_imm8_r32( 16, R_EAX );
   906     store_reg( R_EAX, Rn );
   907     sh4_x86.tstate = TSTATE_NONE;
   908 :}
   909 SUB Rm, Rn {:  
   910     load_reg( R_EAX, Rm );
   911     load_reg( R_ECX, Rn );
   912     SUB_r32_r32( R_EAX, R_ECX );
   913     store_reg( R_ECX, Rn );
   914     sh4_x86.tstate = TSTATE_NONE;
   915 :}
   916 SUBC Rm, Rn {:  
   917     load_reg( R_EAX, Rm );
   918     load_reg( R_ECX, Rn );
   919     if( sh4_x86.tstate != TSTATE_C ) {
   920 	LDC_t();
   921     }
   922     SBB_r32_r32( R_EAX, R_ECX );
   923     store_reg( R_ECX, Rn );
   924     SETC_t();
   925     sh4_x86.tstate = TSTATE_C;
   926 :}
   927 SUBV Rm, Rn {:  
   928     load_reg( R_EAX, Rm );
   929     load_reg( R_ECX, Rn );
   930     SUB_r32_r32( R_EAX, R_ECX );
   931     store_reg( R_ECX, Rn );
   932     SETO_t();
   933     sh4_x86.tstate = TSTATE_O;
   934 :}
   935 SWAP.B Rm, Rn {:  
   936     load_reg( R_EAX, Rm );
   937     XCHG_r8_r8( R_AL, R_AH );
   938     store_reg( R_EAX, Rn );
   939 :}
   940 SWAP.W Rm, Rn {:  
   941     load_reg( R_EAX, Rm );
   942     MOV_r32_r32( R_EAX, R_ECX );
   943     SHL_imm8_r32( 16, R_ECX );
   944     SHR_imm8_r32( 16, R_EAX );
   945     OR_r32_r32( R_EAX, R_ECX );
   946     store_reg( R_ECX, Rn );
   947     sh4_x86.tstate = TSTATE_NONE;
   948 :}
   949 TAS.B @Rn {:  
   950     load_reg( R_EAX, Rn );
   951     MMU_TRANSLATE_WRITE( R_EAX );
   952     PUSH_realigned_r32( R_EAX );
   953     MEM_READ_BYTE( R_EAX, R_EAX );
   954     TEST_r8_r8( R_AL, R_AL );
   955     SETE_t();
   956     OR_imm8_r8( 0x80, R_AL );
   957     POP_realigned_r32( R_ECX );
   958     MEM_WRITE_BYTE( R_ECX, R_EAX );
   959     sh4_x86.tstate = TSTATE_NONE;
   960 :}
   961 TST Rm, Rn {:  
   962     load_reg( R_EAX, Rm );
   963     load_reg( R_ECX, Rn );
   964     TEST_r32_r32( R_EAX, R_ECX );
   965     SETE_t();
   966     sh4_x86.tstate = TSTATE_E;
   967 :}
   968 TST #imm, R0 {:  
   969     load_reg( R_EAX, 0 );
   970     TEST_imm32_r32( imm, R_EAX );
   971     SETE_t();
   972     sh4_x86.tstate = TSTATE_E;
   973 :}
   974 TST.B #imm, @(R0, GBR) {:  
   975     load_reg( R_EAX, 0);
   976     load_reg( R_ECX, R_GBR);
   977     ADD_r32_r32( R_ECX, R_EAX );
   978     MMU_TRANSLATE_READ( R_EAX );
   979     MEM_READ_BYTE( R_EAX, R_EAX );
   980     TEST_imm8_r8( imm, R_AL );
   981     SETE_t();
   982     sh4_x86.tstate = TSTATE_E;
   983 :}
   984 XOR Rm, Rn {:  
   985     load_reg( R_EAX, Rm );
   986     load_reg( R_ECX, Rn );
   987     XOR_r32_r32( R_EAX, R_ECX );
   988     store_reg( R_ECX, Rn );
   989     sh4_x86.tstate = TSTATE_NONE;
   990 :}
   991 XOR #imm, R0 {:  
   992     load_reg( R_EAX, 0 );
   993     XOR_imm32_r32( imm, R_EAX );
   994     store_reg( R_EAX, 0 );
   995     sh4_x86.tstate = TSTATE_NONE;
   996 :}
   997 XOR.B #imm, @(R0, GBR) {:  
   998     load_reg( R_EAX, 0 );
   999     load_spreg( R_ECX, R_GBR );
  1000     ADD_r32_r32( R_ECX, R_EAX );
  1001     MMU_TRANSLATE_WRITE( R_EAX );
  1002     PUSH_realigned_r32(R_EAX);
  1003     MEM_READ_BYTE(R_EAX, R_EAX);
  1004     POP_realigned_r32(R_ECX);
  1005     XOR_imm32_r32( imm, R_EAX );
  1006     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1007     sh4_x86.tstate = TSTATE_NONE;
  1008 :}
  1009 XTRCT Rm, Rn {:
  1010     load_reg( R_EAX, Rm );
  1011     load_reg( R_ECX, Rn );
  1012     SHL_imm8_r32( 16, R_EAX );
  1013     SHR_imm8_r32( 16, R_ECX );
  1014     OR_r32_r32( R_EAX, R_ECX );
  1015     store_reg( R_ECX, Rn );
  1016     sh4_x86.tstate = TSTATE_NONE;
  1017 :}
  1019 /* Data move instructions */
  1020 MOV Rm, Rn {:  
  1021     load_reg( R_EAX, Rm );
  1022     store_reg( R_EAX, Rn );
  1023 :}
  1024 MOV #imm, Rn {:  
  1025     load_imm32( R_EAX, imm );
  1026     store_reg( R_EAX, Rn );
  1027 :}
  1028 MOV.B Rm, @Rn {:  
  1029     load_reg( R_EAX, Rn );
  1030     MMU_TRANSLATE_WRITE( R_EAX );
  1031     load_reg( R_EDX, Rm );
  1032     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1033     sh4_x86.tstate = TSTATE_NONE;
  1034 :}
  1035 MOV.B Rm, @-Rn {:  
  1036     load_reg( R_EAX, Rn );
  1037     ADD_imm8s_r32( -1, R_EAX );
  1038     MMU_TRANSLATE_WRITE( R_EAX );
  1039     load_reg( R_EDX, Rm );
  1040     ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
  1041     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1042     sh4_x86.tstate = TSTATE_NONE;
  1043 :}
  1044 MOV.B Rm, @(R0, Rn) {:  
  1045     load_reg( R_EAX, 0 );
  1046     load_reg( R_ECX, Rn );
  1047     ADD_r32_r32( R_ECX, R_EAX );
  1048     MMU_TRANSLATE_WRITE( R_EAX );
  1049     load_reg( R_EDX, Rm );
  1050     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1051     sh4_x86.tstate = TSTATE_NONE;
  1052 :}
  1053 MOV.B R0, @(disp, GBR) {:  
  1054     load_spreg( R_EAX, R_GBR );
  1055     ADD_imm32_r32( disp, R_EAX );
  1056     MMU_TRANSLATE_WRITE( R_EAX );
  1057     load_reg( R_EDX, 0 );
  1058     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1059     sh4_x86.tstate = TSTATE_NONE;
  1060 :}
  1061 MOV.B R0, @(disp, Rn) {:  
  1062     load_reg( R_EAX, Rn );
  1063     ADD_imm32_r32( disp, R_EAX );
  1064     MMU_TRANSLATE_WRITE( R_EAX );
  1065     load_reg( R_EDX, 0 );
  1066     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1067     sh4_x86.tstate = TSTATE_NONE;
  1068 :}
  1069 MOV.B @Rm, Rn {:  
  1070     load_reg( R_EAX, Rm );
  1071     MMU_TRANSLATE_READ( R_EAX );
  1072     MEM_READ_BYTE( R_EAX, R_EAX );
  1073     store_reg( R_EAX, Rn );
  1074     sh4_x86.tstate = TSTATE_NONE;
  1075 :}
  1076 MOV.B @Rm+, Rn {:  
  1077     load_reg( R_EAX, Rm );
  1078     MMU_TRANSLATE_READ( R_EAX );
  1079     ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
  1080     MEM_READ_BYTE( R_EAX, R_EAX );
  1081     store_reg( R_EAX, Rn );
  1082     sh4_x86.tstate = TSTATE_NONE;
  1083 :}
  1084 MOV.B @(R0, Rm), Rn {:  
  1085     load_reg( R_EAX, 0 );
  1086     load_reg( R_ECX, Rm );
  1087     ADD_r32_r32( R_ECX, R_EAX );
  1088     MMU_TRANSLATE_READ( R_EAX )
  1089     MEM_READ_BYTE( R_EAX, R_EAX );
  1090     store_reg( R_EAX, Rn );
  1091     sh4_x86.tstate = TSTATE_NONE;
  1092 :}
  1093 MOV.B @(disp, GBR), R0 {:  
  1094     load_spreg( R_EAX, R_GBR );
  1095     ADD_imm32_r32( disp, R_EAX );
  1096     MMU_TRANSLATE_READ( R_EAX );
  1097     MEM_READ_BYTE( R_EAX, R_EAX );
  1098     store_reg( R_EAX, 0 );
  1099     sh4_x86.tstate = TSTATE_NONE;
  1100 :}
  1101 MOV.B @(disp, Rm), R0 {:  
  1102     load_reg( R_EAX, Rm );
  1103     ADD_imm32_r32( disp, R_EAX );
  1104     MMU_TRANSLATE_READ( R_EAX );
  1105     MEM_READ_BYTE( R_EAX, R_EAX );
  1106     store_reg( R_EAX, 0 );
  1107     sh4_x86.tstate = TSTATE_NONE;
  1108 :}
  1109 MOV.L Rm, @Rn {:
  1110     load_reg( R_EAX, Rn );
  1111     check_walign32(R_EAX);
  1112     MMU_TRANSLATE_WRITE( R_EAX );
  1113     load_reg( R_EDX, Rm );
  1114     MEM_WRITE_LONG( R_EAX, R_EDX );
  1115     sh4_x86.tstate = TSTATE_NONE;
  1116 :}
  1117 MOV.L Rm, @-Rn {:  
  1118     load_reg( R_EAX, Rn );
  1119     ADD_imm8s_r32( -4, R_EAX );
  1120     check_walign32( R_EAX );
  1121     MMU_TRANSLATE_WRITE( R_EAX );
  1122     load_reg( R_EDX, Rm );
  1123     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  1124     MEM_WRITE_LONG( R_EAX, R_EDX );
  1125     sh4_x86.tstate = TSTATE_NONE;
  1126 :}
  1127 MOV.L Rm, @(R0, Rn) {:  
  1128     load_reg( R_EAX, 0 );
  1129     load_reg( R_ECX, Rn );
  1130     ADD_r32_r32( R_ECX, R_EAX );
  1131     check_walign32( R_EAX );
  1132     MMU_TRANSLATE_WRITE( R_EAX );
  1133     load_reg( R_EDX, Rm );
  1134     MEM_WRITE_LONG( R_EAX, R_EDX );
  1135     sh4_x86.tstate = TSTATE_NONE;
  1136 :}
  1137 MOV.L R0, @(disp, GBR) {:  
  1138     load_spreg( R_EAX, R_GBR );
  1139     ADD_imm32_r32( disp, R_EAX );
  1140     check_walign32( R_EAX );
  1141     MMU_TRANSLATE_WRITE( R_EAX );
  1142     load_reg( R_EDX, 0 );
  1143     MEM_WRITE_LONG( R_EAX, R_EDX );
  1144     sh4_x86.tstate = TSTATE_NONE;
  1145 :}
  1146 MOV.L Rm, @(disp, Rn) {:  
  1147     load_reg( R_EAX, Rn );
  1148     ADD_imm32_r32( disp, R_EAX );
  1149     check_walign32( R_EAX );
  1150     MMU_TRANSLATE_WRITE( R_EAX );
  1151     load_reg( R_EDX, Rm );
  1152     MEM_WRITE_LONG( R_EAX, R_EDX );
  1153     sh4_x86.tstate = TSTATE_NONE;
  1154 :}
  1155 MOV.L @Rm, Rn {:  
  1156     load_reg( R_EAX, Rm );
  1157     check_ralign32( R_EAX );
  1158     MMU_TRANSLATE_READ( R_EAX );
  1159     MEM_READ_LONG( R_EAX, R_EAX );
  1160     store_reg( R_EAX, Rn );
  1161     sh4_x86.tstate = TSTATE_NONE;
  1162 :}
  1163 MOV.L @Rm+, Rn {:  
  1164     load_reg( R_EAX, Rm );
  1165     check_ralign32( R_EAX );
  1166     MMU_TRANSLATE_READ( R_EAX );
  1167     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1168     MEM_READ_LONG( R_EAX, R_EAX );
  1169     store_reg( R_EAX, Rn );
  1170     sh4_x86.tstate = TSTATE_NONE;
  1171 :}
  1172 MOV.L @(R0, Rm), Rn {:  
  1173     load_reg( R_EAX, 0 );
  1174     load_reg( R_ECX, Rm );
  1175     ADD_r32_r32( R_ECX, R_EAX );
  1176     check_ralign32( R_EAX );
  1177     MMU_TRANSLATE_READ( R_EAX );
  1178     MEM_READ_LONG( R_EAX, R_EAX );
  1179     store_reg( R_EAX, Rn );
  1180     sh4_x86.tstate = TSTATE_NONE;
  1181 :}
  1182 MOV.L @(disp, GBR), R0 {:
  1183     load_spreg( R_EAX, R_GBR );
  1184     ADD_imm32_r32( disp, R_EAX );
  1185     check_ralign32( R_EAX );
  1186     MMU_TRANSLATE_READ( R_EAX );
  1187     MEM_READ_LONG( R_EAX, R_EAX );
  1188     store_reg( R_EAX, 0 );
  1189     sh4_x86.tstate = TSTATE_NONE;
  1190 :}
  1191 MOV.L @(disp, PC), Rn {:  
  1192     if( sh4_x86.in_delay_slot ) {
  1193 	SLOTILLEGAL();
  1194     } else {
  1195 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1196 	if( IS_IN_ICACHE(target) ) {
  1197 	    // If the target address is in the same page as the code, it's
  1198 	    // pretty safe to just ref it directly and circumvent the whole
  1199 	    // memory subsystem. (this is a big performance win)
  1201 	    // FIXME: There's a corner-case that's not handled here when
  1202 	    // the current code-page is in the ITLB but not in the UTLB.
  1203 	    // (should generate a TLB miss although need to test SH4 
  1204 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1205 	    // behaviour though.
  1206 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1207 	    MOV_moff32_EAX( ptr );
  1208 	} else {
  1209 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1210 	    // different virtual address than the translation was done with,
  1211 	    // but we can safely assume that the low bits are the same.
  1212 	    load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1213 	    ADD_sh4r_r32( R_PC, R_EAX );
  1214 	    MMU_TRANSLATE_READ( R_EAX );
  1215 	    MEM_READ_LONG( R_EAX, R_EAX );
  1216 	    sh4_x86.tstate = TSTATE_NONE;
  1218 	store_reg( R_EAX, Rn );
  1220 :}
  1221 MOV.L @(disp, Rm), Rn {:  
  1222     load_reg( R_EAX, Rm );
  1223     ADD_imm8s_r32( disp, R_EAX );
  1224     check_ralign32( R_EAX );
  1225     MMU_TRANSLATE_READ( R_EAX );
  1226     MEM_READ_LONG( R_EAX, R_EAX );
  1227     store_reg( R_EAX, Rn );
  1228     sh4_x86.tstate = TSTATE_NONE;
  1229 :}
  1230 MOV.W Rm, @Rn {:  
  1231     load_reg( R_EAX, Rn );
  1232     check_walign16( R_EAX );
  1233     MMU_TRANSLATE_WRITE( R_EAX )
  1234     load_reg( R_EDX, Rm );
  1235     MEM_WRITE_WORD( R_EAX, R_EDX );
  1236     sh4_x86.tstate = TSTATE_NONE;
  1237 :}
  1238 MOV.W Rm, @-Rn {:  
  1239     load_reg( R_EAX, Rn );
  1240     ADD_imm8s_r32( -2, R_EAX );
  1241     check_walign16( R_EAX );
  1242     MMU_TRANSLATE_WRITE( R_EAX );
  1243     load_reg( R_EDX, Rm );
  1244     ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
  1245     MEM_WRITE_WORD( R_EAX, R_EDX );
  1246     sh4_x86.tstate = TSTATE_NONE;
  1247 :}
  1248 MOV.W Rm, @(R0, Rn) {:  
  1249     load_reg( R_EAX, 0 );
  1250     load_reg( R_ECX, Rn );
  1251     ADD_r32_r32( R_ECX, R_EAX );
  1252     check_walign16( R_EAX );
  1253     MMU_TRANSLATE_WRITE( R_EAX );
  1254     load_reg( R_EDX, Rm );
  1255     MEM_WRITE_WORD( R_EAX, R_EDX );
  1256     sh4_x86.tstate = TSTATE_NONE;
  1257 :}
  1258 MOV.W R0, @(disp, GBR) {:  
  1259     load_spreg( R_EAX, R_GBR );
  1260     ADD_imm32_r32( disp, R_EAX );
  1261     check_walign16( R_EAX );
  1262     MMU_TRANSLATE_WRITE( R_EAX );
  1263     load_reg( R_EDX, 0 );
  1264     MEM_WRITE_WORD( R_EAX, R_EDX );
  1265     sh4_x86.tstate = TSTATE_NONE;
  1266 :}
  1267 MOV.W R0, @(disp, Rn) {:  
  1268     load_reg( R_EAX, Rn );
  1269     ADD_imm32_r32( disp, R_EAX );
  1270     check_walign16( R_EAX );
  1271     MMU_TRANSLATE_WRITE( R_EAX );
  1272     load_reg( R_EDX, 0 );
  1273     MEM_WRITE_WORD( R_EAX, R_EDX );
  1274     sh4_x86.tstate = TSTATE_NONE;
  1275 :}
  1276 MOV.W @Rm, Rn {:  
  1277     load_reg( R_EAX, Rm );
  1278     check_ralign16( R_EAX );
  1279     MMU_TRANSLATE_READ( R_EAX );
  1280     MEM_READ_WORD( R_EAX, R_EAX );
  1281     store_reg( R_EAX, Rn );
  1282     sh4_x86.tstate = TSTATE_NONE;
  1283 :}
  1284 MOV.W @Rm+, Rn {:  
  1285     load_reg( R_EAX, Rm );
  1286     check_ralign16( R_EAX );
  1287     MMU_TRANSLATE_READ( R_EAX );
  1288     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
  1289     MEM_READ_WORD( R_EAX, R_EAX );
  1290     store_reg( R_EAX, Rn );
  1291     sh4_x86.tstate = TSTATE_NONE;
  1292 :}
  1293 MOV.W @(R0, Rm), Rn {:  
  1294     load_reg( R_EAX, 0 );
  1295     load_reg( R_ECX, Rm );
  1296     ADD_r32_r32( R_ECX, R_EAX );
  1297     check_ralign16( R_EAX );
  1298     MMU_TRANSLATE_READ( R_EAX );
  1299     MEM_READ_WORD( R_EAX, R_EAX );
  1300     store_reg( R_EAX, Rn );
  1301     sh4_x86.tstate = TSTATE_NONE;
  1302 :}
  1303 MOV.W @(disp, GBR), R0 {:  
  1304     load_spreg( R_EAX, R_GBR );
  1305     ADD_imm32_r32( disp, R_EAX );
  1306     check_ralign16( R_EAX );
  1307     MMU_TRANSLATE_READ( R_EAX );
  1308     MEM_READ_WORD( R_EAX, R_EAX );
  1309     store_reg( R_EAX, 0 );
  1310     sh4_x86.tstate = TSTATE_NONE;
  1311 :}
  1312 MOV.W @(disp, PC), Rn {:  
  1313     if( sh4_x86.in_delay_slot ) {
  1314 	SLOTILLEGAL();
  1315     } else {
  1316 	// See comments for MOV.L @(disp, PC), Rn
  1317 	uint32_t target = pc + disp + 4;
  1318 	if( IS_IN_ICACHE(target) ) {
  1319 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1320 	    MOV_moff32_EAX( ptr );
  1321 	    MOVSX_r16_r32( R_EAX, R_EAX );
  1322 	} else {
  1323 	    load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
  1324 	    ADD_sh4r_r32( R_PC, R_EAX );
  1325 	    MMU_TRANSLATE_READ( R_EAX );
  1326 	    MEM_READ_WORD( R_EAX, R_EAX );
  1327 	    sh4_x86.tstate = TSTATE_NONE;
  1329 	store_reg( R_EAX, Rn );
  1331 :}
  1332 MOV.W @(disp, Rm), R0 {:  
  1333     load_reg( R_EAX, Rm );
  1334     ADD_imm32_r32( disp, R_EAX );
  1335     check_ralign16( R_EAX );
  1336     MMU_TRANSLATE_READ( R_EAX );
  1337     MEM_READ_WORD( R_EAX, R_EAX );
  1338     store_reg( R_EAX, 0 );
  1339     sh4_x86.tstate = TSTATE_NONE;
  1340 :}
  1341 MOVA @(disp, PC), R0 {:  
  1342     if( sh4_x86.in_delay_slot ) {
  1343 	SLOTILLEGAL();
  1344     } else {
  1345 	load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1346 	ADD_sh4r_r32( R_PC, R_ECX );
  1347 	store_reg( R_ECX, 0 );
  1348 	sh4_x86.tstate = TSTATE_NONE;
  1350 :}
  1351 MOVCA.L R0, @Rn {:  
  1352     load_reg( R_EAX, Rn );
  1353     check_walign32( R_EAX );
  1354     MMU_TRANSLATE_WRITE( R_EAX );
  1355     load_reg( R_EDX, 0 );
  1356     MEM_WRITE_LONG( R_EAX, R_EDX );
  1357     sh4_x86.tstate = TSTATE_NONE;
  1358 :}
  1360 /* Control transfer instructions */
  1361 BF disp {:
  1362     if( sh4_x86.in_delay_slot ) {
  1363 	SLOTILLEGAL();
  1364     } else {
  1365 	sh4vma_t target = disp + pc + 4;
  1366 	JT_rel8( EXIT_BLOCK_REL_SIZE(target), nottaken );
  1367 	exit_block_rel(target, pc+2 );
  1368 	JMP_TARGET(nottaken);
  1369 	return 2;
  1371 :}
  1372 BF/S disp {:
  1373     if( sh4_x86.in_delay_slot ) {
  1374 	SLOTILLEGAL();
  1375     } else {
  1376 	sh4vma_t target = disp + pc + 4;
  1377 	sh4_x86.in_delay_slot = TRUE;
  1378 	if( sh4_x86.tstate == TSTATE_NONE ) {
  1379 	    CMP_imm8s_sh4r( 1, R_T );
  1380 	    sh4_x86.tstate = TSTATE_E;
  1382 	OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JNE rel32
  1383 	sh4_translate_instruction(pc+2);
  1384 	exit_block_rel( target, pc+4 );
  1385 	// not taken
  1386 	*patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1387 	sh4_translate_instruction(pc+2);
  1388 	return 4;
  1390 :}
  1391 BRA disp {:  
  1392     if( sh4_x86.in_delay_slot ) {
  1393 	SLOTILLEGAL();
  1394     } else {
  1395 	sh4_x86.in_delay_slot = TRUE;
  1396 	sh4_translate_instruction( pc + 2 );
  1397 	exit_block_rel( disp + pc + 4, pc+4 );
  1398 	sh4_x86.branch_taken = TRUE;
  1399 	return 4;
  1401 :}
  1402 BRAF Rn {:  
  1403     if( sh4_x86.in_delay_slot ) {
  1404 	SLOTILLEGAL();
  1405     } else {
  1406 	load_reg( R_EAX, Rn );
  1407 	ADD_imm32_r32( pc + 4, R_EAX );
  1408 	store_spreg( R_EAX, REG_OFFSET(pc) );
  1409 	sh4_x86.in_delay_slot = TRUE;
  1410 	sh4_x86.tstate = TSTATE_NONE;
  1411 	sh4_translate_instruction( pc + 2 );
  1412 	exit_block_pcset(pc+2);
  1413 	sh4_x86.branch_taken = TRUE;
  1414 	return 4;
  1416 :}
  1417 BSR disp {:  
  1418     if( sh4_x86.in_delay_slot ) {
  1419 	SLOTILLEGAL();
  1420     } else {
  1421 	load_imm32( R_EAX, pc + 4 );
  1422 	store_spreg( R_EAX, R_PR );
  1423 	sh4_x86.in_delay_slot = TRUE;
  1424 	sh4_translate_instruction( pc + 2 );
  1425 	exit_block_rel( disp + pc + 4, pc+4 );
  1426 	sh4_x86.branch_taken = TRUE;
  1427 	return 4;
  1429 :}
  1430 BSRF Rn {:  
  1431     if( sh4_x86.in_delay_slot ) {
  1432 	SLOTILLEGAL();
  1433     } else {
  1434 	load_imm32( R_ECX, pc + 4 );
  1435 	store_spreg( R_ECX, R_PR );
  1436 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_ECX );
  1437 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1438 	sh4_x86.in_delay_slot = TRUE;
  1439 	sh4_x86.tstate = TSTATE_NONE;
  1440 	sh4_translate_instruction( pc + 2 );
  1441 	exit_block_pcset(pc+2);
  1442 	sh4_x86.branch_taken = TRUE;
  1443 	return 4;
  1445 :}
  1446 BT disp {:
  1447     if( sh4_x86.in_delay_slot ) {
  1448 	SLOTILLEGAL();
  1449     } else {
  1450 	sh4vma_t target = disp + pc + 4;
  1451 	JF_rel8( EXIT_BLOCK_REL_SIZE(target), nottaken );
  1452 	exit_block_rel(target, pc+2 );
  1453 	JMP_TARGET(nottaken);
  1454 	return 2;
  1456 :}
  1457 BT/S disp {:
  1458     if( sh4_x86.in_delay_slot ) {
  1459 	SLOTILLEGAL();
  1460     } else {
  1461 	sh4_x86.in_delay_slot = TRUE;
  1462 	if( sh4_x86.tstate == TSTATE_NONE ) {
  1463 	    CMP_imm8s_sh4r( 1, R_T );
  1464 	    sh4_x86.tstate = TSTATE_E;
  1466 	OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JE rel32
  1467 	sh4_translate_instruction(pc+2);
  1468 	exit_block_rel( disp + pc + 4, pc+4 );
  1469 	// not taken
  1470 	*patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1471 	sh4_translate_instruction(pc+2);
  1472 	return 4;
  1474 :}
  1475 JMP @Rn {:  
  1476     if( sh4_x86.in_delay_slot ) {
  1477 	SLOTILLEGAL();
  1478     } else {
  1479 	load_reg( R_ECX, Rn );
  1480 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1481 	sh4_x86.in_delay_slot = TRUE;
  1482 	sh4_translate_instruction(pc+2);
  1483 	exit_block_pcset(pc+2);
  1484 	sh4_x86.branch_taken = TRUE;
  1485 	return 4;
  1487 :}
  1488 JSR @Rn {:  
  1489     if( sh4_x86.in_delay_slot ) {
  1490 	SLOTILLEGAL();
  1491     } else {
  1492 	load_imm32( R_EAX, pc + 4 );
  1493 	store_spreg( R_EAX, R_PR );
  1494 	load_reg( R_ECX, Rn );
  1495 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1496 	sh4_x86.in_delay_slot = TRUE;
  1497 	sh4_translate_instruction(pc+2);
  1498 	exit_block_pcset(pc+2);
  1499 	sh4_x86.branch_taken = TRUE;
  1500 	return 4;
  1502 :}
  1503 RTE {:  
  1504     if( sh4_x86.in_delay_slot ) {
  1505 	SLOTILLEGAL();
  1506     } else {
  1507 	check_priv();
  1508 	load_spreg( R_ECX, R_SPC );
  1509 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1510 	load_spreg( R_EAX, R_SSR );
  1511 	call_func1( sh4_write_sr, R_EAX );
  1512 	sh4_x86.in_delay_slot = TRUE;
  1513 	sh4_x86.priv_checked = FALSE;
  1514 	sh4_x86.fpuen_checked = FALSE;
  1515 	sh4_x86.tstate = TSTATE_NONE;
  1516 	sh4_translate_instruction(pc+2);
  1517 	exit_block_pcset(pc+2);
  1518 	sh4_x86.branch_taken = TRUE;
  1519 	return 4;
  1521 :}
  1522 RTS {:  
  1523     if( sh4_x86.in_delay_slot ) {
  1524 	SLOTILLEGAL();
  1525     } else {
  1526 	load_spreg( R_ECX, R_PR );
  1527 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1528 	sh4_x86.in_delay_slot = TRUE;
  1529 	sh4_translate_instruction(pc+2);
  1530 	exit_block_pcset(pc+2);
  1531 	sh4_x86.branch_taken = TRUE;
  1532 	return 4;
  1534 :}
  1535 TRAPA #imm {:  
  1536     if( sh4_x86.in_delay_slot ) {
  1537 	SLOTILLEGAL();
  1538     } else {
  1539 	load_imm32( R_ECX, pc+2 );
  1540 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1541 	load_imm32( R_EAX, imm );
  1542 	call_func1( sh4_raise_trap, R_EAX );
  1543 	sh4_x86.tstate = TSTATE_NONE;
  1544 	exit_block_pcset(pc);
  1545 	sh4_x86.branch_taken = TRUE;
  1546 	return 2;
  1548 :}
  1549 UNDEF {:  
  1550     if( sh4_x86.in_delay_slot ) {
  1551 	SLOTILLEGAL();
  1552     } else {
  1553 	JMP_exc(EXC_ILLEGAL);
  1554 	return 2;
  1556 :}
  1558 CLRMAC {:  
  1559     XOR_r32_r32(R_EAX, R_EAX);
  1560     store_spreg( R_EAX, R_MACL );
  1561     store_spreg( R_EAX, R_MACH );
  1562     sh4_x86.tstate = TSTATE_NONE;
  1563 :}
  1564 CLRS {:
  1565     CLC();
  1566     SETC_sh4r(R_S);
  1567     sh4_x86.tstate = TSTATE_C;
  1568 :}
  1569 CLRT {:  
  1570     CLC();
  1571     SETC_t();
  1572     sh4_x86.tstate = TSTATE_C;
  1573 :}
  1574 SETS {:  
  1575     STC();
  1576     SETC_sh4r(R_S);
  1577     sh4_x86.tstate = TSTATE_C;
  1578 :}
  1579 SETT {:  
  1580     STC();
  1581     SETC_t();
  1582     sh4_x86.tstate = TSTATE_C;
  1583 :}
  1585 /* Floating point moves */
  1586 FMOV FRm, FRn {:  
  1587     /* As horrible as this looks, it's actually covering 5 separate cases:
  1588      * 1. 32-bit fr-to-fr (PR=0)
  1589      * 2. 64-bit dr-to-dr (PR=1, FRm&1 == 0, FRn&1 == 0 )
  1590      * 3. 64-bit dr-to-xd (PR=1, FRm&1 == 0, FRn&1 == 1 )
  1591      * 4. 64-bit xd-to-dr (PR=1, FRm&1 == 1, FRn&1 == 0 )
  1592      * 5. 64-bit xd-to-xd (PR=1, FRm&1 == 1, FRn&1 == 1 )
  1593      */
  1594     check_fpuen();
  1595     load_spreg( R_ECX, R_FPSCR );
  1596     load_fr_bank( R_EDX );
  1597     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1598     JNE_rel8(8, doublesize);
  1599     load_fr( R_EDX, R_EAX, FRm ); // PR=0 branch
  1600     store_fr( R_EDX, R_EAX, FRn );
  1601     if( FRm&1 ) {
  1602 	JMP_rel8(24, end);
  1603 	JMP_TARGET(doublesize);
  1604 	load_xf_bank( R_ECX ); 
  1605 	load_fr( R_ECX, R_EAX, FRm-1 );
  1606 	if( FRn&1 ) {
  1607 	    load_fr( R_ECX, R_EDX, FRm );
  1608 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1609 	    store_fr( R_ECX, R_EDX, FRn );
  1610 	} else /* FRn&1 == 0 */ {
  1611 	    load_fr( R_ECX, R_ECX, FRm );
  1612 	    store_fr( R_EDX, R_EAX, FRn );
  1613 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1615 	JMP_TARGET(end);
  1616     } else /* FRm&1 == 0 */ {
  1617 	if( FRn&1 ) {
  1618 	    JMP_rel8(24, end);
  1619 	    load_xf_bank( R_ECX );
  1620 	    load_fr( R_EDX, R_EAX, FRm );
  1621 	    load_fr( R_EDX, R_EDX, FRm+1 );
  1622 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1623 	    store_fr( R_ECX, R_EDX, FRn );
  1624 	    JMP_TARGET(end);
  1625 	} else /* FRn&1 == 0 */ {
  1626 	    JMP_rel8(12, end);
  1627 	    load_fr( R_EDX, R_EAX, FRm );
  1628 	    load_fr( R_EDX, R_ECX, FRm+1 );
  1629 	    store_fr( R_EDX, R_EAX, FRn );
  1630 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1631 	    JMP_TARGET(end);
  1634     sh4_x86.tstate = TSTATE_NONE;
  1635 :}
  1636 FMOV FRm, @Rn {: 
  1637     check_fpuen();
  1638     load_reg( R_EAX, Rn );
  1639     check_walign32( R_EAX );
  1640     MMU_TRANSLATE_WRITE( R_EAX );
  1641     load_spreg( R_EDX, R_FPSCR );
  1642     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1643     JNE_rel8(8 + MEM_WRITE_SIZE, doublesize);
  1644     load_fr_bank( R_EDX );
  1645     load_fr( R_EDX, R_ECX, FRm );
  1646     MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
  1647     if( FRm&1 ) {
  1648 	JMP_rel8( 18 + MEM_WRITE_DOUBLE_SIZE, end );
  1649 	JMP_TARGET(doublesize);
  1650 	load_xf_bank( R_EDX );
  1651 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1652 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1653 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1654 	JMP_TARGET(end);
  1655     } else {
  1656 	JMP_rel8( 9 + MEM_WRITE_DOUBLE_SIZE, end );
  1657 	JMP_TARGET(doublesize);
  1658 	load_fr_bank( R_EDX );
  1659 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1660 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1661 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1662 	JMP_TARGET(end);
  1664     sh4_x86.tstate = TSTATE_NONE;
  1665 :}
  1666 FMOV @Rm, FRn {:  
  1667     check_fpuen();
  1668     load_reg( R_EAX, Rm );
  1669     check_ralign32( R_EAX );
  1670     MMU_TRANSLATE_READ( R_EAX );
  1671     load_spreg( R_EDX, R_FPSCR );
  1672     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1673     JNE_rel8(8 + MEM_READ_SIZE, doublesize);
  1674     MEM_READ_LONG( R_EAX, R_EAX );
  1675     load_fr_bank( R_EDX );
  1676     store_fr( R_EDX, R_EAX, FRn );
  1677     if( FRn&1 ) {
  1678 	JMP_rel8(21 + MEM_READ_DOUBLE_SIZE, end);
  1679 	JMP_TARGET(doublesize);
  1680 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1681 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1682 	load_xf_bank( R_EDX );
  1683 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1684 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1685 	JMP_TARGET(end);
  1686     } else {
  1687 	JMP_rel8(9 + MEM_READ_DOUBLE_SIZE, end);
  1688 	JMP_TARGET(doublesize);
  1689 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1690 	load_fr_bank( R_EDX );
  1691 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1692 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1693 	JMP_TARGET(end);
  1695     sh4_x86.tstate = TSTATE_NONE;
  1696 :}
  1697 FMOV FRm, @-Rn {:  
  1698     check_fpuen();
  1699     load_reg( R_EAX, Rn );
  1700     check_walign32( R_EAX );
  1701     load_spreg( R_EDX, R_FPSCR );
  1702     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1703     JNE_rel8(15 + MEM_WRITE_SIZE + MMU_TRANSLATE_SIZE, doublesize);
  1704     ADD_imm8s_r32( -4, R_EAX );
  1705     MMU_TRANSLATE_WRITE( R_EAX );
  1706     load_fr_bank( R_EDX );
  1707     load_fr( R_EDX, R_ECX, FRm );
  1708     ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
  1709     MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
  1710     if( FRm&1 ) {
  1711 	JMP_rel8( 25 + MEM_WRITE_DOUBLE_SIZE + MMU_TRANSLATE_SIZE, end );
  1712 	JMP_TARGET(doublesize);
  1713 	ADD_imm8s_r32(-8,R_EAX);
  1714 	MMU_TRANSLATE_WRITE( R_EAX );
  1715 	load_xf_bank( R_EDX );
  1716 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1717 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1718 	ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
  1719 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1720 	JMP_TARGET(end);
  1721     } else {
  1722 	JMP_rel8( 16 + MEM_WRITE_DOUBLE_SIZE + MMU_TRANSLATE_SIZE, end );
  1723 	JMP_TARGET(doublesize);
  1724 	ADD_imm8s_r32(-8,R_EAX);
  1725 	MMU_TRANSLATE_WRITE( R_EAX );
  1726 	load_fr_bank( R_EDX );
  1727 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1728 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1729 	ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
  1730 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1731 	JMP_TARGET(end);
  1733     sh4_x86.tstate = TSTATE_NONE;
  1734 :}
  1735 FMOV @Rm+, FRn {:
  1736     check_fpuen();
  1737     load_reg( R_EAX, Rm );
  1738     check_ralign32( R_EAX );
  1739     MMU_TRANSLATE_READ( R_EAX );
  1740     load_spreg( R_EDX, R_FPSCR );
  1741     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1742     JNE_rel8(12 + MEM_READ_SIZE, doublesize);
  1743     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1744     MEM_READ_LONG( R_EAX, R_EAX );
  1745     load_fr_bank( R_EDX );
  1746     store_fr( R_EDX, R_EAX, FRn );
  1747     if( FRn&1 ) {
  1748 	JMP_rel8(25 + MEM_READ_DOUBLE_SIZE, end);
  1749 	JMP_TARGET(doublesize);
  1750 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
  1751 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1752 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1753 	load_xf_bank( R_EDX );
  1754 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1755 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1756 	JMP_TARGET(end);
  1757     } else {
  1758 	JMP_rel8(13 + MEM_READ_DOUBLE_SIZE, end);
  1759 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
  1760 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1761 	load_fr_bank( R_EDX );
  1762 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1763 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1764 	JMP_TARGET(end);
  1766     sh4_x86.tstate = TSTATE_NONE;
  1767 :}
  1768 FMOV FRm, @(R0, Rn) {:  
  1769     check_fpuen();
  1770     load_reg( R_EAX, Rn );
  1771     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1772     check_walign32( R_EAX );
  1773     MMU_TRANSLATE_WRITE( R_EAX );
  1774     load_spreg( R_EDX, R_FPSCR );
  1775     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1776     JNE_rel8(8 + MEM_WRITE_SIZE, doublesize);
  1777     load_fr_bank( R_EDX );
  1778     load_fr( R_EDX, R_ECX, FRm );
  1779     MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
  1780     if( FRm&1 ) {
  1781 	JMP_rel8( 18 + MEM_WRITE_DOUBLE_SIZE, end );
  1782 	JMP_TARGET(doublesize);
  1783 	load_xf_bank( R_EDX );
  1784 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1785 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1786 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1787 	JMP_TARGET(end);
  1788     } else {
  1789 	JMP_rel8( 9 + MEM_WRITE_DOUBLE_SIZE, end );
  1790 	JMP_TARGET(doublesize);
  1791 	load_fr_bank( R_EDX );
  1792 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1793 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1794 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1795 	JMP_TARGET(end);
  1797     sh4_x86.tstate = TSTATE_NONE;
  1798 :}
  1799 FMOV @(R0, Rm), FRn {:  
  1800     check_fpuen();
  1801     load_reg( R_EAX, Rm );
  1802     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1803     check_ralign32( R_EAX );
  1804     MMU_TRANSLATE_READ( R_EAX );
  1805     load_spreg( R_EDX, R_FPSCR );
  1806     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1807     JNE_rel8(8 + MEM_READ_SIZE, doublesize);
  1808     MEM_READ_LONG( R_EAX, R_EAX );
  1809     load_fr_bank( R_EDX );
  1810     store_fr( R_EDX, R_EAX, FRn );
  1811     if( FRn&1 ) {
  1812 	JMP_rel8(21 + MEM_READ_DOUBLE_SIZE, end);
  1813 	JMP_TARGET(doublesize);
  1814 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1815 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1816 	load_xf_bank( R_EDX );
  1817 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1818 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1819 	JMP_TARGET(end);
  1820     } else {
  1821 	JMP_rel8(9 + MEM_READ_DOUBLE_SIZE, end);
  1822 	JMP_TARGET(doublesize);
  1823 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1824 	load_fr_bank( R_EDX );
  1825 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1826 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1827 	JMP_TARGET(end);
  1829     sh4_x86.tstate = TSTATE_NONE;
  1830 :}
  1831 FLDI0 FRn {:  /* IFF PR=0 */
  1832     check_fpuen();
  1833     load_spreg( R_ECX, R_FPSCR );
  1834     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1835     JNE_rel8(8, end);
  1836     XOR_r32_r32( R_EAX, R_EAX );
  1837     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  1838     store_fr( R_ECX, R_EAX, FRn );
  1839     JMP_TARGET(end);
  1840     sh4_x86.tstate = TSTATE_NONE;
  1841 :}
  1842 FLDI1 FRn {:  /* IFF PR=0 */
  1843     check_fpuen();
  1844     load_spreg( R_ECX, R_FPSCR );
  1845     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1846     JNE_rel8(11, end);
  1847     load_imm32(R_EAX, 0x3F800000);
  1848     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  1849     store_fr( R_ECX, R_EAX, FRn );
  1850     JMP_TARGET(end);
  1851     sh4_x86.tstate = TSTATE_NONE;
  1852 :}
  1854 FLOAT FPUL, FRn {:  
  1855     check_fpuen();
  1856     load_spreg( R_ECX, R_FPSCR );
  1857     load_spreg(R_EDX, REG_OFFSET(fr_bank));
  1858     FILD_sh4r(R_FPUL);
  1859     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1860     JNE_rel8(5, doubleprec);
  1861     pop_fr( R_EDX, FRn );
  1862     JMP_rel8(3, end);
  1863     JMP_TARGET(doubleprec);
  1864     pop_dr( R_EDX, FRn );
  1865     JMP_TARGET(end);
  1866     sh4_x86.tstate = TSTATE_NONE;
  1867 :}
  1868 FTRC FRm, FPUL {:  
  1869     check_fpuen();
  1870     load_spreg( R_ECX, R_FPSCR );
  1871     load_fr_bank( R_EDX );
  1872     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1873     JNE_rel8(5, doubleprec);
  1874     push_fr( R_EDX, FRm );
  1875     JMP_rel8(3, doop);
  1876     JMP_TARGET(doubleprec);
  1877     push_dr( R_EDX, FRm );
  1878     JMP_TARGET( doop );
  1879     load_imm32( R_ECX, (uint32_t)&max_int );
  1880     FILD_r32ind( R_ECX );
  1881     FCOMIP_st(1);
  1882     JNA_rel8( 32, sat );
  1883     load_imm32( R_ECX, (uint32_t)&min_int );  // 5
  1884     FILD_r32ind( R_ECX );           // 2
  1885     FCOMIP_st(1);                   // 2
  1886     JAE_rel8( 21, sat2 );            // 2
  1887     load_imm32( R_EAX, (uint32_t)&save_fcw );
  1888     FNSTCW_r32ind( R_EAX );
  1889     load_imm32( R_EDX, (uint32_t)&trunc_fcw );
  1890     FLDCW_r32ind( R_EDX );
  1891     FISTP_sh4r(R_FPUL);             // 3
  1892     FLDCW_r32ind( R_EAX );
  1893     JMP_rel8( 9, end );             // 2
  1895     JMP_TARGET(sat);
  1896     JMP_TARGET(sat2);
  1897     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  1898     store_spreg( R_ECX, R_FPUL );
  1899     FPOP_st();
  1900     JMP_TARGET(end);
  1901     sh4_x86.tstate = TSTATE_NONE;
  1902 :}
  1903 FLDS FRm, FPUL {:  
  1904     check_fpuen();
  1905     load_fr_bank( R_ECX );
  1906     load_fr( R_ECX, R_EAX, FRm );
  1907     store_spreg( R_EAX, R_FPUL );
  1908     sh4_x86.tstate = TSTATE_NONE;
  1909 :}
  1910 FSTS FPUL, FRn {:  
  1911     check_fpuen();
  1912     load_fr_bank( R_ECX );
  1913     load_spreg( R_EAX, R_FPUL );
  1914     store_fr( R_ECX, R_EAX, FRn );
  1915     sh4_x86.tstate = TSTATE_NONE;
  1916 :}
  1917 FCNVDS FRm, FPUL {:  
  1918     check_fpuen();
  1919     load_spreg( R_ECX, R_FPSCR );
  1920     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1921     JE_rel8(9, end); // only when PR=1
  1922     load_fr_bank( R_ECX );
  1923     push_dr( R_ECX, FRm );
  1924     pop_fpul();
  1925     JMP_TARGET(end);
  1926     sh4_x86.tstate = TSTATE_NONE;
  1927 :}
  1928 FCNVSD FPUL, FRn {:  
  1929     check_fpuen();
  1930     load_spreg( R_ECX, R_FPSCR );
  1931     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1932     JE_rel8(9, end); // only when PR=1
  1933     load_fr_bank( R_ECX );
  1934     push_fpul();
  1935     pop_dr( R_ECX, FRn );
  1936     JMP_TARGET(end);
  1937     sh4_x86.tstate = TSTATE_NONE;
  1938 :}
  1940 /* Floating point instructions */
  1941 FABS FRn {:  
  1942     check_fpuen();
  1943     load_spreg( R_ECX, R_FPSCR );
  1944     load_fr_bank( R_EDX );
  1945     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1946     JNE_rel8(10, doubleprec);
  1947     push_fr(R_EDX, FRn); // 3
  1948     FABS_st0(); // 2
  1949     pop_fr( R_EDX, FRn); //3
  1950     JMP_rel8(8,end); // 2
  1951     JMP_TARGET(doubleprec);
  1952     push_dr(R_EDX, FRn);
  1953     FABS_st0();
  1954     pop_dr(R_EDX, FRn);
  1955     JMP_TARGET(end);
  1956     sh4_x86.tstate = TSTATE_NONE;
  1957 :}
  1958 FADD FRm, FRn {:  
  1959     check_fpuen();
  1960     load_spreg( R_ECX, R_FPSCR );
  1961     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1962     load_fr_bank( R_EDX );
  1963     JNE_rel8(13,doubleprec);
  1964     push_fr(R_EDX, FRm);
  1965     push_fr(R_EDX, FRn);
  1966     FADDP_st(1);
  1967     pop_fr(R_EDX, FRn);
  1968     JMP_rel8(11,end);
  1969     JMP_TARGET(doubleprec);
  1970     push_dr(R_EDX, FRm);
  1971     push_dr(R_EDX, FRn);
  1972     FADDP_st(1);
  1973     pop_dr(R_EDX, FRn);
  1974     JMP_TARGET(end);
  1975     sh4_x86.tstate = TSTATE_NONE;
  1976 :}
  1977 FDIV FRm, FRn {:  
  1978     check_fpuen();
  1979     load_spreg( R_ECX, R_FPSCR );
  1980     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1981     load_fr_bank( R_EDX );
  1982     JNE_rel8(13, doubleprec);
  1983     push_fr(R_EDX, FRn);
  1984     push_fr(R_EDX, FRm);
  1985     FDIVP_st(1);
  1986     pop_fr(R_EDX, FRn);
  1987     JMP_rel8(11, end);
  1988     JMP_TARGET(doubleprec);
  1989     push_dr(R_EDX, FRn);
  1990     push_dr(R_EDX, FRm);
  1991     FDIVP_st(1);
  1992     pop_dr(R_EDX, FRn);
  1993     JMP_TARGET(end);
  1994     sh4_x86.tstate = TSTATE_NONE;
  1995 :}
  1996 FMAC FR0, FRm, FRn {:  
  1997     check_fpuen();
  1998     load_spreg( R_ECX, R_FPSCR );
  1999     load_spreg( R_EDX, REG_OFFSET(fr_bank));
  2000     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2001     JNE_rel8(18, doubleprec);
  2002     push_fr( R_EDX, 0 );
  2003     push_fr( R_EDX, FRm );
  2004     FMULP_st(1);
  2005     push_fr( R_EDX, FRn );
  2006     FADDP_st(1);
  2007     pop_fr( R_EDX, FRn );
  2008     JMP_rel8(16, end);
  2009     JMP_TARGET(doubleprec);
  2010     push_dr( R_EDX, 0 );
  2011     push_dr( R_EDX, FRm );
  2012     FMULP_st(1);
  2013     push_dr( R_EDX, FRn );
  2014     FADDP_st(1);
  2015     pop_dr( R_EDX, FRn );
  2016     JMP_TARGET(end);
  2017     sh4_x86.tstate = TSTATE_NONE;
  2018 :}
  2020 FMUL FRm, FRn {:  
  2021     check_fpuen();
  2022     load_spreg( R_ECX, R_FPSCR );
  2023     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2024     load_fr_bank( R_EDX );
  2025     JNE_rel8(13, doubleprec);
  2026     push_fr(R_EDX, FRm);
  2027     push_fr(R_EDX, FRn);
  2028     FMULP_st(1);
  2029     pop_fr(R_EDX, FRn);
  2030     JMP_rel8(11, end);
  2031     JMP_TARGET(doubleprec);
  2032     push_dr(R_EDX, FRm);
  2033     push_dr(R_EDX, FRn);
  2034     FMULP_st(1);
  2035     pop_dr(R_EDX, FRn);
  2036     JMP_TARGET(end);
  2037     sh4_x86.tstate = TSTATE_NONE;
  2038 :}
  2039 FNEG FRn {:  
  2040     check_fpuen();
  2041     load_spreg( R_ECX, R_FPSCR );
  2042     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2043     load_fr_bank( R_EDX );
  2044     JNE_rel8(10, doubleprec);
  2045     push_fr(R_EDX, FRn);
  2046     FCHS_st0();
  2047     pop_fr(R_EDX, FRn);
  2048     JMP_rel8(8, end);
  2049     JMP_TARGET(doubleprec);
  2050     push_dr(R_EDX, FRn);
  2051     FCHS_st0();
  2052     pop_dr(R_EDX, FRn);
  2053     JMP_TARGET(end);
  2054     sh4_x86.tstate = TSTATE_NONE;
  2055 :}
  2056 FSRRA FRn {:  
  2057     check_fpuen();
  2058     load_spreg( R_ECX, R_FPSCR );
  2059     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2060     load_fr_bank( R_EDX );
  2061     JNE_rel8(12, end); // PR=0 only
  2062     FLD1_st0();
  2063     push_fr(R_EDX, FRn);
  2064     FSQRT_st0();
  2065     FDIVP_st(1);
  2066     pop_fr(R_EDX, FRn);
  2067     JMP_TARGET(end);
  2068     sh4_x86.tstate = TSTATE_NONE;
  2069 :}
  2070 FSQRT FRn {:  
  2071     check_fpuen();
  2072     load_spreg( R_ECX, R_FPSCR );
  2073     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2074     load_fr_bank( R_EDX );
  2075     JNE_rel8(10, doubleprec);
  2076     push_fr(R_EDX, FRn);
  2077     FSQRT_st0();
  2078     pop_fr(R_EDX, FRn);
  2079     JMP_rel8(8, end);
  2080     JMP_TARGET(doubleprec);
  2081     push_dr(R_EDX, FRn);
  2082     FSQRT_st0();
  2083     pop_dr(R_EDX, FRn);
  2084     JMP_TARGET(end);
  2085     sh4_x86.tstate = TSTATE_NONE;
  2086 :}
  2087 FSUB FRm, FRn {:  
  2088     check_fpuen();
  2089     load_spreg( R_ECX, R_FPSCR );
  2090     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2091     load_fr_bank( R_EDX );
  2092     JNE_rel8(13, doubleprec);
  2093     push_fr(R_EDX, FRn);
  2094     push_fr(R_EDX, FRm);
  2095     FSUBP_st(1);
  2096     pop_fr(R_EDX, FRn);
  2097     JMP_rel8(11, end);
  2098     JMP_TARGET(doubleprec);
  2099     push_dr(R_EDX, FRn);
  2100     push_dr(R_EDX, FRm);
  2101     FSUBP_st(1);
  2102     pop_dr(R_EDX, FRn);
  2103     JMP_TARGET(end);
  2104     sh4_x86.tstate = TSTATE_NONE;
  2105 :}
  2107 FCMP/EQ FRm, FRn {:  
  2108     check_fpuen();
  2109     load_spreg( R_ECX, R_FPSCR );
  2110     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2111     load_fr_bank( R_EDX );
  2112     JNE_rel8(8, doubleprec);
  2113     push_fr(R_EDX, FRm);
  2114     push_fr(R_EDX, FRn);
  2115     JMP_rel8(6, end);
  2116     JMP_TARGET(doubleprec);
  2117     push_dr(R_EDX, FRm);
  2118     push_dr(R_EDX, FRn);
  2119     JMP_TARGET(end);
  2120     FCOMIP_st(1);
  2121     SETE_t();
  2122     FPOP_st();
  2123     sh4_x86.tstate = TSTATE_NONE;
  2124 :}
  2125 FCMP/GT FRm, FRn {:  
  2126     check_fpuen();
  2127     load_spreg( R_ECX, R_FPSCR );
  2128     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2129     load_fr_bank( R_EDX );
  2130     JNE_rel8(8, doubleprec);
  2131     push_fr(R_EDX, FRm);
  2132     push_fr(R_EDX, FRn);
  2133     JMP_rel8(6, end);
  2134     JMP_TARGET(doubleprec);
  2135     push_dr(R_EDX, FRm);
  2136     push_dr(R_EDX, FRn);
  2137     JMP_TARGET(end);
  2138     FCOMIP_st(1);
  2139     SETA_t();
  2140     FPOP_st();
  2141     sh4_x86.tstate = TSTATE_NONE;
  2142 :}
  2144 FSCA FPUL, FRn {:  
  2145     check_fpuen();
  2146     load_spreg( R_ECX, R_FPSCR );
  2147     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2148     JNE_rel8( CALL_FUNC2_SIZE + 9, doubleprec );
  2149     load_fr_bank( R_ECX );
  2150     ADD_imm8s_r32( (FRn&0x0E)<<2, R_ECX );
  2151     load_spreg( R_EDX, R_FPUL );
  2152     call_func2( sh4_fsca, R_EDX, R_ECX );
  2153     JMP_TARGET(doubleprec);
  2154     sh4_x86.tstate = TSTATE_NONE;
  2155 :}
  2156 FIPR FVm, FVn {:  
  2157     check_fpuen();
  2158     load_spreg( R_ECX, R_FPSCR );
  2159     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2160     JNE_rel8(44, doubleprec);
  2162     load_fr_bank( R_ECX );
  2163     push_fr( R_ECX, FVm<<2 );
  2164     push_fr( R_ECX, FVn<<2 );
  2165     FMULP_st(1);
  2166     push_fr( R_ECX, (FVm<<2)+1);
  2167     push_fr( R_ECX, (FVn<<2)+1);
  2168     FMULP_st(1);
  2169     FADDP_st(1);
  2170     push_fr( R_ECX, (FVm<<2)+2);
  2171     push_fr( R_ECX, (FVn<<2)+2);
  2172     FMULP_st(1);
  2173     FADDP_st(1);
  2174     push_fr( R_ECX, (FVm<<2)+3);
  2175     push_fr( R_ECX, (FVn<<2)+3);
  2176     FMULP_st(1);
  2177     FADDP_st(1);
  2178     pop_fr( R_ECX, (FVn<<2)+3);
  2179     JMP_TARGET(doubleprec);
  2180     sh4_x86.tstate = TSTATE_NONE;
  2181 :}
  2182 FTRV XMTRX, FVn {:  
  2183     check_fpuen();
  2184     load_spreg( R_ECX, R_FPSCR );
  2185     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2186     JNE_rel8( 18 + CALL_FUNC2_SIZE, doubleprec );
  2187     load_fr_bank( R_EDX );                 // 3
  2188     ADD_imm8s_r32( FVn<<4, R_EDX );        // 3
  2189     load_xf_bank( R_ECX );                 // 12
  2190     call_func2( sh4_ftrv, R_EDX, R_ECX );  // 12
  2191     JMP_TARGET(doubleprec);
  2192     sh4_x86.tstate = TSTATE_NONE;
  2193 :}
  2195 FRCHG {:  
  2196     check_fpuen();
  2197     load_spreg( R_ECX, R_FPSCR );
  2198     XOR_imm32_r32( FPSCR_FR, R_ECX );
  2199     store_spreg( R_ECX, R_FPSCR );
  2200     update_fr_bank( R_ECX );
  2201     sh4_x86.tstate = TSTATE_NONE;
  2202 :}
  2203 FSCHG {:  
  2204     check_fpuen();
  2205     load_spreg( R_ECX, R_FPSCR );
  2206     XOR_imm32_r32( FPSCR_SZ, R_ECX );
  2207     store_spreg( R_ECX, R_FPSCR );
  2208     sh4_x86.tstate = TSTATE_NONE;
  2209 :}
  2211 /* Processor control instructions */
  2212 LDC Rm, SR {:
  2213     if( sh4_x86.in_delay_slot ) {
  2214 	SLOTILLEGAL();
  2215     } else {
  2216 	check_priv();
  2217 	load_reg( R_EAX, Rm );
  2218 	call_func1( sh4_write_sr, R_EAX );
  2219 	sh4_x86.priv_checked = FALSE;
  2220 	sh4_x86.fpuen_checked = FALSE;
  2221 	sh4_x86.tstate = TSTATE_NONE;
  2223 :}
  2224 LDC Rm, GBR {: 
  2225     load_reg( R_EAX, Rm );
  2226     store_spreg( R_EAX, R_GBR );
  2227 :}
  2228 LDC Rm, VBR {:  
  2229     check_priv();
  2230     load_reg( R_EAX, Rm );
  2231     store_spreg( R_EAX, R_VBR );
  2232     sh4_x86.tstate = TSTATE_NONE;
  2233 :}
  2234 LDC Rm, SSR {:  
  2235     check_priv();
  2236     load_reg( R_EAX, Rm );
  2237     store_spreg( R_EAX, R_SSR );
  2238     sh4_x86.tstate = TSTATE_NONE;
  2239 :}
  2240 LDC Rm, SGR {:  
  2241     check_priv();
  2242     load_reg( R_EAX, Rm );
  2243     store_spreg( R_EAX, R_SGR );
  2244     sh4_x86.tstate = TSTATE_NONE;
  2245 :}
  2246 LDC Rm, SPC {:  
  2247     check_priv();
  2248     load_reg( R_EAX, Rm );
  2249     store_spreg( R_EAX, R_SPC );
  2250     sh4_x86.tstate = TSTATE_NONE;
  2251 :}
  2252 LDC Rm, DBR {:  
  2253     check_priv();
  2254     load_reg( R_EAX, Rm );
  2255     store_spreg( R_EAX, R_DBR );
  2256     sh4_x86.tstate = TSTATE_NONE;
  2257 :}
  2258 LDC Rm, Rn_BANK {:  
  2259     check_priv();
  2260     load_reg( R_EAX, Rm );
  2261     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2262     sh4_x86.tstate = TSTATE_NONE;
  2263 :}
  2264 LDC.L @Rm+, GBR {:  
  2265     load_reg( R_EAX, Rm );
  2266     check_ralign32( R_EAX );
  2267     MMU_TRANSLATE_READ( R_EAX );
  2268     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2269     MEM_READ_LONG( R_EAX, R_EAX );
  2270     store_spreg( R_EAX, R_GBR );
  2271     sh4_x86.tstate = TSTATE_NONE;
  2272 :}
  2273 LDC.L @Rm+, SR {:
  2274     if( sh4_x86.in_delay_slot ) {
  2275 	SLOTILLEGAL();
  2276     } else {
  2277 	check_priv();
  2278 	load_reg( R_EAX, Rm );
  2279 	check_ralign32( R_EAX );
  2280 	MMU_TRANSLATE_READ( R_EAX );
  2281 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2282 	MEM_READ_LONG( R_EAX, R_EAX );
  2283 	call_func1( sh4_write_sr, R_EAX );
  2284 	sh4_x86.priv_checked = FALSE;
  2285 	sh4_x86.fpuen_checked = FALSE;
  2286 	sh4_x86.tstate = TSTATE_NONE;
  2288 :}
  2289 LDC.L @Rm+, VBR {:  
  2290     check_priv();
  2291     load_reg( R_EAX, Rm );
  2292     check_ralign32( R_EAX );
  2293     MMU_TRANSLATE_READ( R_EAX );
  2294     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2295     MEM_READ_LONG( R_EAX, R_EAX );
  2296     store_spreg( R_EAX, R_VBR );
  2297     sh4_x86.tstate = TSTATE_NONE;
  2298 :}
  2299 LDC.L @Rm+, SSR {:
  2300     check_priv();
  2301     load_reg( R_EAX, Rm );
  2302     check_ralign32( R_EAX );
  2303     MMU_TRANSLATE_READ( R_EAX );
  2304     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2305     MEM_READ_LONG( R_EAX, R_EAX );
  2306     store_spreg( R_EAX, R_SSR );
  2307     sh4_x86.tstate = TSTATE_NONE;
  2308 :}
  2309 LDC.L @Rm+, SGR {:  
  2310     check_priv();
  2311     load_reg( R_EAX, Rm );
  2312     check_ralign32( R_EAX );
  2313     MMU_TRANSLATE_READ( R_EAX );
  2314     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2315     MEM_READ_LONG( R_EAX, R_EAX );
  2316     store_spreg( R_EAX, R_SGR );
  2317     sh4_x86.tstate = TSTATE_NONE;
  2318 :}
  2319 LDC.L @Rm+, SPC {:  
  2320     check_priv();
  2321     load_reg( R_EAX, Rm );
  2322     check_ralign32( R_EAX );
  2323     MMU_TRANSLATE_READ( R_EAX );
  2324     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2325     MEM_READ_LONG( R_EAX, R_EAX );
  2326     store_spreg( R_EAX, R_SPC );
  2327     sh4_x86.tstate = TSTATE_NONE;
  2328 :}
  2329 LDC.L @Rm+, DBR {:  
  2330     check_priv();
  2331     load_reg( R_EAX, Rm );
  2332     check_ralign32( R_EAX );
  2333     MMU_TRANSLATE_READ( R_EAX );
  2334     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2335     MEM_READ_LONG( R_EAX, R_EAX );
  2336     store_spreg( R_EAX, R_DBR );
  2337     sh4_x86.tstate = TSTATE_NONE;
  2338 :}
  2339 LDC.L @Rm+, Rn_BANK {:  
  2340     check_priv();
  2341     load_reg( R_EAX, Rm );
  2342     check_ralign32( R_EAX );
  2343     MMU_TRANSLATE_READ( R_EAX );
  2344     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2345     MEM_READ_LONG( R_EAX, R_EAX );
  2346     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2347     sh4_x86.tstate = TSTATE_NONE;
  2348 :}
  2349 LDS Rm, FPSCR {:  
  2350     load_reg( R_EAX, Rm );
  2351     store_spreg( R_EAX, R_FPSCR );
  2352     update_fr_bank( R_EAX );
  2353     sh4_x86.tstate = TSTATE_NONE;
  2354 :}
  2355 LDS.L @Rm+, FPSCR {:  
  2356     load_reg( R_EAX, Rm );
  2357     check_ralign32( R_EAX );
  2358     MMU_TRANSLATE_READ( R_EAX );
  2359     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2360     MEM_READ_LONG( R_EAX, R_EAX );
  2361     store_spreg( R_EAX, R_FPSCR );
  2362     update_fr_bank( R_EAX );
  2363     sh4_x86.tstate = TSTATE_NONE;
  2364 :}
  2365 LDS Rm, FPUL {:  
  2366     load_reg( R_EAX, Rm );
  2367     store_spreg( R_EAX, R_FPUL );
  2368 :}
  2369 LDS.L @Rm+, FPUL {:  
  2370     load_reg( R_EAX, Rm );
  2371     check_ralign32( R_EAX );
  2372     MMU_TRANSLATE_READ( R_EAX );
  2373     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2374     MEM_READ_LONG( R_EAX, R_EAX );
  2375     store_spreg( R_EAX, R_FPUL );
  2376     sh4_x86.tstate = TSTATE_NONE;
  2377 :}
  2378 LDS Rm, MACH {: 
  2379     load_reg( R_EAX, Rm );
  2380     store_spreg( R_EAX, R_MACH );
  2381 :}
  2382 LDS.L @Rm+, MACH {:  
  2383     load_reg( R_EAX, Rm );
  2384     check_ralign32( R_EAX );
  2385     MMU_TRANSLATE_READ( R_EAX );
  2386     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2387     MEM_READ_LONG( R_EAX, R_EAX );
  2388     store_spreg( R_EAX, R_MACH );
  2389     sh4_x86.tstate = TSTATE_NONE;
  2390 :}
  2391 LDS Rm, MACL {:  
  2392     load_reg( R_EAX, Rm );
  2393     store_spreg( R_EAX, R_MACL );
  2394 :}
  2395 LDS.L @Rm+, MACL {:  
  2396     load_reg( R_EAX, Rm );
  2397     check_ralign32( R_EAX );
  2398     MMU_TRANSLATE_READ( R_EAX );
  2399     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2400     MEM_READ_LONG( R_EAX, R_EAX );
  2401     store_spreg( R_EAX, R_MACL );
  2402     sh4_x86.tstate = TSTATE_NONE;
  2403 :}
  2404 LDS Rm, PR {:  
  2405     load_reg( R_EAX, Rm );
  2406     store_spreg( R_EAX, R_PR );
  2407 :}
  2408 LDS.L @Rm+, PR {:  
  2409     load_reg( R_EAX, Rm );
  2410     check_ralign32( R_EAX );
  2411     MMU_TRANSLATE_READ( R_EAX );
  2412     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2413     MEM_READ_LONG( R_EAX, R_EAX );
  2414     store_spreg( R_EAX, R_PR );
  2415     sh4_x86.tstate = TSTATE_NONE;
  2416 :}
  2417 LDTLB {:  
  2418     call_func0( MMU_ldtlb );
  2419 :}
  2420 OCBI @Rn {:  :}
  2421 OCBP @Rn {:  :}
  2422 OCBWB @Rn {:  :}
  2423 PREF @Rn {:
  2424     load_reg( R_EAX, Rn );
  2425     MOV_r32_r32( R_EAX, R_ECX );
  2426     AND_imm32_r32( 0xFC000000, R_EAX );
  2427     CMP_imm32_r32( 0xE0000000, R_EAX );
  2428     JNE_rel8(CALL_FUNC1_SIZE, end);
  2429     call_func1( sh4_flush_store_queue, R_ECX );
  2430     JMP_TARGET(end);
  2431     sh4_x86.tstate = TSTATE_NONE;
  2432 :}
  2433 SLEEP {: 
  2434     check_priv();
  2435     call_func0( sh4_sleep );
  2436     sh4_x86.tstate = TSTATE_NONE;
  2437     sh4_x86.in_delay_slot = FALSE;
  2438     return 2;
  2439 :}
  2440 STC SR, Rn {:
  2441     check_priv();
  2442     call_func0(sh4_read_sr);
  2443     store_reg( R_EAX, Rn );
  2444     sh4_x86.tstate = TSTATE_NONE;
  2445 :}
  2446 STC GBR, Rn {:  
  2447     load_spreg( R_EAX, R_GBR );
  2448     store_reg( R_EAX, Rn );
  2449 :}
  2450 STC VBR, Rn {:  
  2451     check_priv();
  2452     load_spreg( R_EAX, R_VBR );
  2453     store_reg( R_EAX, Rn );
  2454     sh4_x86.tstate = TSTATE_NONE;
  2455 :}
  2456 STC SSR, Rn {:  
  2457     check_priv();
  2458     load_spreg( R_EAX, R_SSR );
  2459     store_reg( R_EAX, Rn );
  2460     sh4_x86.tstate = TSTATE_NONE;
  2461 :}
  2462 STC SPC, Rn {:  
  2463     check_priv();
  2464     load_spreg( R_EAX, R_SPC );
  2465     store_reg( R_EAX, Rn );
  2466     sh4_x86.tstate = TSTATE_NONE;
  2467 :}
  2468 STC SGR, Rn {:  
  2469     check_priv();
  2470     load_spreg( R_EAX, R_SGR );
  2471     store_reg( R_EAX, Rn );
  2472     sh4_x86.tstate = TSTATE_NONE;
  2473 :}
  2474 STC DBR, Rn {:  
  2475     check_priv();
  2476     load_spreg( R_EAX, R_DBR );
  2477     store_reg( R_EAX, Rn );
  2478     sh4_x86.tstate = TSTATE_NONE;
  2479 :}
  2480 STC Rm_BANK, Rn {:
  2481     check_priv();
  2482     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2483     store_reg( R_EAX, Rn );
  2484     sh4_x86.tstate = TSTATE_NONE;
  2485 :}
  2486 STC.L SR, @-Rn {:
  2487     check_priv();
  2488     load_reg( R_EAX, Rn );
  2489     check_walign32( R_EAX );
  2490     ADD_imm8s_r32( -4, R_EAX );
  2491     MMU_TRANSLATE_WRITE( R_EAX );
  2492     PUSH_realigned_r32( R_EAX );
  2493     call_func0( sh4_read_sr );
  2494     POP_realigned_r32( R_ECX );
  2495     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2496     MEM_WRITE_LONG( R_ECX, R_EAX );
  2497     sh4_x86.tstate = TSTATE_NONE;
  2498 :}
  2499 STC.L VBR, @-Rn {:  
  2500     check_priv();
  2501     load_reg( R_EAX, Rn );
  2502     check_walign32( R_EAX );
  2503     ADD_imm8s_r32( -4, R_EAX );
  2504     MMU_TRANSLATE_WRITE( R_EAX );
  2505     load_spreg( R_EDX, R_VBR );
  2506     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2507     MEM_WRITE_LONG( R_EAX, R_EDX );
  2508     sh4_x86.tstate = TSTATE_NONE;
  2509 :}
  2510 STC.L SSR, @-Rn {:  
  2511     check_priv();
  2512     load_reg( R_EAX, Rn );
  2513     check_walign32( R_EAX );
  2514     ADD_imm8s_r32( -4, R_EAX );
  2515     MMU_TRANSLATE_WRITE( R_EAX );
  2516     load_spreg( R_EDX, R_SSR );
  2517     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2518     MEM_WRITE_LONG( R_EAX, R_EDX );
  2519     sh4_x86.tstate = TSTATE_NONE;
  2520 :}
  2521 STC.L SPC, @-Rn {:
  2522     check_priv();
  2523     load_reg( R_EAX, Rn );
  2524     check_walign32( R_EAX );
  2525     ADD_imm8s_r32( -4, R_EAX );
  2526     MMU_TRANSLATE_WRITE( R_EAX );
  2527     load_spreg( R_EDX, R_SPC );
  2528     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2529     MEM_WRITE_LONG( R_EAX, R_EDX );
  2530     sh4_x86.tstate = TSTATE_NONE;
  2531 :}
  2532 STC.L SGR, @-Rn {:  
  2533     check_priv();
  2534     load_reg( R_EAX, Rn );
  2535     check_walign32( R_EAX );
  2536     ADD_imm8s_r32( -4, R_EAX );
  2537     MMU_TRANSLATE_WRITE( R_EAX );
  2538     load_spreg( R_EDX, R_SGR );
  2539     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2540     MEM_WRITE_LONG( R_EAX, R_EDX );
  2541     sh4_x86.tstate = TSTATE_NONE;
  2542 :}
  2543 STC.L DBR, @-Rn {:  
  2544     check_priv();
  2545     load_reg( R_EAX, Rn );
  2546     check_walign32( R_EAX );
  2547     ADD_imm8s_r32( -4, R_EAX );
  2548     MMU_TRANSLATE_WRITE( R_EAX );
  2549     load_spreg( R_EDX, R_DBR );
  2550     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2551     MEM_WRITE_LONG( R_EAX, R_EDX );
  2552     sh4_x86.tstate = TSTATE_NONE;
  2553 :}
  2554 STC.L Rm_BANK, @-Rn {:  
  2555     check_priv();
  2556     load_reg( R_EAX, Rn );
  2557     check_walign32( R_EAX );
  2558     ADD_imm8s_r32( -4, R_EAX );
  2559     MMU_TRANSLATE_WRITE( R_EAX );
  2560     load_spreg( R_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
  2561     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2562     MEM_WRITE_LONG( R_EAX, R_EDX );
  2563     sh4_x86.tstate = TSTATE_NONE;
  2564 :}
  2565 STC.L GBR, @-Rn {:  
  2566     load_reg( R_EAX, Rn );
  2567     check_walign32( R_EAX );
  2568     ADD_imm8s_r32( -4, R_EAX );
  2569     MMU_TRANSLATE_WRITE( R_EAX );
  2570     load_spreg( R_EDX, R_GBR );
  2571     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2572     MEM_WRITE_LONG( R_EAX, R_EDX );
  2573     sh4_x86.tstate = TSTATE_NONE;
  2574 :}
  2575 STS FPSCR, Rn {:  
  2576     load_spreg( R_EAX, R_FPSCR );
  2577     store_reg( R_EAX, Rn );
  2578 :}
  2579 STS.L FPSCR, @-Rn {:  
  2580     load_reg( R_EAX, Rn );
  2581     check_walign32( R_EAX );
  2582     ADD_imm8s_r32( -4, R_EAX );
  2583     MMU_TRANSLATE_WRITE( R_EAX );
  2584     load_spreg( R_EDX, R_FPSCR );
  2585     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2586     MEM_WRITE_LONG( R_EAX, R_EDX );
  2587     sh4_x86.tstate = TSTATE_NONE;
  2588 :}
  2589 STS FPUL, Rn {:  
  2590     load_spreg( R_EAX, R_FPUL );
  2591     store_reg( R_EAX, Rn );
  2592 :}
  2593 STS.L FPUL, @-Rn {:  
  2594     load_reg( R_EAX, Rn );
  2595     check_walign32( R_EAX );
  2596     ADD_imm8s_r32( -4, R_EAX );
  2597     MMU_TRANSLATE_WRITE( R_EAX );
  2598     load_spreg( R_EDX, R_FPUL );
  2599     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2600     MEM_WRITE_LONG( R_EAX, R_EDX );
  2601     sh4_x86.tstate = TSTATE_NONE;
  2602 :}
  2603 STS MACH, Rn {:  
  2604     load_spreg( R_EAX, R_MACH );
  2605     store_reg( R_EAX, Rn );
  2606 :}
  2607 STS.L MACH, @-Rn {:  
  2608     load_reg( R_EAX, Rn );
  2609     check_walign32( R_EAX );
  2610     ADD_imm8s_r32( -4, R_EAX );
  2611     MMU_TRANSLATE_WRITE( R_EAX );
  2612     load_spreg( R_EDX, R_MACH );
  2613     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2614     MEM_WRITE_LONG( R_EAX, R_EDX );
  2615     sh4_x86.tstate = TSTATE_NONE;
  2616 :}
  2617 STS MACL, Rn {:  
  2618     load_spreg( R_EAX, R_MACL );
  2619     store_reg( R_EAX, Rn );
  2620 :}
  2621 STS.L MACL, @-Rn {:  
  2622     load_reg( R_EAX, Rn );
  2623     check_walign32( R_EAX );
  2624     ADD_imm8s_r32( -4, R_EAX );
  2625     MMU_TRANSLATE_WRITE( R_EAX );
  2626     load_spreg( R_EDX, R_MACL );
  2627     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2628     MEM_WRITE_LONG( R_EAX, R_EDX );
  2629     sh4_x86.tstate = TSTATE_NONE;
  2630 :}
  2631 STS PR, Rn {:  
  2632     load_spreg( R_EAX, R_PR );
  2633     store_reg( R_EAX, Rn );
  2634 :}
  2635 STS.L PR, @-Rn {:  
  2636     load_reg( R_EAX, Rn );
  2637     check_walign32( R_EAX );
  2638     ADD_imm8s_r32( -4, R_EAX );
  2639     MMU_TRANSLATE_WRITE( R_EAX );
  2640     load_spreg( R_EDX, R_PR );
  2641     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2642     MEM_WRITE_LONG( R_EAX, R_EDX );
  2643     sh4_x86.tstate = TSTATE_NONE;
  2644 :}
  2646 NOP {: /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ :}
  2647 %%
  2648     sh4_x86.in_delay_slot = FALSE;
  2649     return 0;
.