Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 593:6c710c7c6835
prev591:7b9612fd2395
next596:dfc0c93d882e
author nkeynes
date Thu Jan 17 21:26:58 2008 +0000 (12 years ago)
permissions -rw-r--r--
last change Fix block overruns from long epilogues
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "sh4/xltcache.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4trans.h"
    31 #include "sh4/sh4mmio.h"
    32 #include "sh4/x86op.h"
    33 #include "clock.h"
    35 #define DEFAULT_BACKPATCH_SIZE 4096
    37 struct backpatch_record {
    38     uint32_t *fixup_addr;
    39     uint32_t fixup_icount;
    40     uint32_t exc_code;
    41 };
    43 #define MAX_RECOVERY_SIZE 2048
    45 #define DELAY_NONE 0
    46 #define DELAY_PC 1
    47 #define DELAY_PC_PR 2
    49 /** 
    50  * Struct to manage internal translation state. This state is not saved -
    51  * it is only valid between calls to sh4_translate_begin_block() and
    52  * sh4_translate_end_block()
    53  */
    54 struct sh4_x86_state {
    55     int in_delay_slot;
    56     gboolean priv_checked; /* true if we've already checked the cpu mode. */
    57     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    58     gboolean branch_taken; /* true if we branched unconditionally */
    59     uint32_t block_start_pc;
    60     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    61     int tstate;
    63     /* mode flags */
    64     gboolean tlb_on; /* True if tlb translation is active */
    66     /* Allocated memory for the (block-wide) back-patch list */
    67     struct backpatch_record *backpatch_list;
    68     uint32_t backpatch_posn;
    69     uint32_t backpatch_size;
    70     struct xlat_recovery_record recovery_list[MAX_RECOVERY_SIZE];
    71     uint32_t recovery_posn;
    72 };
    74 #define TSTATE_NONE -1
    75 #define TSTATE_O    0
    76 #define TSTATE_C    2
    77 #define TSTATE_E    4
    78 #define TSTATE_NE   5
    79 #define TSTATE_G    0xF
    80 #define TSTATE_GE   0xD
    81 #define TSTATE_A    7
    82 #define TSTATE_AE   3
    84 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
    85 #define JT_rel8(rel8,label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    86 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    87     OP(0x70+sh4_x86.tstate); OP(rel8); \
    88     MARK_JMP(rel8,label)
    89 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
    90 #define JF_rel8(rel8,label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    91 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    92     OP(0x70+ (sh4_x86.tstate^1)); OP(rel8); \
    93     MARK_JMP(rel8, label)
    95 static struct sh4_x86_state sh4_x86;
    97 static uint32_t max_int = 0x7FFFFFFF;
    98 static uint32_t min_int = 0x80000000;
    99 static uint32_t save_fcw; /* save value for fpu control word */
   100 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   102 void sh4_x86_init()
   103 {
   104     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   105     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   106 }
   109 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   110 {
   111     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   112 	sh4_x86.backpatch_size <<= 1;
   113 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   114 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   115 	assert( sh4_x86.backpatch_list != NULL );
   116     }
   117     if( sh4_x86.in_delay_slot ) {
   118 	fixup_pc -= 2;
   119     }
   120     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_addr = (uint32_t *)fixup_addr;
   121     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   122     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   123     sh4_x86.backpatch_posn++;
   124 }
   126 void sh4_x86_add_recovery( uint32_t pc )
   127 {
   128     xlat_recovery[xlat_recovery_posn].xlat_pc = (uintptr_t)xlat_output;
   129     xlat_recovery[xlat_recovery_posn].sh4_icount = (pc - sh4_x86.block_start_pc)>>1;
   130     xlat_recovery_posn++;
   131 }
   133 /**
   134  * Emit an instruction to load an SH4 reg into a real register
   135  */
   136 static inline void load_reg( int x86reg, int sh4reg ) 
   137 {
   138     /* mov [bp+n], reg */
   139     OP(0x8B);
   140     OP(0x45 + (x86reg<<3));
   141     OP(REG_OFFSET(r[sh4reg]));
   142 }
   144 static inline void load_reg16s( int x86reg, int sh4reg )
   145 {
   146     OP(0x0F);
   147     OP(0xBF);
   148     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   149 }
   151 static inline void load_reg16u( int x86reg, int sh4reg )
   152 {
   153     OP(0x0F);
   154     OP(0xB7);
   155     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   157 }
   159 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   160 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   161 /**
   162  * Emit an instruction to load an immediate value into a register
   163  */
   164 static inline void load_imm32( int x86reg, uint32_t value ) {
   165     /* mov #value, reg */
   166     OP(0xB8 + x86reg);
   167     OP32(value);
   168 }
   170 /**
   171  * Load an immediate 64-bit quantity (note: x86-64 only)
   172  */
   173 static inline void load_imm64( int x86reg, uint32_t value ) {
   174     /* mov #value, reg */
   175     REXW();
   176     OP(0xB8 + x86reg);
   177     OP64(value);
   178 }
   181 /**
   182  * Emit an instruction to store an SH4 reg (RN)
   183  */
   184 void static inline store_reg( int x86reg, int sh4reg ) {
   185     /* mov reg, [bp+n] */
   186     OP(0x89);
   187     OP(0x45 + (x86reg<<3));
   188     OP(REG_OFFSET(r[sh4reg]));
   189 }
   191 #define load_fr_bank(bankreg) load_spreg( bankreg, REG_OFFSET(fr_bank))
   193 /**
   194  * Load an FR register (single-precision floating point) into an integer x86
   195  * register (eg for register-to-register moves)
   196  */
   197 void static inline load_fr( int bankreg, int x86reg, int frm )
   198 {
   199     OP(0x8B); OP(0x40+bankreg+(x86reg<<3)); OP((frm^1)<<2);
   200 }
   202 /**
   203  * Store an FR register (single-precision floating point) into an integer x86
   204  * register (eg for register-to-register moves)
   205  */
   206 void static inline store_fr( int bankreg, int x86reg, int frn )
   207 {
   208     OP(0x89);  OP(0x40+bankreg+(x86reg<<3)); OP((frn^1)<<2);
   209 }
   212 /**
   213  * Load a pointer to the back fp back into the specified x86 register. The
   214  * bankreg must have been previously loaded with FPSCR.
   215  * NB: 12 bytes
   216  */
   217 static inline void load_xf_bank( int bankreg )
   218 {
   219     NOT_r32( bankreg );
   220     SHR_imm8_r32( (21 - 6), bankreg ); // Extract bit 21 then *64 for bank size
   221     AND_imm8s_r32( 0x40, bankreg );    // Complete extraction
   222     OP(0x8D); OP(0x44+(bankreg<<3)); OP(0x28+bankreg); OP(REG_OFFSET(fr)); // LEA [ebp+bankreg+disp], bankreg
   223 }
   225 /**
   226  * Update the fr_bank pointer based on the current fpscr value.
   227  */
   228 static inline void update_fr_bank( int fpscrreg )
   229 {
   230     SHR_imm8_r32( (21 - 6), fpscrreg ); // Extract bit 21 then *64 for bank size
   231     AND_imm8s_r32( 0x40, fpscrreg );    // Complete extraction
   232     OP(0x8D); OP(0x44+(fpscrreg<<3)); OP(0x28+fpscrreg); OP(REG_OFFSET(fr)); // LEA [ebp+fpscrreg+disp], fpscrreg
   233     store_spreg( fpscrreg, REG_OFFSET(fr_bank) );
   234 }
   235 /**
   236  * Push FPUL (as a 32-bit float) onto the FPU stack
   237  */
   238 static inline void push_fpul( )
   239 {
   240     OP(0xD9); OP(0x45); OP(R_FPUL);
   241 }
   243 /**
   244  * Pop FPUL (as a 32-bit float) from the FPU stack
   245  */
   246 static inline void pop_fpul( )
   247 {
   248     OP(0xD9); OP(0x5D); OP(R_FPUL);
   249 }
   251 /**
   252  * Push a 32-bit float onto the FPU stack, with bankreg previously loaded
   253  * with the location of the current fp bank.
   254  */
   255 static inline void push_fr( int bankreg, int frm ) 
   256 {
   257     OP(0xD9); OP(0x40 + bankreg); OP((frm^1)<<2);  // FLD.S [bankreg + frm^1*4]
   258 }
   260 /**
   261  * Pop a 32-bit float from the FPU stack and store it back into the fp bank, 
   262  * with bankreg previously loaded with the location of the current fp bank.
   263  */
   264 static inline void pop_fr( int bankreg, int frm )
   265 {
   266     OP(0xD9); OP(0x58 + bankreg); OP((frm^1)<<2); // FST.S [bankreg + frm^1*4]
   267 }
   269 /**
   270  * Push a 64-bit double onto the FPU stack, with bankreg previously loaded
   271  * with the location of the current fp bank.
   272  */
   273 static inline void push_dr( int bankreg, int frm )
   274 {
   275     OP(0xDD); OP(0x40 + bankreg); OP(frm<<2); // FLD.D [bankreg + frm*4]
   276 }
   278 static inline void pop_dr( int bankreg, int frm )
   279 {
   280     OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]
   281 }
   283 /* Exception checks - Note that all exception checks will clobber EAX */
   285 #define check_priv( ) \
   286     if( !sh4_x86.priv_checked ) { \
   287 	sh4_x86.priv_checked = TRUE;\
   288 	load_spreg( R_EAX, R_SR );\
   289 	AND_imm32_r32( SR_MD, R_EAX );\
   290 	if( sh4_x86.in_delay_slot ) {\
   291 	    JE_exc( EXC_SLOT_ILLEGAL );\
   292 	} else {\
   293 	    JE_exc( EXC_ILLEGAL );\
   294 	}\
   295     }\
   297 #define check_fpuen( ) \
   298     if( !sh4_x86.fpuen_checked ) {\
   299 	sh4_x86.fpuen_checked = TRUE;\
   300 	load_spreg( R_EAX, R_SR );\
   301 	AND_imm32_r32( SR_FD, R_EAX );\
   302 	if( sh4_x86.in_delay_slot ) {\
   303 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   304 	} else {\
   305 	    JNE_exc(EXC_FPU_DISABLED);\
   306 	}\
   307     }
   309 #define check_ralign16( x86reg ) \
   310     TEST_imm32_r32( 0x00000001, x86reg ); \
   311     JNE_exc(EXC_DATA_ADDR_READ)
   313 #define check_walign16( x86reg ) \
   314     TEST_imm32_r32( 0x00000001, x86reg ); \
   315     JNE_exc(EXC_DATA_ADDR_WRITE);
   317 #define check_ralign32( x86reg ) \
   318     TEST_imm32_r32( 0x00000003, x86reg ); \
   319     JNE_exc(EXC_DATA_ADDR_READ)
   321 #define check_walign32( x86reg ) \
   322     TEST_imm32_r32( 0x00000003, x86reg ); \
   323     JNE_exc(EXC_DATA_ADDR_WRITE);
   325 #define UNDEF()
   326 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   327 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
   328 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
   329 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
   330 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
   331 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
   332 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
   334 /**
   335  * Perform MMU translation on the address in addr_reg for a read operation, iff the TLB is turned 
   336  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   337  */
   338 #define MMU_TRANSLATE_READ( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   339 /**
   340  * Perform MMU translation on the address in addr_reg for a write operation, iff the TLB is turned 
   341  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   342  */
   343 #define MMU_TRANSLATE_WRITE( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_write, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   345 #define MEM_READ_SIZE (CALL_FUNC1_SIZE)
   346 #define MEM_WRITE_SIZE (CALL_FUNC2_SIZE)
   347 #define MMU_TRANSLATE_SIZE (sh4_x86.tlb_on ? (CALL_FUNC1_SIZE + 12) : 0 )
   349 #define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = DELAY_NONE; return 1;
   351 /****** Import appropriate calling conventions ******/
   352 #if SH4_TRANSLATOR == TARGET_X86_64
   353 #include "sh4/ia64abi.h"
   354 #else /* SH4_TRANSLATOR == TARGET_X86 */
   355 #ifdef APPLE_BUILD
   356 #include "sh4/ia32mac.h"
   357 #else
   358 #include "sh4/ia32abi.h"
   359 #endif
   360 #endif
   362 uint32_t sh4_translate_end_block_size()
   363 {
   364     return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);
   365 }
   368 /**
   369  * Embed a breakpoint into the generated code
   370  */
   371 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   372 {
   373     load_imm32( R_EAX, pc );
   374     call_func1( sh4_translate_breakpoint_hit, R_EAX );
   375 }
   377 /**
   378  * Embed a call to sh4_execute_instruction for situations that we
   379  * can't translate (mainly page-crossing delay slots at the moment).
   380  * Caller is responsible for setting new_pc.
   381  */
   382 void sh4_emulator_exit( sh4vma_t endpc )
   383 {
   384     load_imm32( R_ECX, endpc - sh4_x86.block_start_pc );   // 5
   385     ADD_r32_sh4r( R_ECX, R_PC );
   387     load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
   388     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
   389     load_imm32( R_ECX, sh4_x86.in_delay_slot ? 1 : 0 );
   390     store_spreg( R_ECX, REG_OFFSET(in_delay_slot) );
   392     call_func0( sh4_execute_instruction );    
   393     load_imm32( R_EAX, R_PC );
   394     if( sh4_x86.tlb_on ) {
   395 	call_func1(xlat_get_code_by_vma,R_EAX);
   396     } else {
   397 	call_func1(xlat_get_code,R_EAX);
   398     }
   399     AND_imm8s_r32( 0xFC, R_EAX ); // 3
   400     POP_r32(R_EBP);
   401     RET();
   402 } 
   404 /**
   405  * Translate a single instruction. Delayed branches are handled specially
   406  * by translating both branch and delayed instruction as a single unit (as
   407  * 
   408  * The instruction MUST be in the icache (assert check)
   409  *
   410  * @return true if the instruction marks the end of a basic block
   411  * (eg a branch or 
   412  */
   413 uint32_t sh4_translate_instruction( sh4vma_t pc )
   414 {
   415     uint32_t ir;
   416     /* Read instruction from icache */
   417     assert( IS_IN_ICACHE(pc) );
   418     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   420 	/* PC is not in the current icache - this usually means we're running
   421 	 * with MMU on, and we've gone past the end of the page. And since 
   422 	 * sh4_translate_block is pretty careful about this, it means we're
   423 	 * almost certainly in a delay slot.
   424 	 *
   425 	 * Since we can't assume the page is present (and we can't fault it in
   426 	 * at this point, inline a call to sh4_execute_instruction (with a few
   427 	 * small repairs to cope with the different environment).
   428 	 */
   430     if( !sh4_x86.in_delay_slot ) {
   431 	sh4_x86_add_recovery(pc);
   432     }
   433 %%
   434 /* ALU operations */
   435 ADD Rm, Rn {:
   436     load_reg( R_EAX, Rm );
   437     load_reg( R_ECX, Rn );
   438     ADD_r32_r32( R_EAX, R_ECX );
   439     store_reg( R_ECX, Rn );
   440     sh4_x86.tstate = TSTATE_NONE;
   441 :}
   442 ADD #imm, Rn {:  
   443     load_reg( R_EAX, Rn );
   444     ADD_imm8s_r32( imm, R_EAX );
   445     store_reg( R_EAX, Rn );
   446     sh4_x86.tstate = TSTATE_NONE;
   447 :}
   448 ADDC Rm, Rn {:
   449     if( sh4_x86.tstate != TSTATE_C ) {
   450 	LDC_t();
   451     }
   452     load_reg( R_EAX, Rm );
   453     load_reg( R_ECX, Rn );
   454     ADC_r32_r32( R_EAX, R_ECX );
   455     store_reg( R_ECX, Rn );
   456     SETC_t();
   457     sh4_x86.tstate = TSTATE_C;
   458 :}
   459 ADDV Rm, Rn {:
   460     load_reg( R_EAX, Rm );
   461     load_reg( R_ECX, Rn );
   462     ADD_r32_r32( R_EAX, R_ECX );
   463     store_reg( R_ECX, Rn );
   464     SETO_t();
   465     sh4_x86.tstate = TSTATE_O;
   466 :}
   467 AND Rm, Rn {:
   468     load_reg( R_EAX, Rm );
   469     load_reg( R_ECX, Rn );
   470     AND_r32_r32( R_EAX, R_ECX );
   471     store_reg( R_ECX, Rn );
   472     sh4_x86.tstate = TSTATE_NONE;
   473 :}
   474 AND #imm, R0 {:  
   475     load_reg( R_EAX, 0 );
   476     AND_imm32_r32(imm, R_EAX); 
   477     store_reg( R_EAX, 0 );
   478     sh4_x86.tstate = TSTATE_NONE;
   479 :}
   480 AND.B #imm, @(R0, GBR) {: 
   481     load_reg( R_EAX, 0 );
   482     load_spreg( R_ECX, R_GBR );
   483     ADD_r32_r32( R_ECX, R_EAX );
   484     MMU_TRANSLATE_WRITE( R_EAX );
   485     PUSH_realigned_r32(R_EAX);
   486     MEM_READ_BYTE( R_EAX, R_EAX );
   487     POP_realigned_r32(R_ECX);
   488     AND_imm32_r32(imm, R_EAX );
   489     MEM_WRITE_BYTE( R_ECX, R_EAX );
   490     sh4_x86.tstate = TSTATE_NONE;
   491 :}
   492 CMP/EQ Rm, Rn {:  
   493     load_reg( R_EAX, Rm );
   494     load_reg( R_ECX, Rn );
   495     CMP_r32_r32( R_EAX, R_ECX );
   496     SETE_t();
   497     sh4_x86.tstate = TSTATE_E;
   498 :}
   499 CMP/EQ #imm, R0 {:  
   500     load_reg( R_EAX, 0 );
   501     CMP_imm8s_r32(imm, R_EAX);
   502     SETE_t();
   503     sh4_x86.tstate = TSTATE_E;
   504 :}
   505 CMP/GE Rm, Rn {:  
   506     load_reg( R_EAX, Rm );
   507     load_reg( R_ECX, Rn );
   508     CMP_r32_r32( R_EAX, R_ECX );
   509     SETGE_t();
   510     sh4_x86.tstate = TSTATE_GE;
   511 :}
   512 CMP/GT Rm, Rn {: 
   513     load_reg( R_EAX, Rm );
   514     load_reg( R_ECX, Rn );
   515     CMP_r32_r32( R_EAX, R_ECX );
   516     SETG_t();
   517     sh4_x86.tstate = TSTATE_G;
   518 :}
   519 CMP/HI Rm, Rn {:  
   520     load_reg( R_EAX, Rm );
   521     load_reg( R_ECX, Rn );
   522     CMP_r32_r32( R_EAX, R_ECX );
   523     SETA_t();
   524     sh4_x86.tstate = TSTATE_A;
   525 :}
   526 CMP/HS Rm, Rn {: 
   527     load_reg( R_EAX, Rm );
   528     load_reg( R_ECX, Rn );
   529     CMP_r32_r32( R_EAX, R_ECX );
   530     SETAE_t();
   531     sh4_x86.tstate = TSTATE_AE;
   532  :}
   533 CMP/PL Rn {: 
   534     load_reg( R_EAX, Rn );
   535     CMP_imm8s_r32( 0, R_EAX );
   536     SETG_t();
   537     sh4_x86.tstate = TSTATE_G;
   538 :}
   539 CMP/PZ Rn {:  
   540     load_reg( R_EAX, Rn );
   541     CMP_imm8s_r32( 0, R_EAX );
   542     SETGE_t();
   543     sh4_x86.tstate = TSTATE_GE;
   544 :}
   545 CMP/STR Rm, Rn {:  
   546     load_reg( R_EAX, Rm );
   547     load_reg( R_ECX, Rn );
   548     XOR_r32_r32( R_ECX, R_EAX );
   549     TEST_r8_r8( R_AL, R_AL );
   550     JE_rel8(13, target1);
   551     TEST_r8_r8( R_AH, R_AH ); // 2
   552     JE_rel8(9, target2);
   553     SHR_imm8_r32( 16, R_EAX ); // 3
   554     TEST_r8_r8( R_AL, R_AL ); // 2
   555     JE_rel8(2, target3);
   556     TEST_r8_r8( R_AH, R_AH ); // 2
   557     JMP_TARGET(target1);
   558     JMP_TARGET(target2);
   559     JMP_TARGET(target3);
   560     SETE_t();
   561     sh4_x86.tstate = TSTATE_E;
   562 :}
   563 DIV0S Rm, Rn {:
   564     load_reg( R_EAX, Rm );
   565     load_reg( R_ECX, Rn );
   566     SHR_imm8_r32( 31, R_EAX );
   567     SHR_imm8_r32( 31, R_ECX );
   568     store_spreg( R_EAX, R_M );
   569     store_spreg( R_ECX, R_Q );
   570     CMP_r32_r32( R_EAX, R_ECX );
   571     SETNE_t();
   572     sh4_x86.tstate = TSTATE_NE;
   573 :}
   574 DIV0U {:  
   575     XOR_r32_r32( R_EAX, R_EAX );
   576     store_spreg( R_EAX, R_Q );
   577     store_spreg( R_EAX, R_M );
   578     store_spreg( R_EAX, R_T );
   579     sh4_x86.tstate = TSTATE_C; // works for DIV1
   580 :}
   581 DIV1 Rm, Rn {:
   582     load_spreg( R_ECX, R_M );
   583     load_reg( R_EAX, Rn );
   584     if( sh4_x86.tstate != TSTATE_C ) {
   585 	LDC_t();
   586     }
   587     RCL1_r32( R_EAX );
   588     SETC_r8( R_DL ); // Q'
   589     CMP_sh4r_r32( R_Q, R_ECX );
   590     JE_rel8(5, mqequal);
   591     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   592     JMP_rel8(3, end);
   593     JMP_TARGET(mqequal);
   594     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   595     JMP_TARGET(end);
   596     store_reg( R_EAX, Rn ); // Done with Rn now
   597     SETC_r8(R_AL); // tmp1
   598     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   599     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   600     store_spreg( R_ECX, R_Q );
   601     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   602     MOVZX_r8_r32( R_AL, R_EAX );
   603     store_spreg( R_EAX, R_T );
   604     sh4_x86.tstate = TSTATE_NONE;
   605 :}
   606 DMULS.L Rm, Rn {:  
   607     load_reg( R_EAX, Rm );
   608     load_reg( R_ECX, Rn );
   609     IMUL_r32(R_ECX);
   610     store_spreg( R_EDX, R_MACH );
   611     store_spreg( R_EAX, R_MACL );
   612     sh4_x86.tstate = TSTATE_NONE;
   613 :}
   614 DMULU.L Rm, Rn {:  
   615     load_reg( R_EAX, Rm );
   616     load_reg( R_ECX, Rn );
   617     MUL_r32(R_ECX);
   618     store_spreg( R_EDX, R_MACH );
   619     store_spreg( R_EAX, R_MACL );    
   620     sh4_x86.tstate = TSTATE_NONE;
   621 :}
   622 DT Rn {:  
   623     load_reg( R_EAX, Rn );
   624     ADD_imm8s_r32( -1, R_EAX );
   625     store_reg( R_EAX, Rn );
   626     SETE_t();
   627     sh4_x86.tstate = TSTATE_E;
   628 :}
   629 EXTS.B Rm, Rn {:  
   630     load_reg( R_EAX, Rm );
   631     MOVSX_r8_r32( R_EAX, R_EAX );
   632     store_reg( R_EAX, Rn );
   633 :}
   634 EXTS.W Rm, Rn {:  
   635     load_reg( R_EAX, Rm );
   636     MOVSX_r16_r32( R_EAX, R_EAX );
   637     store_reg( R_EAX, Rn );
   638 :}
   639 EXTU.B Rm, Rn {:  
   640     load_reg( R_EAX, Rm );
   641     MOVZX_r8_r32( R_EAX, R_EAX );
   642     store_reg( R_EAX, Rn );
   643 :}
   644 EXTU.W Rm, Rn {:  
   645     load_reg( R_EAX, Rm );
   646     MOVZX_r16_r32( R_EAX, R_EAX );
   647     store_reg( R_EAX, Rn );
   648 :}
   649 MAC.L @Rm+, @Rn+ {:
   650     if( Rm == Rn ) {
   651 	load_reg( R_EAX, Rm );
   652 	check_ralign32( R_EAX );
   653 	MMU_TRANSLATE_READ( R_EAX );
   654 	PUSH_realigned_r32( R_EAX );
   655 	load_reg( R_EAX, Rn );
   656 	ADD_imm8s_r32( 4, R_EAX );
   657 	MMU_TRANSLATE_READ( R_EAX );
   658 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
   659 	// Note translate twice in case of page boundaries. Maybe worth
   660 	// adding a page-boundary check to skip the second translation
   661     } else {
   662 	load_reg( R_EAX, Rm );
   663 	check_ralign32( R_EAX );
   664 	MMU_TRANSLATE_READ( R_EAX );
   665 	PUSH_realigned_r32( R_EAX );
   666 	load_reg( R_EAX, Rn );
   667 	check_ralign32( R_EAX );
   668 	MMU_TRANSLATE_READ( R_EAX );
   669 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   670 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   671     }
   672     MEM_READ_LONG( R_EAX, R_EAX );
   673     POP_r32( R_ECX );
   674     PUSH_r32( R_EAX );
   675     MEM_READ_LONG( R_ECX, R_EAX );
   676     POP_realigned_r32( R_ECX );
   678     IMUL_r32( R_ECX );
   679     ADD_r32_sh4r( R_EAX, R_MACL );
   680     ADC_r32_sh4r( R_EDX, R_MACH );
   682     load_spreg( R_ECX, R_S );
   683     TEST_r32_r32(R_ECX, R_ECX);
   684     JE_rel8( CALL_FUNC0_SIZE, nosat );
   685     call_func0( signsat48 );
   686     JMP_TARGET( nosat );
   687     sh4_x86.tstate = TSTATE_NONE;
   688 :}
   689 MAC.W @Rm+, @Rn+ {:  
   690     if( Rm == Rn ) {
   691 	load_reg( R_EAX, Rm );
   692 	check_ralign16( R_EAX );
   693 	MMU_TRANSLATE_READ( R_EAX );
   694 	PUSH_realigned_r32( R_EAX );
   695 	load_reg( R_EAX, Rn );
   696 	ADD_imm8s_r32( 2, R_EAX );
   697 	MMU_TRANSLATE_READ( R_EAX );
   698 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   699 	// Note translate twice in case of page boundaries. Maybe worth
   700 	// adding a page-boundary check to skip the second translation
   701     } else {
   702 	load_reg( R_EAX, Rm );
   703 	check_ralign16( R_EAX );
   704 	MMU_TRANSLATE_READ( R_EAX );
   705 	PUSH_realigned_r32( R_EAX );
   706 	load_reg( R_EAX, Rn );
   707 	check_ralign16( R_EAX );
   708 	MMU_TRANSLATE_READ( R_EAX );
   709 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   710 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   711     }
   712     MEM_READ_WORD( R_EAX, R_EAX );
   713     POP_r32( R_ECX );
   714     PUSH_r32( R_EAX );
   715     MEM_READ_WORD( R_ECX, R_EAX );
   716     POP_realigned_r32( R_ECX );
   717     IMUL_r32( R_ECX );
   719     load_spreg( R_ECX, R_S );
   720     TEST_r32_r32( R_ECX, R_ECX );
   721     JE_rel8( 47, nosat );
   723     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   724     JNO_rel8( 51, end );            // 2
   725     load_imm32( R_EDX, 1 );         // 5
   726     store_spreg( R_EDX, R_MACH );   // 6
   727     JS_rel8( 13, positive );        // 2
   728     load_imm32( R_EAX, 0x80000000 );// 5
   729     store_spreg( R_EAX, R_MACL );   // 6
   730     JMP_rel8( 25, end2 );           // 2
   732     JMP_TARGET(positive);
   733     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   734     store_spreg( R_EAX, R_MACL );   // 6
   735     JMP_rel8( 12, end3);            // 2
   737     JMP_TARGET(nosat);
   738     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   739     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   740     JMP_TARGET(end);
   741     JMP_TARGET(end2);
   742     JMP_TARGET(end3);
   743     sh4_x86.tstate = TSTATE_NONE;
   744 :}
   745 MOVT Rn {:  
   746     load_spreg( R_EAX, R_T );
   747     store_reg( R_EAX, Rn );
   748 :}
   749 MUL.L Rm, Rn {:  
   750     load_reg( R_EAX, Rm );
   751     load_reg( R_ECX, Rn );
   752     MUL_r32( R_ECX );
   753     store_spreg( R_EAX, R_MACL );
   754     sh4_x86.tstate = TSTATE_NONE;
   755 :}
   756 MULS.W Rm, Rn {:
   757     load_reg16s( R_EAX, Rm );
   758     load_reg16s( R_ECX, Rn );
   759     MUL_r32( R_ECX );
   760     store_spreg( R_EAX, R_MACL );
   761     sh4_x86.tstate = TSTATE_NONE;
   762 :}
   763 MULU.W Rm, Rn {:  
   764     load_reg16u( R_EAX, Rm );
   765     load_reg16u( R_ECX, Rn );
   766     MUL_r32( R_ECX );
   767     store_spreg( R_EAX, R_MACL );
   768     sh4_x86.tstate = TSTATE_NONE;
   769 :}
   770 NEG Rm, Rn {:
   771     load_reg( R_EAX, Rm );
   772     NEG_r32( R_EAX );
   773     store_reg( R_EAX, Rn );
   774     sh4_x86.tstate = TSTATE_NONE;
   775 :}
   776 NEGC Rm, Rn {:  
   777     load_reg( R_EAX, Rm );
   778     XOR_r32_r32( R_ECX, R_ECX );
   779     LDC_t();
   780     SBB_r32_r32( R_EAX, R_ECX );
   781     store_reg( R_ECX, Rn );
   782     SETC_t();
   783     sh4_x86.tstate = TSTATE_C;
   784 :}
   785 NOT Rm, Rn {:  
   786     load_reg( R_EAX, Rm );
   787     NOT_r32( R_EAX );
   788     store_reg( R_EAX, Rn );
   789     sh4_x86.tstate = TSTATE_NONE;
   790 :}
   791 OR Rm, Rn {:  
   792     load_reg( R_EAX, Rm );
   793     load_reg( R_ECX, Rn );
   794     OR_r32_r32( R_EAX, R_ECX );
   795     store_reg( R_ECX, Rn );
   796     sh4_x86.tstate = TSTATE_NONE;
   797 :}
   798 OR #imm, R0 {:
   799     load_reg( R_EAX, 0 );
   800     OR_imm32_r32(imm, R_EAX);
   801     store_reg( R_EAX, 0 );
   802     sh4_x86.tstate = TSTATE_NONE;
   803 :}
   804 OR.B #imm, @(R0, GBR) {:  
   805     load_reg( R_EAX, 0 );
   806     load_spreg( R_ECX, R_GBR );
   807     ADD_r32_r32( R_ECX, R_EAX );
   808     MMU_TRANSLATE_WRITE( R_EAX );
   809     PUSH_realigned_r32(R_EAX);
   810     MEM_READ_BYTE( R_EAX, R_EAX );
   811     POP_realigned_r32(R_ECX);
   812     OR_imm32_r32(imm, R_EAX );
   813     MEM_WRITE_BYTE( R_ECX, R_EAX );
   814     sh4_x86.tstate = TSTATE_NONE;
   815 :}
   816 ROTCL Rn {:
   817     load_reg( R_EAX, Rn );
   818     if( sh4_x86.tstate != TSTATE_C ) {
   819 	LDC_t();
   820     }
   821     RCL1_r32( R_EAX );
   822     store_reg( R_EAX, Rn );
   823     SETC_t();
   824     sh4_x86.tstate = TSTATE_C;
   825 :}
   826 ROTCR Rn {:  
   827     load_reg( R_EAX, Rn );
   828     if( sh4_x86.tstate != TSTATE_C ) {
   829 	LDC_t();
   830     }
   831     RCR1_r32( R_EAX );
   832     store_reg( R_EAX, Rn );
   833     SETC_t();
   834     sh4_x86.tstate = TSTATE_C;
   835 :}
   836 ROTL Rn {:  
   837     load_reg( R_EAX, Rn );
   838     ROL1_r32( R_EAX );
   839     store_reg( R_EAX, Rn );
   840     SETC_t();
   841     sh4_x86.tstate = TSTATE_C;
   842 :}
   843 ROTR Rn {:  
   844     load_reg( R_EAX, Rn );
   845     ROR1_r32( R_EAX );
   846     store_reg( R_EAX, Rn );
   847     SETC_t();
   848     sh4_x86.tstate = TSTATE_C;
   849 :}
   850 SHAD Rm, Rn {:
   851     /* Annoyingly enough, not directly convertible */
   852     load_reg( R_EAX, Rn );
   853     load_reg( R_ECX, Rm );
   854     CMP_imm32_r32( 0, R_ECX );
   855     JGE_rel8(16, doshl);
   857     NEG_r32( R_ECX );      // 2
   858     AND_imm8_r8( 0x1F, R_CL ); // 3
   859     JE_rel8( 4, emptysar);     // 2
   860     SAR_r32_CL( R_EAX );       // 2
   861     JMP_rel8(10, end);          // 2
   863     JMP_TARGET(emptysar);
   864     SAR_imm8_r32(31, R_EAX );  // 3
   865     JMP_rel8(5, end2);
   867     JMP_TARGET(doshl);
   868     AND_imm8_r8( 0x1F, R_CL ); // 3
   869     SHL_r32_CL( R_EAX );       // 2
   870     JMP_TARGET(end);
   871     JMP_TARGET(end2);
   872     store_reg( R_EAX, Rn );
   873     sh4_x86.tstate = TSTATE_NONE;
   874 :}
   875 SHLD Rm, Rn {:  
   876     load_reg( R_EAX, Rn );
   877     load_reg( R_ECX, Rm );
   878     CMP_imm32_r32( 0, R_ECX );
   879     JGE_rel8(15, doshl);
   881     NEG_r32( R_ECX );      // 2
   882     AND_imm8_r8( 0x1F, R_CL ); // 3
   883     JE_rel8( 4, emptyshr );
   884     SHR_r32_CL( R_EAX );       // 2
   885     JMP_rel8(9, end);          // 2
   887     JMP_TARGET(emptyshr);
   888     XOR_r32_r32( R_EAX, R_EAX );
   889     JMP_rel8(5, end2);
   891     JMP_TARGET(doshl);
   892     AND_imm8_r8( 0x1F, R_CL ); // 3
   893     SHL_r32_CL( R_EAX );       // 2
   894     JMP_TARGET(end);
   895     JMP_TARGET(end2);
   896     store_reg( R_EAX, Rn );
   897     sh4_x86.tstate = TSTATE_NONE;
   898 :}
   899 SHAL Rn {: 
   900     load_reg( R_EAX, Rn );
   901     SHL1_r32( R_EAX );
   902     SETC_t();
   903     store_reg( R_EAX, Rn );
   904     sh4_x86.tstate = TSTATE_C;
   905 :}
   906 SHAR Rn {:  
   907     load_reg( R_EAX, Rn );
   908     SAR1_r32( R_EAX );
   909     SETC_t();
   910     store_reg( R_EAX, Rn );
   911     sh4_x86.tstate = TSTATE_C;
   912 :}
   913 SHLL Rn {:  
   914     load_reg( R_EAX, Rn );
   915     SHL1_r32( R_EAX );
   916     SETC_t();
   917     store_reg( R_EAX, Rn );
   918     sh4_x86.tstate = TSTATE_C;
   919 :}
   920 SHLL2 Rn {:
   921     load_reg( R_EAX, Rn );
   922     SHL_imm8_r32( 2, R_EAX );
   923     store_reg( R_EAX, Rn );
   924     sh4_x86.tstate = TSTATE_NONE;
   925 :}
   926 SHLL8 Rn {:  
   927     load_reg( R_EAX, Rn );
   928     SHL_imm8_r32( 8, R_EAX );
   929     store_reg( R_EAX, Rn );
   930     sh4_x86.tstate = TSTATE_NONE;
   931 :}
   932 SHLL16 Rn {:  
   933     load_reg( R_EAX, Rn );
   934     SHL_imm8_r32( 16, R_EAX );
   935     store_reg( R_EAX, Rn );
   936     sh4_x86.tstate = TSTATE_NONE;
   937 :}
   938 SHLR Rn {:  
   939     load_reg( R_EAX, Rn );
   940     SHR1_r32( R_EAX );
   941     SETC_t();
   942     store_reg( R_EAX, Rn );
   943     sh4_x86.tstate = TSTATE_C;
   944 :}
   945 SHLR2 Rn {:  
   946     load_reg( R_EAX, Rn );
   947     SHR_imm8_r32( 2, R_EAX );
   948     store_reg( R_EAX, Rn );
   949     sh4_x86.tstate = TSTATE_NONE;
   950 :}
   951 SHLR8 Rn {:  
   952     load_reg( R_EAX, Rn );
   953     SHR_imm8_r32( 8, R_EAX );
   954     store_reg( R_EAX, Rn );
   955     sh4_x86.tstate = TSTATE_NONE;
   956 :}
   957 SHLR16 Rn {:  
   958     load_reg( R_EAX, Rn );
   959     SHR_imm8_r32( 16, R_EAX );
   960     store_reg( R_EAX, Rn );
   961     sh4_x86.tstate = TSTATE_NONE;
   962 :}
   963 SUB Rm, Rn {:  
   964     load_reg( R_EAX, Rm );
   965     load_reg( R_ECX, Rn );
   966     SUB_r32_r32( R_EAX, R_ECX );
   967     store_reg( R_ECX, Rn );
   968     sh4_x86.tstate = TSTATE_NONE;
   969 :}
   970 SUBC Rm, Rn {:  
   971     load_reg( R_EAX, Rm );
   972     load_reg( R_ECX, Rn );
   973     if( sh4_x86.tstate != TSTATE_C ) {
   974 	LDC_t();
   975     }
   976     SBB_r32_r32( R_EAX, R_ECX );
   977     store_reg( R_ECX, Rn );
   978     SETC_t();
   979     sh4_x86.tstate = TSTATE_C;
   980 :}
   981 SUBV Rm, Rn {:  
   982     load_reg( R_EAX, Rm );
   983     load_reg( R_ECX, Rn );
   984     SUB_r32_r32( R_EAX, R_ECX );
   985     store_reg( R_ECX, Rn );
   986     SETO_t();
   987     sh4_x86.tstate = TSTATE_O;
   988 :}
   989 SWAP.B Rm, Rn {:  
   990     load_reg( R_EAX, Rm );
   991     XCHG_r8_r8( R_AL, R_AH );
   992     store_reg( R_EAX, Rn );
   993 :}
   994 SWAP.W Rm, Rn {:  
   995     load_reg( R_EAX, Rm );
   996     MOV_r32_r32( R_EAX, R_ECX );
   997     SHL_imm8_r32( 16, R_ECX );
   998     SHR_imm8_r32( 16, R_EAX );
   999     OR_r32_r32( R_EAX, R_ECX );
  1000     store_reg( R_ECX, Rn );
  1001     sh4_x86.tstate = TSTATE_NONE;
  1002 :}
  1003 TAS.B @Rn {:  
  1004     load_reg( R_EAX, Rn );
  1005     MMU_TRANSLATE_WRITE( R_EAX );
  1006     PUSH_realigned_r32( R_EAX );
  1007     MEM_READ_BYTE( R_EAX, R_EAX );
  1008     TEST_r8_r8( R_AL, R_AL );
  1009     SETE_t();
  1010     OR_imm8_r8( 0x80, R_AL );
  1011     POP_realigned_r32( R_ECX );
  1012     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1013     sh4_x86.tstate = TSTATE_NONE;
  1014 :}
  1015 TST Rm, Rn {:  
  1016     load_reg( R_EAX, Rm );
  1017     load_reg( R_ECX, Rn );
  1018     TEST_r32_r32( R_EAX, R_ECX );
  1019     SETE_t();
  1020     sh4_x86.tstate = TSTATE_E;
  1021 :}
  1022 TST #imm, R0 {:  
  1023     load_reg( R_EAX, 0 );
  1024     TEST_imm32_r32( imm, R_EAX );
  1025     SETE_t();
  1026     sh4_x86.tstate = TSTATE_E;
  1027 :}
  1028 TST.B #imm, @(R0, GBR) {:  
  1029     load_reg( R_EAX, 0);
  1030     load_reg( R_ECX, R_GBR);
  1031     ADD_r32_r32( R_ECX, R_EAX );
  1032     MMU_TRANSLATE_READ( R_EAX );
  1033     MEM_READ_BYTE( R_EAX, R_EAX );
  1034     TEST_imm8_r8( imm, R_AL );
  1035     SETE_t();
  1036     sh4_x86.tstate = TSTATE_E;
  1037 :}
  1038 XOR Rm, Rn {:  
  1039     load_reg( R_EAX, Rm );
  1040     load_reg( R_ECX, Rn );
  1041     XOR_r32_r32( R_EAX, R_ECX );
  1042     store_reg( R_ECX, Rn );
  1043     sh4_x86.tstate = TSTATE_NONE;
  1044 :}
  1045 XOR #imm, R0 {:  
  1046     load_reg( R_EAX, 0 );
  1047     XOR_imm32_r32( imm, R_EAX );
  1048     store_reg( R_EAX, 0 );
  1049     sh4_x86.tstate = TSTATE_NONE;
  1050 :}
  1051 XOR.B #imm, @(R0, GBR) {:  
  1052     load_reg( R_EAX, 0 );
  1053     load_spreg( R_ECX, R_GBR );
  1054     ADD_r32_r32( R_ECX, R_EAX );
  1055     MMU_TRANSLATE_WRITE( R_EAX );
  1056     PUSH_realigned_r32(R_EAX);
  1057     MEM_READ_BYTE(R_EAX, R_EAX);
  1058     POP_realigned_r32(R_ECX);
  1059     XOR_imm32_r32( imm, R_EAX );
  1060     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1061     sh4_x86.tstate = TSTATE_NONE;
  1062 :}
  1063 XTRCT Rm, Rn {:
  1064     load_reg( R_EAX, Rm );
  1065     load_reg( R_ECX, Rn );
  1066     SHL_imm8_r32( 16, R_EAX );
  1067     SHR_imm8_r32( 16, R_ECX );
  1068     OR_r32_r32( R_EAX, R_ECX );
  1069     store_reg( R_ECX, Rn );
  1070     sh4_x86.tstate = TSTATE_NONE;
  1071 :}
  1073 /* Data move instructions */
  1074 MOV Rm, Rn {:  
  1075     load_reg( R_EAX, Rm );
  1076     store_reg( R_EAX, Rn );
  1077 :}
  1078 MOV #imm, Rn {:  
  1079     load_imm32( R_EAX, imm );
  1080     store_reg( R_EAX, Rn );
  1081 :}
  1082 MOV.B Rm, @Rn {:  
  1083     load_reg( R_EAX, Rn );
  1084     MMU_TRANSLATE_WRITE( R_EAX );
  1085     load_reg( R_EDX, Rm );
  1086     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1087     sh4_x86.tstate = TSTATE_NONE;
  1088 :}
  1089 MOV.B Rm, @-Rn {:  
  1090     load_reg( R_EAX, Rn );
  1091     ADD_imm8s_r32( -1, R_EAX );
  1092     MMU_TRANSLATE_WRITE( R_EAX );
  1093     load_reg( R_EDX, Rm );
  1094     ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
  1095     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1096     sh4_x86.tstate = TSTATE_NONE;
  1097 :}
  1098 MOV.B Rm, @(R0, Rn) {:  
  1099     load_reg( R_EAX, 0 );
  1100     load_reg( R_ECX, Rn );
  1101     ADD_r32_r32( R_ECX, R_EAX );
  1102     MMU_TRANSLATE_WRITE( R_EAX );
  1103     load_reg( R_EDX, Rm );
  1104     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1105     sh4_x86.tstate = TSTATE_NONE;
  1106 :}
  1107 MOV.B R0, @(disp, GBR) {:  
  1108     load_spreg( R_EAX, R_GBR );
  1109     ADD_imm32_r32( disp, R_EAX );
  1110     MMU_TRANSLATE_WRITE( R_EAX );
  1111     load_reg( R_EDX, 0 );
  1112     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1113     sh4_x86.tstate = TSTATE_NONE;
  1114 :}
  1115 MOV.B R0, @(disp, Rn) {:  
  1116     load_reg( R_EAX, Rn );
  1117     ADD_imm32_r32( disp, R_EAX );
  1118     MMU_TRANSLATE_WRITE( R_EAX );
  1119     load_reg( R_EDX, 0 );
  1120     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1121     sh4_x86.tstate = TSTATE_NONE;
  1122 :}
  1123 MOV.B @Rm, Rn {:  
  1124     load_reg( R_EAX, Rm );
  1125     MMU_TRANSLATE_READ( R_EAX );
  1126     MEM_READ_BYTE( R_EAX, R_EAX );
  1127     store_reg( R_EAX, Rn );
  1128     sh4_x86.tstate = TSTATE_NONE;
  1129 :}
  1130 MOV.B @Rm+, Rn {:  
  1131     load_reg( R_EAX, Rm );
  1132     MMU_TRANSLATE_READ( R_EAX );
  1133     ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
  1134     MEM_READ_BYTE( R_EAX, R_EAX );
  1135     store_reg( R_EAX, Rn );
  1136     sh4_x86.tstate = TSTATE_NONE;
  1137 :}
  1138 MOV.B @(R0, Rm), Rn {:  
  1139     load_reg( R_EAX, 0 );
  1140     load_reg( R_ECX, Rm );
  1141     ADD_r32_r32( R_ECX, R_EAX );
  1142     MMU_TRANSLATE_READ( R_EAX )
  1143     MEM_READ_BYTE( R_EAX, R_EAX );
  1144     store_reg( R_EAX, Rn );
  1145     sh4_x86.tstate = TSTATE_NONE;
  1146 :}
  1147 MOV.B @(disp, GBR), R0 {:  
  1148     load_spreg( R_EAX, R_GBR );
  1149     ADD_imm32_r32( disp, R_EAX );
  1150     MMU_TRANSLATE_READ( R_EAX );
  1151     MEM_READ_BYTE( R_EAX, R_EAX );
  1152     store_reg( R_EAX, 0 );
  1153     sh4_x86.tstate = TSTATE_NONE;
  1154 :}
  1155 MOV.B @(disp, Rm), R0 {:  
  1156     load_reg( R_EAX, Rm );
  1157     ADD_imm32_r32( disp, R_EAX );
  1158     MMU_TRANSLATE_READ( R_EAX );
  1159     MEM_READ_BYTE( R_EAX, R_EAX );
  1160     store_reg( R_EAX, 0 );
  1161     sh4_x86.tstate = TSTATE_NONE;
  1162 :}
  1163 MOV.L Rm, @Rn {:
  1164     load_reg( R_EAX, Rn );
  1165     check_walign32(R_EAX);
  1166     MMU_TRANSLATE_WRITE( R_EAX );
  1167     load_reg( R_EDX, Rm );
  1168     MEM_WRITE_LONG( R_EAX, R_EDX );
  1169     sh4_x86.tstate = TSTATE_NONE;
  1170 :}
  1171 MOV.L Rm, @-Rn {:  
  1172     load_reg( R_EAX, Rn );
  1173     ADD_imm8s_r32( -4, R_EAX );
  1174     check_walign32( R_EAX );
  1175     MMU_TRANSLATE_WRITE( R_EAX );
  1176     load_reg( R_EDX, Rm );
  1177     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  1178     MEM_WRITE_LONG( R_EAX, R_EDX );
  1179     sh4_x86.tstate = TSTATE_NONE;
  1180 :}
  1181 MOV.L Rm, @(R0, Rn) {:  
  1182     load_reg( R_EAX, 0 );
  1183     load_reg( R_ECX, Rn );
  1184     ADD_r32_r32( R_ECX, R_EAX );
  1185     check_walign32( R_EAX );
  1186     MMU_TRANSLATE_WRITE( R_EAX );
  1187     load_reg( R_EDX, Rm );
  1188     MEM_WRITE_LONG( R_EAX, R_EDX );
  1189     sh4_x86.tstate = TSTATE_NONE;
  1190 :}
  1191 MOV.L R0, @(disp, GBR) {:  
  1192     load_spreg( R_EAX, R_GBR );
  1193     ADD_imm32_r32( disp, R_EAX );
  1194     check_walign32( R_EAX );
  1195     MMU_TRANSLATE_WRITE( R_EAX );
  1196     load_reg( R_EDX, 0 );
  1197     MEM_WRITE_LONG( R_EAX, R_EDX );
  1198     sh4_x86.tstate = TSTATE_NONE;
  1199 :}
  1200 MOV.L Rm, @(disp, Rn) {:  
  1201     load_reg( R_EAX, Rn );
  1202     ADD_imm32_r32( disp, R_EAX );
  1203     check_walign32( R_EAX );
  1204     MMU_TRANSLATE_WRITE( R_EAX );
  1205     load_reg( R_EDX, Rm );
  1206     MEM_WRITE_LONG( R_EAX, R_EDX );
  1207     sh4_x86.tstate = TSTATE_NONE;
  1208 :}
  1209 MOV.L @Rm, Rn {:  
  1210     load_reg( R_EAX, Rm );
  1211     check_ralign32( R_EAX );
  1212     MMU_TRANSLATE_READ( R_EAX );
  1213     MEM_READ_LONG( R_EAX, R_EAX );
  1214     store_reg( R_EAX, Rn );
  1215     sh4_x86.tstate = TSTATE_NONE;
  1216 :}
  1217 MOV.L @Rm+, Rn {:  
  1218     load_reg( R_EAX, Rm );
  1219     check_ralign32( R_EAX );
  1220     MMU_TRANSLATE_READ( R_EAX );
  1221     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1222     MEM_READ_LONG( R_EAX, R_EAX );
  1223     store_reg( R_EAX, Rn );
  1224     sh4_x86.tstate = TSTATE_NONE;
  1225 :}
  1226 MOV.L @(R0, Rm), Rn {:  
  1227     load_reg( R_EAX, 0 );
  1228     load_reg( R_ECX, Rm );
  1229     ADD_r32_r32( R_ECX, R_EAX );
  1230     check_ralign32( R_EAX );
  1231     MMU_TRANSLATE_READ( R_EAX );
  1232     MEM_READ_LONG( R_EAX, R_EAX );
  1233     store_reg( R_EAX, Rn );
  1234     sh4_x86.tstate = TSTATE_NONE;
  1235 :}
  1236 MOV.L @(disp, GBR), R0 {:
  1237     load_spreg( R_EAX, R_GBR );
  1238     ADD_imm32_r32( disp, R_EAX );
  1239     check_ralign32( R_EAX );
  1240     MMU_TRANSLATE_READ( R_EAX );
  1241     MEM_READ_LONG( R_EAX, R_EAX );
  1242     store_reg( R_EAX, 0 );
  1243     sh4_x86.tstate = TSTATE_NONE;
  1244 :}
  1245 MOV.L @(disp, PC), Rn {:  
  1246     if( sh4_x86.in_delay_slot ) {
  1247 	SLOTILLEGAL();
  1248     } else {
  1249 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1250 	if( IS_IN_ICACHE(target) ) {
  1251 	    // If the target address is in the same page as the code, it's
  1252 	    // pretty safe to just ref it directly and circumvent the whole
  1253 	    // memory subsystem. (this is a big performance win)
  1255 	    // FIXME: There's a corner-case that's not handled here when
  1256 	    // the current code-page is in the ITLB but not in the UTLB.
  1257 	    // (should generate a TLB miss although need to test SH4 
  1258 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1259 	    // behaviour though.
  1260 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1261 	    MOV_moff32_EAX( ptr );
  1262 	} else {
  1263 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1264 	    // different virtual address than the translation was done with,
  1265 	    // but we can safely assume that the low bits are the same.
  1266 	    load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1267 	    ADD_sh4r_r32( R_PC, R_EAX );
  1268 	    MMU_TRANSLATE_READ( R_EAX );
  1269 	    MEM_READ_LONG( R_EAX, R_EAX );
  1270 	    sh4_x86.tstate = TSTATE_NONE;
  1272 	store_reg( R_EAX, Rn );
  1274 :}
  1275 MOV.L @(disp, Rm), Rn {:  
  1276     load_reg( R_EAX, Rm );
  1277     ADD_imm8s_r32( disp, R_EAX );
  1278     check_ralign32( R_EAX );
  1279     MMU_TRANSLATE_READ( R_EAX );
  1280     MEM_READ_LONG( R_EAX, R_EAX );
  1281     store_reg( R_EAX, Rn );
  1282     sh4_x86.tstate = TSTATE_NONE;
  1283 :}
  1284 MOV.W Rm, @Rn {:  
  1285     load_reg( R_EAX, Rn );
  1286     check_walign16( R_EAX );
  1287     MMU_TRANSLATE_WRITE( R_EAX )
  1288     load_reg( R_EDX, Rm );
  1289     MEM_WRITE_WORD( R_EAX, R_EDX );
  1290     sh4_x86.tstate = TSTATE_NONE;
  1291 :}
  1292 MOV.W Rm, @-Rn {:  
  1293     load_reg( R_EAX, Rn );
  1294     ADD_imm8s_r32( -2, R_EAX );
  1295     check_walign16( R_EAX );
  1296     MMU_TRANSLATE_WRITE( R_EAX );
  1297     load_reg( R_EDX, Rm );
  1298     ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
  1299     MEM_WRITE_WORD( R_EAX, R_EDX );
  1300     sh4_x86.tstate = TSTATE_NONE;
  1301 :}
  1302 MOV.W Rm, @(R0, Rn) {:  
  1303     load_reg( R_EAX, 0 );
  1304     load_reg( R_ECX, Rn );
  1305     ADD_r32_r32( R_ECX, R_EAX );
  1306     check_walign16( R_EAX );
  1307     MMU_TRANSLATE_WRITE( R_EAX );
  1308     load_reg( R_EDX, Rm );
  1309     MEM_WRITE_WORD( R_EAX, R_EDX );
  1310     sh4_x86.tstate = TSTATE_NONE;
  1311 :}
  1312 MOV.W R0, @(disp, GBR) {:  
  1313     load_spreg( R_EAX, R_GBR );
  1314     ADD_imm32_r32( disp, R_EAX );
  1315     check_walign16( R_EAX );
  1316     MMU_TRANSLATE_WRITE( R_EAX );
  1317     load_reg( R_EDX, 0 );
  1318     MEM_WRITE_WORD( R_EAX, R_EDX );
  1319     sh4_x86.tstate = TSTATE_NONE;
  1320 :}
  1321 MOV.W R0, @(disp, Rn) {:  
  1322     load_reg( R_EAX, Rn );
  1323     ADD_imm32_r32( disp, R_EAX );
  1324     check_walign16( R_EAX );
  1325     MMU_TRANSLATE_WRITE( R_EAX );
  1326     load_reg( R_EDX, 0 );
  1327     MEM_WRITE_WORD( R_EAX, R_EDX );
  1328     sh4_x86.tstate = TSTATE_NONE;
  1329 :}
  1330 MOV.W @Rm, Rn {:  
  1331     load_reg( R_EAX, Rm );
  1332     check_ralign16( R_EAX );
  1333     MMU_TRANSLATE_READ( R_EAX );
  1334     MEM_READ_WORD( R_EAX, R_EAX );
  1335     store_reg( R_EAX, Rn );
  1336     sh4_x86.tstate = TSTATE_NONE;
  1337 :}
  1338 MOV.W @Rm+, Rn {:  
  1339     load_reg( R_EAX, Rm );
  1340     check_ralign16( R_EAX );
  1341     MMU_TRANSLATE_READ( R_EAX );
  1342     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
  1343     MEM_READ_WORD( R_EAX, R_EAX );
  1344     store_reg( R_EAX, Rn );
  1345     sh4_x86.tstate = TSTATE_NONE;
  1346 :}
  1347 MOV.W @(R0, Rm), Rn {:  
  1348     load_reg( R_EAX, 0 );
  1349     load_reg( R_ECX, Rm );
  1350     ADD_r32_r32( R_ECX, R_EAX );
  1351     check_ralign16( R_EAX );
  1352     MMU_TRANSLATE_READ( R_EAX );
  1353     MEM_READ_WORD( R_EAX, R_EAX );
  1354     store_reg( R_EAX, Rn );
  1355     sh4_x86.tstate = TSTATE_NONE;
  1356 :}
  1357 MOV.W @(disp, GBR), R0 {:  
  1358     load_spreg( R_EAX, R_GBR );
  1359     ADD_imm32_r32( disp, R_EAX );
  1360     check_ralign16( R_EAX );
  1361     MMU_TRANSLATE_READ( R_EAX );
  1362     MEM_READ_WORD( R_EAX, R_EAX );
  1363     store_reg( R_EAX, 0 );
  1364     sh4_x86.tstate = TSTATE_NONE;
  1365 :}
  1366 MOV.W @(disp, PC), Rn {:  
  1367     if( sh4_x86.in_delay_slot ) {
  1368 	SLOTILLEGAL();
  1369     } else {
  1370 	// See comments for MOV.L @(disp, PC), Rn
  1371 	uint32_t target = pc + disp + 4;
  1372 	if( IS_IN_ICACHE(target) ) {
  1373 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1374 	    MOV_moff32_EAX( ptr );
  1375 	    MOVSX_r16_r32( R_EAX, R_EAX );
  1376 	} else {
  1377 	    load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
  1378 	    ADD_sh4r_r32( R_PC, R_EAX );
  1379 	    MMU_TRANSLATE_READ( R_EAX );
  1380 	    MEM_READ_WORD( R_EAX, R_EAX );
  1381 	    sh4_x86.tstate = TSTATE_NONE;
  1383 	store_reg( R_EAX, Rn );
  1385 :}
  1386 MOV.W @(disp, Rm), R0 {:  
  1387     load_reg( R_EAX, Rm );
  1388     ADD_imm32_r32( disp, R_EAX );
  1389     check_ralign16( R_EAX );
  1390     MMU_TRANSLATE_READ( R_EAX );
  1391     MEM_READ_WORD( R_EAX, R_EAX );
  1392     store_reg( R_EAX, 0 );
  1393     sh4_x86.tstate = TSTATE_NONE;
  1394 :}
  1395 MOVA @(disp, PC), R0 {:  
  1396     if( sh4_x86.in_delay_slot ) {
  1397 	SLOTILLEGAL();
  1398     } else {
  1399 	load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1400 	ADD_sh4r_r32( R_PC, R_ECX );
  1401 	store_reg( R_ECX, 0 );
  1402 	sh4_x86.tstate = TSTATE_NONE;
  1404 :}
  1405 MOVCA.L R0, @Rn {:  
  1406     load_reg( R_EAX, Rn );
  1407     check_walign32( R_EAX );
  1408     MMU_TRANSLATE_WRITE( R_EAX );
  1409     load_reg( R_EDX, 0 );
  1410     MEM_WRITE_LONG( R_EAX, R_EDX );
  1411     sh4_x86.tstate = TSTATE_NONE;
  1412 :}
  1414 /* Control transfer instructions */
  1415 BF disp {:
  1416     if( sh4_x86.in_delay_slot ) {
  1417 	SLOTILLEGAL();
  1418     } else {
  1419 	sh4vma_t target = disp + pc + 4;
  1420 	JT_rel8( EXIT_BLOCK_REL_SIZE(target), nottaken );
  1421 	exit_block_rel(target, pc+2 );
  1422 	JMP_TARGET(nottaken);
  1423 	return 2;
  1425 :}
  1426 BF/S disp {:
  1427     if( sh4_x86.in_delay_slot ) {
  1428 	SLOTILLEGAL();
  1429     } else {
  1430 	sh4vma_t target = disp + pc + 4;
  1431 	sh4_x86.in_delay_slot = DELAY_PC;
  1432 	if( sh4_x86.tstate == TSTATE_NONE ) {
  1433 	    CMP_imm8s_sh4r( 1, R_T );
  1434 	    sh4_x86.tstate = TSTATE_E;
  1436 	OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JNE rel32
  1437 	sh4_translate_instruction(pc+2);
  1438 	exit_block_rel( target, pc+4 );
  1439 	// not taken
  1440 	*patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1441 	sh4_translate_instruction(pc+2);
  1442 	return 4;
  1444 :}
  1445 BRA disp {:  
  1446     if( sh4_x86.in_delay_slot ) {
  1447 	SLOTILLEGAL();
  1448     } else {
  1449 	sh4_x86.in_delay_slot = DELAY_PC;
  1450 	sh4_translate_instruction( pc + 2 );
  1451 	exit_block_rel( disp + pc + 4, pc+4 );
  1452 	sh4_x86.branch_taken = TRUE;
  1453 	return 4;
  1455 :}
  1456 BRAF Rn {:  
  1457     if( sh4_x86.in_delay_slot ) {
  1458 	SLOTILLEGAL();
  1459     } else {
  1460 	load_spreg( R_EAX, R_PC );
  1461 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1462 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1463 	store_spreg( R_EAX, R_NEW_PC );
  1464 	sh4_x86.in_delay_slot = DELAY_PC;
  1465 	sh4_x86.tstate = TSTATE_NONE;
  1466 	sh4_translate_instruction( pc + 2 );
  1467 	exit_block_newpcset(pc+2);
  1468 	sh4_x86.branch_taken = TRUE;
  1469 	return 4;
  1471 :}
  1472 BSR disp {:  
  1473     if( sh4_x86.in_delay_slot ) {
  1474 	SLOTILLEGAL();
  1475     } else {
  1476 	load_spreg( R_EAX, R_PC );
  1477 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1478 	store_spreg( R_EAX, R_PR );
  1479 	sh4_x86.in_delay_slot = DELAY_PC;
  1480 	sh4_translate_instruction( pc + 2 );
  1481 	exit_block_rel( disp + pc + 4, pc+4 );
  1482 	sh4_x86.branch_taken = TRUE;
  1483 	return 4;
  1485 :}
  1486 BSRF Rn {:  
  1487     if( sh4_x86.in_delay_slot ) {
  1488 	SLOTILLEGAL();
  1489     } else {
  1490 	load_spreg( R_EAX, R_PC );
  1491 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1492 	store_spreg( R_EAX, R_PR );
  1493 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1494 	store_spreg( R_EAX, R_NEW_PC );
  1496 	sh4_x86.tstate = TSTATE_NONE;
  1497 	sh4_translate_instruction( pc + 2 );
  1498 	exit_block_newpcset(pc+2);
  1499 	sh4_x86.branch_taken = TRUE;
  1500 	return 4;
  1502 :}
  1503 BT disp {:
  1504     if( sh4_x86.in_delay_slot ) {
  1505 	SLOTILLEGAL();
  1506     } else {
  1507 	sh4vma_t target = disp + pc + 4;
  1508 	JF_rel8( EXIT_BLOCK_REL_SIZE(target), nottaken );
  1509 	exit_block_rel(target, pc+2 );
  1510 	JMP_TARGET(nottaken);
  1511 	return 2;
  1513 :}
  1514 BT/S disp {:
  1515     if( sh4_x86.in_delay_slot ) {
  1516 	SLOTILLEGAL();
  1517     } else {
  1518 	sh4_x86.in_delay_slot = DELAY_PC;
  1519 	if( sh4_x86.tstate == TSTATE_NONE ) {
  1520 	    CMP_imm8s_sh4r( 1, R_T );
  1521 	    sh4_x86.tstate = TSTATE_E;
  1523 	OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JE rel32
  1524 	sh4_translate_instruction(pc+2);
  1525 	exit_block_rel( disp + pc + 4, pc+4 );
  1526 	// not taken
  1527 	*patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1528 	sh4_translate_instruction(pc+2);
  1529 	return 4;
  1531 :}
  1532 JMP @Rn {:  
  1533     if( sh4_x86.in_delay_slot ) {
  1534 	SLOTILLEGAL();
  1535     } else {
  1536 	load_reg( R_ECX, Rn );
  1537 	store_spreg( R_ECX, R_NEW_PC );
  1538 	sh4_x86.in_delay_slot = DELAY_PC;
  1539 	sh4_translate_instruction(pc+2);
  1540 	exit_block_newpcset(pc+2);
  1541 	sh4_x86.branch_taken = TRUE;
  1542 	return 4;
  1544 :}
  1545 JSR @Rn {:  
  1546     if( sh4_x86.in_delay_slot ) {
  1547 	SLOTILLEGAL();
  1548     } else {
  1549 	load_spreg( R_EAX, R_PC );
  1550 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1551 	store_spreg( R_EAX, R_PR );
  1552 	load_reg( R_ECX, Rn );
  1553 	store_spreg( R_ECX, R_NEW_PC );
  1554 	sh4_translate_instruction(pc+2);
  1555 	exit_block_newpcset(pc+2);
  1556 	sh4_x86.branch_taken = TRUE;
  1557 	return 4;
  1559 :}
  1560 RTE {:  
  1561     if( sh4_x86.in_delay_slot ) {
  1562 	SLOTILLEGAL();
  1563     } else {
  1564 	check_priv();
  1565 	load_spreg( R_ECX, R_SPC );
  1566 	store_spreg( R_ECX, R_NEW_PC );
  1567 	load_spreg( R_EAX, R_SSR );
  1568 	call_func1( sh4_write_sr, R_EAX );
  1569 	sh4_x86.in_delay_slot = DELAY_PC;
  1570 	sh4_x86.priv_checked = FALSE;
  1571 	sh4_x86.fpuen_checked = FALSE;
  1572 	sh4_x86.tstate = TSTATE_NONE;
  1573 	sh4_translate_instruction(pc+2);
  1574 	exit_block_newpcset(pc+2);
  1575 	sh4_x86.branch_taken = TRUE;
  1576 	return 4;
  1578 :}
  1579 RTS {:  
  1580     if( sh4_x86.in_delay_slot ) {
  1581 	SLOTILLEGAL();
  1582     } else {
  1583 	load_spreg( R_ECX, R_PR );
  1584 	store_spreg( R_ECX, R_NEW_PC );
  1585 	sh4_x86.in_delay_slot = DELAY_PC;
  1586 	sh4_translate_instruction(pc+2);
  1587 	exit_block_newpcset(pc+2);
  1588 	sh4_x86.branch_taken = TRUE;
  1589 	return 4;
  1591 :}
  1592 TRAPA #imm {:  
  1593     if( sh4_x86.in_delay_slot ) {
  1594 	SLOTILLEGAL();
  1595     } else {
  1596 	load_imm32( R_ECX, pc+2 - sh4_x86.block_start_pc );   // 5
  1597 	ADD_r32_sh4r( R_ECX, R_PC );
  1598 	load_imm32( R_EAX, imm );
  1599 	call_func1( sh4_raise_trap, R_EAX );
  1600 	sh4_x86.tstate = TSTATE_NONE;
  1601 	exit_block_pcset(pc);
  1602 	sh4_x86.branch_taken = TRUE;
  1603 	return 2;
  1605 :}
  1606 UNDEF {:  
  1607     if( sh4_x86.in_delay_slot ) {
  1608 	SLOTILLEGAL();
  1609     } else {
  1610 	JMP_exc(EXC_ILLEGAL);
  1611 	return 2;
  1613 :}
  1615 CLRMAC {:  
  1616     XOR_r32_r32(R_EAX, R_EAX);
  1617     store_spreg( R_EAX, R_MACL );
  1618     store_spreg( R_EAX, R_MACH );
  1619     sh4_x86.tstate = TSTATE_NONE;
  1620 :}
  1621 CLRS {:
  1622     CLC();
  1623     SETC_sh4r(R_S);
  1624     sh4_x86.tstate = TSTATE_C;
  1625 :}
  1626 CLRT {:  
  1627     CLC();
  1628     SETC_t();
  1629     sh4_x86.tstate = TSTATE_C;
  1630 :}
  1631 SETS {:  
  1632     STC();
  1633     SETC_sh4r(R_S);
  1634     sh4_x86.tstate = TSTATE_C;
  1635 :}
  1636 SETT {:  
  1637     STC();
  1638     SETC_t();
  1639     sh4_x86.tstate = TSTATE_C;
  1640 :}
  1642 /* Floating point moves */
  1643 FMOV FRm, FRn {:  
  1644     /* As horrible as this looks, it's actually covering 5 separate cases:
  1645      * 1. 32-bit fr-to-fr (PR=0)
  1646      * 2. 64-bit dr-to-dr (PR=1, FRm&1 == 0, FRn&1 == 0 )
  1647      * 3. 64-bit dr-to-xd (PR=1, FRm&1 == 0, FRn&1 == 1 )
  1648      * 4. 64-bit xd-to-dr (PR=1, FRm&1 == 1, FRn&1 == 0 )
  1649      * 5. 64-bit xd-to-xd (PR=1, FRm&1 == 1, FRn&1 == 1 )
  1650      */
  1651     check_fpuen();
  1652     load_spreg( R_ECX, R_FPSCR );
  1653     load_fr_bank( R_EDX );
  1654     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1655     JNE_rel8(8, doublesize);
  1656     load_fr( R_EDX, R_EAX, FRm ); // PR=0 branch
  1657     store_fr( R_EDX, R_EAX, FRn );
  1658     if( FRm&1 ) {
  1659 	JMP_rel8(24, end);
  1660 	JMP_TARGET(doublesize);
  1661 	load_xf_bank( R_ECX ); 
  1662 	load_fr( R_ECX, R_EAX, FRm-1 );
  1663 	if( FRn&1 ) {
  1664 	    load_fr( R_ECX, R_EDX, FRm );
  1665 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1666 	    store_fr( R_ECX, R_EDX, FRn );
  1667 	} else /* FRn&1 == 0 */ {
  1668 	    load_fr( R_ECX, R_ECX, FRm );
  1669 	    store_fr( R_EDX, R_EAX, FRn );
  1670 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1672 	JMP_TARGET(end);
  1673     } else /* FRm&1 == 0 */ {
  1674 	if( FRn&1 ) {
  1675 	    JMP_rel8(24, end);
  1676 	    load_xf_bank( R_ECX );
  1677 	    load_fr( R_EDX, R_EAX, FRm );
  1678 	    load_fr( R_EDX, R_EDX, FRm+1 );
  1679 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1680 	    store_fr( R_ECX, R_EDX, FRn );
  1681 	    JMP_TARGET(end);
  1682 	} else /* FRn&1 == 0 */ {
  1683 	    JMP_rel8(12, end);
  1684 	    load_fr( R_EDX, R_EAX, FRm );
  1685 	    load_fr( R_EDX, R_ECX, FRm+1 );
  1686 	    store_fr( R_EDX, R_EAX, FRn );
  1687 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1688 	    JMP_TARGET(end);
  1691     sh4_x86.tstate = TSTATE_NONE;
  1692 :}
  1693 FMOV FRm, @Rn {: 
  1694     check_fpuen();
  1695     load_reg( R_EAX, Rn );
  1696     check_walign32( R_EAX );
  1697     MMU_TRANSLATE_WRITE( R_EAX );
  1698     load_spreg( R_EDX, R_FPSCR );
  1699     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1700     JNE_rel8(8 + MEM_WRITE_SIZE, doublesize);
  1701     load_fr_bank( R_EDX );
  1702     load_fr( R_EDX, R_ECX, FRm );
  1703     MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
  1704     if( FRm&1 ) {
  1705 	JMP_rel8( 18 + MEM_WRITE_DOUBLE_SIZE, end );
  1706 	JMP_TARGET(doublesize);
  1707 	load_xf_bank( R_EDX );
  1708 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1709 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1710 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1711 	JMP_TARGET(end);
  1712     } else {
  1713 	JMP_rel8( 9 + MEM_WRITE_DOUBLE_SIZE, end );
  1714 	JMP_TARGET(doublesize);
  1715 	load_fr_bank( R_EDX );
  1716 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1717 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1718 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1719 	JMP_TARGET(end);
  1721     sh4_x86.tstate = TSTATE_NONE;
  1722 :}
  1723 FMOV @Rm, FRn {:  
  1724     check_fpuen();
  1725     load_reg( R_EAX, Rm );
  1726     check_ralign32( R_EAX );
  1727     MMU_TRANSLATE_READ( R_EAX );
  1728     load_spreg( R_EDX, R_FPSCR );
  1729     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1730     JNE_rel8(8 + MEM_READ_SIZE, doublesize);
  1731     MEM_READ_LONG( R_EAX, R_EAX );
  1732     load_fr_bank( R_EDX );
  1733     store_fr( R_EDX, R_EAX, FRn );
  1734     if( FRn&1 ) {
  1735 	JMP_rel8(21 + MEM_READ_DOUBLE_SIZE, end);
  1736 	JMP_TARGET(doublesize);
  1737 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1738 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1739 	load_xf_bank( R_EDX );
  1740 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1741 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1742 	JMP_TARGET(end);
  1743     } else {
  1744 	JMP_rel8(9 + MEM_READ_DOUBLE_SIZE, end);
  1745 	JMP_TARGET(doublesize);
  1746 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1747 	load_fr_bank( R_EDX );
  1748 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1749 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1750 	JMP_TARGET(end);
  1752     sh4_x86.tstate = TSTATE_NONE;
  1753 :}
  1754 FMOV FRm, @-Rn {:  
  1755     check_fpuen();
  1756     load_reg( R_EAX, Rn );
  1757     check_walign32( R_EAX );
  1758     load_spreg( R_EDX, R_FPSCR );
  1759     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1760     JNE_rel8(15 + MEM_WRITE_SIZE + MMU_TRANSLATE_SIZE, doublesize);
  1761     ADD_imm8s_r32( -4, R_EAX );
  1762     MMU_TRANSLATE_WRITE( R_EAX );
  1763     load_fr_bank( R_EDX );
  1764     load_fr( R_EDX, R_ECX, FRm );
  1765     ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
  1766     MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
  1767     if( FRm&1 ) {
  1768 	JMP_rel8( 25 + MEM_WRITE_DOUBLE_SIZE + MMU_TRANSLATE_SIZE, end );
  1769 	JMP_TARGET(doublesize);
  1770 	ADD_imm8s_r32(-8,R_EAX);
  1771 	MMU_TRANSLATE_WRITE( R_EAX );
  1772 	load_xf_bank( R_EDX );
  1773 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1774 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1775 	ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
  1776 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1777 	JMP_TARGET(end);
  1778     } else {
  1779 	JMP_rel8( 16 + MEM_WRITE_DOUBLE_SIZE + MMU_TRANSLATE_SIZE, end );
  1780 	JMP_TARGET(doublesize);
  1781 	ADD_imm8s_r32(-8,R_EAX);
  1782 	MMU_TRANSLATE_WRITE( R_EAX );
  1783 	load_fr_bank( R_EDX );
  1784 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1785 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1786 	ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
  1787 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1788 	JMP_TARGET(end);
  1790     sh4_x86.tstate = TSTATE_NONE;
  1791 :}
  1792 FMOV @Rm+, FRn {:
  1793     check_fpuen();
  1794     load_reg( R_EAX, Rm );
  1795     check_ralign32( R_EAX );
  1796     MMU_TRANSLATE_READ( R_EAX );
  1797     load_spreg( R_EDX, R_FPSCR );
  1798     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1799     JNE_rel8(12 + MEM_READ_SIZE, doublesize);
  1800     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1801     MEM_READ_LONG( R_EAX, R_EAX );
  1802     load_fr_bank( R_EDX );
  1803     store_fr( R_EDX, R_EAX, FRn );
  1804     if( FRn&1 ) {
  1805 	JMP_rel8(25 + MEM_READ_DOUBLE_SIZE, end);
  1806 	JMP_TARGET(doublesize);
  1807 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
  1808 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1809 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1810 	load_xf_bank( R_EDX );
  1811 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1812 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1813 	JMP_TARGET(end);
  1814     } else {
  1815 	JMP_rel8(13 + MEM_READ_DOUBLE_SIZE, end);
  1816 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
  1817 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1818 	load_fr_bank( R_EDX );
  1819 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1820 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1821 	JMP_TARGET(end);
  1823     sh4_x86.tstate = TSTATE_NONE;
  1824 :}
  1825 FMOV FRm, @(R0, Rn) {:  
  1826     check_fpuen();
  1827     load_reg( R_EAX, Rn );
  1828     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1829     check_walign32( R_EAX );
  1830     MMU_TRANSLATE_WRITE( R_EAX );
  1831     load_spreg( R_EDX, R_FPSCR );
  1832     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1833     JNE_rel8(8 + MEM_WRITE_SIZE, doublesize);
  1834     load_fr_bank( R_EDX );
  1835     load_fr( R_EDX, R_ECX, FRm );
  1836     MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
  1837     if( FRm&1 ) {
  1838 	JMP_rel8( 18 + MEM_WRITE_DOUBLE_SIZE, end );
  1839 	JMP_TARGET(doublesize);
  1840 	load_xf_bank( R_EDX );
  1841 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1842 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1843 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1844 	JMP_TARGET(end);
  1845     } else {
  1846 	JMP_rel8( 9 + MEM_WRITE_DOUBLE_SIZE, end );
  1847 	JMP_TARGET(doublesize);
  1848 	load_fr_bank( R_EDX );
  1849 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1850 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1851 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1852 	JMP_TARGET(end);
  1854     sh4_x86.tstate = TSTATE_NONE;
  1855 :}
  1856 FMOV @(R0, Rm), FRn {:  
  1857     check_fpuen();
  1858     load_reg( R_EAX, Rm );
  1859     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1860     check_ralign32( R_EAX );
  1861     MMU_TRANSLATE_READ( R_EAX );
  1862     load_spreg( R_EDX, R_FPSCR );
  1863     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1864     JNE_rel8(8 + MEM_READ_SIZE, doublesize);
  1865     MEM_READ_LONG( R_EAX, R_EAX );
  1866     load_fr_bank( R_EDX );
  1867     store_fr( R_EDX, R_EAX, FRn );
  1868     if( FRn&1 ) {
  1869 	JMP_rel8(21 + MEM_READ_DOUBLE_SIZE, end);
  1870 	JMP_TARGET(doublesize);
  1871 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1872 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1873 	load_xf_bank( R_EDX );
  1874 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1875 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1876 	JMP_TARGET(end);
  1877     } else {
  1878 	JMP_rel8(9 + MEM_READ_DOUBLE_SIZE, end);
  1879 	JMP_TARGET(doublesize);
  1880 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1881 	load_fr_bank( R_EDX );
  1882 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1883 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1884 	JMP_TARGET(end);
  1886     sh4_x86.tstate = TSTATE_NONE;
  1887 :}
  1888 FLDI0 FRn {:  /* IFF PR=0 */
  1889     check_fpuen();
  1890     load_spreg( R_ECX, R_FPSCR );
  1891     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1892     JNE_rel8(8, end);
  1893     XOR_r32_r32( R_EAX, R_EAX );
  1894     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  1895     store_fr( R_ECX, R_EAX, FRn );
  1896     JMP_TARGET(end);
  1897     sh4_x86.tstate = TSTATE_NONE;
  1898 :}
  1899 FLDI1 FRn {:  /* IFF PR=0 */
  1900     check_fpuen();
  1901     load_spreg( R_ECX, R_FPSCR );
  1902     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1903     JNE_rel8(11, end);
  1904     load_imm32(R_EAX, 0x3F800000);
  1905     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  1906     store_fr( R_ECX, R_EAX, FRn );
  1907     JMP_TARGET(end);
  1908     sh4_x86.tstate = TSTATE_NONE;
  1909 :}
  1911 FLOAT FPUL, FRn {:  
  1912     check_fpuen();
  1913     load_spreg( R_ECX, R_FPSCR );
  1914     load_spreg(R_EDX, REG_OFFSET(fr_bank));
  1915     FILD_sh4r(R_FPUL);
  1916     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1917     JNE_rel8(5, doubleprec);
  1918     pop_fr( R_EDX, FRn );
  1919     JMP_rel8(3, end);
  1920     JMP_TARGET(doubleprec);
  1921     pop_dr( R_EDX, FRn );
  1922     JMP_TARGET(end);
  1923     sh4_x86.tstate = TSTATE_NONE;
  1924 :}
  1925 FTRC FRm, FPUL {:  
  1926     check_fpuen();
  1927     load_spreg( R_ECX, R_FPSCR );
  1928     load_fr_bank( R_EDX );
  1929     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1930     JNE_rel8(5, doubleprec);
  1931     push_fr( R_EDX, FRm );
  1932     JMP_rel8(3, doop);
  1933     JMP_TARGET(doubleprec);
  1934     push_dr( R_EDX, FRm );
  1935     JMP_TARGET( doop );
  1936     load_imm32( R_ECX, (uint32_t)&max_int );
  1937     FILD_r32ind( R_ECX );
  1938     FCOMIP_st(1);
  1939     JNA_rel8( 32, sat );
  1940     load_imm32( R_ECX, (uint32_t)&min_int );  // 5
  1941     FILD_r32ind( R_ECX );           // 2
  1942     FCOMIP_st(1);                   // 2
  1943     JAE_rel8( 21, sat2 );            // 2
  1944     load_imm32( R_EAX, (uint32_t)&save_fcw );
  1945     FNSTCW_r32ind( R_EAX );
  1946     load_imm32( R_EDX, (uint32_t)&trunc_fcw );
  1947     FLDCW_r32ind( R_EDX );
  1948     FISTP_sh4r(R_FPUL);             // 3
  1949     FLDCW_r32ind( R_EAX );
  1950     JMP_rel8( 9, end );             // 2
  1952     JMP_TARGET(sat);
  1953     JMP_TARGET(sat2);
  1954     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  1955     store_spreg( R_ECX, R_FPUL );
  1956     FPOP_st();
  1957     JMP_TARGET(end);
  1958     sh4_x86.tstate = TSTATE_NONE;
  1959 :}
  1960 FLDS FRm, FPUL {:  
  1961     check_fpuen();
  1962     load_fr_bank( R_ECX );
  1963     load_fr( R_ECX, R_EAX, FRm );
  1964     store_spreg( R_EAX, R_FPUL );
  1965     sh4_x86.tstate = TSTATE_NONE;
  1966 :}
  1967 FSTS FPUL, FRn {:  
  1968     check_fpuen();
  1969     load_fr_bank( R_ECX );
  1970     load_spreg( R_EAX, R_FPUL );
  1971     store_fr( R_ECX, R_EAX, FRn );
  1972     sh4_x86.tstate = TSTATE_NONE;
  1973 :}
  1974 FCNVDS FRm, FPUL {:  
  1975     check_fpuen();
  1976     load_spreg( R_ECX, R_FPSCR );
  1977     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1978     JE_rel8(9, end); // only when PR=1
  1979     load_fr_bank( R_ECX );
  1980     push_dr( R_ECX, FRm );
  1981     pop_fpul();
  1982     JMP_TARGET(end);
  1983     sh4_x86.tstate = TSTATE_NONE;
  1984 :}
  1985 FCNVSD FPUL, FRn {:  
  1986     check_fpuen();
  1987     load_spreg( R_ECX, R_FPSCR );
  1988     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1989     JE_rel8(9, end); // only when PR=1
  1990     load_fr_bank( R_ECX );
  1991     push_fpul();
  1992     pop_dr( R_ECX, FRn );
  1993     JMP_TARGET(end);
  1994     sh4_x86.tstate = TSTATE_NONE;
  1995 :}
  1997 /* Floating point instructions */
  1998 FABS FRn {:  
  1999     check_fpuen();
  2000     load_spreg( R_ECX, R_FPSCR );
  2001     load_fr_bank( R_EDX );
  2002     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2003     JNE_rel8(10, doubleprec);
  2004     push_fr(R_EDX, FRn); // 3
  2005     FABS_st0(); // 2
  2006     pop_fr( R_EDX, FRn); //3
  2007     JMP_rel8(8,end); // 2
  2008     JMP_TARGET(doubleprec);
  2009     push_dr(R_EDX, FRn);
  2010     FABS_st0();
  2011     pop_dr(R_EDX, FRn);
  2012     JMP_TARGET(end);
  2013     sh4_x86.tstate = TSTATE_NONE;
  2014 :}
  2015 FADD FRm, FRn {:  
  2016     check_fpuen();
  2017     load_spreg( R_ECX, R_FPSCR );
  2018     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2019     load_fr_bank( R_EDX );
  2020     JNE_rel8(13,doubleprec);
  2021     push_fr(R_EDX, FRm);
  2022     push_fr(R_EDX, FRn);
  2023     FADDP_st(1);
  2024     pop_fr(R_EDX, FRn);
  2025     JMP_rel8(11,end);
  2026     JMP_TARGET(doubleprec);
  2027     push_dr(R_EDX, FRm);
  2028     push_dr(R_EDX, FRn);
  2029     FADDP_st(1);
  2030     pop_dr(R_EDX, FRn);
  2031     JMP_TARGET(end);
  2032     sh4_x86.tstate = TSTATE_NONE;
  2033 :}
  2034 FDIV FRm, FRn {:  
  2035     check_fpuen();
  2036     load_spreg( R_ECX, R_FPSCR );
  2037     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2038     load_fr_bank( R_EDX );
  2039     JNE_rel8(13, doubleprec);
  2040     push_fr(R_EDX, FRn);
  2041     push_fr(R_EDX, FRm);
  2042     FDIVP_st(1);
  2043     pop_fr(R_EDX, FRn);
  2044     JMP_rel8(11, end);
  2045     JMP_TARGET(doubleprec);
  2046     push_dr(R_EDX, FRn);
  2047     push_dr(R_EDX, FRm);
  2048     FDIVP_st(1);
  2049     pop_dr(R_EDX, FRn);
  2050     JMP_TARGET(end);
  2051     sh4_x86.tstate = TSTATE_NONE;
  2052 :}
  2053 FMAC FR0, FRm, FRn {:  
  2054     check_fpuen();
  2055     load_spreg( R_ECX, R_FPSCR );
  2056     load_spreg( R_EDX, REG_OFFSET(fr_bank));
  2057     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2058     JNE_rel8(18, doubleprec);
  2059     push_fr( R_EDX, 0 );
  2060     push_fr( R_EDX, FRm );
  2061     FMULP_st(1);
  2062     push_fr( R_EDX, FRn );
  2063     FADDP_st(1);
  2064     pop_fr( R_EDX, FRn );
  2065     JMP_rel8(16, end);
  2066     JMP_TARGET(doubleprec);
  2067     push_dr( R_EDX, 0 );
  2068     push_dr( R_EDX, FRm );
  2069     FMULP_st(1);
  2070     push_dr( R_EDX, FRn );
  2071     FADDP_st(1);
  2072     pop_dr( R_EDX, FRn );
  2073     JMP_TARGET(end);
  2074     sh4_x86.tstate = TSTATE_NONE;
  2075 :}
  2077 FMUL FRm, FRn {:  
  2078     check_fpuen();
  2079     load_spreg( R_ECX, R_FPSCR );
  2080     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2081     load_fr_bank( R_EDX );
  2082     JNE_rel8(13, doubleprec);
  2083     push_fr(R_EDX, FRm);
  2084     push_fr(R_EDX, FRn);
  2085     FMULP_st(1);
  2086     pop_fr(R_EDX, FRn);
  2087     JMP_rel8(11, end);
  2088     JMP_TARGET(doubleprec);
  2089     push_dr(R_EDX, FRm);
  2090     push_dr(R_EDX, FRn);
  2091     FMULP_st(1);
  2092     pop_dr(R_EDX, FRn);
  2093     JMP_TARGET(end);
  2094     sh4_x86.tstate = TSTATE_NONE;
  2095 :}
  2096 FNEG FRn {:  
  2097     check_fpuen();
  2098     load_spreg( R_ECX, R_FPSCR );
  2099     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2100     load_fr_bank( R_EDX );
  2101     JNE_rel8(10, doubleprec);
  2102     push_fr(R_EDX, FRn);
  2103     FCHS_st0();
  2104     pop_fr(R_EDX, FRn);
  2105     JMP_rel8(8, end);
  2106     JMP_TARGET(doubleprec);
  2107     push_dr(R_EDX, FRn);
  2108     FCHS_st0();
  2109     pop_dr(R_EDX, FRn);
  2110     JMP_TARGET(end);
  2111     sh4_x86.tstate = TSTATE_NONE;
  2112 :}
  2113 FSRRA FRn {:  
  2114     check_fpuen();
  2115     load_spreg( R_ECX, R_FPSCR );
  2116     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2117     load_fr_bank( R_EDX );
  2118     JNE_rel8(12, end); // PR=0 only
  2119     FLD1_st0();
  2120     push_fr(R_EDX, FRn);
  2121     FSQRT_st0();
  2122     FDIVP_st(1);
  2123     pop_fr(R_EDX, FRn);
  2124     JMP_TARGET(end);
  2125     sh4_x86.tstate = TSTATE_NONE;
  2126 :}
  2127 FSQRT FRn {:  
  2128     check_fpuen();
  2129     load_spreg( R_ECX, R_FPSCR );
  2130     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2131     load_fr_bank( R_EDX );
  2132     JNE_rel8(10, doubleprec);
  2133     push_fr(R_EDX, FRn);
  2134     FSQRT_st0();
  2135     pop_fr(R_EDX, FRn);
  2136     JMP_rel8(8, end);
  2137     JMP_TARGET(doubleprec);
  2138     push_dr(R_EDX, FRn);
  2139     FSQRT_st0();
  2140     pop_dr(R_EDX, FRn);
  2141     JMP_TARGET(end);
  2142     sh4_x86.tstate = TSTATE_NONE;
  2143 :}
  2144 FSUB FRm, FRn {:  
  2145     check_fpuen();
  2146     load_spreg( R_ECX, R_FPSCR );
  2147     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2148     load_fr_bank( R_EDX );
  2149     JNE_rel8(13, doubleprec);
  2150     push_fr(R_EDX, FRn);
  2151     push_fr(R_EDX, FRm);
  2152     FSUBP_st(1);
  2153     pop_fr(R_EDX, FRn);
  2154     JMP_rel8(11, end);
  2155     JMP_TARGET(doubleprec);
  2156     push_dr(R_EDX, FRn);
  2157     push_dr(R_EDX, FRm);
  2158     FSUBP_st(1);
  2159     pop_dr(R_EDX, FRn);
  2160     JMP_TARGET(end);
  2161     sh4_x86.tstate = TSTATE_NONE;
  2162 :}
  2164 FCMP/EQ FRm, FRn {:  
  2165     check_fpuen();
  2166     load_spreg( R_ECX, R_FPSCR );
  2167     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2168     load_fr_bank( R_EDX );
  2169     JNE_rel8(8, doubleprec);
  2170     push_fr(R_EDX, FRm);
  2171     push_fr(R_EDX, FRn);
  2172     JMP_rel8(6, end);
  2173     JMP_TARGET(doubleprec);
  2174     push_dr(R_EDX, FRm);
  2175     push_dr(R_EDX, FRn);
  2176     JMP_TARGET(end);
  2177     FCOMIP_st(1);
  2178     SETE_t();
  2179     FPOP_st();
  2180     sh4_x86.tstate = TSTATE_NONE;
  2181 :}
  2182 FCMP/GT FRm, FRn {:  
  2183     check_fpuen();
  2184     load_spreg( R_ECX, R_FPSCR );
  2185     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2186     load_fr_bank( R_EDX );
  2187     JNE_rel8(8, doubleprec);
  2188     push_fr(R_EDX, FRm);
  2189     push_fr(R_EDX, FRn);
  2190     JMP_rel8(6, end);
  2191     JMP_TARGET(doubleprec);
  2192     push_dr(R_EDX, FRm);
  2193     push_dr(R_EDX, FRn);
  2194     JMP_TARGET(end);
  2195     FCOMIP_st(1);
  2196     SETA_t();
  2197     FPOP_st();
  2198     sh4_x86.tstate = TSTATE_NONE;
  2199 :}
  2201 FSCA FPUL, FRn {:  
  2202     check_fpuen();
  2203     load_spreg( R_ECX, R_FPSCR );
  2204     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2205     JNE_rel8( CALL_FUNC2_SIZE + 9, doubleprec );
  2206     load_fr_bank( R_ECX );
  2207     ADD_imm8s_r32( (FRn&0x0E)<<2, R_ECX );
  2208     load_spreg( R_EDX, R_FPUL );
  2209     call_func2( sh4_fsca, R_EDX, R_ECX );
  2210     JMP_TARGET(doubleprec);
  2211     sh4_x86.tstate = TSTATE_NONE;
  2212 :}
  2213 FIPR FVm, FVn {:  
  2214     check_fpuen();
  2215     load_spreg( R_ECX, R_FPSCR );
  2216     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2217     JNE_rel8(44, doubleprec);
  2219     load_fr_bank( R_ECX );
  2220     push_fr( R_ECX, FVm<<2 );
  2221     push_fr( R_ECX, FVn<<2 );
  2222     FMULP_st(1);
  2223     push_fr( R_ECX, (FVm<<2)+1);
  2224     push_fr( R_ECX, (FVn<<2)+1);
  2225     FMULP_st(1);
  2226     FADDP_st(1);
  2227     push_fr( R_ECX, (FVm<<2)+2);
  2228     push_fr( R_ECX, (FVn<<2)+2);
  2229     FMULP_st(1);
  2230     FADDP_st(1);
  2231     push_fr( R_ECX, (FVm<<2)+3);
  2232     push_fr( R_ECX, (FVn<<2)+3);
  2233     FMULP_st(1);
  2234     FADDP_st(1);
  2235     pop_fr( R_ECX, (FVn<<2)+3);
  2236     JMP_TARGET(doubleprec);
  2237     sh4_x86.tstate = TSTATE_NONE;
  2238 :}
  2239 FTRV XMTRX, FVn {:  
  2240     check_fpuen();
  2241     load_spreg( R_ECX, R_FPSCR );
  2242     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2243     JNE_rel8( 18 + CALL_FUNC2_SIZE, doubleprec );
  2244     load_fr_bank( R_EDX );                 // 3
  2245     ADD_imm8s_r32( FVn<<4, R_EDX );        // 3
  2246     load_xf_bank( R_ECX );                 // 12
  2247     call_func2( sh4_ftrv, R_EDX, R_ECX );  // 12
  2248     JMP_TARGET(doubleprec);
  2249     sh4_x86.tstate = TSTATE_NONE;
  2250 :}
  2252 FRCHG {:  
  2253     check_fpuen();
  2254     load_spreg( R_ECX, R_FPSCR );
  2255     XOR_imm32_r32( FPSCR_FR, R_ECX );
  2256     store_spreg( R_ECX, R_FPSCR );
  2257     update_fr_bank( R_ECX );
  2258     sh4_x86.tstate = TSTATE_NONE;
  2259 :}
  2260 FSCHG {:  
  2261     check_fpuen();
  2262     load_spreg( R_ECX, R_FPSCR );
  2263     XOR_imm32_r32( FPSCR_SZ, R_ECX );
  2264     store_spreg( R_ECX, R_FPSCR );
  2265     sh4_x86.tstate = TSTATE_NONE;
  2266 :}
  2268 /* Processor control instructions */
  2269 LDC Rm, SR {:
  2270     if( sh4_x86.in_delay_slot ) {
  2271 	SLOTILLEGAL();
  2272     } else {
  2273 	check_priv();
  2274 	load_reg( R_EAX, Rm );
  2275 	call_func1( sh4_write_sr, R_EAX );
  2276 	sh4_x86.priv_checked = FALSE;
  2277 	sh4_x86.fpuen_checked = FALSE;
  2278 	sh4_x86.tstate = TSTATE_NONE;
  2280 :}
  2281 LDC Rm, GBR {: 
  2282     load_reg( R_EAX, Rm );
  2283     store_spreg( R_EAX, R_GBR );
  2284 :}
  2285 LDC Rm, VBR {:  
  2286     check_priv();
  2287     load_reg( R_EAX, Rm );
  2288     store_spreg( R_EAX, R_VBR );
  2289     sh4_x86.tstate = TSTATE_NONE;
  2290 :}
  2291 LDC Rm, SSR {:  
  2292     check_priv();
  2293     load_reg( R_EAX, Rm );
  2294     store_spreg( R_EAX, R_SSR );
  2295     sh4_x86.tstate = TSTATE_NONE;
  2296 :}
  2297 LDC Rm, SGR {:  
  2298     check_priv();
  2299     load_reg( R_EAX, Rm );
  2300     store_spreg( R_EAX, R_SGR );
  2301     sh4_x86.tstate = TSTATE_NONE;
  2302 :}
  2303 LDC Rm, SPC {:  
  2304     check_priv();
  2305     load_reg( R_EAX, Rm );
  2306     store_spreg( R_EAX, R_SPC );
  2307     sh4_x86.tstate = TSTATE_NONE;
  2308 :}
  2309 LDC Rm, DBR {:  
  2310     check_priv();
  2311     load_reg( R_EAX, Rm );
  2312     store_spreg( R_EAX, R_DBR );
  2313     sh4_x86.tstate = TSTATE_NONE;
  2314 :}
  2315 LDC Rm, Rn_BANK {:  
  2316     check_priv();
  2317     load_reg( R_EAX, Rm );
  2318     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2319     sh4_x86.tstate = TSTATE_NONE;
  2320 :}
  2321 LDC.L @Rm+, GBR {:  
  2322     load_reg( R_EAX, Rm );
  2323     check_ralign32( R_EAX );
  2324     MMU_TRANSLATE_READ( R_EAX );
  2325     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2326     MEM_READ_LONG( R_EAX, R_EAX );
  2327     store_spreg( R_EAX, R_GBR );
  2328     sh4_x86.tstate = TSTATE_NONE;
  2329 :}
  2330 LDC.L @Rm+, SR {:
  2331     if( sh4_x86.in_delay_slot ) {
  2332 	SLOTILLEGAL();
  2333     } else {
  2334 	check_priv();
  2335 	load_reg( R_EAX, Rm );
  2336 	check_ralign32( R_EAX );
  2337 	MMU_TRANSLATE_READ( R_EAX );
  2338 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2339 	MEM_READ_LONG( R_EAX, R_EAX );
  2340 	call_func1( sh4_write_sr, R_EAX );
  2341 	sh4_x86.priv_checked = FALSE;
  2342 	sh4_x86.fpuen_checked = FALSE;
  2343 	sh4_x86.tstate = TSTATE_NONE;
  2345 :}
  2346 LDC.L @Rm+, VBR {:  
  2347     check_priv();
  2348     load_reg( R_EAX, Rm );
  2349     check_ralign32( R_EAX );
  2350     MMU_TRANSLATE_READ( R_EAX );
  2351     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2352     MEM_READ_LONG( R_EAX, R_EAX );
  2353     store_spreg( R_EAX, R_VBR );
  2354     sh4_x86.tstate = TSTATE_NONE;
  2355 :}
  2356 LDC.L @Rm+, SSR {:
  2357     check_priv();
  2358     load_reg( R_EAX, Rm );
  2359     check_ralign32( R_EAX );
  2360     MMU_TRANSLATE_READ( R_EAX );
  2361     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2362     MEM_READ_LONG( R_EAX, R_EAX );
  2363     store_spreg( R_EAX, R_SSR );
  2364     sh4_x86.tstate = TSTATE_NONE;
  2365 :}
  2366 LDC.L @Rm+, SGR {:  
  2367     check_priv();
  2368     load_reg( R_EAX, Rm );
  2369     check_ralign32( R_EAX );
  2370     MMU_TRANSLATE_READ( R_EAX );
  2371     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2372     MEM_READ_LONG( R_EAX, R_EAX );
  2373     store_spreg( R_EAX, R_SGR );
  2374     sh4_x86.tstate = TSTATE_NONE;
  2375 :}
  2376 LDC.L @Rm+, SPC {:  
  2377     check_priv();
  2378     load_reg( R_EAX, Rm );
  2379     check_ralign32( R_EAX );
  2380     MMU_TRANSLATE_READ( R_EAX );
  2381     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2382     MEM_READ_LONG( R_EAX, R_EAX );
  2383     store_spreg( R_EAX, R_SPC );
  2384     sh4_x86.tstate = TSTATE_NONE;
  2385 :}
  2386 LDC.L @Rm+, DBR {:  
  2387     check_priv();
  2388     load_reg( R_EAX, Rm );
  2389     check_ralign32( R_EAX );
  2390     MMU_TRANSLATE_READ( R_EAX );
  2391     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2392     MEM_READ_LONG( R_EAX, R_EAX );
  2393     store_spreg( R_EAX, R_DBR );
  2394     sh4_x86.tstate = TSTATE_NONE;
  2395 :}
  2396 LDC.L @Rm+, Rn_BANK {:  
  2397     check_priv();
  2398     load_reg( R_EAX, Rm );
  2399     check_ralign32( R_EAX );
  2400     MMU_TRANSLATE_READ( R_EAX );
  2401     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2402     MEM_READ_LONG( R_EAX, R_EAX );
  2403     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2404     sh4_x86.tstate = TSTATE_NONE;
  2405 :}
  2406 LDS Rm, FPSCR {:  
  2407     load_reg( R_EAX, Rm );
  2408     store_spreg( R_EAX, R_FPSCR );
  2409     update_fr_bank( R_EAX );
  2410     sh4_x86.tstate = TSTATE_NONE;
  2411 :}
  2412 LDS.L @Rm+, FPSCR {:  
  2413     load_reg( R_EAX, Rm );
  2414     check_ralign32( R_EAX );
  2415     MMU_TRANSLATE_READ( R_EAX );
  2416     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2417     MEM_READ_LONG( R_EAX, R_EAX );
  2418     store_spreg( R_EAX, R_FPSCR );
  2419     update_fr_bank( R_EAX );
  2420     sh4_x86.tstate = TSTATE_NONE;
  2421 :}
  2422 LDS Rm, FPUL {:  
  2423     load_reg( R_EAX, Rm );
  2424     store_spreg( R_EAX, R_FPUL );
  2425 :}
  2426 LDS.L @Rm+, FPUL {:  
  2427     load_reg( R_EAX, Rm );
  2428     check_ralign32( R_EAX );
  2429     MMU_TRANSLATE_READ( R_EAX );
  2430     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2431     MEM_READ_LONG( R_EAX, R_EAX );
  2432     store_spreg( R_EAX, R_FPUL );
  2433     sh4_x86.tstate = TSTATE_NONE;
  2434 :}
  2435 LDS Rm, MACH {: 
  2436     load_reg( R_EAX, Rm );
  2437     store_spreg( R_EAX, R_MACH );
  2438 :}
  2439 LDS.L @Rm+, MACH {:  
  2440     load_reg( R_EAX, Rm );
  2441     check_ralign32( R_EAX );
  2442     MMU_TRANSLATE_READ( R_EAX );
  2443     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2444     MEM_READ_LONG( R_EAX, R_EAX );
  2445     store_spreg( R_EAX, R_MACH );
  2446     sh4_x86.tstate = TSTATE_NONE;
  2447 :}
  2448 LDS Rm, MACL {:  
  2449     load_reg( R_EAX, Rm );
  2450     store_spreg( R_EAX, R_MACL );
  2451 :}
  2452 LDS.L @Rm+, MACL {:  
  2453     load_reg( R_EAX, Rm );
  2454     check_ralign32( R_EAX );
  2455     MMU_TRANSLATE_READ( R_EAX );
  2456     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2457     MEM_READ_LONG( R_EAX, R_EAX );
  2458     store_spreg( R_EAX, R_MACL );
  2459     sh4_x86.tstate = TSTATE_NONE;
  2460 :}
  2461 LDS Rm, PR {:  
  2462     load_reg( R_EAX, Rm );
  2463     store_spreg( R_EAX, R_PR );
  2464 :}
  2465 LDS.L @Rm+, PR {:  
  2466     load_reg( R_EAX, Rm );
  2467     check_ralign32( R_EAX );
  2468     MMU_TRANSLATE_READ( R_EAX );
  2469     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2470     MEM_READ_LONG( R_EAX, R_EAX );
  2471     store_spreg( R_EAX, R_PR );
  2472     sh4_x86.tstate = TSTATE_NONE;
  2473 :}
  2474 LDTLB {:  
  2475     call_func0( MMU_ldtlb );
  2476 :}
  2477 OCBI @Rn {:  :}
  2478 OCBP @Rn {:  :}
  2479 OCBWB @Rn {:  :}
  2480 PREF @Rn {:
  2481     load_reg( R_EAX, Rn );
  2482     MOV_r32_r32( R_EAX, R_ECX );
  2483     AND_imm32_r32( 0xFC000000, R_EAX );
  2484     CMP_imm32_r32( 0xE0000000, R_EAX );
  2485     JNE_rel8(8+CALL_FUNC1_SIZE, end);
  2486     call_func1( sh4_flush_store_queue, R_ECX );
  2487     TEST_r32_r32( R_EAX, R_EAX );
  2488     JE_exc(-1);
  2489     JMP_TARGET(end);
  2490     sh4_x86.tstate = TSTATE_NONE;
  2491 :}
  2492 SLEEP {: 
  2493     check_priv();
  2494     call_func0( sh4_sleep );
  2495     sh4_x86.tstate = TSTATE_NONE;
  2496     sh4_x86.in_delay_slot = DELAY_NONE;
  2497     return 2;
  2498 :}
  2499 STC SR, Rn {:
  2500     check_priv();
  2501     call_func0(sh4_read_sr);
  2502     store_reg( R_EAX, Rn );
  2503     sh4_x86.tstate = TSTATE_NONE;
  2504 :}
  2505 STC GBR, Rn {:  
  2506     load_spreg( R_EAX, R_GBR );
  2507     store_reg( R_EAX, Rn );
  2508 :}
  2509 STC VBR, Rn {:  
  2510     check_priv();
  2511     load_spreg( R_EAX, R_VBR );
  2512     store_reg( R_EAX, Rn );
  2513     sh4_x86.tstate = TSTATE_NONE;
  2514 :}
  2515 STC SSR, Rn {:  
  2516     check_priv();
  2517     load_spreg( R_EAX, R_SSR );
  2518     store_reg( R_EAX, Rn );
  2519     sh4_x86.tstate = TSTATE_NONE;
  2520 :}
  2521 STC SPC, Rn {:  
  2522     check_priv();
  2523     load_spreg( R_EAX, R_SPC );
  2524     store_reg( R_EAX, Rn );
  2525     sh4_x86.tstate = TSTATE_NONE;
  2526 :}
  2527 STC SGR, Rn {:  
  2528     check_priv();
  2529     load_spreg( R_EAX, R_SGR );
  2530     store_reg( R_EAX, Rn );
  2531     sh4_x86.tstate = TSTATE_NONE;
  2532 :}
  2533 STC DBR, Rn {:  
  2534     check_priv();
  2535     load_spreg( R_EAX, R_DBR );
  2536     store_reg( R_EAX, Rn );
  2537     sh4_x86.tstate = TSTATE_NONE;
  2538 :}
  2539 STC Rm_BANK, Rn {:
  2540     check_priv();
  2541     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2542     store_reg( R_EAX, Rn );
  2543     sh4_x86.tstate = TSTATE_NONE;
  2544 :}
  2545 STC.L SR, @-Rn {:
  2546     check_priv();
  2547     load_reg( R_EAX, Rn );
  2548     check_walign32( R_EAX );
  2549     ADD_imm8s_r32( -4, R_EAX );
  2550     MMU_TRANSLATE_WRITE( R_EAX );
  2551     PUSH_realigned_r32( R_EAX );
  2552     call_func0( sh4_read_sr );
  2553     POP_realigned_r32( R_ECX );
  2554     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2555     MEM_WRITE_LONG( R_ECX, R_EAX );
  2556     sh4_x86.tstate = TSTATE_NONE;
  2557 :}
  2558 STC.L VBR, @-Rn {:  
  2559     check_priv();
  2560     load_reg( R_EAX, Rn );
  2561     check_walign32( R_EAX );
  2562     ADD_imm8s_r32( -4, R_EAX );
  2563     MMU_TRANSLATE_WRITE( R_EAX );
  2564     load_spreg( R_EDX, R_VBR );
  2565     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2566     MEM_WRITE_LONG( R_EAX, R_EDX );
  2567     sh4_x86.tstate = TSTATE_NONE;
  2568 :}
  2569 STC.L SSR, @-Rn {:  
  2570     check_priv();
  2571     load_reg( R_EAX, Rn );
  2572     check_walign32( R_EAX );
  2573     ADD_imm8s_r32( -4, R_EAX );
  2574     MMU_TRANSLATE_WRITE( R_EAX );
  2575     load_spreg( R_EDX, R_SSR );
  2576     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2577     MEM_WRITE_LONG( R_EAX, R_EDX );
  2578     sh4_x86.tstate = TSTATE_NONE;
  2579 :}
  2580 STC.L SPC, @-Rn {:
  2581     check_priv();
  2582     load_reg( R_EAX, Rn );
  2583     check_walign32( R_EAX );
  2584     ADD_imm8s_r32( -4, R_EAX );
  2585     MMU_TRANSLATE_WRITE( R_EAX );
  2586     load_spreg( R_EDX, R_SPC );
  2587     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2588     MEM_WRITE_LONG( R_EAX, R_EDX );
  2589     sh4_x86.tstate = TSTATE_NONE;
  2590 :}
  2591 STC.L SGR, @-Rn {:  
  2592     check_priv();
  2593     load_reg( R_EAX, Rn );
  2594     check_walign32( R_EAX );
  2595     ADD_imm8s_r32( -4, R_EAX );
  2596     MMU_TRANSLATE_WRITE( R_EAX );
  2597     load_spreg( R_EDX, R_SGR );
  2598     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2599     MEM_WRITE_LONG( R_EAX, R_EDX );
  2600     sh4_x86.tstate = TSTATE_NONE;
  2601 :}
  2602 STC.L DBR, @-Rn {:  
  2603     check_priv();
  2604     load_reg( R_EAX, Rn );
  2605     check_walign32( R_EAX );
  2606     ADD_imm8s_r32( -4, R_EAX );
  2607     MMU_TRANSLATE_WRITE( R_EAX );
  2608     load_spreg( R_EDX, R_DBR );
  2609     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2610     MEM_WRITE_LONG( R_EAX, R_EDX );
  2611     sh4_x86.tstate = TSTATE_NONE;
  2612 :}
  2613 STC.L Rm_BANK, @-Rn {:  
  2614     check_priv();
  2615     load_reg( R_EAX, Rn );
  2616     check_walign32( R_EAX );
  2617     ADD_imm8s_r32( -4, R_EAX );
  2618     MMU_TRANSLATE_WRITE( R_EAX );
  2619     load_spreg( R_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
  2620     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2621     MEM_WRITE_LONG( R_EAX, R_EDX );
  2622     sh4_x86.tstate = TSTATE_NONE;
  2623 :}
  2624 STC.L GBR, @-Rn {:  
  2625     load_reg( R_EAX, Rn );
  2626     check_walign32( R_EAX );
  2627     ADD_imm8s_r32( -4, R_EAX );
  2628     MMU_TRANSLATE_WRITE( R_EAX );
  2629     load_spreg( R_EDX, R_GBR );
  2630     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2631     MEM_WRITE_LONG( R_EAX, R_EDX );
  2632     sh4_x86.tstate = TSTATE_NONE;
  2633 :}
  2634 STS FPSCR, Rn {:  
  2635     load_spreg( R_EAX, R_FPSCR );
  2636     store_reg( R_EAX, Rn );
  2637 :}
  2638 STS.L FPSCR, @-Rn {:  
  2639     load_reg( R_EAX, Rn );
  2640     check_walign32( R_EAX );
  2641     ADD_imm8s_r32( -4, R_EAX );
  2642     MMU_TRANSLATE_WRITE( R_EAX );
  2643     load_spreg( R_EDX, R_FPSCR );
  2644     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2645     MEM_WRITE_LONG( R_EAX, R_EDX );
  2646     sh4_x86.tstate = TSTATE_NONE;
  2647 :}
  2648 STS FPUL, Rn {:  
  2649     load_spreg( R_EAX, R_FPUL );
  2650     store_reg( R_EAX, Rn );
  2651 :}
  2652 STS.L FPUL, @-Rn {:  
  2653     load_reg( R_EAX, Rn );
  2654     check_walign32( R_EAX );
  2655     ADD_imm8s_r32( -4, R_EAX );
  2656     MMU_TRANSLATE_WRITE( R_EAX );
  2657     load_spreg( R_EDX, R_FPUL );
  2658     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2659     MEM_WRITE_LONG( R_EAX, R_EDX );
  2660     sh4_x86.tstate = TSTATE_NONE;
  2661 :}
  2662 STS MACH, Rn {:  
  2663     load_spreg( R_EAX, R_MACH );
  2664     store_reg( R_EAX, Rn );
  2665 :}
  2666 STS.L MACH, @-Rn {:  
  2667     load_reg( R_EAX, Rn );
  2668     check_walign32( R_EAX );
  2669     ADD_imm8s_r32( -4, R_EAX );
  2670     MMU_TRANSLATE_WRITE( R_EAX );
  2671     load_spreg( R_EDX, R_MACH );
  2672     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2673     MEM_WRITE_LONG( R_EAX, R_EDX );
  2674     sh4_x86.tstate = TSTATE_NONE;
  2675 :}
  2676 STS MACL, Rn {:  
  2677     load_spreg( R_EAX, R_MACL );
  2678     store_reg( R_EAX, Rn );
  2679 :}
  2680 STS.L MACL, @-Rn {:  
  2681     load_reg( R_EAX, Rn );
  2682     check_walign32( R_EAX );
  2683     ADD_imm8s_r32( -4, R_EAX );
  2684     MMU_TRANSLATE_WRITE( R_EAX );
  2685     load_spreg( R_EDX, R_MACL );
  2686     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2687     MEM_WRITE_LONG( R_EAX, R_EDX );
  2688     sh4_x86.tstate = TSTATE_NONE;
  2689 :}
  2690 STS PR, Rn {:  
  2691     load_spreg( R_EAX, R_PR );
  2692     store_reg( R_EAX, Rn );
  2693 :}
  2694 STS.L PR, @-Rn {:  
  2695     load_reg( R_EAX, Rn );
  2696     check_walign32( R_EAX );
  2697     ADD_imm8s_r32( -4, R_EAX );
  2698     MMU_TRANSLATE_WRITE( R_EAX );
  2699     load_spreg( R_EDX, R_PR );
  2700     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2701     MEM_WRITE_LONG( R_EAX, R_EDX );
  2702     sh4_x86.tstate = TSTATE_NONE;
  2703 :}
  2705 NOP {: /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ :}
  2706 %%
  2707     sh4_x86.in_delay_slot = DELAY_NONE;
  2708     return 0;
.