Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 596:dfc0c93d882e
prev593:6c710c7c6835
next601:d8d1af0d133c
author nkeynes
date Mon Jan 21 11:59:46 2008 +0000 (12 years ago)
permissions -rw-r--r--
last change Fix MAC.L/MAC.W stack issues
Fix various recovery-table issues
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "sh4/xltcache.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4trans.h"
    31 #include "sh4/sh4mmio.h"
    32 #include "sh4/x86op.h"
    33 #include "clock.h"
    35 #define DEFAULT_BACKPATCH_SIZE 4096
    37 struct backpatch_record {
    38     uint32_t *fixup_addr;
    39     uint32_t fixup_icount;
    40     int32_t exc_code;
    41 };
    43 #define MAX_RECOVERY_SIZE 2048
    45 #define DELAY_NONE 0
    46 #define DELAY_PC 1
    47 #define DELAY_PC_PR 2
    49 /** 
    50  * Struct to manage internal translation state. This state is not saved -
    51  * it is only valid between calls to sh4_translate_begin_block() and
    52  * sh4_translate_end_block()
    53  */
    54 struct sh4_x86_state {
    55     int in_delay_slot;
    56     gboolean priv_checked; /* true if we've already checked the cpu mode. */
    57     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    58     gboolean branch_taken; /* true if we branched unconditionally */
    59     uint32_t block_start_pc;
    60     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    61     int tstate;
    63     /* mode flags */
    64     gboolean tlb_on; /* True if tlb translation is active */
    66     /* Allocated memory for the (block-wide) back-patch list */
    67     struct backpatch_record *backpatch_list;
    68     uint32_t backpatch_posn;
    69     uint32_t backpatch_size;
    70 };
    72 #define TSTATE_NONE -1
    73 #define TSTATE_O    0
    74 #define TSTATE_C    2
    75 #define TSTATE_E    4
    76 #define TSTATE_NE   5
    77 #define TSTATE_G    0xF
    78 #define TSTATE_GE   0xD
    79 #define TSTATE_A    7
    80 #define TSTATE_AE   3
    82 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
    83 #define JT_rel8(rel8,label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    84 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    85     OP(0x70+sh4_x86.tstate); OP(rel8); \
    86     MARK_JMP(rel8,label)
    87 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
    88 #define JF_rel8(rel8,label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    89 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    90     OP(0x70+ (sh4_x86.tstate^1)); OP(rel8); \
    91     MARK_JMP(rel8, label)
    93 static struct sh4_x86_state sh4_x86;
    95 static uint32_t max_int = 0x7FFFFFFF;
    96 static uint32_t min_int = 0x80000000;
    97 static uint32_t save_fcw; /* save value for fpu control word */
    98 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   100 void sh4_x86_init()
   101 {
   102     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   103     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   104 }
   107 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   108 {
   109     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   110 	sh4_x86.backpatch_size <<= 1;
   111 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   112 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   113 	assert( sh4_x86.backpatch_list != NULL );
   114     }
   115     if( sh4_x86.in_delay_slot ) {
   116 	fixup_pc -= 2;
   117     }
   118     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_addr = (uint32_t *)fixup_addr;
   119     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   120     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   121     sh4_x86.backpatch_posn++;
   122 }
   124 /**
   125  * Emit an instruction to load an SH4 reg into a real register
   126  */
   127 static inline void load_reg( int x86reg, int sh4reg ) 
   128 {
   129     /* mov [bp+n], reg */
   130     OP(0x8B);
   131     OP(0x45 + (x86reg<<3));
   132     OP(REG_OFFSET(r[sh4reg]));
   133 }
   135 static inline void load_reg16s( int x86reg, int sh4reg )
   136 {
   137     OP(0x0F);
   138     OP(0xBF);
   139     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   140 }
   142 static inline void load_reg16u( int x86reg, int sh4reg )
   143 {
   144     OP(0x0F);
   145     OP(0xB7);
   146     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   148 }
   150 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   151 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   152 /**
   153  * Emit an instruction to load an immediate value into a register
   154  */
   155 static inline void load_imm32( int x86reg, uint32_t value ) {
   156     /* mov #value, reg */
   157     OP(0xB8 + x86reg);
   158     OP32(value);
   159 }
   161 /**
   162  * Load an immediate 64-bit quantity (note: x86-64 only)
   163  */
   164 static inline void load_imm64( int x86reg, uint32_t value ) {
   165     /* mov #value, reg */
   166     REXW();
   167     OP(0xB8 + x86reg);
   168     OP64(value);
   169 }
   172 /**
   173  * Emit an instruction to store an SH4 reg (RN)
   174  */
   175 void static inline store_reg( int x86reg, int sh4reg ) {
   176     /* mov reg, [bp+n] */
   177     OP(0x89);
   178     OP(0x45 + (x86reg<<3));
   179     OP(REG_OFFSET(r[sh4reg]));
   180 }
   182 #define load_fr_bank(bankreg) load_spreg( bankreg, REG_OFFSET(fr_bank))
   184 /**
   185  * Load an FR register (single-precision floating point) into an integer x86
   186  * register (eg for register-to-register moves)
   187  */
   188 void static inline load_fr( int bankreg, int x86reg, int frm )
   189 {
   190     OP(0x8B); OP(0x40+bankreg+(x86reg<<3)); OP((frm^1)<<2);
   191 }
   193 /**
   194  * Store an FR register (single-precision floating point) into an integer x86
   195  * register (eg for register-to-register moves)
   196  */
   197 void static inline store_fr( int bankreg, int x86reg, int frn )
   198 {
   199     OP(0x89);  OP(0x40+bankreg+(x86reg<<3)); OP((frn^1)<<2);
   200 }
   203 /**
   204  * Load a pointer to the back fp back into the specified x86 register. The
   205  * bankreg must have been previously loaded with FPSCR.
   206  * NB: 12 bytes
   207  */
   208 static inline void load_xf_bank( int bankreg )
   209 {
   210     NOT_r32( bankreg );
   211     SHR_imm8_r32( (21 - 6), bankreg ); // Extract bit 21 then *64 for bank size
   212     AND_imm8s_r32( 0x40, bankreg );    // Complete extraction
   213     OP(0x8D); OP(0x44+(bankreg<<3)); OP(0x28+bankreg); OP(REG_OFFSET(fr)); // LEA [ebp+bankreg+disp], bankreg
   214 }
   216 /**
   217  * Update the fr_bank pointer based on the current fpscr value.
   218  */
   219 static inline void update_fr_bank( int fpscrreg )
   220 {
   221     SHR_imm8_r32( (21 - 6), fpscrreg ); // Extract bit 21 then *64 for bank size
   222     AND_imm8s_r32( 0x40, fpscrreg );    // Complete extraction
   223     OP(0x8D); OP(0x44+(fpscrreg<<3)); OP(0x28+fpscrreg); OP(REG_OFFSET(fr)); // LEA [ebp+fpscrreg+disp], fpscrreg
   224     store_spreg( fpscrreg, REG_OFFSET(fr_bank) );
   225 }
   226 /**
   227  * Push FPUL (as a 32-bit float) onto the FPU stack
   228  */
   229 static inline void push_fpul( )
   230 {
   231     OP(0xD9); OP(0x45); OP(R_FPUL);
   232 }
   234 /**
   235  * Pop FPUL (as a 32-bit float) from the FPU stack
   236  */
   237 static inline void pop_fpul( )
   238 {
   239     OP(0xD9); OP(0x5D); OP(R_FPUL);
   240 }
   242 /**
   243  * Push a 32-bit float onto the FPU stack, with bankreg previously loaded
   244  * with the location of the current fp bank.
   245  */
   246 static inline void push_fr( int bankreg, int frm ) 
   247 {
   248     OP(0xD9); OP(0x40 + bankreg); OP((frm^1)<<2);  // FLD.S [bankreg + frm^1*4]
   249 }
   251 /**
   252  * Pop a 32-bit float from the FPU stack and store it back into the fp bank, 
   253  * with bankreg previously loaded with the location of the current fp bank.
   254  */
   255 static inline void pop_fr( int bankreg, int frm )
   256 {
   257     OP(0xD9); OP(0x58 + bankreg); OP((frm^1)<<2); // FST.S [bankreg + frm^1*4]
   258 }
   260 /**
   261  * Push a 64-bit double onto the FPU stack, with bankreg previously loaded
   262  * with the location of the current fp bank.
   263  */
   264 static inline void push_dr( int bankreg, int frm )
   265 {
   266     OP(0xDD); OP(0x40 + bankreg); OP(frm<<2); // FLD.D [bankreg + frm*4]
   267 }
   269 static inline void pop_dr( int bankreg, int frm )
   270 {
   271     OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]
   272 }
   274 /* Exception checks - Note that all exception checks will clobber EAX */
   276 #define check_priv( ) \
   277     if( !sh4_x86.priv_checked ) { \
   278 	sh4_x86.priv_checked = TRUE;\
   279 	load_spreg( R_EAX, R_SR );\
   280 	AND_imm32_r32( SR_MD, R_EAX );\
   281 	if( sh4_x86.in_delay_slot ) {\
   282 	    JE_exc( EXC_SLOT_ILLEGAL );\
   283 	} else {\
   284 	    JE_exc( EXC_ILLEGAL );\
   285 	}\
   286     }\
   288 #define check_fpuen( ) \
   289     if( !sh4_x86.fpuen_checked ) {\
   290 	sh4_x86.fpuen_checked = TRUE;\
   291 	load_spreg( R_EAX, R_SR );\
   292 	AND_imm32_r32( SR_FD, R_EAX );\
   293 	if( sh4_x86.in_delay_slot ) {\
   294 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   295 	} else {\
   296 	    JNE_exc(EXC_FPU_DISABLED);\
   297 	}\
   298     }
   300 #define check_ralign16( x86reg ) \
   301     TEST_imm32_r32( 0x00000001, x86reg ); \
   302     JNE_exc(EXC_DATA_ADDR_READ)
   304 #define check_walign16( x86reg ) \
   305     TEST_imm32_r32( 0x00000001, x86reg ); \
   306     JNE_exc(EXC_DATA_ADDR_WRITE);
   308 #define check_ralign32( x86reg ) \
   309     TEST_imm32_r32( 0x00000003, x86reg ); \
   310     JNE_exc(EXC_DATA_ADDR_READ)
   312 #define check_walign32( x86reg ) \
   313     TEST_imm32_r32( 0x00000003, x86reg ); \
   314     JNE_exc(EXC_DATA_ADDR_WRITE);
   316 #define UNDEF()
   317 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   318 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
   319 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
   320 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
   321 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
   322 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
   323 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
   325 /**
   326  * Perform MMU translation on the address in addr_reg for a read operation, iff the TLB is turned 
   327  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   328  */
   329 #define MMU_TRANSLATE_READ( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   331 #define MMU_TRANSLATE_READ_EXC( addr_reg, exc_code ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(exc_code); MEM_RESULT(addr_reg) }
   332 /**
   333  * Perform MMU translation on the address in addr_reg for a write operation, iff the TLB is turned 
   334  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   335  */
   336 #define MMU_TRANSLATE_WRITE( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_write, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   338 #define MEM_READ_SIZE (CALL_FUNC1_SIZE)
   339 #define MEM_WRITE_SIZE (CALL_FUNC2_SIZE)
   340 #define MMU_TRANSLATE_SIZE (sh4_x86.tlb_on ? (CALL_FUNC1_SIZE + 12) : 0 )
   342 #define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = DELAY_NONE; return 1;
   344 /****** Import appropriate calling conventions ******/
   345 #if SH4_TRANSLATOR == TARGET_X86_64
   346 #include "sh4/ia64abi.h"
   347 #else /* SH4_TRANSLATOR == TARGET_X86 */
   348 #ifdef APPLE_BUILD
   349 #include "sh4/ia32mac.h"
   350 #else
   351 #include "sh4/ia32abi.h"
   352 #endif
   353 #endif
   355 uint32_t sh4_translate_end_block_size()
   356 {
   357     if( sh4_x86.backpatch_posn <= 3 ) {
   358 	return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);
   359     } else {
   360 	return EPILOGUE_SIZE + 48 + (sh4_x86.backpatch_posn-3)*15;
   361     }
   362 }
   365 /**
   366  * Embed a breakpoint into the generated code
   367  */
   368 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   369 {
   370     load_imm32( R_EAX, pc );
   371     call_func1( sh4_translate_breakpoint_hit, R_EAX );
   372 }
   374 /**
   375  * Embed a call to sh4_execute_instruction for situations that we
   376  * can't translate (mainly page-crossing delay slots at the moment).
   377  * Caller is responsible for setting new_pc.
   378  */
   379 void sh4_emulator_exit( sh4vma_t endpc )
   380 {
   381     load_imm32( R_ECX, endpc - sh4_x86.block_start_pc );   // 5
   382     ADD_r32_sh4r( R_ECX, R_PC );
   384     load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
   385     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
   386     load_imm32( R_ECX, sh4_x86.in_delay_slot ? 1 : 0 );
   387     store_spreg( R_ECX, REG_OFFSET(in_delay_slot) );
   389     call_func0( sh4_execute_instruction );    
   390     load_imm32( R_EAX, R_PC );
   391     if( sh4_x86.tlb_on ) {
   392 	call_func1(xlat_get_code_by_vma,R_EAX);
   393     } else {
   394 	call_func1(xlat_get_code,R_EAX);
   395     }
   396     AND_imm8s_r32( 0xFC, R_EAX ); // 3
   397     POP_r32(R_EBP);
   398     RET();
   399 } 
   401 /**
   402  * Translate a single instruction. Delayed branches are handled specially
   403  * by translating both branch and delayed instruction as a single unit (as
   404  * 
   405  * The instruction MUST be in the icache (assert check)
   406  *
   407  * @return true if the instruction marks the end of a basic block
   408  * (eg a branch or 
   409  */
   410 uint32_t sh4_translate_instruction( sh4vma_t pc )
   411 {
   412     uint32_t ir;
   413     /* Read instruction from icache */
   414     assert( IS_IN_ICACHE(pc) );
   415     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   417 	/* PC is not in the current icache - this usually means we're running
   418 	 * with MMU on, and we've gone past the end of the page. And since 
   419 	 * sh4_translate_block is pretty careful about this, it means we're
   420 	 * almost certainly in a delay slot.
   421 	 *
   422 	 * Since we can't assume the page is present (and we can't fault it in
   423 	 * at this point, inline a call to sh4_execute_instruction (with a few
   424 	 * small repairs to cope with the different environment).
   425 	 */
   427     if( !sh4_x86.in_delay_slot ) {
   428 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   429     }
   430 %%
   431 /* ALU operations */
   432 ADD Rm, Rn {:
   433     load_reg( R_EAX, Rm );
   434     load_reg( R_ECX, Rn );
   435     ADD_r32_r32( R_EAX, R_ECX );
   436     store_reg( R_ECX, Rn );
   437     sh4_x86.tstate = TSTATE_NONE;
   438 :}
   439 ADD #imm, Rn {:  
   440     load_reg( R_EAX, Rn );
   441     ADD_imm8s_r32( imm, R_EAX );
   442     store_reg( R_EAX, Rn );
   443     sh4_x86.tstate = TSTATE_NONE;
   444 :}
   445 ADDC Rm, Rn {:
   446     if( sh4_x86.tstate != TSTATE_C ) {
   447 	LDC_t();
   448     }
   449     load_reg( R_EAX, Rm );
   450     load_reg( R_ECX, Rn );
   451     ADC_r32_r32( R_EAX, R_ECX );
   452     store_reg( R_ECX, Rn );
   453     SETC_t();
   454     sh4_x86.tstate = TSTATE_C;
   455 :}
   456 ADDV Rm, Rn {:
   457     load_reg( R_EAX, Rm );
   458     load_reg( R_ECX, Rn );
   459     ADD_r32_r32( R_EAX, R_ECX );
   460     store_reg( R_ECX, Rn );
   461     SETO_t();
   462     sh4_x86.tstate = TSTATE_O;
   463 :}
   464 AND Rm, Rn {:
   465     load_reg( R_EAX, Rm );
   466     load_reg( R_ECX, Rn );
   467     AND_r32_r32( R_EAX, R_ECX );
   468     store_reg( R_ECX, Rn );
   469     sh4_x86.tstate = TSTATE_NONE;
   470 :}
   471 AND #imm, R0 {:  
   472     load_reg( R_EAX, 0 );
   473     AND_imm32_r32(imm, R_EAX); 
   474     store_reg( R_EAX, 0 );
   475     sh4_x86.tstate = TSTATE_NONE;
   476 :}
   477 AND.B #imm, @(R0, GBR) {: 
   478     load_reg( R_EAX, 0 );
   479     load_spreg( R_ECX, R_GBR );
   480     ADD_r32_r32( R_ECX, R_EAX );
   481     MMU_TRANSLATE_WRITE( R_EAX );
   482     PUSH_realigned_r32(R_EAX);
   483     MEM_READ_BYTE( R_EAX, R_EAX );
   484     POP_realigned_r32(R_ECX);
   485     AND_imm32_r32(imm, R_EAX );
   486     MEM_WRITE_BYTE( R_ECX, R_EAX );
   487     sh4_x86.tstate = TSTATE_NONE;
   488 :}
   489 CMP/EQ Rm, Rn {:  
   490     load_reg( R_EAX, Rm );
   491     load_reg( R_ECX, Rn );
   492     CMP_r32_r32( R_EAX, R_ECX );
   493     SETE_t();
   494     sh4_x86.tstate = TSTATE_E;
   495 :}
   496 CMP/EQ #imm, R0 {:  
   497     load_reg( R_EAX, 0 );
   498     CMP_imm8s_r32(imm, R_EAX);
   499     SETE_t();
   500     sh4_x86.tstate = TSTATE_E;
   501 :}
   502 CMP/GE Rm, Rn {:  
   503     load_reg( R_EAX, Rm );
   504     load_reg( R_ECX, Rn );
   505     CMP_r32_r32( R_EAX, R_ECX );
   506     SETGE_t();
   507     sh4_x86.tstate = TSTATE_GE;
   508 :}
   509 CMP/GT Rm, Rn {: 
   510     load_reg( R_EAX, Rm );
   511     load_reg( R_ECX, Rn );
   512     CMP_r32_r32( R_EAX, R_ECX );
   513     SETG_t();
   514     sh4_x86.tstate = TSTATE_G;
   515 :}
   516 CMP/HI Rm, Rn {:  
   517     load_reg( R_EAX, Rm );
   518     load_reg( R_ECX, Rn );
   519     CMP_r32_r32( R_EAX, R_ECX );
   520     SETA_t();
   521     sh4_x86.tstate = TSTATE_A;
   522 :}
   523 CMP/HS Rm, Rn {: 
   524     load_reg( R_EAX, Rm );
   525     load_reg( R_ECX, Rn );
   526     CMP_r32_r32( R_EAX, R_ECX );
   527     SETAE_t();
   528     sh4_x86.tstate = TSTATE_AE;
   529  :}
   530 CMP/PL Rn {: 
   531     load_reg( R_EAX, Rn );
   532     CMP_imm8s_r32( 0, R_EAX );
   533     SETG_t();
   534     sh4_x86.tstate = TSTATE_G;
   535 :}
   536 CMP/PZ Rn {:  
   537     load_reg( R_EAX, Rn );
   538     CMP_imm8s_r32( 0, R_EAX );
   539     SETGE_t();
   540     sh4_x86.tstate = TSTATE_GE;
   541 :}
   542 CMP/STR Rm, Rn {:  
   543     load_reg( R_EAX, Rm );
   544     load_reg( R_ECX, Rn );
   545     XOR_r32_r32( R_ECX, R_EAX );
   546     TEST_r8_r8( R_AL, R_AL );
   547     JE_rel8(13, target1);
   548     TEST_r8_r8( R_AH, R_AH ); // 2
   549     JE_rel8(9, target2);
   550     SHR_imm8_r32( 16, R_EAX ); // 3
   551     TEST_r8_r8( R_AL, R_AL ); // 2
   552     JE_rel8(2, target3);
   553     TEST_r8_r8( R_AH, R_AH ); // 2
   554     JMP_TARGET(target1);
   555     JMP_TARGET(target2);
   556     JMP_TARGET(target3);
   557     SETE_t();
   558     sh4_x86.tstate = TSTATE_E;
   559 :}
   560 DIV0S Rm, Rn {:
   561     load_reg( R_EAX, Rm );
   562     load_reg( R_ECX, Rn );
   563     SHR_imm8_r32( 31, R_EAX );
   564     SHR_imm8_r32( 31, R_ECX );
   565     store_spreg( R_EAX, R_M );
   566     store_spreg( R_ECX, R_Q );
   567     CMP_r32_r32( R_EAX, R_ECX );
   568     SETNE_t();
   569     sh4_x86.tstate = TSTATE_NE;
   570 :}
   571 DIV0U {:  
   572     XOR_r32_r32( R_EAX, R_EAX );
   573     store_spreg( R_EAX, R_Q );
   574     store_spreg( R_EAX, R_M );
   575     store_spreg( R_EAX, R_T );
   576     sh4_x86.tstate = TSTATE_C; // works for DIV1
   577 :}
   578 DIV1 Rm, Rn {:
   579     load_spreg( R_ECX, R_M );
   580     load_reg( R_EAX, Rn );
   581     if( sh4_x86.tstate != TSTATE_C ) {
   582 	LDC_t();
   583     }
   584     RCL1_r32( R_EAX );
   585     SETC_r8( R_DL ); // Q'
   586     CMP_sh4r_r32( R_Q, R_ECX );
   587     JE_rel8(5, mqequal);
   588     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   589     JMP_rel8(3, end);
   590     JMP_TARGET(mqequal);
   591     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   592     JMP_TARGET(end);
   593     store_reg( R_EAX, Rn ); // Done with Rn now
   594     SETC_r8(R_AL); // tmp1
   595     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   596     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   597     store_spreg( R_ECX, R_Q );
   598     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   599     MOVZX_r8_r32( R_AL, R_EAX );
   600     store_spreg( R_EAX, R_T );
   601     sh4_x86.tstate = TSTATE_NONE;
   602 :}
   603 DMULS.L Rm, Rn {:  
   604     load_reg( R_EAX, Rm );
   605     load_reg( R_ECX, Rn );
   606     IMUL_r32(R_ECX);
   607     store_spreg( R_EDX, R_MACH );
   608     store_spreg( R_EAX, R_MACL );
   609     sh4_x86.tstate = TSTATE_NONE;
   610 :}
   611 DMULU.L Rm, Rn {:  
   612     load_reg( R_EAX, Rm );
   613     load_reg( R_ECX, Rn );
   614     MUL_r32(R_ECX);
   615     store_spreg( R_EDX, R_MACH );
   616     store_spreg( R_EAX, R_MACL );    
   617     sh4_x86.tstate = TSTATE_NONE;
   618 :}
   619 DT Rn {:  
   620     load_reg( R_EAX, Rn );
   621     ADD_imm8s_r32( -1, R_EAX );
   622     store_reg( R_EAX, Rn );
   623     SETE_t();
   624     sh4_x86.tstate = TSTATE_E;
   625 :}
   626 EXTS.B Rm, Rn {:  
   627     load_reg( R_EAX, Rm );
   628     MOVSX_r8_r32( R_EAX, R_EAX );
   629     store_reg( R_EAX, Rn );
   630 :}
   631 EXTS.W Rm, Rn {:  
   632     load_reg( R_EAX, Rm );
   633     MOVSX_r16_r32( R_EAX, R_EAX );
   634     store_reg( R_EAX, Rn );
   635 :}
   636 EXTU.B Rm, Rn {:  
   637     load_reg( R_EAX, Rm );
   638     MOVZX_r8_r32( R_EAX, R_EAX );
   639     store_reg( R_EAX, Rn );
   640 :}
   641 EXTU.W Rm, Rn {:  
   642     load_reg( R_EAX, Rm );
   643     MOVZX_r16_r32( R_EAX, R_EAX );
   644     store_reg( R_EAX, Rn );
   645 :}
   646 MAC.L @Rm+, @Rn+ {:
   647     if( Rm == Rn ) {
   648 	load_reg( R_EAX, Rm );
   649 	check_ralign32( R_EAX );
   650 	MMU_TRANSLATE_READ( R_EAX );
   651 	PUSH_realigned_r32( R_EAX );
   652 	load_reg( R_EAX, Rn );
   653 	ADD_imm8s_r32( 4, R_EAX );
   654 	MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
   655 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
   656 	// Note translate twice in case of page boundaries. Maybe worth
   657 	// adding a page-boundary check to skip the second translation
   658     } else {
   659 	load_reg( R_EAX, Rm );
   660 	check_ralign32( R_EAX );
   661 	MMU_TRANSLATE_READ( R_EAX );
   662 	load_reg( R_ECX, Rn );
   663 	check_ralign32( R_ECX );
   664 	PUSH_realigned_r32( R_EAX );
   665 	MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
   666 	MOV_r32_r32( R_ECX, R_EAX );
   667 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   668 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   669     }
   670     MEM_READ_LONG( R_EAX, R_EAX );
   671     POP_r32( R_ECX );
   672     PUSH_r32( R_EAX );
   673     MEM_READ_LONG( R_ECX, R_EAX );
   674     POP_realigned_r32( R_ECX );
   676     IMUL_r32( R_ECX );
   677     ADD_r32_sh4r( R_EAX, R_MACL );
   678     ADC_r32_sh4r( R_EDX, R_MACH );
   680     load_spreg( R_ECX, R_S );
   681     TEST_r32_r32(R_ECX, R_ECX);
   682     JE_rel8( CALL_FUNC0_SIZE, nosat );
   683     call_func0( signsat48 );
   684     JMP_TARGET( nosat );
   685     sh4_x86.tstate = TSTATE_NONE;
   686 :}
   687 MAC.W @Rm+, @Rn+ {:  
   688     if( Rm == Rn ) {
   689 	load_reg( R_EAX, Rm );
   690 	check_ralign16( R_EAX );
   691 	MMU_TRANSLATE_READ( R_EAX );
   692 	PUSH_realigned_r32( R_EAX );
   693 	load_reg( R_EAX, Rn );
   694 	ADD_imm8s_r32( 2, R_EAX );
   695 	MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
   696 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   697 	// Note translate twice in case of page boundaries. Maybe worth
   698 	// adding a page-boundary check to skip the second translation
   699     } else {
   700 	load_reg( R_EAX, Rm );
   701 	check_ralign16( R_EAX );
   702 	MMU_TRANSLATE_READ( R_EAX );
   703 	load_reg( R_ECX, Rn );
   704 	check_ralign16( R_ECX );
   705 	PUSH_realigned_r32( R_EAX );
   706 	MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
   707 	MOV_r32_r32( R_ECX, R_EAX );
   708 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   709 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   710     }
   711     MEM_READ_WORD( R_EAX, R_EAX );
   712     POP_r32( R_ECX );
   713     PUSH_r32( R_EAX );
   714     MEM_READ_WORD( R_ECX, R_EAX );
   715     POP_realigned_r32( R_ECX );
   716     IMUL_r32( R_ECX );
   718     load_spreg( R_ECX, R_S );
   719     TEST_r32_r32( R_ECX, R_ECX );
   720     JE_rel8( 47, nosat );
   722     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   723     JNO_rel8( 51, end );            // 2
   724     load_imm32( R_EDX, 1 );         // 5
   725     store_spreg( R_EDX, R_MACH );   // 6
   726     JS_rel8( 13, positive );        // 2
   727     load_imm32( R_EAX, 0x80000000 );// 5
   728     store_spreg( R_EAX, R_MACL );   // 6
   729     JMP_rel8( 25, end2 );           // 2
   731     JMP_TARGET(positive);
   732     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   733     store_spreg( R_EAX, R_MACL );   // 6
   734     JMP_rel8( 12, end3);            // 2
   736     JMP_TARGET(nosat);
   737     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   738     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   739     JMP_TARGET(end);
   740     JMP_TARGET(end2);
   741     JMP_TARGET(end3);
   742     sh4_x86.tstate = TSTATE_NONE;
   743 :}
   744 MOVT Rn {:  
   745     load_spreg( R_EAX, R_T );
   746     store_reg( R_EAX, Rn );
   747 :}
   748 MUL.L Rm, Rn {:  
   749     load_reg( R_EAX, Rm );
   750     load_reg( R_ECX, Rn );
   751     MUL_r32( R_ECX );
   752     store_spreg( R_EAX, R_MACL );
   753     sh4_x86.tstate = TSTATE_NONE;
   754 :}
   755 MULS.W Rm, Rn {:
   756     load_reg16s( R_EAX, Rm );
   757     load_reg16s( R_ECX, Rn );
   758     MUL_r32( R_ECX );
   759     store_spreg( R_EAX, R_MACL );
   760     sh4_x86.tstate = TSTATE_NONE;
   761 :}
   762 MULU.W Rm, Rn {:  
   763     load_reg16u( R_EAX, Rm );
   764     load_reg16u( R_ECX, Rn );
   765     MUL_r32( R_ECX );
   766     store_spreg( R_EAX, R_MACL );
   767     sh4_x86.tstate = TSTATE_NONE;
   768 :}
   769 NEG Rm, Rn {:
   770     load_reg( R_EAX, Rm );
   771     NEG_r32( R_EAX );
   772     store_reg( R_EAX, Rn );
   773     sh4_x86.tstate = TSTATE_NONE;
   774 :}
   775 NEGC Rm, Rn {:  
   776     load_reg( R_EAX, Rm );
   777     XOR_r32_r32( R_ECX, R_ECX );
   778     LDC_t();
   779     SBB_r32_r32( R_EAX, R_ECX );
   780     store_reg( R_ECX, Rn );
   781     SETC_t();
   782     sh4_x86.tstate = TSTATE_C;
   783 :}
   784 NOT Rm, Rn {:  
   785     load_reg( R_EAX, Rm );
   786     NOT_r32( R_EAX );
   787     store_reg( R_EAX, Rn );
   788     sh4_x86.tstate = TSTATE_NONE;
   789 :}
   790 OR Rm, Rn {:  
   791     load_reg( R_EAX, Rm );
   792     load_reg( R_ECX, Rn );
   793     OR_r32_r32( R_EAX, R_ECX );
   794     store_reg( R_ECX, Rn );
   795     sh4_x86.tstate = TSTATE_NONE;
   796 :}
   797 OR #imm, R0 {:
   798     load_reg( R_EAX, 0 );
   799     OR_imm32_r32(imm, R_EAX);
   800     store_reg( R_EAX, 0 );
   801     sh4_x86.tstate = TSTATE_NONE;
   802 :}
   803 OR.B #imm, @(R0, GBR) {:  
   804     load_reg( R_EAX, 0 );
   805     load_spreg( R_ECX, R_GBR );
   806     ADD_r32_r32( R_ECX, R_EAX );
   807     MMU_TRANSLATE_WRITE( R_EAX );
   808     PUSH_realigned_r32(R_EAX);
   809     MEM_READ_BYTE( R_EAX, R_EAX );
   810     POP_realigned_r32(R_ECX);
   811     OR_imm32_r32(imm, R_EAX );
   812     MEM_WRITE_BYTE( R_ECX, R_EAX );
   813     sh4_x86.tstate = TSTATE_NONE;
   814 :}
   815 ROTCL Rn {:
   816     load_reg( R_EAX, Rn );
   817     if( sh4_x86.tstate != TSTATE_C ) {
   818 	LDC_t();
   819     }
   820     RCL1_r32( R_EAX );
   821     store_reg( R_EAX, Rn );
   822     SETC_t();
   823     sh4_x86.tstate = TSTATE_C;
   824 :}
   825 ROTCR Rn {:  
   826     load_reg( R_EAX, Rn );
   827     if( sh4_x86.tstate != TSTATE_C ) {
   828 	LDC_t();
   829     }
   830     RCR1_r32( R_EAX );
   831     store_reg( R_EAX, Rn );
   832     SETC_t();
   833     sh4_x86.tstate = TSTATE_C;
   834 :}
   835 ROTL Rn {:  
   836     load_reg( R_EAX, Rn );
   837     ROL1_r32( R_EAX );
   838     store_reg( R_EAX, Rn );
   839     SETC_t();
   840     sh4_x86.tstate = TSTATE_C;
   841 :}
   842 ROTR Rn {:  
   843     load_reg( R_EAX, Rn );
   844     ROR1_r32( R_EAX );
   845     store_reg( R_EAX, Rn );
   846     SETC_t();
   847     sh4_x86.tstate = TSTATE_C;
   848 :}
   849 SHAD Rm, Rn {:
   850     /* Annoyingly enough, not directly convertible */
   851     load_reg( R_EAX, Rn );
   852     load_reg( R_ECX, Rm );
   853     CMP_imm32_r32( 0, R_ECX );
   854     JGE_rel8(16, doshl);
   856     NEG_r32( R_ECX );      // 2
   857     AND_imm8_r8( 0x1F, R_CL ); // 3
   858     JE_rel8( 4, emptysar);     // 2
   859     SAR_r32_CL( R_EAX );       // 2
   860     JMP_rel8(10, end);          // 2
   862     JMP_TARGET(emptysar);
   863     SAR_imm8_r32(31, R_EAX );  // 3
   864     JMP_rel8(5, end2);
   866     JMP_TARGET(doshl);
   867     AND_imm8_r8( 0x1F, R_CL ); // 3
   868     SHL_r32_CL( R_EAX );       // 2
   869     JMP_TARGET(end);
   870     JMP_TARGET(end2);
   871     store_reg( R_EAX, Rn );
   872     sh4_x86.tstate = TSTATE_NONE;
   873 :}
   874 SHLD Rm, Rn {:  
   875     load_reg( R_EAX, Rn );
   876     load_reg( R_ECX, Rm );
   877     CMP_imm32_r32( 0, R_ECX );
   878     JGE_rel8(15, doshl);
   880     NEG_r32( R_ECX );      // 2
   881     AND_imm8_r8( 0x1F, R_CL ); // 3
   882     JE_rel8( 4, emptyshr );
   883     SHR_r32_CL( R_EAX );       // 2
   884     JMP_rel8(9, end);          // 2
   886     JMP_TARGET(emptyshr);
   887     XOR_r32_r32( R_EAX, R_EAX );
   888     JMP_rel8(5, end2);
   890     JMP_TARGET(doshl);
   891     AND_imm8_r8( 0x1F, R_CL ); // 3
   892     SHL_r32_CL( R_EAX );       // 2
   893     JMP_TARGET(end);
   894     JMP_TARGET(end2);
   895     store_reg( R_EAX, Rn );
   896     sh4_x86.tstate = TSTATE_NONE;
   897 :}
   898 SHAL Rn {: 
   899     load_reg( R_EAX, Rn );
   900     SHL1_r32( R_EAX );
   901     SETC_t();
   902     store_reg( R_EAX, Rn );
   903     sh4_x86.tstate = TSTATE_C;
   904 :}
   905 SHAR Rn {:  
   906     load_reg( R_EAX, Rn );
   907     SAR1_r32( R_EAX );
   908     SETC_t();
   909     store_reg( R_EAX, Rn );
   910     sh4_x86.tstate = TSTATE_C;
   911 :}
   912 SHLL Rn {:  
   913     load_reg( R_EAX, Rn );
   914     SHL1_r32( R_EAX );
   915     SETC_t();
   916     store_reg( R_EAX, Rn );
   917     sh4_x86.tstate = TSTATE_C;
   918 :}
   919 SHLL2 Rn {:
   920     load_reg( R_EAX, Rn );
   921     SHL_imm8_r32( 2, R_EAX );
   922     store_reg( R_EAX, Rn );
   923     sh4_x86.tstate = TSTATE_NONE;
   924 :}
   925 SHLL8 Rn {:  
   926     load_reg( R_EAX, Rn );
   927     SHL_imm8_r32( 8, R_EAX );
   928     store_reg( R_EAX, Rn );
   929     sh4_x86.tstate = TSTATE_NONE;
   930 :}
   931 SHLL16 Rn {:  
   932     load_reg( R_EAX, Rn );
   933     SHL_imm8_r32( 16, R_EAX );
   934     store_reg( R_EAX, Rn );
   935     sh4_x86.tstate = TSTATE_NONE;
   936 :}
   937 SHLR Rn {:  
   938     load_reg( R_EAX, Rn );
   939     SHR1_r32( R_EAX );
   940     SETC_t();
   941     store_reg( R_EAX, Rn );
   942     sh4_x86.tstate = TSTATE_C;
   943 :}
   944 SHLR2 Rn {:  
   945     load_reg( R_EAX, Rn );
   946     SHR_imm8_r32( 2, R_EAX );
   947     store_reg( R_EAX, Rn );
   948     sh4_x86.tstate = TSTATE_NONE;
   949 :}
   950 SHLR8 Rn {:  
   951     load_reg( R_EAX, Rn );
   952     SHR_imm8_r32( 8, R_EAX );
   953     store_reg( R_EAX, Rn );
   954     sh4_x86.tstate = TSTATE_NONE;
   955 :}
   956 SHLR16 Rn {:  
   957     load_reg( R_EAX, Rn );
   958     SHR_imm8_r32( 16, R_EAX );
   959     store_reg( R_EAX, Rn );
   960     sh4_x86.tstate = TSTATE_NONE;
   961 :}
   962 SUB Rm, Rn {:  
   963     load_reg( R_EAX, Rm );
   964     load_reg( R_ECX, Rn );
   965     SUB_r32_r32( R_EAX, R_ECX );
   966     store_reg( R_ECX, Rn );
   967     sh4_x86.tstate = TSTATE_NONE;
   968 :}
   969 SUBC Rm, Rn {:  
   970     load_reg( R_EAX, Rm );
   971     load_reg( R_ECX, Rn );
   972     if( sh4_x86.tstate != TSTATE_C ) {
   973 	LDC_t();
   974     }
   975     SBB_r32_r32( R_EAX, R_ECX );
   976     store_reg( R_ECX, Rn );
   977     SETC_t();
   978     sh4_x86.tstate = TSTATE_C;
   979 :}
   980 SUBV Rm, Rn {:  
   981     load_reg( R_EAX, Rm );
   982     load_reg( R_ECX, Rn );
   983     SUB_r32_r32( R_EAX, R_ECX );
   984     store_reg( R_ECX, Rn );
   985     SETO_t();
   986     sh4_x86.tstate = TSTATE_O;
   987 :}
   988 SWAP.B Rm, Rn {:  
   989     load_reg( R_EAX, Rm );
   990     XCHG_r8_r8( R_AL, R_AH );
   991     store_reg( R_EAX, Rn );
   992 :}
   993 SWAP.W Rm, Rn {:  
   994     load_reg( R_EAX, Rm );
   995     MOV_r32_r32( R_EAX, R_ECX );
   996     SHL_imm8_r32( 16, R_ECX );
   997     SHR_imm8_r32( 16, R_EAX );
   998     OR_r32_r32( R_EAX, R_ECX );
   999     store_reg( R_ECX, Rn );
  1000     sh4_x86.tstate = TSTATE_NONE;
  1001 :}
  1002 TAS.B @Rn {:  
  1003     load_reg( R_EAX, Rn );
  1004     MMU_TRANSLATE_WRITE( R_EAX );
  1005     PUSH_realigned_r32( R_EAX );
  1006     MEM_READ_BYTE( R_EAX, R_EAX );
  1007     TEST_r8_r8( R_AL, R_AL );
  1008     SETE_t();
  1009     OR_imm8_r8( 0x80, R_AL );
  1010     POP_realigned_r32( R_ECX );
  1011     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1012     sh4_x86.tstate = TSTATE_NONE;
  1013 :}
  1014 TST Rm, Rn {:  
  1015     load_reg( R_EAX, Rm );
  1016     load_reg( R_ECX, Rn );
  1017     TEST_r32_r32( R_EAX, R_ECX );
  1018     SETE_t();
  1019     sh4_x86.tstate = TSTATE_E;
  1020 :}
  1021 TST #imm, R0 {:  
  1022     load_reg( R_EAX, 0 );
  1023     TEST_imm32_r32( imm, R_EAX );
  1024     SETE_t();
  1025     sh4_x86.tstate = TSTATE_E;
  1026 :}
  1027 TST.B #imm, @(R0, GBR) {:  
  1028     load_reg( R_EAX, 0);
  1029     load_reg( R_ECX, R_GBR);
  1030     ADD_r32_r32( R_ECX, R_EAX );
  1031     MMU_TRANSLATE_READ( R_EAX );
  1032     MEM_READ_BYTE( R_EAX, R_EAX );
  1033     TEST_imm8_r8( imm, R_AL );
  1034     SETE_t();
  1035     sh4_x86.tstate = TSTATE_E;
  1036 :}
  1037 XOR Rm, Rn {:  
  1038     load_reg( R_EAX, Rm );
  1039     load_reg( R_ECX, Rn );
  1040     XOR_r32_r32( R_EAX, R_ECX );
  1041     store_reg( R_ECX, Rn );
  1042     sh4_x86.tstate = TSTATE_NONE;
  1043 :}
  1044 XOR #imm, R0 {:  
  1045     load_reg( R_EAX, 0 );
  1046     XOR_imm32_r32( imm, R_EAX );
  1047     store_reg( R_EAX, 0 );
  1048     sh4_x86.tstate = TSTATE_NONE;
  1049 :}
  1050 XOR.B #imm, @(R0, GBR) {:  
  1051     load_reg( R_EAX, 0 );
  1052     load_spreg( R_ECX, R_GBR );
  1053     ADD_r32_r32( R_ECX, R_EAX );
  1054     MMU_TRANSLATE_WRITE( R_EAX );
  1055     PUSH_realigned_r32(R_EAX);
  1056     MEM_READ_BYTE(R_EAX, R_EAX);
  1057     POP_realigned_r32(R_ECX);
  1058     XOR_imm32_r32( imm, R_EAX );
  1059     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1060     sh4_x86.tstate = TSTATE_NONE;
  1061 :}
  1062 XTRCT Rm, Rn {:
  1063     load_reg( R_EAX, Rm );
  1064     load_reg( R_ECX, Rn );
  1065     SHL_imm8_r32( 16, R_EAX );
  1066     SHR_imm8_r32( 16, R_ECX );
  1067     OR_r32_r32( R_EAX, R_ECX );
  1068     store_reg( R_ECX, Rn );
  1069     sh4_x86.tstate = TSTATE_NONE;
  1070 :}
  1072 /* Data move instructions */
  1073 MOV Rm, Rn {:  
  1074     load_reg( R_EAX, Rm );
  1075     store_reg( R_EAX, Rn );
  1076 :}
  1077 MOV #imm, Rn {:  
  1078     load_imm32( R_EAX, imm );
  1079     store_reg( R_EAX, Rn );
  1080 :}
  1081 MOV.B Rm, @Rn {:  
  1082     load_reg( R_EAX, Rn );
  1083     MMU_TRANSLATE_WRITE( R_EAX );
  1084     load_reg( R_EDX, Rm );
  1085     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1086     sh4_x86.tstate = TSTATE_NONE;
  1087 :}
  1088 MOV.B Rm, @-Rn {:  
  1089     load_reg( R_EAX, Rn );
  1090     ADD_imm8s_r32( -1, R_EAX );
  1091     MMU_TRANSLATE_WRITE( R_EAX );
  1092     load_reg( R_EDX, Rm );
  1093     ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
  1094     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1095     sh4_x86.tstate = TSTATE_NONE;
  1096 :}
  1097 MOV.B Rm, @(R0, Rn) {:  
  1098     load_reg( R_EAX, 0 );
  1099     load_reg( R_ECX, Rn );
  1100     ADD_r32_r32( R_ECX, R_EAX );
  1101     MMU_TRANSLATE_WRITE( R_EAX );
  1102     load_reg( R_EDX, Rm );
  1103     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1104     sh4_x86.tstate = TSTATE_NONE;
  1105 :}
  1106 MOV.B R0, @(disp, GBR) {:  
  1107     load_spreg( R_EAX, R_GBR );
  1108     ADD_imm32_r32( disp, R_EAX );
  1109     MMU_TRANSLATE_WRITE( R_EAX );
  1110     load_reg( R_EDX, 0 );
  1111     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1112     sh4_x86.tstate = TSTATE_NONE;
  1113 :}
  1114 MOV.B R0, @(disp, Rn) {:  
  1115     load_reg( R_EAX, Rn );
  1116     ADD_imm32_r32( disp, R_EAX );
  1117     MMU_TRANSLATE_WRITE( R_EAX );
  1118     load_reg( R_EDX, 0 );
  1119     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1120     sh4_x86.tstate = TSTATE_NONE;
  1121 :}
  1122 MOV.B @Rm, Rn {:  
  1123     load_reg( R_EAX, Rm );
  1124     MMU_TRANSLATE_READ( R_EAX );
  1125     MEM_READ_BYTE( R_EAX, R_EAX );
  1126     store_reg( R_EAX, Rn );
  1127     sh4_x86.tstate = TSTATE_NONE;
  1128 :}
  1129 MOV.B @Rm+, Rn {:  
  1130     load_reg( R_EAX, Rm );
  1131     MMU_TRANSLATE_READ( R_EAX );
  1132     ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
  1133     MEM_READ_BYTE( R_EAX, R_EAX );
  1134     store_reg( R_EAX, Rn );
  1135     sh4_x86.tstate = TSTATE_NONE;
  1136 :}
  1137 MOV.B @(R0, Rm), Rn {:  
  1138     load_reg( R_EAX, 0 );
  1139     load_reg( R_ECX, Rm );
  1140     ADD_r32_r32( R_ECX, R_EAX );
  1141     MMU_TRANSLATE_READ( R_EAX )
  1142     MEM_READ_BYTE( R_EAX, R_EAX );
  1143     store_reg( R_EAX, Rn );
  1144     sh4_x86.tstate = TSTATE_NONE;
  1145 :}
  1146 MOV.B @(disp, GBR), R0 {:  
  1147     load_spreg( R_EAX, R_GBR );
  1148     ADD_imm32_r32( disp, R_EAX );
  1149     MMU_TRANSLATE_READ( R_EAX );
  1150     MEM_READ_BYTE( R_EAX, R_EAX );
  1151     store_reg( R_EAX, 0 );
  1152     sh4_x86.tstate = TSTATE_NONE;
  1153 :}
  1154 MOV.B @(disp, Rm), R0 {:  
  1155     load_reg( R_EAX, Rm );
  1156     ADD_imm32_r32( disp, R_EAX );
  1157     MMU_TRANSLATE_READ( R_EAX );
  1158     MEM_READ_BYTE( R_EAX, R_EAX );
  1159     store_reg( R_EAX, 0 );
  1160     sh4_x86.tstate = TSTATE_NONE;
  1161 :}
  1162 MOV.L Rm, @Rn {:
  1163     load_reg( R_EAX, Rn );
  1164     check_walign32(R_EAX);
  1165     MMU_TRANSLATE_WRITE( R_EAX );
  1166     load_reg( R_EDX, Rm );
  1167     MEM_WRITE_LONG( R_EAX, R_EDX );
  1168     sh4_x86.tstate = TSTATE_NONE;
  1169 :}
  1170 MOV.L Rm, @-Rn {:  
  1171     load_reg( R_EAX, Rn );
  1172     ADD_imm8s_r32( -4, R_EAX );
  1173     check_walign32( R_EAX );
  1174     MMU_TRANSLATE_WRITE( R_EAX );
  1175     load_reg( R_EDX, Rm );
  1176     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  1177     MEM_WRITE_LONG( R_EAX, R_EDX );
  1178     sh4_x86.tstate = TSTATE_NONE;
  1179 :}
  1180 MOV.L Rm, @(R0, Rn) {:  
  1181     load_reg( R_EAX, 0 );
  1182     load_reg( R_ECX, Rn );
  1183     ADD_r32_r32( R_ECX, R_EAX );
  1184     check_walign32( R_EAX );
  1185     MMU_TRANSLATE_WRITE( R_EAX );
  1186     load_reg( R_EDX, Rm );
  1187     MEM_WRITE_LONG( R_EAX, R_EDX );
  1188     sh4_x86.tstate = TSTATE_NONE;
  1189 :}
  1190 MOV.L R0, @(disp, GBR) {:  
  1191     load_spreg( R_EAX, R_GBR );
  1192     ADD_imm32_r32( disp, R_EAX );
  1193     check_walign32( R_EAX );
  1194     MMU_TRANSLATE_WRITE( R_EAX );
  1195     load_reg( R_EDX, 0 );
  1196     MEM_WRITE_LONG( R_EAX, R_EDX );
  1197     sh4_x86.tstate = TSTATE_NONE;
  1198 :}
  1199 MOV.L Rm, @(disp, Rn) {:  
  1200     load_reg( R_EAX, Rn );
  1201     ADD_imm32_r32( disp, R_EAX );
  1202     check_walign32( R_EAX );
  1203     MMU_TRANSLATE_WRITE( R_EAX );
  1204     load_reg( R_EDX, Rm );
  1205     MEM_WRITE_LONG( R_EAX, R_EDX );
  1206     sh4_x86.tstate = TSTATE_NONE;
  1207 :}
  1208 MOV.L @Rm, Rn {:  
  1209     load_reg( R_EAX, Rm );
  1210     check_ralign32( R_EAX );
  1211     MMU_TRANSLATE_READ( R_EAX );
  1212     MEM_READ_LONG( R_EAX, R_EAX );
  1213     store_reg( R_EAX, Rn );
  1214     sh4_x86.tstate = TSTATE_NONE;
  1215 :}
  1216 MOV.L @Rm+, Rn {:  
  1217     load_reg( R_EAX, Rm );
  1218     check_ralign32( R_EAX );
  1219     MMU_TRANSLATE_READ( R_EAX );
  1220     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1221     MEM_READ_LONG( R_EAX, R_EAX );
  1222     store_reg( R_EAX, Rn );
  1223     sh4_x86.tstate = TSTATE_NONE;
  1224 :}
  1225 MOV.L @(R0, Rm), Rn {:  
  1226     load_reg( R_EAX, 0 );
  1227     load_reg( R_ECX, Rm );
  1228     ADD_r32_r32( R_ECX, R_EAX );
  1229     check_ralign32( R_EAX );
  1230     MMU_TRANSLATE_READ( R_EAX );
  1231     MEM_READ_LONG( R_EAX, R_EAX );
  1232     store_reg( R_EAX, Rn );
  1233     sh4_x86.tstate = TSTATE_NONE;
  1234 :}
  1235 MOV.L @(disp, GBR), R0 {:
  1236     load_spreg( R_EAX, R_GBR );
  1237     ADD_imm32_r32( disp, R_EAX );
  1238     check_ralign32( R_EAX );
  1239     MMU_TRANSLATE_READ( R_EAX );
  1240     MEM_READ_LONG( R_EAX, R_EAX );
  1241     store_reg( R_EAX, 0 );
  1242     sh4_x86.tstate = TSTATE_NONE;
  1243 :}
  1244 MOV.L @(disp, PC), Rn {:  
  1245     if( sh4_x86.in_delay_slot ) {
  1246 	SLOTILLEGAL();
  1247     } else {
  1248 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1249 	if( IS_IN_ICACHE(target) ) {
  1250 	    // If the target address is in the same page as the code, it's
  1251 	    // pretty safe to just ref it directly and circumvent the whole
  1252 	    // memory subsystem. (this is a big performance win)
  1254 	    // FIXME: There's a corner-case that's not handled here when
  1255 	    // the current code-page is in the ITLB but not in the UTLB.
  1256 	    // (should generate a TLB miss although need to test SH4 
  1257 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1258 	    // behaviour though.
  1259 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1260 	    MOV_moff32_EAX( ptr );
  1261 	} else {
  1262 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1263 	    // different virtual address than the translation was done with,
  1264 	    // but we can safely assume that the low bits are the same.
  1265 	    load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1266 	    ADD_sh4r_r32( R_PC, R_EAX );
  1267 	    MMU_TRANSLATE_READ( R_EAX );
  1268 	    MEM_READ_LONG( R_EAX, R_EAX );
  1269 	    sh4_x86.tstate = TSTATE_NONE;
  1271 	store_reg( R_EAX, Rn );
  1273 :}
  1274 MOV.L @(disp, Rm), Rn {:  
  1275     load_reg( R_EAX, Rm );
  1276     ADD_imm8s_r32( disp, R_EAX );
  1277     check_ralign32( R_EAX );
  1278     MMU_TRANSLATE_READ( R_EAX );
  1279     MEM_READ_LONG( R_EAX, R_EAX );
  1280     store_reg( R_EAX, Rn );
  1281     sh4_x86.tstate = TSTATE_NONE;
  1282 :}
  1283 MOV.W Rm, @Rn {:  
  1284     load_reg( R_EAX, Rn );
  1285     check_walign16( R_EAX );
  1286     MMU_TRANSLATE_WRITE( R_EAX )
  1287     load_reg( R_EDX, Rm );
  1288     MEM_WRITE_WORD( R_EAX, R_EDX );
  1289     sh4_x86.tstate = TSTATE_NONE;
  1290 :}
  1291 MOV.W Rm, @-Rn {:  
  1292     load_reg( R_EAX, Rn );
  1293     ADD_imm8s_r32( -2, R_EAX );
  1294     check_walign16( R_EAX );
  1295     MMU_TRANSLATE_WRITE( R_EAX );
  1296     load_reg( R_EDX, Rm );
  1297     ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
  1298     MEM_WRITE_WORD( R_EAX, R_EDX );
  1299     sh4_x86.tstate = TSTATE_NONE;
  1300 :}
  1301 MOV.W Rm, @(R0, Rn) {:  
  1302     load_reg( R_EAX, 0 );
  1303     load_reg( R_ECX, Rn );
  1304     ADD_r32_r32( R_ECX, R_EAX );
  1305     check_walign16( R_EAX );
  1306     MMU_TRANSLATE_WRITE( R_EAX );
  1307     load_reg( R_EDX, Rm );
  1308     MEM_WRITE_WORD( R_EAX, R_EDX );
  1309     sh4_x86.tstate = TSTATE_NONE;
  1310 :}
  1311 MOV.W R0, @(disp, GBR) {:  
  1312     load_spreg( R_EAX, R_GBR );
  1313     ADD_imm32_r32( disp, R_EAX );
  1314     check_walign16( R_EAX );
  1315     MMU_TRANSLATE_WRITE( R_EAX );
  1316     load_reg( R_EDX, 0 );
  1317     MEM_WRITE_WORD( R_EAX, R_EDX );
  1318     sh4_x86.tstate = TSTATE_NONE;
  1319 :}
  1320 MOV.W R0, @(disp, Rn) {:  
  1321     load_reg( R_EAX, Rn );
  1322     ADD_imm32_r32( disp, R_EAX );
  1323     check_walign16( R_EAX );
  1324     MMU_TRANSLATE_WRITE( R_EAX );
  1325     load_reg( R_EDX, 0 );
  1326     MEM_WRITE_WORD( R_EAX, R_EDX );
  1327     sh4_x86.tstate = TSTATE_NONE;
  1328 :}
  1329 MOV.W @Rm, Rn {:  
  1330     load_reg( R_EAX, Rm );
  1331     check_ralign16( R_EAX );
  1332     MMU_TRANSLATE_READ( R_EAX );
  1333     MEM_READ_WORD( R_EAX, R_EAX );
  1334     store_reg( R_EAX, Rn );
  1335     sh4_x86.tstate = TSTATE_NONE;
  1336 :}
  1337 MOV.W @Rm+, Rn {:  
  1338     load_reg( R_EAX, Rm );
  1339     check_ralign16( R_EAX );
  1340     MMU_TRANSLATE_READ( R_EAX );
  1341     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
  1342     MEM_READ_WORD( R_EAX, R_EAX );
  1343     store_reg( R_EAX, Rn );
  1344     sh4_x86.tstate = TSTATE_NONE;
  1345 :}
  1346 MOV.W @(R0, Rm), Rn {:  
  1347     load_reg( R_EAX, 0 );
  1348     load_reg( R_ECX, Rm );
  1349     ADD_r32_r32( R_ECX, R_EAX );
  1350     check_ralign16( R_EAX );
  1351     MMU_TRANSLATE_READ( R_EAX );
  1352     MEM_READ_WORD( R_EAX, R_EAX );
  1353     store_reg( R_EAX, Rn );
  1354     sh4_x86.tstate = TSTATE_NONE;
  1355 :}
  1356 MOV.W @(disp, GBR), R0 {:  
  1357     load_spreg( R_EAX, R_GBR );
  1358     ADD_imm32_r32( disp, R_EAX );
  1359     check_ralign16( R_EAX );
  1360     MMU_TRANSLATE_READ( R_EAX );
  1361     MEM_READ_WORD( R_EAX, R_EAX );
  1362     store_reg( R_EAX, 0 );
  1363     sh4_x86.tstate = TSTATE_NONE;
  1364 :}
  1365 MOV.W @(disp, PC), Rn {:  
  1366     if( sh4_x86.in_delay_slot ) {
  1367 	SLOTILLEGAL();
  1368     } else {
  1369 	// See comments for MOV.L @(disp, PC), Rn
  1370 	uint32_t target = pc + disp + 4;
  1371 	if( IS_IN_ICACHE(target) ) {
  1372 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1373 	    MOV_moff32_EAX( ptr );
  1374 	    MOVSX_r16_r32( R_EAX, R_EAX );
  1375 	} else {
  1376 	    load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
  1377 	    ADD_sh4r_r32( R_PC, R_EAX );
  1378 	    MMU_TRANSLATE_READ( R_EAX );
  1379 	    MEM_READ_WORD( R_EAX, R_EAX );
  1380 	    sh4_x86.tstate = TSTATE_NONE;
  1382 	store_reg( R_EAX, Rn );
  1384 :}
  1385 MOV.W @(disp, Rm), R0 {:  
  1386     load_reg( R_EAX, Rm );
  1387     ADD_imm32_r32( disp, R_EAX );
  1388     check_ralign16( R_EAX );
  1389     MMU_TRANSLATE_READ( R_EAX );
  1390     MEM_READ_WORD( R_EAX, R_EAX );
  1391     store_reg( R_EAX, 0 );
  1392     sh4_x86.tstate = TSTATE_NONE;
  1393 :}
  1394 MOVA @(disp, PC), R0 {:  
  1395     if( sh4_x86.in_delay_slot ) {
  1396 	SLOTILLEGAL();
  1397     } else {
  1398 	load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1399 	ADD_sh4r_r32( R_PC, R_ECX );
  1400 	store_reg( R_ECX, 0 );
  1401 	sh4_x86.tstate = TSTATE_NONE;
  1403 :}
  1404 MOVCA.L R0, @Rn {:  
  1405     load_reg( R_EAX, Rn );
  1406     check_walign32( R_EAX );
  1407     MMU_TRANSLATE_WRITE( R_EAX );
  1408     load_reg( R_EDX, 0 );
  1409     MEM_WRITE_LONG( R_EAX, R_EDX );
  1410     sh4_x86.tstate = TSTATE_NONE;
  1411 :}
  1413 /* Control transfer instructions */
  1414 BF disp {:
  1415     if( sh4_x86.in_delay_slot ) {
  1416 	SLOTILLEGAL();
  1417     } else {
  1418 	sh4vma_t target = disp + pc + 4;
  1419 	JT_rel8( EXIT_BLOCK_REL_SIZE(target), nottaken );
  1420 	exit_block_rel(target, pc+2 );
  1421 	JMP_TARGET(nottaken);
  1422 	return 2;
  1424 :}
  1425 BF/S disp {:
  1426     if( sh4_x86.in_delay_slot ) {
  1427 	SLOTILLEGAL();
  1428     } else {
  1429 	sh4vma_t target = disp + pc + 4;
  1430 	sh4_x86.in_delay_slot = DELAY_PC;
  1431 	if( sh4_x86.tstate == TSTATE_NONE ) {
  1432 	    CMP_imm8s_sh4r( 1, R_T );
  1433 	    sh4_x86.tstate = TSTATE_E;
  1435 	OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JNE rel32
  1436 	sh4_translate_instruction(pc+2);
  1437 	exit_block_rel( target, pc+4 );
  1438 	// not taken
  1439 	*patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1440 	sh4_translate_instruction(pc+2);
  1441 	return 4;
  1443 :}
  1444 BRA disp {:  
  1445     if( sh4_x86.in_delay_slot ) {
  1446 	SLOTILLEGAL();
  1447     } else {
  1448 	sh4_x86.in_delay_slot = DELAY_PC;
  1449 	sh4_translate_instruction( pc + 2 );
  1450 	exit_block_rel( disp + pc + 4, pc+4 );
  1451 	sh4_x86.branch_taken = TRUE;
  1452 	return 4;
  1454 :}
  1455 BRAF Rn {:  
  1456     if( sh4_x86.in_delay_slot ) {
  1457 	SLOTILLEGAL();
  1458     } else {
  1459 	load_spreg( R_EAX, R_PC );
  1460 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1461 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1462 	store_spreg( R_EAX, R_NEW_PC );
  1463 	sh4_x86.in_delay_slot = DELAY_PC;
  1464 	sh4_x86.tstate = TSTATE_NONE;
  1465 	sh4_translate_instruction( pc + 2 );
  1466 	exit_block_newpcset(pc+2);
  1467 	sh4_x86.branch_taken = TRUE;
  1468 	return 4;
  1470 :}
  1471 BSR disp {:  
  1472     if( sh4_x86.in_delay_slot ) {
  1473 	SLOTILLEGAL();
  1474     } else {
  1475 	load_spreg( R_EAX, R_PC );
  1476 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1477 	store_spreg( R_EAX, R_PR );
  1478 	sh4_x86.in_delay_slot = DELAY_PC;
  1479 	sh4_translate_instruction( pc + 2 );
  1480 	exit_block_rel( disp + pc + 4, pc+4 );
  1481 	sh4_x86.branch_taken = TRUE;
  1482 	return 4;
  1484 :}
  1485 BSRF Rn {:  
  1486     if( sh4_x86.in_delay_slot ) {
  1487 	SLOTILLEGAL();
  1488     } else {
  1489 	load_spreg( R_EAX, R_PC );
  1490 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1491 	store_spreg( R_EAX, R_PR );
  1492 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1493 	store_spreg( R_EAX, R_NEW_PC );
  1495 	sh4_x86.tstate = TSTATE_NONE;
  1496 	sh4_translate_instruction( pc + 2 );
  1497 	exit_block_newpcset(pc+2);
  1498 	sh4_x86.branch_taken = TRUE;
  1499 	return 4;
  1501 :}
  1502 BT disp {:
  1503     if( sh4_x86.in_delay_slot ) {
  1504 	SLOTILLEGAL();
  1505     } else {
  1506 	sh4vma_t target = disp + pc + 4;
  1507 	JF_rel8( EXIT_BLOCK_REL_SIZE(target), nottaken );
  1508 	exit_block_rel(target, pc+2 );
  1509 	JMP_TARGET(nottaken);
  1510 	return 2;
  1512 :}
  1513 BT/S disp {:
  1514     if( sh4_x86.in_delay_slot ) {
  1515 	SLOTILLEGAL();
  1516     } else {
  1517 	sh4_x86.in_delay_slot = DELAY_PC;
  1518 	if( sh4_x86.tstate == TSTATE_NONE ) {
  1519 	    CMP_imm8s_sh4r( 1, R_T );
  1520 	    sh4_x86.tstate = TSTATE_E;
  1522 	OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JE rel32
  1523 	sh4_translate_instruction(pc+2);
  1524 	exit_block_rel( disp + pc + 4, pc+4 );
  1525 	// not taken
  1526 	*patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1527 	sh4_translate_instruction(pc+2);
  1528 	return 4;
  1530 :}
  1531 JMP @Rn {:  
  1532     if( sh4_x86.in_delay_slot ) {
  1533 	SLOTILLEGAL();
  1534     } else {
  1535 	load_reg( R_ECX, Rn );
  1536 	store_spreg( R_ECX, R_NEW_PC );
  1537 	sh4_x86.in_delay_slot = DELAY_PC;
  1538 	sh4_translate_instruction(pc+2);
  1539 	exit_block_newpcset(pc+2);
  1540 	sh4_x86.branch_taken = TRUE;
  1541 	return 4;
  1543 :}
  1544 JSR @Rn {:  
  1545     if( sh4_x86.in_delay_slot ) {
  1546 	SLOTILLEGAL();
  1547     } else {
  1548 	load_spreg( R_EAX, R_PC );
  1549 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1550 	store_spreg( R_EAX, R_PR );
  1551 	load_reg( R_ECX, Rn );
  1552 	store_spreg( R_ECX, R_NEW_PC );
  1553 	sh4_translate_instruction(pc+2);
  1554 	exit_block_newpcset(pc+2);
  1555 	sh4_x86.branch_taken = TRUE;
  1556 	return 4;
  1558 :}
  1559 RTE {:  
  1560     if( sh4_x86.in_delay_slot ) {
  1561 	SLOTILLEGAL();
  1562     } else {
  1563 	check_priv();
  1564 	load_spreg( R_ECX, R_SPC );
  1565 	store_spreg( R_ECX, R_NEW_PC );
  1566 	load_spreg( R_EAX, R_SSR );
  1567 	call_func1( sh4_write_sr, R_EAX );
  1568 	sh4_x86.in_delay_slot = DELAY_PC;
  1569 	sh4_x86.priv_checked = FALSE;
  1570 	sh4_x86.fpuen_checked = FALSE;
  1571 	sh4_x86.tstate = TSTATE_NONE;
  1572 	sh4_translate_instruction(pc+2);
  1573 	exit_block_newpcset(pc+2);
  1574 	sh4_x86.branch_taken = TRUE;
  1575 	return 4;
  1577 :}
  1578 RTS {:  
  1579     if( sh4_x86.in_delay_slot ) {
  1580 	SLOTILLEGAL();
  1581     } else {
  1582 	load_spreg( R_ECX, R_PR );
  1583 	store_spreg( R_ECX, R_NEW_PC );
  1584 	sh4_x86.in_delay_slot = DELAY_PC;
  1585 	sh4_translate_instruction(pc+2);
  1586 	exit_block_newpcset(pc+2);
  1587 	sh4_x86.branch_taken = TRUE;
  1588 	return 4;
  1590 :}
  1591 TRAPA #imm {:  
  1592     if( sh4_x86.in_delay_slot ) {
  1593 	SLOTILLEGAL();
  1594     } else {
  1595 	load_imm32( R_ECX, pc+2 - sh4_x86.block_start_pc );   // 5
  1596 	ADD_r32_sh4r( R_ECX, R_PC );
  1597 	load_imm32( R_EAX, imm );
  1598 	call_func1( sh4_raise_trap, R_EAX );
  1599 	sh4_x86.tstate = TSTATE_NONE;
  1600 	exit_block_pcset(pc);
  1601 	sh4_x86.branch_taken = TRUE;
  1602 	return 2;
  1604 :}
  1605 UNDEF {:  
  1606     if( sh4_x86.in_delay_slot ) {
  1607 	SLOTILLEGAL();
  1608     } else {
  1609 	JMP_exc(EXC_ILLEGAL);
  1610 	return 2;
  1612 :}
  1614 CLRMAC {:  
  1615     XOR_r32_r32(R_EAX, R_EAX);
  1616     store_spreg( R_EAX, R_MACL );
  1617     store_spreg( R_EAX, R_MACH );
  1618     sh4_x86.tstate = TSTATE_NONE;
  1619 :}
  1620 CLRS {:
  1621     CLC();
  1622     SETC_sh4r(R_S);
  1623     sh4_x86.tstate = TSTATE_C;
  1624 :}
  1625 CLRT {:  
  1626     CLC();
  1627     SETC_t();
  1628     sh4_x86.tstate = TSTATE_C;
  1629 :}
  1630 SETS {:  
  1631     STC();
  1632     SETC_sh4r(R_S);
  1633     sh4_x86.tstate = TSTATE_C;
  1634 :}
  1635 SETT {:  
  1636     STC();
  1637     SETC_t();
  1638     sh4_x86.tstate = TSTATE_C;
  1639 :}
  1641 /* Floating point moves */
  1642 FMOV FRm, FRn {:  
  1643     /* As horrible as this looks, it's actually covering 5 separate cases:
  1644      * 1. 32-bit fr-to-fr (PR=0)
  1645      * 2. 64-bit dr-to-dr (PR=1, FRm&1 == 0, FRn&1 == 0 )
  1646      * 3. 64-bit dr-to-xd (PR=1, FRm&1 == 0, FRn&1 == 1 )
  1647      * 4. 64-bit xd-to-dr (PR=1, FRm&1 == 1, FRn&1 == 0 )
  1648      * 5. 64-bit xd-to-xd (PR=1, FRm&1 == 1, FRn&1 == 1 )
  1649      */
  1650     check_fpuen();
  1651     load_spreg( R_ECX, R_FPSCR );
  1652     load_fr_bank( R_EDX );
  1653     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1654     JNE_rel8(8, doublesize);
  1655     load_fr( R_EDX, R_EAX, FRm ); // PR=0 branch
  1656     store_fr( R_EDX, R_EAX, FRn );
  1657     if( FRm&1 ) {
  1658 	JMP_rel8(24, end);
  1659 	JMP_TARGET(doublesize);
  1660 	load_xf_bank( R_ECX ); 
  1661 	load_fr( R_ECX, R_EAX, FRm-1 );
  1662 	if( FRn&1 ) {
  1663 	    load_fr( R_ECX, R_EDX, FRm );
  1664 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1665 	    store_fr( R_ECX, R_EDX, FRn );
  1666 	} else /* FRn&1 == 0 */ {
  1667 	    load_fr( R_ECX, R_ECX, FRm );
  1668 	    store_fr( R_EDX, R_EAX, FRn );
  1669 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1671 	JMP_TARGET(end);
  1672     } else /* FRm&1 == 0 */ {
  1673 	if( FRn&1 ) {
  1674 	    JMP_rel8(24, end);
  1675 	    load_xf_bank( R_ECX );
  1676 	    load_fr( R_EDX, R_EAX, FRm );
  1677 	    load_fr( R_EDX, R_EDX, FRm+1 );
  1678 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1679 	    store_fr( R_ECX, R_EDX, FRn );
  1680 	    JMP_TARGET(end);
  1681 	} else /* FRn&1 == 0 */ {
  1682 	    JMP_rel8(12, end);
  1683 	    load_fr( R_EDX, R_EAX, FRm );
  1684 	    load_fr( R_EDX, R_ECX, FRm+1 );
  1685 	    store_fr( R_EDX, R_EAX, FRn );
  1686 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1687 	    JMP_TARGET(end);
  1690     sh4_x86.tstate = TSTATE_NONE;
  1691 :}
  1692 FMOV FRm, @Rn {: 
  1693     check_fpuen();
  1694     load_reg( R_EAX, Rn );
  1695     check_walign32( R_EAX );
  1696     MMU_TRANSLATE_WRITE( R_EAX );
  1697     load_spreg( R_EDX, R_FPSCR );
  1698     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1699     JNE_rel8(8 + MEM_WRITE_SIZE, doublesize);
  1700     load_fr_bank( R_EDX );
  1701     load_fr( R_EDX, R_ECX, FRm );
  1702     MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
  1703     if( FRm&1 ) {
  1704 	JMP_rel8( 18 + MEM_WRITE_DOUBLE_SIZE, end );
  1705 	JMP_TARGET(doublesize);
  1706 	load_xf_bank( R_EDX );
  1707 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1708 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1709 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1710 	JMP_TARGET(end);
  1711     } else {
  1712 	JMP_rel8( 9 + MEM_WRITE_DOUBLE_SIZE, end );
  1713 	JMP_TARGET(doublesize);
  1714 	load_fr_bank( R_EDX );
  1715 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1716 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1717 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1718 	JMP_TARGET(end);
  1720     sh4_x86.tstate = TSTATE_NONE;
  1721 :}
  1722 FMOV @Rm, FRn {:  
  1723     check_fpuen();
  1724     load_reg( R_EAX, Rm );
  1725     check_ralign32( R_EAX );
  1726     MMU_TRANSLATE_READ( R_EAX );
  1727     load_spreg( R_EDX, R_FPSCR );
  1728     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1729     JNE_rel8(8 + MEM_READ_SIZE, doublesize);
  1730     MEM_READ_LONG( R_EAX, R_EAX );
  1731     load_fr_bank( R_EDX );
  1732     store_fr( R_EDX, R_EAX, FRn );
  1733     if( FRn&1 ) {
  1734 	JMP_rel8(21 + MEM_READ_DOUBLE_SIZE, end);
  1735 	JMP_TARGET(doublesize);
  1736 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1737 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1738 	load_xf_bank( R_EDX );
  1739 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1740 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1741 	JMP_TARGET(end);
  1742     } else {
  1743 	JMP_rel8(9 + MEM_READ_DOUBLE_SIZE, end);
  1744 	JMP_TARGET(doublesize);
  1745 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1746 	load_fr_bank( R_EDX );
  1747 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1748 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1749 	JMP_TARGET(end);
  1751     sh4_x86.tstate = TSTATE_NONE;
  1752 :}
  1753 FMOV FRm, @-Rn {:  
  1754     check_fpuen();
  1755     load_reg( R_EAX, Rn );
  1756     check_walign32( R_EAX );
  1757     load_spreg( R_EDX, R_FPSCR );
  1758     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1759     JNE_rel8(15 + MEM_WRITE_SIZE + MMU_TRANSLATE_SIZE, doublesize);
  1760     ADD_imm8s_r32( -4, R_EAX );
  1761     MMU_TRANSLATE_WRITE( R_EAX );
  1762     load_fr_bank( R_EDX );
  1763     load_fr( R_EDX, R_ECX, FRm );
  1764     ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
  1765     MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
  1766     if( FRm&1 ) {
  1767 	JMP_rel8( 25 + MEM_WRITE_DOUBLE_SIZE + MMU_TRANSLATE_SIZE, end );
  1768 	JMP_TARGET(doublesize);
  1769 	ADD_imm8s_r32(-8,R_EAX);
  1770 	MMU_TRANSLATE_WRITE( R_EAX );
  1771 	load_xf_bank( R_EDX );
  1772 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1773 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1774 	ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
  1775 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1776 	JMP_TARGET(end);
  1777     } else {
  1778 	JMP_rel8( 16 + MEM_WRITE_DOUBLE_SIZE + MMU_TRANSLATE_SIZE, end );
  1779 	JMP_TARGET(doublesize);
  1780 	ADD_imm8s_r32(-8,R_EAX);
  1781 	MMU_TRANSLATE_WRITE( R_EAX );
  1782 	load_fr_bank( R_EDX );
  1783 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1784 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1785 	ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
  1786 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1787 	JMP_TARGET(end);
  1789     sh4_x86.tstate = TSTATE_NONE;
  1790 :}
  1791 FMOV @Rm+, FRn {:
  1792     check_fpuen();
  1793     load_reg( R_EAX, Rm );
  1794     check_ralign32( R_EAX );
  1795     MMU_TRANSLATE_READ( R_EAX );
  1796     load_spreg( R_EDX, R_FPSCR );
  1797     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1798     JNE_rel8(12 + MEM_READ_SIZE, doublesize);
  1799     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1800     MEM_READ_LONG( R_EAX, R_EAX );
  1801     load_fr_bank( R_EDX );
  1802     store_fr( R_EDX, R_EAX, FRn );
  1803     if( FRn&1 ) {
  1804 	JMP_rel8(25 + MEM_READ_DOUBLE_SIZE, end);
  1805 	JMP_TARGET(doublesize);
  1806 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
  1807 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1808 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1809 	load_xf_bank( R_EDX );
  1810 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1811 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1812 	JMP_TARGET(end);
  1813     } else {
  1814 	JMP_rel8(13 + MEM_READ_DOUBLE_SIZE, end);
  1815 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
  1816 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1817 	load_fr_bank( R_EDX );
  1818 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1819 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1820 	JMP_TARGET(end);
  1822     sh4_x86.tstate = TSTATE_NONE;
  1823 :}
  1824 FMOV FRm, @(R0, Rn) {:  
  1825     check_fpuen();
  1826     load_reg( R_EAX, Rn );
  1827     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1828     check_walign32( R_EAX );
  1829     MMU_TRANSLATE_WRITE( R_EAX );
  1830     load_spreg( R_EDX, R_FPSCR );
  1831     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1832     JNE_rel8(8 + MEM_WRITE_SIZE, doublesize);
  1833     load_fr_bank( R_EDX );
  1834     load_fr( R_EDX, R_ECX, FRm );
  1835     MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
  1836     if( FRm&1 ) {
  1837 	JMP_rel8( 18 + MEM_WRITE_DOUBLE_SIZE, end );
  1838 	JMP_TARGET(doublesize);
  1839 	load_xf_bank( R_EDX );
  1840 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1841 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1842 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1843 	JMP_TARGET(end);
  1844     } else {
  1845 	JMP_rel8( 9 + MEM_WRITE_DOUBLE_SIZE, end );
  1846 	JMP_TARGET(doublesize);
  1847 	load_fr_bank( R_EDX );
  1848 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1849 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1850 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1851 	JMP_TARGET(end);
  1853     sh4_x86.tstate = TSTATE_NONE;
  1854 :}
  1855 FMOV @(R0, Rm), FRn {:  
  1856     check_fpuen();
  1857     load_reg( R_EAX, Rm );
  1858     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1859     check_ralign32( R_EAX );
  1860     MMU_TRANSLATE_READ( R_EAX );
  1861     load_spreg( R_EDX, R_FPSCR );
  1862     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1863     JNE_rel8(8 + MEM_READ_SIZE, doublesize);
  1864     MEM_READ_LONG( R_EAX, R_EAX );
  1865     load_fr_bank( R_EDX );
  1866     store_fr( R_EDX, R_EAX, FRn );
  1867     if( FRn&1 ) {
  1868 	JMP_rel8(21 + MEM_READ_DOUBLE_SIZE, end);
  1869 	JMP_TARGET(doublesize);
  1870 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1871 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1872 	load_xf_bank( R_EDX );
  1873 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1874 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1875 	JMP_TARGET(end);
  1876     } else {
  1877 	JMP_rel8(9 + MEM_READ_DOUBLE_SIZE, end);
  1878 	JMP_TARGET(doublesize);
  1879 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1880 	load_fr_bank( R_EDX );
  1881 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1882 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1883 	JMP_TARGET(end);
  1885     sh4_x86.tstate = TSTATE_NONE;
  1886 :}
  1887 FLDI0 FRn {:  /* IFF PR=0 */
  1888     check_fpuen();
  1889     load_spreg( R_ECX, R_FPSCR );
  1890     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1891     JNE_rel8(8, end);
  1892     XOR_r32_r32( R_EAX, R_EAX );
  1893     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  1894     store_fr( R_ECX, R_EAX, FRn );
  1895     JMP_TARGET(end);
  1896     sh4_x86.tstate = TSTATE_NONE;
  1897 :}
  1898 FLDI1 FRn {:  /* IFF PR=0 */
  1899     check_fpuen();
  1900     load_spreg( R_ECX, R_FPSCR );
  1901     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1902     JNE_rel8(11, end);
  1903     load_imm32(R_EAX, 0x3F800000);
  1904     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  1905     store_fr( R_ECX, R_EAX, FRn );
  1906     JMP_TARGET(end);
  1907     sh4_x86.tstate = TSTATE_NONE;
  1908 :}
  1910 FLOAT FPUL, FRn {:  
  1911     check_fpuen();
  1912     load_spreg( R_ECX, R_FPSCR );
  1913     load_spreg(R_EDX, REG_OFFSET(fr_bank));
  1914     FILD_sh4r(R_FPUL);
  1915     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1916     JNE_rel8(5, doubleprec);
  1917     pop_fr( R_EDX, FRn );
  1918     JMP_rel8(3, end);
  1919     JMP_TARGET(doubleprec);
  1920     pop_dr( R_EDX, FRn );
  1921     JMP_TARGET(end);
  1922     sh4_x86.tstate = TSTATE_NONE;
  1923 :}
  1924 FTRC FRm, FPUL {:  
  1925     check_fpuen();
  1926     load_spreg( R_ECX, R_FPSCR );
  1927     load_fr_bank( R_EDX );
  1928     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1929     JNE_rel8(5, doubleprec);
  1930     push_fr( R_EDX, FRm );
  1931     JMP_rel8(3, doop);
  1932     JMP_TARGET(doubleprec);
  1933     push_dr( R_EDX, FRm );
  1934     JMP_TARGET( doop );
  1935     load_imm32( R_ECX, (uint32_t)&max_int );
  1936     FILD_r32ind( R_ECX );
  1937     FCOMIP_st(1);
  1938     JNA_rel8( 32, sat );
  1939     load_imm32( R_ECX, (uint32_t)&min_int );  // 5
  1940     FILD_r32ind( R_ECX );           // 2
  1941     FCOMIP_st(1);                   // 2
  1942     JAE_rel8( 21, sat2 );            // 2
  1943     load_imm32( R_EAX, (uint32_t)&save_fcw );
  1944     FNSTCW_r32ind( R_EAX );
  1945     load_imm32( R_EDX, (uint32_t)&trunc_fcw );
  1946     FLDCW_r32ind( R_EDX );
  1947     FISTP_sh4r(R_FPUL);             // 3
  1948     FLDCW_r32ind( R_EAX );
  1949     JMP_rel8( 9, end );             // 2
  1951     JMP_TARGET(sat);
  1952     JMP_TARGET(sat2);
  1953     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  1954     store_spreg( R_ECX, R_FPUL );
  1955     FPOP_st();
  1956     JMP_TARGET(end);
  1957     sh4_x86.tstate = TSTATE_NONE;
  1958 :}
  1959 FLDS FRm, FPUL {:  
  1960     check_fpuen();
  1961     load_fr_bank( R_ECX );
  1962     load_fr( R_ECX, R_EAX, FRm );
  1963     store_spreg( R_EAX, R_FPUL );
  1964     sh4_x86.tstate = TSTATE_NONE;
  1965 :}
  1966 FSTS FPUL, FRn {:  
  1967     check_fpuen();
  1968     load_fr_bank( R_ECX );
  1969     load_spreg( R_EAX, R_FPUL );
  1970     store_fr( R_ECX, R_EAX, FRn );
  1971     sh4_x86.tstate = TSTATE_NONE;
  1972 :}
  1973 FCNVDS FRm, FPUL {:  
  1974     check_fpuen();
  1975     load_spreg( R_ECX, R_FPSCR );
  1976     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1977     JE_rel8(9, end); // only when PR=1
  1978     load_fr_bank( R_ECX );
  1979     push_dr( R_ECX, FRm );
  1980     pop_fpul();
  1981     JMP_TARGET(end);
  1982     sh4_x86.tstate = TSTATE_NONE;
  1983 :}
  1984 FCNVSD FPUL, FRn {:  
  1985     check_fpuen();
  1986     load_spreg( R_ECX, R_FPSCR );
  1987     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1988     JE_rel8(9, end); // only when PR=1
  1989     load_fr_bank( R_ECX );
  1990     push_fpul();
  1991     pop_dr( R_ECX, FRn );
  1992     JMP_TARGET(end);
  1993     sh4_x86.tstate = TSTATE_NONE;
  1994 :}
  1996 /* Floating point instructions */
  1997 FABS FRn {:  
  1998     check_fpuen();
  1999     load_spreg( R_ECX, R_FPSCR );
  2000     load_fr_bank( R_EDX );
  2001     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2002     JNE_rel8(10, doubleprec);
  2003     push_fr(R_EDX, FRn); // 3
  2004     FABS_st0(); // 2
  2005     pop_fr( R_EDX, FRn); //3
  2006     JMP_rel8(8,end); // 2
  2007     JMP_TARGET(doubleprec);
  2008     push_dr(R_EDX, FRn);
  2009     FABS_st0();
  2010     pop_dr(R_EDX, FRn);
  2011     JMP_TARGET(end);
  2012     sh4_x86.tstate = TSTATE_NONE;
  2013 :}
  2014 FADD FRm, FRn {:  
  2015     check_fpuen();
  2016     load_spreg( R_ECX, R_FPSCR );
  2017     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2018     load_fr_bank( R_EDX );
  2019     JNE_rel8(13,doubleprec);
  2020     push_fr(R_EDX, FRm);
  2021     push_fr(R_EDX, FRn);
  2022     FADDP_st(1);
  2023     pop_fr(R_EDX, FRn);
  2024     JMP_rel8(11,end);
  2025     JMP_TARGET(doubleprec);
  2026     push_dr(R_EDX, FRm);
  2027     push_dr(R_EDX, FRn);
  2028     FADDP_st(1);
  2029     pop_dr(R_EDX, FRn);
  2030     JMP_TARGET(end);
  2031     sh4_x86.tstate = TSTATE_NONE;
  2032 :}
  2033 FDIV FRm, FRn {:  
  2034     check_fpuen();
  2035     load_spreg( R_ECX, R_FPSCR );
  2036     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2037     load_fr_bank( R_EDX );
  2038     JNE_rel8(13, doubleprec);
  2039     push_fr(R_EDX, FRn);
  2040     push_fr(R_EDX, FRm);
  2041     FDIVP_st(1);
  2042     pop_fr(R_EDX, FRn);
  2043     JMP_rel8(11, end);
  2044     JMP_TARGET(doubleprec);
  2045     push_dr(R_EDX, FRn);
  2046     push_dr(R_EDX, FRm);
  2047     FDIVP_st(1);
  2048     pop_dr(R_EDX, FRn);
  2049     JMP_TARGET(end);
  2050     sh4_x86.tstate = TSTATE_NONE;
  2051 :}
  2052 FMAC FR0, FRm, FRn {:  
  2053     check_fpuen();
  2054     load_spreg( R_ECX, R_FPSCR );
  2055     load_spreg( R_EDX, REG_OFFSET(fr_bank));
  2056     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2057     JNE_rel8(18, doubleprec);
  2058     push_fr( R_EDX, 0 );
  2059     push_fr( R_EDX, FRm );
  2060     FMULP_st(1);
  2061     push_fr( R_EDX, FRn );
  2062     FADDP_st(1);
  2063     pop_fr( R_EDX, FRn );
  2064     JMP_rel8(16, end);
  2065     JMP_TARGET(doubleprec);
  2066     push_dr( R_EDX, 0 );
  2067     push_dr( R_EDX, FRm );
  2068     FMULP_st(1);
  2069     push_dr( R_EDX, FRn );
  2070     FADDP_st(1);
  2071     pop_dr( R_EDX, FRn );
  2072     JMP_TARGET(end);
  2073     sh4_x86.tstate = TSTATE_NONE;
  2074 :}
  2076 FMUL FRm, FRn {:  
  2077     check_fpuen();
  2078     load_spreg( R_ECX, R_FPSCR );
  2079     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2080     load_fr_bank( R_EDX );
  2081     JNE_rel8(13, doubleprec);
  2082     push_fr(R_EDX, FRm);
  2083     push_fr(R_EDX, FRn);
  2084     FMULP_st(1);
  2085     pop_fr(R_EDX, FRn);
  2086     JMP_rel8(11, end);
  2087     JMP_TARGET(doubleprec);
  2088     push_dr(R_EDX, FRm);
  2089     push_dr(R_EDX, FRn);
  2090     FMULP_st(1);
  2091     pop_dr(R_EDX, FRn);
  2092     JMP_TARGET(end);
  2093     sh4_x86.tstate = TSTATE_NONE;
  2094 :}
  2095 FNEG FRn {:  
  2096     check_fpuen();
  2097     load_spreg( R_ECX, R_FPSCR );
  2098     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2099     load_fr_bank( R_EDX );
  2100     JNE_rel8(10, doubleprec);
  2101     push_fr(R_EDX, FRn);
  2102     FCHS_st0();
  2103     pop_fr(R_EDX, FRn);
  2104     JMP_rel8(8, end);
  2105     JMP_TARGET(doubleprec);
  2106     push_dr(R_EDX, FRn);
  2107     FCHS_st0();
  2108     pop_dr(R_EDX, FRn);
  2109     JMP_TARGET(end);
  2110     sh4_x86.tstate = TSTATE_NONE;
  2111 :}
  2112 FSRRA FRn {:  
  2113     check_fpuen();
  2114     load_spreg( R_ECX, R_FPSCR );
  2115     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2116     load_fr_bank( R_EDX );
  2117     JNE_rel8(12, end); // PR=0 only
  2118     FLD1_st0();
  2119     push_fr(R_EDX, FRn);
  2120     FSQRT_st0();
  2121     FDIVP_st(1);
  2122     pop_fr(R_EDX, FRn);
  2123     JMP_TARGET(end);
  2124     sh4_x86.tstate = TSTATE_NONE;
  2125 :}
  2126 FSQRT FRn {:  
  2127     check_fpuen();
  2128     load_spreg( R_ECX, R_FPSCR );
  2129     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2130     load_fr_bank( R_EDX );
  2131     JNE_rel8(10, doubleprec);
  2132     push_fr(R_EDX, FRn);
  2133     FSQRT_st0();
  2134     pop_fr(R_EDX, FRn);
  2135     JMP_rel8(8, end);
  2136     JMP_TARGET(doubleprec);
  2137     push_dr(R_EDX, FRn);
  2138     FSQRT_st0();
  2139     pop_dr(R_EDX, FRn);
  2140     JMP_TARGET(end);
  2141     sh4_x86.tstate = TSTATE_NONE;
  2142 :}
  2143 FSUB FRm, FRn {:  
  2144     check_fpuen();
  2145     load_spreg( R_ECX, R_FPSCR );
  2146     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2147     load_fr_bank( R_EDX );
  2148     JNE_rel8(13, doubleprec);
  2149     push_fr(R_EDX, FRn);
  2150     push_fr(R_EDX, FRm);
  2151     FSUBP_st(1);
  2152     pop_fr(R_EDX, FRn);
  2153     JMP_rel8(11, end);
  2154     JMP_TARGET(doubleprec);
  2155     push_dr(R_EDX, FRn);
  2156     push_dr(R_EDX, FRm);
  2157     FSUBP_st(1);
  2158     pop_dr(R_EDX, FRn);
  2159     JMP_TARGET(end);
  2160     sh4_x86.tstate = TSTATE_NONE;
  2161 :}
  2163 FCMP/EQ FRm, FRn {:  
  2164     check_fpuen();
  2165     load_spreg( R_ECX, R_FPSCR );
  2166     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2167     load_fr_bank( R_EDX );
  2168     JNE_rel8(8, doubleprec);
  2169     push_fr(R_EDX, FRm);
  2170     push_fr(R_EDX, FRn);
  2171     JMP_rel8(6, end);
  2172     JMP_TARGET(doubleprec);
  2173     push_dr(R_EDX, FRm);
  2174     push_dr(R_EDX, FRn);
  2175     JMP_TARGET(end);
  2176     FCOMIP_st(1);
  2177     SETE_t();
  2178     FPOP_st();
  2179     sh4_x86.tstate = TSTATE_NONE;
  2180 :}
  2181 FCMP/GT FRm, FRn {:  
  2182     check_fpuen();
  2183     load_spreg( R_ECX, R_FPSCR );
  2184     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2185     load_fr_bank( R_EDX );
  2186     JNE_rel8(8, doubleprec);
  2187     push_fr(R_EDX, FRm);
  2188     push_fr(R_EDX, FRn);
  2189     JMP_rel8(6, end);
  2190     JMP_TARGET(doubleprec);
  2191     push_dr(R_EDX, FRm);
  2192     push_dr(R_EDX, FRn);
  2193     JMP_TARGET(end);
  2194     FCOMIP_st(1);
  2195     SETA_t();
  2196     FPOP_st();
  2197     sh4_x86.tstate = TSTATE_NONE;
  2198 :}
  2200 FSCA FPUL, FRn {:  
  2201     check_fpuen();
  2202     load_spreg( R_ECX, R_FPSCR );
  2203     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2204     JNE_rel8( CALL_FUNC2_SIZE + 9, doubleprec );
  2205     load_fr_bank( R_ECX );
  2206     ADD_imm8s_r32( (FRn&0x0E)<<2, R_ECX );
  2207     load_spreg( R_EDX, R_FPUL );
  2208     call_func2( sh4_fsca, R_EDX, R_ECX );
  2209     JMP_TARGET(doubleprec);
  2210     sh4_x86.tstate = TSTATE_NONE;
  2211 :}
  2212 FIPR FVm, FVn {:  
  2213     check_fpuen();
  2214     load_spreg( R_ECX, R_FPSCR );
  2215     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2216     JNE_rel8(44, doubleprec);
  2218     load_fr_bank( R_ECX );
  2219     push_fr( R_ECX, FVm<<2 );
  2220     push_fr( R_ECX, FVn<<2 );
  2221     FMULP_st(1);
  2222     push_fr( R_ECX, (FVm<<2)+1);
  2223     push_fr( R_ECX, (FVn<<2)+1);
  2224     FMULP_st(1);
  2225     FADDP_st(1);
  2226     push_fr( R_ECX, (FVm<<2)+2);
  2227     push_fr( R_ECX, (FVn<<2)+2);
  2228     FMULP_st(1);
  2229     FADDP_st(1);
  2230     push_fr( R_ECX, (FVm<<2)+3);
  2231     push_fr( R_ECX, (FVn<<2)+3);
  2232     FMULP_st(1);
  2233     FADDP_st(1);
  2234     pop_fr( R_ECX, (FVn<<2)+3);
  2235     JMP_TARGET(doubleprec);
  2236     sh4_x86.tstate = TSTATE_NONE;
  2237 :}
  2238 FTRV XMTRX, FVn {:  
  2239     check_fpuen();
  2240     load_spreg( R_ECX, R_FPSCR );
  2241     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2242     JNE_rel8( 18 + CALL_FUNC2_SIZE, doubleprec );
  2243     load_fr_bank( R_EDX );                 // 3
  2244     ADD_imm8s_r32( FVn<<4, R_EDX );        // 3
  2245     load_xf_bank( R_ECX );                 // 12
  2246     call_func2( sh4_ftrv, R_EDX, R_ECX );  // 12
  2247     JMP_TARGET(doubleprec);
  2248     sh4_x86.tstate = TSTATE_NONE;
  2249 :}
  2251 FRCHG {:  
  2252     check_fpuen();
  2253     load_spreg( R_ECX, R_FPSCR );
  2254     XOR_imm32_r32( FPSCR_FR, R_ECX );
  2255     store_spreg( R_ECX, R_FPSCR );
  2256     update_fr_bank( R_ECX );
  2257     sh4_x86.tstate = TSTATE_NONE;
  2258 :}
  2259 FSCHG {:  
  2260     check_fpuen();
  2261     load_spreg( R_ECX, R_FPSCR );
  2262     XOR_imm32_r32( FPSCR_SZ, R_ECX );
  2263     store_spreg( R_ECX, R_FPSCR );
  2264     sh4_x86.tstate = TSTATE_NONE;
  2265 :}
  2267 /* Processor control instructions */
  2268 LDC Rm, SR {:
  2269     if( sh4_x86.in_delay_slot ) {
  2270 	SLOTILLEGAL();
  2271     } else {
  2272 	check_priv();
  2273 	load_reg( R_EAX, Rm );
  2274 	call_func1( sh4_write_sr, R_EAX );
  2275 	sh4_x86.priv_checked = FALSE;
  2276 	sh4_x86.fpuen_checked = FALSE;
  2277 	sh4_x86.tstate = TSTATE_NONE;
  2279 :}
  2280 LDC Rm, GBR {: 
  2281     load_reg( R_EAX, Rm );
  2282     store_spreg( R_EAX, R_GBR );
  2283 :}
  2284 LDC Rm, VBR {:  
  2285     check_priv();
  2286     load_reg( R_EAX, Rm );
  2287     store_spreg( R_EAX, R_VBR );
  2288     sh4_x86.tstate = TSTATE_NONE;
  2289 :}
  2290 LDC Rm, SSR {:  
  2291     check_priv();
  2292     load_reg( R_EAX, Rm );
  2293     store_spreg( R_EAX, R_SSR );
  2294     sh4_x86.tstate = TSTATE_NONE;
  2295 :}
  2296 LDC Rm, SGR {:  
  2297     check_priv();
  2298     load_reg( R_EAX, Rm );
  2299     store_spreg( R_EAX, R_SGR );
  2300     sh4_x86.tstate = TSTATE_NONE;
  2301 :}
  2302 LDC Rm, SPC {:  
  2303     check_priv();
  2304     load_reg( R_EAX, Rm );
  2305     store_spreg( R_EAX, R_SPC );
  2306     sh4_x86.tstate = TSTATE_NONE;
  2307 :}
  2308 LDC Rm, DBR {:  
  2309     check_priv();
  2310     load_reg( R_EAX, Rm );
  2311     store_spreg( R_EAX, R_DBR );
  2312     sh4_x86.tstate = TSTATE_NONE;
  2313 :}
  2314 LDC Rm, Rn_BANK {:  
  2315     check_priv();
  2316     load_reg( R_EAX, Rm );
  2317     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2318     sh4_x86.tstate = TSTATE_NONE;
  2319 :}
  2320 LDC.L @Rm+, GBR {:  
  2321     load_reg( R_EAX, Rm );
  2322     check_ralign32( R_EAX );
  2323     MMU_TRANSLATE_READ( R_EAX );
  2324     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2325     MEM_READ_LONG( R_EAX, R_EAX );
  2326     store_spreg( R_EAX, R_GBR );
  2327     sh4_x86.tstate = TSTATE_NONE;
  2328 :}
  2329 LDC.L @Rm+, SR {:
  2330     if( sh4_x86.in_delay_slot ) {
  2331 	SLOTILLEGAL();
  2332     } else {
  2333 	check_priv();
  2334 	load_reg( R_EAX, Rm );
  2335 	check_ralign32( R_EAX );
  2336 	MMU_TRANSLATE_READ( R_EAX );
  2337 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2338 	MEM_READ_LONG( R_EAX, R_EAX );
  2339 	call_func1( sh4_write_sr, R_EAX );
  2340 	sh4_x86.priv_checked = FALSE;
  2341 	sh4_x86.fpuen_checked = FALSE;
  2342 	sh4_x86.tstate = TSTATE_NONE;
  2344 :}
  2345 LDC.L @Rm+, VBR {:  
  2346     check_priv();
  2347     load_reg( R_EAX, Rm );
  2348     check_ralign32( R_EAX );
  2349     MMU_TRANSLATE_READ( R_EAX );
  2350     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2351     MEM_READ_LONG( R_EAX, R_EAX );
  2352     store_spreg( R_EAX, R_VBR );
  2353     sh4_x86.tstate = TSTATE_NONE;
  2354 :}
  2355 LDC.L @Rm+, SSR {:
  2356     check_priv();
  2357     load_reg( R_EAX, Rm );
  2358     check_ralign32( R_EAX );
  2359     MMU_TRANSLATE_READ( R_EAX );
  2360     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2361     MEM_READ_LONG( R_EAX, R_EAX );
  2362     store_spreg( R_EAX, R_SSR );
  2363     sh4_x86.tstate = TSTATE_NONE;
  2364 :}
  2365 LDC.L @Rm+, SGR {:  
  2366     check_priv();
  2367     load_reg( R_EAX, Rm );
  2368     check_ralign32( R_EAX );
  2369     MMU_TRANSLATE_READ( R_EAX );
  2370     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2371     MEM_READ_LONG( R_EAX, R_EAX );
  2372     store_spreg( R_EAX, R_SGR );
  2373     sh4_x86.tstate = TSTATE_NONE;
  2374 :}
  2375 LDC.L @Rm+, SPC {:  
  2376     check_priv();
  2377     load_reg( R_EAX, Rm );
  2378     check_ralign32( R_EAX );
  2379     MMU_TRANSLATE_READ( R_EAX );
  2380     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2381     MEM_READ_LONG( R_EAX, R_EAX );
  2382     store_spreg( R_EAX, R_SPC );
  2383     sh4_x86.tstate = TSTATE_NONE;
  2384 :}
  2385 LDC.L @Rm+, DBR {:  
  2386     check_priv();
  2387     load_reg( R_EAX, Rm );
  2388     check_ralign32( R_EAX );
  2389     MMU_TRANSLATE_READ( R_EAX );
  2390     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2391     MEM_READ_LONG( R_EAX, R_EAX );
  2392     store_spreg( R_EAX, R_DBR );
  2393     sh4_x86.tstate = TSTATE_NONE;
  2394 :}
  2395 LDC.L @Rm+, Rn_BANK {:  
  2396     check_priv();
  2397     load_reg( R_EAX, Rm );
  2398     check_ralign32( R_EAX );
  2399     MMU_TRANSLATE_READ( R_EAX );
  2400     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2401     MEM_READ_LONG( R_EAX, R_EAX );
  2402     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2403     sh4_x86.tstate = TSTATE_NONE;
  2404 :}
  2405 LDS Rm, FPSCR {:  
  2406     load_reg( R_EAX, Rm );
  2407     store_spreg( R_EAX, R_FPSCR );
  2408     update_fr_bank( R_EAX );
  2409     sh4_x86.tstate = TSTATE_NONE;
  2410 :}
  2411 LDS.L @Rm+, FPSCR {:  
  2412     load_reg( R_EAX, Rm );
  2413     check_ralign32( R_EAX );
  2414     MMU_TRANSLATE_READ( R_EAX );
  2415     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2416     MEM_READ_LONG( R_EAX, R_EAX );
  2417     store_spreg( R_EAX, R_FPSCR );
  2418     update_fr_bank( R_EAX );
  2419     sh4_x86.tstate = TSTATE_NONE;
  2420 :}
  2421 LDS Rm, FPUL {:  
  2422     load_reg( R_EAX, Rm );
  2423     store_spreg( R_EAX, R_FPUL );
  2424 :}
  2425 LDS.L @Rm+, FPUL {:  
  2426     load_reg( R_EAX, Rm );
  2427     check_ralign32( R_EAX );
  2428     MMU_TRANSLATE_READ( R_EAX );
  2429     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2430     MEM_READ_LONG( R_EAX, R_EAX );
  2431     store_spreg( R_EAX, R_FPUL );
  2432     sh4_x86.tstate = TSTATE_NONE;
  2433 :}
  2434 LDS Rm, MACH {: 
  2435     load_reg( R_EAX, Rm );
  2436     store_spreg( R_EAX, R_MACH );
  2437 :}
  2438 LDS.L @Rm+, MACH {:  
  2439     load_reg( R_EAX, Rm );
  2440     check_ralign32( R_EAX );
  2441     MMU_TRANSLATE_READ( R_EAX );
  2442     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2443     MEM_READ_LONG( R_EAX, R_EAX );
  2444     store_spreg( R_EAX, R_MACH );
  2445     sh4_x86.tstate = TSTATE_NONE;
  2446 :}
  2447 LDS Rm, MACL {:  
  2448     load_reg( R_EAX, Rm );
  2449     store_spreg( R_EAX, R_MACL );
  2450 :}
  2451 LDS.L @Rm+, MACL {:  
  2452     load_reg( R_EAX, Rm );
  2453     check_ralign32( R_EAX );
  2454     MMU_TRANSLATE_READ( R_EAX );
  2455     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2456     MEM_READ_LONG( R_EAX, R_EAX );
  2457     store_spreg( R_EAX, R_MACL );
  2458     sh4_x86.tstate = TSTATE_NONE;
  2459 :}
  2460 LDS Rm, PR {:  
  2461     load_reg( R_EAX, Rm );
  2462     store_spreg( R_EAX, R_PR );
  2463 :}
  2464 LDS.L @Rm+, PR {:  
  2465     load_reg( R_EAX, Rm );
  2466     check_ralign32( R_EAX );
  2467     MMU_TRANSLATE_READ( R_EAX );
  2468     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2469     MEM_READ_LONG( R_EAX, R_EAX );
  2470     store_spreg( R_EAX, R_PR );
  2471     sh4_x86.tstate = TSTATE_NONE;
  2472 :}
  2473 LDTLB {:  
  2474     call_func0( MMU_ldtlb );
  2475 :}
  2476 OCBI @Rn {:  :}
  2477 OCBP @Rn {:  :}
  2478 OCBWB @Rn {:  :}
  2479 PREF @Rn {:
  2480     load_reg( R_EAX, Rn );
  2481     MOV_r32_r32( R_EAX, R_ECX );
  2482     AND_imm32_r32( 0xFC000000, R_EAX );
  2483     CMP_imm32_r32( 0xE0000000, R_EAX );
  2484     JNE_rel8(8+CALL_FUNC1_SIZE, end);
  2485     call_func1( sh4_flush_store_queue, R_ECX );
  2486     TEST_r32_r32( R_EAX, R_EAX );
  2487     JE_exc(-1);
  2488     JMP_TARGET(end);
  2489     sh4_x86.tstate = TSTATE_NONE;
  2490 :}
  2491 SLEEP {: 
  2492     check_priv();
  2493     call_func0( sh4_sleep );
  2494     sh4_x86.tstate = TSTATE_NONE;
  2495     sh4_x86.in_delay_slot = DELAY_NONE;
  2496     return 2;
  2497 :}
  2498 STC SR, Rn {:
  2499     check_priv();
  2500     call_func0(sh4_read_sr);
  2501     store_reg( R_EAX, Rn );
  2502     sh4_x86.tstate = TSTATE_NONE;
  2503 :}
  2504 STC GBR, Rn {:  
  2505     load_spreg( R_EAX, R_GBR );
  2506     store_reg( R_EAX, Rn );
  2507 :}
  2508 STC VBR, Rn {:  
  2509     check_priv();
  2510     load_spreg( R_EAX, R_VBR );
  2511     store_reg( R_EAX, Rn );
  2512     sh4_x86.tstate = TSTATE_NONE;
  2513 :}
  2514 STC SSR, Rn {:  
  2515     check_priv();
  2516     load_spreg( R_EAX, R_SSR );
  2517     store_reg( R_EAX, Rn );
  2518     sh4_x86.tstate = TSTATE_NONE;
  2519 :}
  2520 STC SPC, Rn {:  
  2521     check_priv();
  2522     load_spreg( R_EAX, R_SPC );
  2523     store_reg( R_EAX, Rn );
  2524     sh4_x86.tstate = TSTATE_NONE;
  2525 :}
  2526 STC SGR, Rn {:  
  2527     check_priv();
  2528     load_spreg( R_EAX, R_SGR );
  2529     store_reg( R_EAX, Rn );
  2530     sh4_x86.tstate = TSTATE_NONE;
  2531 :}
  2532 STC DBR, Rn {:  
  2533     check_priv();
  2534     load_spreg( R_EAX, R_DBR );
  2535     store_reg( R_EAX, Rn );
  2536     sh4_x86.tstate = TSTATE_NONE;
  2537 :}
  2538 STC Rm_BANK, Rn {:
  2539     check_priv();
  2540     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2541     store_reg( R_EAX, Rn );
  2542     sh4_x86.tstate = TSTATE_NONE;
  2543 :}
  2544 STC.L SR, @-Rn {:
  2545     check_priv();
  2546     load_reg( R_EAX, Rn );
  2547     check_walign32( R_EAX );
  2548     ADD_imm8s_r32( -4, R_EAX );
  2549     MMU_TRANSLATE_WRITE( R_EAX );
  2550     PUSH_realigned_r32( R_EAX );
  2551     call_func0( sh4_read_sr );
  2552     POP_realigned_r32( R_ECX );
  2553     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2554     MEM_WRITE_LONG( R_ECX, R_EAX );
  2555     sh4_x86.tstate = TSTATE_NONE;
  2556 :}
  2557 STC.L VBR, @-Rn {:  
  2558     check_priv();
  2559     load_reg( R_EAX, Rn );
  2560     check_walign32( R_EAX );
  2561     ADD_imm8s_r32( -4, R_EAX );
  2562     MMU_TRANSLATE_WRITE( R_EAX );
  2563     load_spreg( R_EDX, R_VBR );
  2564     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2565     MEM_WRITE_LONG( R_EAX, R_EDX );
  2566     sh4_x86.tstate = TSTATE_NONE;
  2567 :}
  2568 STC.L SSR, @-Rn {:  
  2569     check_priv();
  2570     load_reg( R_EAX, Rn );
  2571     check_walign32( R_EAX );
  2572     ADD_imm8s_r32( -4, R_EAX );
  2573     MMU_TRANSLATE_WRITE( R_EAX );
  2574     load_spreg( R_EDX, R_SSR );
  2575     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2576     MEM_WRITE_LONG( R_EAX, R_EDX );
  2577     sh4_x86.tstate = TSTATE_NONE;
  2578 :}
  2579 STC.L SPC, @-Rn {:
  2580     check_priv();
  2581     load_reg( R_EAX, Rn );
  2582     check_walign32( R_EAX );
  2583     ADD_imm8s_r32( -4, R_EAX );
  2584     MMU_TRANSLATE_WRITE( R_EAX );
  2585     load_spreg( R_EDX, R_SPC );
  2586     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2587     MEM_WRITE_LONG( R_EAX, R_EDX );
  2588     sh4_x86.tstate = TSTATE_NONE;
  2589 :}
  2590 STC.L SGR, @-Rn {:  
  2591     check_priv();
  2592     load_reg( R_EAX, Rn );
  2593     check_walign32( R_EAX );
  2594     ADD_imm8s_r32( -4, R_EAX );
  2595     MMU_TRANSLATE_WRITE( R_EAX );
  2596     load_spreg( R_EDX, R_SGR );
  2597     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2598     MEM_WRITE_LONG( R_EAX, R_EDX );
  2599     sh4_x86.tstate = TSTATE_NONE;
  2600 :}
  2601 STC.L DBR, @-Rn {:  
  2602     check_priv();
  2603     load_reg( R_EAX, Rn );
  2604     check_walign32( R_EAX );
  2605     ADD_imm8s_r32( -4, R_EAX );
  2606     MMU_TRANSLATE_WRITE( R_EAX );
  2607     load_spreg( R_EDX, R_DBR );
  2608     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2609     MEM_WRITE_LONG( R_EAX, R_EDX );
  2610     sh4_x86.tstate = TSTATE_NONE;
  2611 :}
  2612 STC.L Rm_BANK, @-Rn {:  
  2613     check_priv();
  2614     load_reg( R_EAX, Rn );
  2615     check_walign32( R_EAX );
  2616     ADD_imm8s_r32( -4, R_EAX );
  2617     MMU_TRANSLATE_WRITE( R_EAX );
  2618     load_spreg( R_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
  2619     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2620     MEM_WRITE_LONG( R_EAX, R_EDX );
  2621     sh4_x86.tstate = TSTATE_NONE;
  2622 :}
  2623 STC.L GBR, @-Rn {:  
  2624     load_reg( R_EAX, Rn );
  2625     check_walign32( R_EAX );
  2626     ADD_imm8s_r32( -4, R_EAX );
  2627     MMU_TRANSLATE_WRITE( R_EAX );
  2628     load_spreg( R_EDX, R_GBR );
  2629     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2630     MEM_WRITE_LONG( R_EAX, R_EDX );
  2631     sh4_x86.tstate = TSTATE_NONE;
  2632 :}
  2633 STS FPSCR, Rn {:  
  2634     load_spreg( R_EAX, R_FPSCR );
  2635     store_reg( R_EAX, Rn );
  2636 :}
  2637 STS.L FPSCR, @-Rn {:  
  2638     load_reg( R_EAX, Rn );
  2639     check_walign32( R_EAX );
  2640     ADD_imm8s_r32( -4, R_EAX );
  2641     MMU_TRANSLATE_WRITE( R_EAX );
  2642     load_spreg( R_EDX, R_FPSCR );
  2643     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2644     MEM_WRITE_LONG( R_EAX, R_EDX );
  2645     sh4_x86.tstate = TSTATE_NONE;
  2646 :}
  2647 STS FPUL, Rn {:  
  2648     load_spreg( R_EAX, R_FPUL );
  2649     store_reg( R_EAX, Rn );
  2650 :}
  2651 STS.L FPUL, @-Rn {:  
  2652     load_reg( R_EAX, Rn );
  2653     check_walign32( R_EAX );
  2654     ADD_imm8s_r32( -4, R_EAX );
  2655     MMU_TRANSLATE_WRITE( R_EAX );
  2656     load_spreg( R_EDX, R_FPUL );
  2657     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2658     MEM_WRITE_LONG( R_EAX, R_EDX );
  2659     sh4_x86.tstate = TSTATE_NONE;
  2660 :}
  2661 STS MACH, Rn {:  
  2662     load_spreg( R_EAX, R_MACH );
  2663     store_reg( R_EAX, Rn );
  2664 :}
  2665 STS.L MACH, @-Rn {:  
  2666     load_reg( R_EAX, Rn );
  2667     check_walign32( R_EAX );
  2668     ADD_imm8s_r32( -4, R_EAX );
  2669     MMU_TRANSLATE_WRITE( R_EAX );
  2670     load_spreg( R_EDX, R_MACH );
  2671     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2672     MEM_WRITE_LONG( R_EAX, R_EDX );
  2673     sh4_x86.tstate = TSTATE_NONE;
  2674 :}
  2675 STS MACL, Rn {:  
  2676     load_spreg( R_EAX, R_MACL );
  2677     store_reg( R_EAX, Rn );
  2678 :}
  2679 STS.L MACL, @-Rn {:  
  2680     load_reg( R_EAX, Rn );
  2681     check_walign32( R_EAX );
  2682     ADD_imm8s_r32( -4, R_EAX );
  2683     MMU_TRANSLATE_WRITE( R_EAX );
  2684     load_spreg( R_EDX, R_MACL );
  2685     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2686     MEM_WRITE_LONG( R_EAX, R_EDX );
  2687     sh4_x86.tstate = TSTATE_NONE;
  2688 :}
  2689 STS PR, Rn {:  
  2690     load_spreg( R_EAX, R_PR );
  2691     store_reg( R_EAX, Rn );
  2692 :}
  2693 STS.L PR, @-Rn {:  
  2694     load_reg( R_EAX, Rn );
  2695     check_walign32( R_EAX );
  2696     ADD_imm8s_r32( -4, R_EAX );
  2697     MMU_TRANSLATE_WRITE( R_EAX );
  2698     load_spreg( R_EDX, R_PR );
  2699     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2700     MEM_WRITE_LONG( R_EAX, R_EDX );
  2701     sh4_x86.tstate = TSTATE_NONE;
  2702 :}
  2704 NOP {: /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ :}
  2705 %%
  2706     sh4_x86.in_delay_slot = DELAY_NONE;
  2707     return 0;
.