Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 601:d8d1af0d133c
prev596:dfc0c93d882e
next604:1024c3a9cb88
author nkeynes
date Tue Jan 22 10:11:45 2008 +0000 (12 years ago)
permissions -rw-r--r--
last change Invoke emulator single-step for untranslatable delay slots (and fix a few
related bugs)
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "sh4/xltcache.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4trans.h"
    31 #include "sh4/sh4mmio.h"
    32 #include "sh4/x86op.h"
    33 #include "clock.h"
    35 #define DEFAULT_BACKPATCH_SIZE 4096
    37 struct backpatch_record {
    38     uint32_t *fixup_addr;
    39     uint32_t fixup_icount;
    40     int32_t exc_code;
    41 };
    43 #define MAX_RECOVERY_SIZE 2048
    45 #define DELAY_NONE 0
    46 #define DELAY_PC 1
    47 #define DELAY_PC_PR 2
    49 /** 
    50  * Struct to manage internal translation state. This state is not saved -
    51  * it is only valid between calls to sh4_translate_begin_block() and
    52  * sh4_translate_end_block()
    53  */
    54 struct sh4_x86_state {
    55     int in_delay_slot;
    56     gboolean priv_checked; /* true if we've already checked the cpu mode. */
    57     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    58     gboolean branch_taken; /* true if we branched unconditionally */
    59     uint32_t block_start_pc;
    60     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    61     int tstate;
    63     /* mode flags */
    64     gboolean tlb_on; /* True if tlb translation is active */
    66     /* Allocated memory for the (block-wide) back-patch list */
    67     struct backpatch_record *backpatch_list;
    68     uint32_t backpatch_posn;
    69     uint32_t backpatch_size;
    70 };
    72 #define TSTATE_NONE -1
    73 #define TSTATE_O    0
    74 #define TSTATE_C    2
    75 #define TSTATE_E    4
    76 #define TSTATE_NE   5
    77 #define TSTATE_G    0xF
    78 #define TSTATE_GE   0xD
    79 #define TSTATE_A    7
    80 #define TSTATE_AE   3
    82 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
    83 #define JT_rel8(rel8,label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    84 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    85     OP(0x70+sh4_x86.tstate); OP(rel8); \
    86     MARK_JMP(rel8,label)
    87 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
    88 #define JF_rel8(rel8,label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    89 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    90     OP(0x70+ (sh4_x86.tstate^1)); OP(rel8); \
    91     MARK_JMP(rel8, label)
    93 static struct sh4_x86_state sh4_x86;
    95 static uint32_t max_int = 0x7FFFFFFF;
    96 static uint32_t min_int = 0x80000000;
    97 static uint32_t save_fcw; /* save value for fpu control word */
    98 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   100 void sh4_x86_init()
   101 {
   102     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   103     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   104 }
   107 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   108 {
   109     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   110 	sh4_x86.backpatch_size <<= 1;
   111 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   112 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   113 	assert( sh4_x86.backpatch_list != NULL );
   114     }
   115     if( sh4_x86.in_delay_slot ) {
   116 	fixup_pc -= 2;
   117     }
   118     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_addr = (uint32_t *)fixup_addr;
   119     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   120     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   121     sh4_x86.backpatch_posn++;
   122 }
   124 /**
   125  * Emit an instruction to load an SH4 reg into a real register
   126  */
   127 static inline void load_reg( int x86reg, int sh4reg ) 
   128 {
   129     /* mov [bp+n], reg */
   130     OP(0x8B);
   131     OP(0x45 + (x86reg<<3));
   132     OP(REG_OFFSET(r[sh4reg]));
   133 }
   135 static inline void load_reg16s( int x86reg, int sh4reg )
   136 {
   137     OP(0x0F);
   138     OP(0xBF);
   139     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   140 }
   142 static inline void load_reg16u( int x86reg, int sh4reg )
   143 {
   144     OP(0x0F);
   145     OP(0xB7);
   146     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   148 }
   150 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   151 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   152 /**
   153  * Emit an instruction to load an immediate value into a register
   154  */
   155 static inline void load_imm32( int x86reg, uint32_t value ) {
   156     /* mov #value, reg */
   157     OP(0xB8 + x86reg);
   158     OP32(value);
   159 }
   161 /**
   162  * Load an immediate 64-bit quantity (note: x86-64 only)
   163  */
   164 static inline void load_imm64( int x86reg, uint32_t value ) {
   165     /* mov #value, reg */
   166     REXW();
   167     OP(0xB8 + x86reg);
   168     OP64(value);
   169 }
   172 /**
   173  * Emit an instruction to store an SH4 reg (RN)
   174  */
   175 void static inline store_reg( int x86reg, int sh4reg ) {
   176     /* mov reg, [bp+n] */
   177     OP(0x89);
   178     OP(0x45 + (x86reg<<3));
   179     OP(REG_OFFSET(r[sh4reg]));
   180 }
   182 #define load_fr_bank(bankreg) load_spreg( bankreg, REG_OFFSET(fr_bank))
   184 /**
   185  * Load an FR register (single-precision floating point) into an integer x86
   186  * register (eg for register-to-register moves)
   187  */
   188 void static inline load_fr( int bankreg, int x86reg, int frm )
   189 {
   190     OP(0x8B); OP(0x40+bankreg+(x86reg<<3)); OP((frm^1)<<2);
   191 }
   193 /**
   194  * Store an FR register (single-precision floating point) into an integer x86
   195  * register (eg for register-to-register moves)
   196  */
   197 void static inline store_fr( int bankreg, int x86reg, int frn )
   198 {
   199     OP(0x89);  OP(0x40+bankreg+(x86reg<<3)); OP((frn^1)<<2);
   200 }
   203 /**
   204  * Load a pointer to the back fp back into the specified x86 register. The
   205  * bankreg must have been previously loaded with FPSCR.
   206  * NB: 12 bytes
   207  */
   208 static inline void load_xf_bank( int bankreg )
   209 {
   210     NOT_r32( bankreg );
   211     SHR_imm8_r32( (21 - 6), bankreg ); // Extract bit 21 then *64 for bank size
   212     AND_imm8s_r32( 0x40, bankreg );    // Complete extraction
   213     OP(0x8D); OP(0x44+(bankreg<<3)); OP(0x28+bankreg); OP(REG_OFFSET(fr)); // LEA [ebp+bankreg+disp], bankreg
   214 }
   216 /**
   217  * Update the fr_bank pointer based on the current fpscr value.
   218  */
   219 static inline void update_fr_bank( int fpscrreg )
   220 {
   221     SHR_imm8_r32( (21 - 6), fpscrreg ); // Extract bit 21 then *64 for bank size
   222     AND_imm8s_r32( 0x40, fpscrreg );    // Complete extraction
   223     OP(0x8D); OP(0x44+(fpscrreg<<3)); OP(0x28+fpscrreg); OP(REG_OFFSET(fr)); // LEA [ebp+fpscrreg+disp], fpscrreg
   224     store_spreg( fpscrreg, REG_OFFSET(fr_bank) );
   225 }
   226 /**
   227  * Push FPUL (as a 32-bit float) onto the FPU stack
   228  */
   229 static inline void push_fpul( )
   230 {
   231     OP(0xD9); OP(0x45); OP(R_FPUL);
   232 }
   234 /**
   235  * Pop FPUL (as a 32-bit float) from the FPU stack
   236  */
   237 static inline void pop_fpul( )
   238 {
   239     OP(0xD9); OP(0x5D); OP(R_FPUL);
   240 }
   242 /**
   243  * Push a 32-bit float onto the FPU stack, with bankreg previously loaded
   244  * with the location of the current fp bank.
   245  */
   246 static inline void push_fr( int bankreg, int frm ) 
   247 {
   248     OP(0xD9); OP(0x40 + bankreg); OP((frm^1)<<2);  // FLD.S [bankreg + frm^1*4]
   249 }
   251 /**
   252  * Pop a 32-bit float from the FPU stack and store it back into the fp bank, 
   253  * with bankreg previously loaded with the location of the current fp bank.
   254  */
   255 static inline void pop_fr( int bankreg, int frm )
   256 {
   257     OP(0xD9); OP(0x58 + bankreg); OP((frm^1)<<2); // FST.S [bankreg + frm^1*4]
   258 }
   260 /**
   261  * Push a 64-bit double onto the FPU stack, with bankreg previously loaded
   262  * with the location of the current fp bank.
   263  */
   264 static inline void push_dr( int bankreg, int frm )
   265 {
   266     OP(0xDD); OP(0x40 + bankreg); OP(frm<<2); // FLD.D [bankreg + frm*4]
   267 }
   269 static inline void pop_dr( int bankreg, int frm )
   270 {
   271     OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]
   272 }
   274 /* Exception checks - Note that all exception checks will clobber EAX */
   276 #define check_priv( ) \
   277     if( !sh4_x86.priv_checked ) { \
   278 	sh4_x86.priv_checked = TRUE;\
   279 	load_spreg( R_EAX, R_SR );\
   280 	AND_imm32_r32( SR_MD, R_EAX );\
   281 	if( sh4_x86.in_delay_slot ) {\
   282 	    JE_exc( EXC_SLOT_ILLEGAL );\
   283 	} else {\
   284 	    JE_exc( EXC_ILLEGAL );\
   285 	}\
   286     }\
   288 #define check_fpuen( ) \
   289     if( !sh4_x86.fpuen_checked ) {\
   290 	sh4_x86.fpuen_checked = TRUE;\
   291 	load_spreg( R_EAX, R_SR );\
   292 	AND_imm32_r32( SR_FD, R_EAX );\
   293 	if( sh4_x86.in_delay_slot ) {\
   294 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   295 	} else {\
   296 	    JNE_exc(EXC_FPU_DISABLED);\
   297 	}\
   298     }
   300 #define check_ralign16( x86reg ) \
   301     TEST_imm32_r32( 0x00000001, x86reg ); \
   302     JNE_exc(EXC_DATA_ADDR_READ)
   304 #define check_walign16( x86reg ) \
   305     TEST_imm32_r32( 0x00000001, x86reg ); \
   306     JNE_exc(EXC_DATA_ADDR_WRITE);
   308 #define check_ralign32( x86reg ) \
   309     TEST_imm32_r32( 0x00000003, x86reg ); \
   310     JNE_exc(EXC_DATA_ADDR_READ)
   312 #define check_walign32( x86reg ) \
   313     TEST_imm32_r32( 0x00000003, x86reg ); \
   314     JNE_exc(EXC_DATA_ADDR_WRITE);
   316 #define UNDEF()
   317 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   318 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
   319 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
   320 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
   321 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
   322 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
   323 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
   325 /**
   326  * Perform MMU translation on the address in addr_reg for a read operation, iff the TLB is turned 
   327  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   328  */
   329 #define MMU_TRANSLATE_READ( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   331 #define MMU_TRANSLATE_READ_EXC( addr_reg, exc_code ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(exc_code); MEM_RESULT(addr_reg) }
   332 /**
   333  * Perform MMU translation on the address in addr_reg for a write operation, iff the TLB is turned 
   334  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   335  */
   336 #define MMU_TRANSLATE_WRITE( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_write, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   338 #define MEM_READ_SIZE (CALL_FUNC1_SIZE)
   339 #define MEM_WRITE_SIZE (CALL_FUNC2_SIZE)
   340 #define MMU_TRANSLATE_SIZE (sh4_x86.tlb_on ? (CALL_FUNC1_SIZE + 12) : 0 )
   342 #define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = DELAY_NONE; return 1;
   344 /****** Import appropriate calling conventions ******/
   345 #if SH4_TRANSLATOR == TARGET_X86_64
   346 #include "sh4/ia64abi.h"
   347 #else /* SH4_TRANSLATOR == TARGET_X86 */
   348 #ifdef APPLE_BUILD
   349 #include "sh4/ia32mac.h"
   350 #else
   351 #include "sh4/ia32abi.h"
   352 #endif
   353 #endif
   355 uint32_t sh4_translate_end_block_size()
   356 {
   357     if( sh4_x86.backpatch_posn <= 3 ) {
   358 	return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);
   359     } else {
   360 	return EPILOGUE_SIZE + 48 + (sh4_x86.backpatch_posn-3)*15;
   361     }
   362 }
   365 /**
   366  * Embed a breakpoint into the generated code
   367  */
   368 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   369 {
   370     load_imm32( R_EAX, pc );
   371     call_func1( sh4_translate_breakpoint_hit, R_EAX );
   372 }
   375 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   377 /**
   378  * Embed a call to sh4_execute_instruction for situations that we
   379  * can't translate (just page-crossing delay slots at the moment).
   380  * Caller is responsible for setting new_pc before calling this function.
   381  *
   382  * Performs:
   383  *   Set PC = endpc
   384  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   385  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   386  *   Call sh4_execute_instruction
   387  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   388  */
   389 void exit_block_emu( sh4vma_t endpc )
   390 {
   391     load_imm32( R_ECX, endpc - sh4_x86.block_start_pc );   // 5
   392     ADD_r32_sh4r( R_ECX, R_PC );
   394     load_imm32( R_ECX, (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period ); // 5
   395     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
   396     load_imm32( R_ECX, sh4_x86.in_delay_slot ? 1 : 0 );
   397     store_spreg( R_ECX, REG_OFFSET(in_delay_slot) );
   399     call_func0( sh4_execute_instruction );    
   400     load_spreg( R_EAX, R_PC );
   401     if( sh4_x86.tlb_on ) {
   402 	call_func1(xlat_get_code_by_vma,R_EAX);
   403     } else {
   404 	call_func1(xlat_get_code,R_EAX);
   405     }
   406     AND_imm8s_rptr( 0xFC, R_EAX );
   407     POP_r32(R_EBP);
   408     RET();
   409 } 
   411 /**
   412  * Translate a single instruction. Delayed branches are handled specially
   413  * by translating both branch and delayed instruction as a single unit (as
   414  * 
   415  * The instruction MUST be in the icache (assert check)
   416  *
   417  * @return true if the instruction marks the end of a basic block
   418  * (eg a branch or 
   419  */
   420 uint32_t sh4_translate_instruction( sh4vma_t pc )
   421 {
   422     uint32_t ir;
   423     /* Read instruction from icache */
   424     assert( IS_IN_ICACHE(pc) );
   425     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   427 	/* PC is not in the current icache - this usually means we're running
   428 	 * with MMU on, and we've gone past the end of the page. And since 
   429 	 * sh4_translate_block is pretty careful about this, it means we're
   430 	 * almost certainly in a delay slot.
   431 	 *
   432 	 * Since we can't assume the page is present (and we can't fault it in
   433 	 * at this point, inline a call to sh4_execute_instruction (with a few
   434 	 * small repairs to cope with the different environment).
   435 	 */
   437     if( !sh4_x86.in_delay_slot ) {
   438 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   439     }
   440 %%
   441 /* ALU operations */
   442 ADD Rm, Rn {:
   443     load_reg( R_EAX, Rm );
   444     load_reg( R_ECX, Rn );
   445     ADD_r32_r32( R_EAX, R_ECX );
   446     store_reg( R_ECX, Rn );
   447     sh4_x86.tstate = TSTATE_NONE;
   448 :}
   449 ADD #imm, Rn {:  
   450     load_reg( R_EAX, Rn );
   451     ADD_imm8s_r32( imm, R_EAX );
   452     store_reg( R_EAX, Rn );
   453     sh4_x86.tstate = TSTATE_NONE;
   454 :}
   455 ADDC Rm, Rn {:
   456     if( sh4_x86.tstate != TSTATE_C ) {
   457 	LDC_t();
   458     }
   459     load_reg( R_EAX, Rm );
   460     load_reg( R_ECX, Rn );
   461     ADC_r32_r32( R_EAX, R_ECX );
   462     store_reg( R_ECX, Rn );
   463     SETC_t();
   464     sh4_x86.tstate = TSTATE_C;
   465 :}
   466 ADDV Rm, Rn {:
   467     load_reg( R_EAX, Rm );
   468     load_reg( R_ECX, Rn );
   469     ADD_r32_r32( R_EAX, R_ECX );
   470     store_reg( R_ECX, Rn );
   471     SETO_t();
   472     sh4_x86.tstate = TSTATE_O;
   473 :}
   474 AND Rm, Rn {:
   475     load_reg( R_EAX, Rm );
   476     load_reg( R_ECX, Rn );
   477     AND_r32_r32( R_EAX, R_ECX );
   478     store_reg( R_ECX, Rn );
   479     sh4_x86.tstate = TSTATE_NONE;
   480 :}
   481 AND #imm, R0 {:  
   482     load_reg( R_EAX, 0 );
   483     AND_imm32_r32(imm, R_EAX); 
   484     store_reg( R_EAX, 0 );
   485     sh4_x86.tstate = TSTATE_NONE;
   486 :}
   487 AND.B #imm, @(R0, GBR) {: 
   488     load_reg( R_EAX, 0 );
   489     load_spreg( R_ECX, R_GBR );
   490     ADD_r32_r32( R_ECX, R_EAX );
   491     MMU_TRANSLATE_WRITE( R_EAX );
   492     PUSH_realigned_r32(R_EAX);
   493     MEM_READ_BYTE( R_EAX, R_EAX );
   494     POP_realigned_r32(R_ECX);
   495     AND_imm32_r32(imm, R_EAX );
   496     MEM_WRITE_BYTE( R_ECX, R_EAX );
   497     sh4_x86.tstate = TSTATE_NONE;
   498 :}
   499 CMP/EQ Rm, Rn {:  
   500     load_reg( R_EAX, Rm );
   501     load_reg( R_ECX, Rn );
   502     CMP_r32_r32( R_EAX, R_ECX );
   503     SETE_t();
   504     sh4_x86.tstate = TSTATE_E;
   505 :}
   506 CMP/EQ #imm, R0 {:  
   507     load_reg( R_EAX, 0 );
   508     CMP_imm8s_r32(imm, R_EAX);
   509     SETE_t();
   510     sh4_x86.tstate = TSTATE_E;
   511 :}
   512 CMP/GE Rm, Rn {:  
   513     load_reg( R_EAX, Rm );
   514     load_reg( R_ECX, Rn );
   515     CMP_r32_r32( R_EAX, R_ECX );
   516     SETGE_t();
   517     sh4_x86.tstate = TSTATE_GE;
   518 :}
   519 CMP/GT Rm, Rn {: 
   520     load_reg( R_EAX, Rm );
   521     load_reg( R_ECX, Rn );
   522     CMP_r32_r32( R_EAX, R_ECX );
   523     SETG_t();
   524     sh4_x86.tstate = TSTATE_G;
   525 :}
   526 CMP/HI Rm, Rn {:  
   527     load_reg( R_EAX, Rm );
   528     load_reg( R_ECX, Rn );
   529     CMP_r32_r32( R_EAX, R_ECX );
   530     SETA_t();
   531     sh4_x86.tstate = TSTATE_A;
   532 :}
   533 CMP/HS Rm, Rn {: 
   534     load_reg( R_EAX, Rm );
   535     load_reg( R_ECX, Rn );
   536     CMP_r32_r32( R_EAX, R_ECX );
   537     SETAE_t();
   538     sh4_x86.tstate = TSTATE_AE;
   539  :}
   540 CMP/PL Rn {: 
   541     load_reg( R_EAX, Rn );
   542     CMP_imm8s_r32( 0, R_EAX );
   543     SETG_t();
   544     sh4_x86.tstate = TSTATE_G;
   545 :}
   546 CMP/PZ Rn {:  
   547     load_reg( R_EAX, Rn );
   548     CMP_imm8s_r32( 0, R_EAX );
   549     SETGE_t();
   550     sh4_x86.tstate = TSTATE_GE;
   551 :}
   552 CMP/STR Rm, Rn {:  
   553     load_reg( R_EAX, Rm );
   554     load_reg( R_ECX, Rn );
   555     XOR_r32_r32( R_ECX, R_EAX );
   556     TEST_r8_r8( R_AL, R_AL );
   557     JE_rel8(13, target1);
   558     TEST_r8_r8( R_AH, R_AH ); // 2
   559     JE_rel8(9, target2);
   560     SHR_imm8_r32( 16, R_EAX ); // 3
   561     TEST_r8_r8( R_AL, R_AL ); // 2
   562     JE_rel8(2, target3);
   563     TEST_r8_r8( R_AH, R_AH ); // 2
   564     JMP_TARGET(target1);
   565     JMP_TARGET(target2);
   566     JMP_TARGET(target3);
   567     SETE_t();
   568     sh4_x86.tstate = TSTATE_E;
   569 :}
   570 DIV0S Rm, Rn {:
   571     load_reg( R_EAX, Rm );
   572     load_reg( R_ECX, Rn );
   573     SHR_imm8_r32( 31, R_EAX );
   574     SHR_imm8_r32( 31, R_ECX );
   575     store_spreg( R_EAX, R_M );
   576     store_spreg( R_ECX, R_Q );
   577     CMP_r32_r32( R_EAX, R_ECX );
   578     SETNE_t();
   579     sh4_x86.tstate = TSTATE_NE;
   580 :}
   581 DIV0U {:  
   582     XOR_r32_r32( R_EAX, R_EAX );
   583     store_spreg( R_EAX, R_Q );
   584     store_spreg( R_EAX, R_M );
   585     store_spreg( R_EAX, R_T );
   586     sh4_x86.tstate = TSTATE_C; // works for DIV1
   587 :}
   588 DIV1 Rm, Rn {:
   589     load_spreg( R_ECX, R_M );
   590     load_reg( R_EAX, Rn );
   591     if( sh4_x86.tstate != TSTATE_C ) {
   592 	LDC_t();
   593     }
   594     RCL1_r32( R_EAX );
   595     SETC_r8( R_DL ); // Q'
   596     CMP_sh4r_r32( R_Q, R_ECX );
   597     JE_rel8(5, mqequal);
   598     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   599     JMP_rel8(3, end);
   600     JMP_TARGET(mqequal);
   601     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   602     JMP_TARGET(end);
   603     store_reg( R_EAX, Rn ); // Done with Rn now
   604     SETC_r8(R_AL); // tmp1
   605     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   606     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   607     store_spreg( R_ECX, R_Q );
   608     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   609     MOVZX_r8_r32( R_AL, R_EAX );
   610     store_spreg( R_EAX, R_T );
   611     sh4_x86.tstate = TSTATE_NONE;
   612 :}
   613 DMULS.L Rm, Rn {:  
   614     load_reg( R_EAX, Rm );
   615     load_reg( R_ECX, Rn );
   616     IMUL_r32(R_ECX);
   617     store_spreg( R_EDX, R_MACH );
   618     store_spreg( R_EAX, R_MACL );
   619     sh4_x86.tstate = TSTATE_NONE;
   620 :}
   621 DMULU.L Rm, Rn {:  
   622     load_reg( R_EAX, Rm );
   623     load_reg( R_ECX, Rn );
   624     MUL_r32(R_ECX);
   625     store_spreg( R_EDX, R_MACH );
   626     store_spreg( R_EAX, R_MACL );    
   627     sh4_x86.tstate = TSTATE_NONE;
   628 :}
   629 DT Rn {:  
   630     load_reg( R_EAX, Rn );
   631     ADD_imm8s_r32( -1, R_EAX );
   632     store_reg( R_EAX, Rn );
   633     SETE_t();
   634     sh4_x86.tstate = TSTATE_E;
   635 :}
   636 EXTS.B Rm, Rn {:  
   637     load_reg( R_EAX, Rm );
   638     MOVSX_r8_r32( R_EAX, R_EAX );
   639     store_reg( R_EAX, Rn );
   640 :}
   641 EXTS.W Rm, Rn {:  
   642     load_reg( R_EAX, Rm );
   643     MOVSX_r16_r32( R_EAX, R_EAX );
   644     store_reg( R_EAX, Rn );
   645 :}
   646 EXTU.B Rm, Rn {:  
   647     load_reg( R_EAX, Rm );
   648     MOVZX_r8_r32( R_EAX, R_EAX );
   649     store_reg( R_EAX, Rn );
   650 :}
   651 EXTU.W Rm, Rn {:  
   652     load_reg( R_EAX, Rm );
   653     MOVZX_r16_r32( R_EAX, R_EAX );
   654     store_reg( R_EAX, Rn );
   655 :}
   656 MAC.L @Rm+, @Rn+ {:
   657     if( Rm == Rn ) {
   658 	load_reg( R_EAX, Rm );
   659 	check_ralign32( R_EAX );
   660 	MMU_TRANSLATE_READ( R_EAX );
   661 	PUSH_realigned_r32( R_EAX );
   662 	load_reg( R_EAX, Rn );
   663 	ADD_imm8s_r32( 4, R_EAX );
   664 	MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
   665 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
   666 	// Note translate twice in case of page boundaries. Maybe worth
   667 	// adding a page-boundary check to skip the second translation
   668     } else {
   669 	load_reg( R_EAX, Rm );
   670 	check_ralign32( R_EAX );
   671 	MMU_TRANSLATE_READ( R_EAX );
   672 	load_reg( R_ECX, Rn );
   673 	check_ralign32( R_ECX );
   674 	PUSH_realigned_r32( R_EAX );
   675 	MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
   676 	MOV_r32_r32( R_ECX, R_EAX );
   677 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   678 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   679     }
   680     MEM_READ_LONG( R_EAX, R_EAX );
   681     POP_r32( R_ECX );
   682     PUSH_r32( R_EAX );
   683     MEM_READ_LONG( R_ECX, R_EAX );
   684     POP_realigned_r32( R_ECX );
   686     IMUL_r32( R_ECX );
   687     ADD_r32_sh4r( R_EAX, R_MACL );
   688     ADC_r32_sh4r( R_EDX, R_MACH );
   690     load_spreg( R_ECX, R_S );
   691     TEST_r32_r32(R_ECX, R_ECX);
   692     JE_rel8( CALL_FUNC0_SIZE, nosat );
   693     call_func0( signsat48 );
   694     JMP_TARGET( nosat );
   695     sh4_x86.tstate = TSTATE_NONE;
   696 :}
   697 MAC.W @Rm+, @Rn+ {:  
   698     if( Rm == Rn ) {
   699 	load_reg( R_EAX, Rm );
   700 	check_ralign16( R_EAX );
   701 	MMU_TRANSLATE_READ( R_EAX );
   702 	PUSH_realigned_r32( R_EAX );
   703 	load_reg( R_EAX, Rn );
   704 	ADD_imm8s_r32( 2, R_EAX );
   705 	MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
   706 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   707 	// Note translate twice in case of page boundaries. Maybe worth
   708 	// adding a page-boundary check to skip the second translation
   709     } else {
   710 	load_reg( R_EAX, Rm );
   711 	check_ralign16( R_EAX );
   712 	MMU_TRANSLATE_READ( R_EAX );
   713 	load_reg( R_ECX, Rn );
   714 	check_ralign16( R_ECX );
   715 	PUSH_realigned_r32( R_EAX );
   716 	MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
   717 	MOV_r32_r32( R_ECX, R_EAX );
   718 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   719 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   720     }
   721     MEM_READ_WORD( R_EAX, R_EAX );
   722     POP_r32( R_ECX );
   723     PUSH_r32( R_EAX );
   724     MEM_READ_WORD( R_ECX, R_EAX );
   725     POP_realigned_r32( R_ECX );
   726     IMUL_r32( R_ECX );
   728     load_spreg( R_ECX, R_S );
   729     TEST_r32_r32( R_ECX, R_ECX );
   730     JE_rel8( 47, nosat );
   732     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   733     JNO_rel8( 51, end );            // 2
   734     load_imm32( R_EDX, 1 );         // 5
   735     store_spreg( R_EDX, R_MACH );   // 6
   736     JS_rel8( 13, positive );        // 2
   737     load_imm32( R_EAX, 0x80000000 );// 5
   738     store_spreg( R_EAX, R_MACL );   // 6
   739     JMP_rel8( 25, end2 );           // 2
   741     JMP_TARGET(positive);
   742     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   743     store_spreg( R_EAX, R_MACL );   // 6
   744     JMP_rel8( 12, end3);            // 2
   746     JMP_TARGET(nosat);
   747     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   748     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   749     JMP_TARGET(end);
   750     JMP_TARGET(end2);
   751     JMP_TARGET(end3);
   752     sh4_x86.tstate = TSTATE_NONE;
   753 :}
   754 MOVT Rn {:  
   755     load_spreg( R_EAX, R_T );
   756     store_reg( R_EAX, Rn );
   757 :}
   758 MUL.L Rm, Rn {:  
   759     load_reg( R_EAX, Rm );
   760     load_reg( R_ECX, Rn );
   761     MUL_r32( R_ECX );
   762     store_spreg( R_EAX, R_MACL );
   763     sh4_x86.tstate = TSTATE_NONE;
   764 :}
   765 MULS.W Rm, Rn {:
   766     load_reg16s( R_EAX, Rm );
   767     load_reg16s( R_ECX, Rn );
   768     MUL_r32( R_ECX );
   769     store_spreg( R_EAX, R_MACL );
   770     sh4_x86.tstate = TSTATE_NONE;
   771 :}
   772 MULU.W Rm, Rn {:  
   773     load_reg16u( R_EAX, Rm );
   774     load_reg16u( R_ECX, Rn );
   775     MUL_r32( R_ECX );
   776     store_spreg( R_EAX, R_MACL );
   777     sh4_x86.tstate = TSTATE_NONE;
   778 :}
   779 NEG Rm, Rn {:
   780     load_reg( R_EAX, Rm );
   781     NEG_r32( R_EAX );
   782     store_reg( R_EAX, Rn );
   783     sh4_x86.tstate = TSTATE_NONE;
   784 :}
   785 NEGC Rm, Rn {:  
   786     load_reg( R_EAX, Rm );
   787     XOR_r32_r32( R_ECX, R_ECX );
   788     LDC_t();
   789     SBB_r32_r32( R_EAX, R_ECX );
   790     store_reg( R_ECX, Rn );
   791     SETC_t();
   792     sh4_x86.tstate = TSTATE_C;
   793 :}
   794 NOT Rm, Rn {:  
   795     load_reg( R_EAX, Rm );
   796     NOT_r32( R_EAX );
   797     store_reg( R_EAX, Rn );
   798     sh4_x86.tstate = TSTATE_NONE;
   799 :}
   800 OR Rm, Rn {:  
   801     load_reg( R_EAX, Rm );
   802     load_reg( R_ECX, Rn );
   803     OR_r32_r32( R_EAX, R_ECX );
   804     store_reg( R_ECX, Rn );
   805     sh4_x86.tstate = TSTATE_NONE;
   806 :}
   807 OR #imm, R0 {:
   808     load_reg( R_EAX, 0 );
   809     OR_imm32_r32(imm, R_EAX);
   810     store_reg( R_EAX, 0 );
   811     sh4_x86.tstate = TSTATE_NONE;
   812 :}
   813 OR.B #imm, @(R0, GBR) {:  
   814     load_reg( R_EAX, 0 );
   815     load_spreg( R_ECX, R_GBR );
   816     ADD_r32_r32( R_ECX, R_EAX );
   817     MMU_TRANSLATE_WRITE( R_EAX );
   818     PUSH_realigned_r32(R_EAX);
   819     MEM_READ_BYTE( R_EAX, R_EAX );
   820     POP_realigned_r32(R_ECX);
   821     OR_imm32_r32(imm, R_EAX );
   822     MEM_WRITE_BYTE( R_ECX, R_EAX );
   823     sh4_x86.tstate = TSTATE_NONE;
   824 :}
   825 ROTCL Rn {:
   826     load_reg( R_EAX, Rn );
   827     if( sh4_x86.tstate != TSTATE_C ) {
   828 	LDC_t();
   829     }
   830     RCL1_r32( R_EAX );
   831     store_reg( R_EAX, Rn );
   832     SETC_t();
   833     sh4_x86.tstate = TSTATE_C;
   834 :}
   835 ROTCR Rn {:  
   836     load_reg( R_EAX, Rn );
   837     if( sh4_x86.tstate != TSTATE_C ) {
   838 	LDC_t();
   839     }
   840     RCR1_r32( R_EAX );
   841     store_reg( R_EAX, Rn );
   842     SETC_t();
   843     sh4_x86.tstate = TSTATE_C;
   844 :}
   845 ROTL Rn {:  
   846     load_reg( R_EAX, Rn );
   847     ROL1_r32( R_EAX );
   848     store_reg( R_EAX, Rn );
   849     SETC_t();
   850     sh4_x86.tstate = TSTATE_C;
   851 :}
   852 ROTR Rn {:  
   853     load_reg( R_EAX, Rn );
   854     ROR1_r32( R_EAX );
   855     store_reg( R_EAX, Rn );
   856     SETC_t();
   857     sh4_x86.tstate = TSTATE_C;
   858 :}
   859 SHAD Rm, Rn {:
   860     /* Annoyingly enough, not directly convertible */
   861     load_reg( R_EAX, Rn );
   862     load_reg( R_ECX, Rm );
   863     CMP_imm32_r32( 0, R_ECX );
   864     JGE_rel8(16, doshl);
   866     NEG_r32( R_ECX );      // 2
   867     AND_imm8_r8( 0x1F, R_CL ); // 3
   868     JE_rel8( 4, emptysar);     // 2
   869     SAR_r32_CL( R_EAX );       // 2
   870     JMP_rel8(10, end);          // 2
   872     JMP_TARGET(emptysar);
   873     SAR_imm8_r32(31, R_EAX );  // 3
   874     JMP_rel8(5, end2);
   876     JMP_TARGET(doshl);
   877     AND_imm8_r8( 0x1F, R_CL ); // 3
   878     SHL_r32_CL( R_EAX );       // 2
   879     JMP_TARGET(end);
   880     JMP_TARGET(end2);
   881     store_reg( R_EAX, Rn );
   882     sh4_x86.tstate = TSTATE_NONE;
   883 :}
   884 SHLD Rm, Rn {:  
   885     load_reg( R_EAX, Rn );
   886     load_reg( R_ECX, Rm );
   887     CMP_imm32_r32( 0, R_ECX );
   888     JGE_rel8(15, doshl);
   890     NEG_r32( R_ECX );      // 2
   891     AND_imm8_r8( 0x1F, R_CL ); // 3
   892     JE_rel8( 4, emptyshr );
   893     SHR_r32_CL( R_EAX );       // 2
   894     JMP_rel8(9, end);          // 2
   896     JMP_TARGET(emptyshr);
   897     XOR_r32_r32( R_EAX, R_EAX );
   898     JMP_rel8(5, end2);
   900     JMP_TARGET(doshl);
   901     AND_imm8_r8( 0x1F, R_CL ); // 3
   902     SHL_r32_CL( R_EAX );       // 2
   903     JMP_TARGET(end);
   904     JMP_TARGET(end2);
   905     store_reg( R_EAX, Rn );
   906     sh4_x86.tstate = TSTATE_NONE;
   907 :}
   908 SHAL Rn {: 
   909     load_reg( R_EAX, Rn );
   910     SHL1_r32( R_EAX );
   911     SETC_t();
   912     store_reg( R_EAX, Rn );
   913     sh4_x86.tstate = TSTATE_C;
   914 :}
   915 SHAR Rn {:  
   916     load_reg( R_EAX, Rn );
   917     SAR1_r32( R_EAX );
   918     SETC_t();
   919     store_reg( R_EAX, Rn );
   920     sh4_x86.tstate = TSTATE_C;
   921 :}
   922 SHLL Rn {:  
   923     load_reg( R_EAX, Rn );
   924     SHL1_r32( R_EAX );
   925     SETC_t();
   926     store_reg( R_EAX, Rn );
   927     sh4_x86.tstate = TSTATE_C;
   928 :}
   929 SHLL2 Rn {:
   930     load_reg( R_EAX, Rn );
   931     SHL_imm8_r32( 2, R_EAX );
   932     store_reg( R_EAX, Rn );
   933     sh4_x86.tstate = TSTATE_NONE;
   934 :}
   935 SHLL8 Rn {:  
   936     load_reg( R_EAX, Rn );
   937     SHL_imm8_r32( 8, R_EAX );
   938     store_reg( R_EAX, Rn );
   939     sh4_x86.tstate = TSTATE_NONE;
   940 :}
   941 SHLL16 Rn {:  
   942     load_reg( R_EAX, Rn );
   943     SHL_imm8_r32( 16, R_EAX );
   944     store_reg( R_EAX, Rn );
   945     sh4_x86.tstate = TSTATE_NONE;
   946 :}
   947 SHLR Rn {:  
   948     load_reg( R_EAX, Rn );
   949     SHR1_r32( R_EAX );
   950     SETC_t();
   951     store_reg( R_EAX, Rn );
   952     sh4_x86.tstate = TSTATE_C;
   953 :}
   954 SHLR2 Rn {:  
   955     load_reg( R_EAX, Rn );
   956     SHR_imm8_r32( 2, R_EAX );
   957     store_reg( R_EAX, Rn );
   958     sh4_x86.tstate = TSTATE_NONE;
   959 :}
   960 SHLR8 Rn {:  
   961     load_reg( R_EAX, Rn );
   962     SHR_imm8_r32( 8, R_EAX );
   963     store_reg( R_EAX, Rn );
   964     sh4_x86.tstate = TSTATE_NONE;
   965 :}
   966 SHLR16 Rn {:  
   967     load_reg( R_EAX, Rn );
   968     SHR_imm8_r32( 16, R_EAX );
   969     store_reg( R_EAX, Rn );
   970     sh4_x86.tstate = TSTATE_NONE;
   971 :}
   972 SUB Rm, Rn {:  
   973     load_reg( R_EAX, Rm );
   974     load_reg( R_ECX, Rn );
   975     SUB_r32_r32( R_EAX, R_ECX );
   976     store_reg( R_ECX, Rn );
   977     sh4_x86.tstate = TSTATE_NONE;
   978 :}
   979 SUBC Rm, Rn {:  
   980     load_reg( R_EAX, Rm );
   981     load_reg( R_ECX, Rn );
   982     if( sh4_x86.tstate != TSTATE_C ) {
   983 	LDC_t();
   984     }
   985     SBB_r32_r32( R_EAX, R_ECX );
   986     store_reg( R_ECX, Rn );
   987     SETC_t();
   988     sh4_x86.tstate = TSTATE_C;
   989 :}
   990 SUBV Rm, Rn {:  
   991     load_reg( R_EAX, Rm );
   992     load_reg( R_ECX, Rn );
   993     SUB_r32_r32( R_EAX, R_ECX );
   994     store_reg( R_ECX, Rn );
   995     SETO_t();
   996     sh4_x86.tstate = TSTATE_O;
   997 :}
   998 SWAP.B Rm, Rn {:  
   999     load_reg( R_EAX, Rm );
  1000     XCHG_r8_r8( R_AL, R_AH ); // NB: does not touch EFLAGS
  1001     store_reg( R_EAX, Rn );
  1002 :}
  1003 SWAP.W Rm, Rn {:  
  1004     load_reg( R_EAX, Rm );
  1005     MOV_r32_r32( R_EAX, R_ECX );
  1006     SHL_imm8_r32( 16, R_ECX );
  1007     SHR_imm8_r32( 16, R_EAX );
  1008     OR_r32_r32( R_EAX, R_ECX );
  1009     store_reg( R_ECX, Rn );
  1010     sh4_x86.tstate = TSTATE_NONE;
  1011 :}
  1012 TAS.B @Rn {:  
  1013     load_reg( R_EAX, Rn );
  1014     MMU_TRANSLATE_WRITE( R_EAX );
  1015     PUSH_realigned_r32( R_EAX );
  1016     MEM_READ_BYTE( R_EAX, R_EAX );
  1017     TEST_r8_r8( R_AL, R_AL );
  1018     SETE_t();
  1019     OR_imm8_r8( 0x80, R_AL );
  1020     POP_realigned_r32( R_ECX );
  1021     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1022     sh4_x86.tstate = TSTATE_NONE;
  1023 :}
  1024 TST Rm, Rn {:  
  1025     load_reg( R_EAX, Rm );
  1026     load_reg( R_ECX, Rn );
  1027     TEST_r32_r32( R_EAX, R_ECX );
  1028     SETE_t();
  1029     sh4_x86.tstate = TSTATE_E;
  1030 :}
  1031 TST #imm, R0 {:  
  1032     load_reg( R_EAX, 0 );
  1033     TEST_imm32_r32( imm, R_EAX );
  1034     SETE_t();
  1035     sh4_x86.tstate = TSTATE_E;
  1036 :}
  1037 TST.B #imm, @(R0, GBR) {:  
  1038     load_reg( R_EAX, 0);
  1039     load_reg( R_ECX, R_GBR);
  1040     ADD_r32_r32( R_ECX, R_EAX );
  1041     MMU_TRANSLATE_READ( R_EAX );
  1042     MEM_READ_BYTE( R_EAX, R_EAX );
  1043     TEST_imm8_r8( imm, R_AL );
  1044     SETE_t();
  1045     sh4_x86.tstate = TSTATE_E;
  1046 :}
  1047 XOR Rm, Rn {:  
  1048     load_reg( R_EAX, Rm );
  1049     load_reg( R_ECX, Rn );
  1050     XOR_r32_r32( R_EAX, R_ECX );
  1051     store_reg( R_ECX, Rn );
  1052     sh4_x86.tstate = TSTATE_NONE;
  1053 :}
  1054 XOR #imm, R0 {:  
  1055     load_reg( R_EAX, 0 );
  1056     XOR_imm32_r32( imm, R_EAX );
  1057     store_reg( R_EAX, 0 );
  1058     sh4_x86.tstate = TSTATE_NONE;
  1059 :}
  1060 XOR.B #imm, @(R0, GBR) {:  
  1061     load_reg( R_EAX, 0 );
  1062     load_spreg( R_ECX, R_GBR );
  1063     ADD_r32_r32( R_ECX, R_EAX );
  1064     MMU_TRANSLATE_WRITE( R_EAX );
  1065     PUSH_realigned_r32(R_EAX);
  1066     MEM_READ_BYTE(R_EAX, R_EAX);
  1067     POP_realigned_r32(R_ECX);
  1068     XOR_imm32_r32( imm, R_EAX );
  1069     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1070     sh4_x86.tstate = TSTATE_NONE;
  1071 :}
  1072 XTRCT Rm, Rn {:
  1073     load_reg( R_EAX, Rm );
  1074     load_reg( R_ECX, Rn );
  1075     SHL_imm8_r32( 16, R_EAX );
  1076     SHR_imm8_r32( 16, R_ECX );
  1077     OR_r32_r32( R_EAX, R_ECX );
  1078     store_reg( R_ECX, Rn );
  1079     sh4_x86.tstate = TSTATE_NONE;
  1080 :}
  1082 /* Data move instructions */
  1083 MOV Rm, Rn {:  
  1084     load_reg( R_EAX, Rm );
  1085     store_reg( R_EAX, Rn );
  1086 :}
  1087 MOV #imm, Rn {:  
  1088     load_imm32( R_EAX, imm );
  1089     store_reg( R_EAX, Rn );
  1090 :}
  1091 MOV.B Rm, @Rn {:  
  1092     load_reg( R_EAX, Rn );
  1093     MMU_TRANSLATE_WRITE( R_EAX );
  1094     load_reg( R_EDX, Rm );
  1095     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1096     sh4_x86.tstate = TSTATE_NONE;
  1097 :}
  1098 MOV.B Rm, @-Rn {:  
  1099     load_reg( R_EAX, Rn );
  1100     ADD_imm8s_r32( -1, R_EAX );
  1101     MMU_TRANSLATE_WRITE( R_EAX );
  1102     load_reg( R_EDX, Rm );
  1103     ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
  1104     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1105     sh4_x86.tstate = TSTATE_NONE;
  1106 :}
  1107 MOV.B Rm, @(R0, Rn) {:  
  1108     load_reg( R_EAX, 0 );
  1109     load_reg( R_ECX, Rn );
  1110     ADD_r32_r32( R_ECX, R_EAX );
  1111     MMU_TRANSLATE_WRITE( R_EAX );
  1112     load_reg( R_EDX, Rm );
  1113     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1114     sh4_x86.tstate = TSTATE_NONE;
  1115 :}
  1116 MOV.B R0, @(disp, GBR) {:  
  1117     load_spreg( R_EAX, R_GBR );
  1118     ADD_imm32_r32( disp, R_EAX );
  1119     MMU_TRANSLATE_WRITE( R_EAX );
  1120     load_reg( R_EDX, 0 );
  1121     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1122     sh4_x86.tstate = TSTATE_NONE;
  1123 :}
  1124 MOV.B R0, @(disp, Rn) {:  
  1125     load_reg( R_EAX, Rn );
  1126     ADD_imm32_r32( disp, R_EAX );
  1127     MMU_TRANSLATE_WRITE( R_EAX );
  1128     load_reg( R_EDX, 0 );
  1129     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1130     sh4_x86.tstate = TSTATE_NONE;
  1131 :}
  1132 MOV.B @Rm, Rn {:  
  1133     load_reg( R_EAX, Rm );
  1134     MMU_TRANSLATE_READ( R_EAX );
  1135     MEM_READ_BYTE( R_EAX, R_EAX );
  1136     store_reg( R_EAX, Rn );
  1137     sh4_x86.tstate = TSTATE_NONE;
  1138 :}
  1139 MOV.B @Rm+, Rn {:  
  1140     load_reg( R_EAX, Rm );
  1141     MMU_TRANSLATE_READ( R_EAX );
  1142     ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
  1143     MEM_READ_BYTE( R_EAX, R_EAX );
  1144     store_reg( R_EAX, Rn );
  1145     sh4_x86.tstate = TSTATE_NONE;
  1146 :}
  1147 MOV.B @(R0, Rm), Rn {:  
  1148     load_reg( R_EAX, 0 );
  1149     load_reg( R_ECX, Rm );
  1150     ADD_r32_r32( R_ECX, R_EAX );
  1151     MMU_TRANSLATE_READ( R_EAX )
  1152     MEM_READ_BYTE( R_EAX, R_EAX );
  1153     store_reg( R_EAX, Rn );
  1154     sh4_x86.tstate = TSTATE_NONE;
  1155 :}
  1156 MOV.B @(disp, GBR), R0 {:  
  1157     load_spreg( R_EAX, R_GBR );
  1158     ADD_imm32_r32( disp, R_EAX );
  1159     MMU_TRANSLATE_READ( R_EAX );
  1160     MEM_READ_BYTE( R_EAX, R_EAX );
  1161     store_reg( R_EAX, 0 );
  1162     sh4_x86.tstate = TSTATE_NONE;
  1163 :}
  1164 MOV.B @(disp, Rm), R0 {:  
  1165     load_reg( R_EAX, Rm );
  1166     ADD_imm32_r32( disp, R_EAX );
  1167     MMU_TRANSLATE_READ( R_EAX );
  1168     MEM_READ_BYTE( R_EAX, R_EAX );
  1169     store_reg( R_EAX, 0 );
  1170     sh4_x86.tstate = TSTATE_NONE;
  1171 :}
  1172 MOV.L Rm, @Rn {:
  1173     load_reg( R_EAX, Rn );
  1174     check_walign32(R_EAX);
  1175     MMU_TRANSLATE_WRITE( R_EAX );
  1176     load_reg( R_EDX, Rm );
  1177     MEM_WRITE_LONG( R_EAX, R_EDX );
  1178     sh4_x86.tstate = TSTATE_NONE;
  1179 :}
  1180 MOV.L Rm, @-Rn {:  
  1181     load_reg( R_EAX, Rn );
  1182     ADD_imm8s_r32( -4, R_EAX );
  1183     check_walign32( R_EAX );
  1184     MMU_TRANSLATE_WRITE( R_EAX );
  1185     load_reg( R_EDX, Rm );
  1186     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  1187     MEM_WRITE_LONG( R_EAX, R_EDX );
  1188     sh4_x86.tstate = TSTATE_NONE;
  1189 :}
  1190 MOV.L Rm, @(R0, Rn) {:  
  1191     load_reg( R_EAX, 0 );
  1192     load_reg( R_ECX, Rn );
  1193     ADD_r32_r32( R_ECX, R_EAX );
  1194     check_walign32( R_EAX );
  1195     MMU_TRANSLATE_WRITE( R_EAX );
  1196     load_reg( R_EDX, Rm );
  1197     MEM_WRITE_LONG( R_EAX, R_EDX );
  1198     sh4_x86.tstate = TSTATE_NONE;
  1199 :}
  1200 MOV.L R0, @(disp, GBR) {:  
  1201     load_spreg( R_EAX, R_GBR );
  1202     ADD_imm32_r32( disp, R_EAX );
  1203     check_walign32( R_EAX );
  1204     MMU_TRANSLATE_WRITE( R_EAX );
  1205     load_reg( R_EDX, 0 );
  1206     MEM_WRITE_LONG( R_EAX, R_EDX );
  1207     sh4_x86.tstate = TSTATE_NONE;
  1208 :}
  1209 MOV.L Rm, @(disp, Rn) {:  
  1210     load_reg( R_EAX, Rn );
  1211     ADD_imm32_r32( disp, R_EAX );
  1212     check_walign32( R_EAX );
  1213     MMU_TRANSLATE_WRITE( R_EAX );
  1214     load_reg( R_EDX, Rm );
  1215     MEM_WRITE_LONG( R_EAX, R_EDX );
  1216     sh4_x86.tstate = TSTATE_NONE;
  1217 :}
  1218 MOV.L @Rm, Rn {:  
  1219     load_reg( R_EAX, Rm );
  1220     check_ralign32( R_EAX );
  1221     MMU_TRANSLATE_READ( R_EAX );
  1222     MEM_READ_LONG( R_EAX, R_EAX );
  1223     store_reg( R_EAX, Rn );
  1224     sh4_x86.tstate = TSTATE_NONE;
  1225 :}
  1226 MOV.L @Rm+, Rn {:  
  1227     load_reg( R_EAX, Rm );
  1228     check_ralign32( R_EAX );
  1229     MMU_TRANSLATE_READ( R_EAX );
  1230     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1231     MEM_READ_LONG( R_EAX, R_EAX );
  1232     store_reg( R_EAX, Rn );
  1233     sh4_x86.tstate = TSTATE_NONE;
  1234 :}
  1235 MOV.L @(R0, Rm), Rn {:  
  1236     load_reg( R_EAX, 0 );
  1237     load_reg( R_ECX, Rm );
  1238     ADD_r32_r32( R_ECX, R_EAX );
  1239     check_ralign32( R_EAX );
  1240     MMU_TRANSLATE_READ( R_EAX );
  1241     MEM_READ_LONG( R_EAX, R_EAX );
  1242     store_reg( R_EAX, Rn );
  1243     sh4_x86.tstate = TSTATE_NONE;
  1244 :}
  1245 MOV.L @(disp, GBR), R0 {:
  1246     load_spreg( R_EAX, R_GBR );
  1247     ADD_imm32_r32( disp, R_EAX );
  1248     check_ralign32( R_EAX );
  1249     MMU_TRANSLATE_READ( R_EAX );
  1250     MEM_READ_LONG( R_EAX, R_EAX );
  1251     store_reg( R_EAX, 0 );
  1252     sh4_x86.tstate = TSTATE_NONE;
  1253 :}
  1254 MOV.L @(disp, PC), Rn {:  
  1255     if( sh4_x86.in_delay_slot ) {
  1256 	SLOTILLEGAL();
  1257     } else {
  1258 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1259 	if( IS_IN_ICACHE(target) ) {
  1260 	    // If the target address is in the same page as the code, it's
  1261 	    // pretty safe to just ref it directly and circumvent the whole
  1262 	    // memory subsystem. (this is a big performance win)
  1264 	    // FIXME: There's a corner-case that's not handled here when
  1265 	    // the current code-page is in the ITLB but not in the UTLB.
  1266 	    // (should generate a TLB miss although need to test SH4 
  1267 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1268 	    // behaviour though.
  1269 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1270 	    MOV_moff32_EAX( ptr );
  1271 	} else {
  1272 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1273 	    // different virtual address than the translation was done with,
  1274 	    // but we can safely assume that the low bits are the same.
  1275 	    load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1276 	    ADD_sh4r_r32( R_PC, R_EAX );
  1277 	    MMU_TRANSLATE_READ( R_EAX );
  1278 	    MEM_READ_LONG( R_EAX, R_EAX );
  1279 	    sh4_x86.tstate = TSTATE_NONE;
  1281 	store_reg( R_EAX, Rn );
  1283 :}
  1284 MOV.L @(disp, Rm), Rn {:  
  1285     load_reg( R_EAX, Rm );
  1286     ADD_imm8s_r32( disp, R_EAX );
  1287     check_ralign32( R_EAX );
  1288     MMU_TRANSLATE_READ( R_EAX );
  1289     MEM_READ_LONG( R_EAX, R_EAX );
  1290     store_reg( R_EAX, Rn );
  1291     sh4_x86.tstate = TSTATE_NONE;
  1292 :}
  1293 MOV.W Rm, @Rn {:  
  1294     load_reg( R_EAX, Rn );
  1295     check_walign16( R_EAX );
  1296     MMU_TRANSLATE_WRITE( R_EAX )
  1297     load_reg( R_EDX, Rm );
  1298     MEM_WRITE_WORD( R_EAX, R_EDX );
  1299     sh4_x86.tstate = TSTATE_NONE;
  1300 :}
  1301 MOV.W Rm, @-Rn {:  
  1302     load_reg( R_EAX, Rn );
  1303     ADD_imm8s_r32( -2, R_EAX );
  1304     check_walign16( R_EAX );
  1305     MMU_TRANSLATE_WRITE( R_EAX );
  1306     load_reg( R_EDX, Rm );
  1307     ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
  1308     MEM_WRITE_WORD( R_EAX, R_EDX );
  1309     sh4_x86.tstate = TSTATE_NONE;
  1310 :}
  1311 MOV.W Rm, @(R0, Rn) {:  
  1312     load_reg( R_EAX, 0 );
  1313     load_reg( R_ECX, Rn );
  1314     ADD_r32_r32( R_ECX, R_EAX );
  1315     check_walign16( R_EAX );
  1316     MMU_TRANSLATE_WRITE( R_EAX );
  1317     load_reg( R_EDX, Rm );
  1318     MEM_WRITE_WORD( R_EAX, R_EDX );
  1319     sh4_x86.tstate = TSTATE_NONE;
  1320 :}
  1321 MOV.W R0, @(disp, GBR) {:  
  1322     load_spreg( R_EAX, R_GBR );
  1323     ADD_imm32_r32( disp, R_EAX );
  1324     check_walign16( R_EAX );
  1325     MMU_TRANSLATE_WRITE( R_EAX );
  1326     load_reg( R_EDX, 0 );
  1327     MEM_WRITE_WORD( R_EAX, R_EDX );
  1328     sh4_x86.tstate = TSTATE_NONE;
  1329 :}
  1330 MOV.W R0, @(disp, Rn) {:  
  1331     load_reg( R_EAX, Rn );
  1332     ADD_imm32_r32( disp, R_EAX );
  1333     check_walign16( R_EAX );
  1334     MMU_TRANSLATE_WRITE( R_EAX );
  1335     load_reg( R_EDX, 0 );
  1336     MEM_WRITE_WORD( R_EAX, R_EDX );
  1337     sh4_x86.tstate = TSTATE_NONE;
  1338 :}
  1339 MOV.W @Rm, Rn {:  
  1340     load_reg( R_EAX, Rm );
  1341     check_ralign16( R_EAX );
  1342     MMU_TRANSLATE_READ( R_EAX );
  1343     MEM_READ_WORD( R_EAX, R_EAX );
  1344     store_reg( R_EAX, Rn );
  1345     sh4_x86.tstate = TSTATE_NONE;
  1346 :}
  1347 MOV.W @Rm+, Rn {:  
  1348     load_reg( R_EAX, Rm );
  1349     check_ralign16( R_EAX );
  1350     MMU_TRANSLATE_READ( R_EAX );
  1351     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
  1352     MEM_READ_WORD( R_EAX, R_EAX );
  1353     store_reg( R_EAX, Rn );
  1354     sh4_x86.tstate = TSTATE_NONE;
  1355 :}
  1356 MOV.W @(R0, Rm), Rn {:  
  1357     load_reg( R_EAX, 0 );
  1358     load_reg( R_ECX, Rm );
  1359     ADD_r32_r32( R_ECX, R_EAX );
  1360     check_ralign16( R_EAX );
  1361     MMU_TRANSLATE_READ( R_EAX );
  1362     MEM_READ_WORD( R_EAX, R_EAX );
  1363     store_reg( R_EAX, Rn );
  1364     sh4_x86.tstate = TSTATE_NONE;
  1365 :}
  1366 MOV.W @(disp, GBR), R0 {:  
  1367     load_spreg( R_EAX, R_GBR );
  1368     ADD_imm32_r32( disp, R_EAX );
  1369     check_ralign16( R_EAX );
  1370     MMU_TRANSLATE_READ( R_EAX );
  1371     MEM_READ_WORD( R_EAX, R_EAX );
  1372     store_reg( R_EAX, 0 );
  1373     sh4_x86.tstate = TSTATE_NONE;
  1374 :}
  1375 MOV.W @(disp, PC), Rn {:  
  1376     if( sh4_x86.in_delay_slot ) {
  1377 	SLOTILLEGAL();
  1378     } else {
  1379 	// See comments for MOV.L @(disp, PC), Rn
  1380 	uint32_t target = pc + disp + 4;
  1381 	if( IS_IN_ICACHE(target) ) {
  1382 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1383 	    MOV_moff32_EAX( ptr );
  1384 	    MOVSX_r16_r32( R_EAX, R_EAX );
  1385 	} else {
  1386 	    load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
  1387 	    ADD_sh4r_r32( R_PC, R_EAX );
  1388 	    MMU_TRANSLATE_READ( R_EAX );
  1389 	    MEM_READ_WORD( R_EAX, R_EAX );
  1390 	    sh4_x86.tstate = TSTATE_NONE;
  1392 	store_reg( R_EAX, Rn );
  1394 :}
  1395 MOV.W @(disp, Rm), R0 {:  
  1396     load_reg( R_EAX, Rm );
  1397     ADD_imm32_r32( disp, R_EAX );
  1398     check_ralign16( R_EAX );
  1399     MMU_TRANSLATE_READ( R_EAX );
  1400     MEM_READ_WORD( R_EAX, R_EAX );
  1401     store_reg( R_EAX, 0 );
  1402     sh4_x86.tstate = TSTATE_NONE;
  1403 :}
  1404 MOVA @(disp, PC), R0 {:  
  1405     if( sh4_x86.in_delay_slot ) {
  1406 	SLOTILLEGAL();
  1407     } else {
  1408 	load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1409 	ADD_sh4r_r32( R_PC, R_ECX );
  1410 	store_reg( R_ECX, 0 );
  1411 	sh4_x86.tstate = TSTATE_NONE;
  1413 :}
  1414 MOVCA.L R0, @Rn {:  
  1415     load_reg( R_EAX, Rn );
  1416     check_walign32( R_EAX );
  1417     MMU_TRANSLATE_WRITE( R_EAX );
  1418     load_reg( R_EDX, 0 );
  1419     MEM_WRITE_LONG( R_EAX, R_EDX );
  1420     sh4_x86.tstate = TSTATE_NONE;
  1421 :}
  1423 /* Control transfer instructions */
  1424 BF disp {:
  1425     if( sh4_x86.in_delay_slot ) {
  1426 	SLOTILLEGAL();
  1427     } else {
  1428 	sh4vma_t target = disp + pc + 4;
  1429 	JT_rel8( EXIT_BLOCK_REL_SIZE(target), nottaken );
  1430 	exit_block_rel(target, pc+2 );
  1431 	JMP_TARGET(nottaken);
  1432 	return 2;
  1434 :}
  1435 BF/S disp {:
  1436     if( sh4_x86.in_delay_slot ) {
  1437 	SLOTILLEGAL();
  1438     } else {
  1439 	sh4_x86.in_delay_slot = DELAY_PC;
  1440 	if( UNTRANSLATABLE(pc+2) ) {
  1441 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1442 	    JT_rel8(6,nottaken);
  1443 	    ADD_imm32_r32( disp, R_EAX );
  1444 	    JMP_TARGET(nottaken);
  1445 	    ADD_sh4r_r32( R_PC, R_EAX );
  1446 	    store_spreg( R_EAX, R_NEW_PC );
  1447 	    exit_block_emu(pc+2);
  1448 	    sh4_x86.branch_taken = TRUE;
  1449 	    return 2;
  1450 	} else {
  1451 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1452 		CMP_imm8s_sh4r( 1, R_T );
  1453 		sh4_x86.tstate = TSTATE_E;
  1455 	    sh4vma_t target = disp + pc + 4;
  1456 	    OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JT rel32
  1457 	    sh4_translate_instruction(pc+2);
  1458 	    exit_block_rel( target, pc+4 );
  1460 	    // not taken
  1461 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1462 	    sh4_translate_instruction(pc+2);
  1463 	    return 4;
  1466 :}
  1467 BRA disp {:  
  1468     if( sh4_x86.in_delay_slot ) {
  1469 	SLOTILLEGAL();
  1470     } else {
  1471 	sh4_x86.in_delay_slot = DELAY_PC;
  1472 	sh4_x86.branch_taken = TRUE;
  1473 	if( UNTRANSLATABLE(pc+2) ) {
  1474 	    load_spreg( R_EAX, R_PC );
  1475 	    ADD_imm32_r32( pc + disp + 4 - sh4_x86.block_start_pc, R_EAX );
  1476 	    store_spreg( R_EAX, R_NEW_PC );
  1477 	    exit_block_emu(pc+2);
  1478 	    return 2;
  1479 	} else {
  1480 	    sh4_translate_instruction( pc + 2 );
  1481 	    exit_block_rel( disp + pc + 4, pc+4 );
  1482 	    return 4;
  1485 :}
  1486 BRAF Rn {:  
  1487     if( sh4_x86.in_delay_slot ) {
  1488 	SLOTILLEGAL();
  1489     } else {
  1490 	load_spreg( R_EAX, R_PC );
  1491 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1492 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1493 	store_spreg( R_EAX, R_NEW_PC );
  1494 	sh4_x86.in_delay_slot = DELAY_PC;
  1495 	sh4_x86.tstate = TSTATE_NONE;
  1496 	sh4_x86.branch_taken = TRUE;
  1497 	if( UNTRANSLATABLE(pc+2) ) {
  1498 	    exit_block_emu(pc+2);
  1499 	    return 2;
  1500 	} else {
  1501 	    sh4_translate_instruction( pc + 2 );
  1502 	    exit_block_newpcset(pc+2);
  1503 	    return 4;
  1506 :}
  1507 BSR disp {:  
  1508     if( sh4_x86.in_delay_slot ) {
  1509 	SLOTILLEGAL();
  1510     } else {
  1511 	load_spreg( R_EAX, R_PC );
  1512 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1513 	store_spreg( R_EAX, R_PR );
  1514 	sh4_x86.in_delay_slot = DELAY_PC;
  1515 	sh4_x86.branch_taken = TRUE;
  1516 	sh4_x86.tstate = TSTATE_NONE;
  1517 	if( UNTRANSLATABLE(pc+2) ) {
  1518 	    ADD_imm32_r32( disp, R_EAX );
  1519 	    store_spreg( R_EAX, R_NEW_PC );
  1520 	    exit_block_emu(pc+2);
  1521 	    return 2;
  1522 	} else {
  1523 	    sh4_translate_instruction( pc + 2 );
  1524 	    exit_block_rel( disp + pc + 4, pc+4 );
  1525 	    return 4;
  1528 :}
  1529 BSRF Rn {:  
  1530     if( sh4_x86.in_delay_slot ) {
  1531 	SLOTILLEGAL();
  1532     } else {
  1533 	load_spreg( R_EAX, R_PC );
  1534 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1535 	store_spreg( R_EAX, R_PR );
  1536 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1537 	store_spreg( R_EAX, R_NEW_PC );
  1539 	sh4_x86.in_delay_slot = DELAY_PC;
  1540 	sh4_x86.tstate = TSTATE_NONE;
  1541 	sh4_x86.branch_taken = TRUE;
  1542 	if( UNTRANSLATABLE(pc+2) ) {
  1543 	    exit_block_emu(pc+2);
  1544 	    return 2;
  1545 	} else {
  1546 	    sh4_translate_instruction( pc + 2 );
  1547 	    exit_block_newpcset(pc+2);
  1548 	    return 4;
  1551 :}
  1552 BT disp {:
  1553     if( sh4_x86.in_delay_slot ) {
  1554 	SLOTILLEGAL();
  1555     } else {
  1556 	sh4vma_t target = disp + pc + 4;
  1557 	JF_rel8( EXIT_BLOCK_REL_SIZE(target), nottaken );
  1558 	exit_block_rel(target, pc+2 );
  1559 	JMP_TARGET(nottaken);
  1560 	return 2;
  1562 :}
  1563 BT/S disp {:
  1564     if( sh4_x86.in_delay_slot ) {
  1565 	SLOTILLEGAL();
  1566     } else {
  1567 	sh4_x86.in_delay_slot = DELAY_PC;
  1568 	if( UNTRANSLATABLE(pc+2) ) {
  1569 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1570 	    JF_rel8(6,nottaken);
  1571 	    ADD_imm32_r32( disp, R_EAX );
  1572 	    JMP_TARGET(nottaken);
  1573 	    ADD_sh4r_r32( R_PC, R_EAX );
  1574 	    store_spreg( R_EAX, R_NEW_PC );
  1575 	    exit_block_emu(pc+2);
  1576 	    sh4_x86.branch_taken = TRUE;
  1577 	    return 2;
  1578 	} else {
  1579 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1580 		CMP_imm8s_sh4r( 1, R_T );
  1581 		sh4_x86.tstate = TSTATE_E;
  1583 	    OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JF rel32
  1584 	    sh4_translate_instruction(pc+2);
  1585 	    exit_block_rel( disp + pc + 4, pc+4 );
  1586 	    // not taken
  1587 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1588 	    sh4_translate_instruction(pc+2);
  1589 	    return 4;
  1592 :}
  1593 JMP @Rn {:  
  1594     if( sh4_x86.in_delay_slot ) {
  1595 	SLOTILLEGAL();
  1596     } else {
  1597 	load_reg( R_ECX, Rn );
  1598 	store_spreg( R_ECX, R_NEW_PC );
  1599 	sh4_x86.in_delay_slot = DELAY_PC;
  1600 	sh4_x86.branch_taken = TRUE;
  1601 	if( UNTRANSLATABLE(pc+2) ) {
  1602 	    exit_block_emu(pc+2);
  1603 	    return 2;
  1604 	} else {
  1605 	    sh4_translate_instruction(pc+2);
  1606 	    exit_block_newpcset(pc+2);
  1607 	    return 4;
  1610 :}
  1611 JSR @Rn {:  
  1612     if( sh4_x86.in_delay_slot ) {
  1613 	SLOTILLEGAL();
  1614     } else {
  1615 	load_spreg( R_EAX, R_PC );
  1616 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1617 	store_spreg( R_EAX, R_PR );
  1618 	load_reg( R_ECX, Rn );
  1619 	store_spreg( R_ECX, R_NEW_PC );
  1620 	sh4_x86.in_delay_slot = DELAY_PC;
  1621 	sh4_x86.branch_taken = TRUE;
  1622 	sh4_x86.tstate = TSTATE_NONE;
  1623 	if( UNTRANSLATABLE(pc+2) ) {
  1624 	    exit_block_emu(pc+2);
  1625 	    return 2;
  1626 	} else {
  1627 	    sh4_translate_instruction(pc+2);
  1628 	    exit_block_newpcset(pc+2);
  1629 	    return 4;
  1632 :}
  1633 RTE {:  
  1634     if( sh4_x86.in_delay_slot ) {
  1635 	SLOTILLEGAL();
  1636     } else {
  1637 	check_priv();
  1638 	load_spreg( R_ECX, R_SPC );
  1639 	store_spreg( R_ECX, R_NEW_PC );
  1640 	load_spreg( R_EAX, R_SSR );
  1641 	call_func1( sh4_write_sr, R_EAX );
  1642 	sh4_x86.in_delay_slot = DELAY_PC;
  1643 	sh4_x86.priv_checked = FALSE;
  1644 	sh4_x86.fpuen_checked = FALSE;
  1645 	sh4_x86.tstate = TSTATE_NONE;
  1646 	sh4_x86.branch_taken = TRUE;
  1647 	if( UNTRANSLATABLE(pc+2) ) {
  1648 	    exit_block_emu(pc+2);
  1649 	    return 2;
  1650 	} else {
  1651 	    sh4_translate_instruction(pc+2);
  1652 	    exit_block_newpcset(pc+2);
  1653 	    return 4;
  1656 :}
  1657 RTS {:  
  1658     if( sh4_x86.in_delay_slot ) {
  1659 	SLOTILLEGAL();
  1660     } else {
  1661 	load_spreg( R_ECX, R_PR );
  1662 	store_spreg( R_ECX, R_NEW_PC );
  1663 	sh4_x86.in_delay_slot = DELAY_PC;
  1664 	sh4_x86.branch_taken = TRUE;
  1665 	if( UNTRANSLATABLE(pc+2) ) {
  1666 	    exit_block_emu(pc+2);
  1667 	    return 2;
  1668 	} else {
  1669 	    sh4_translate_instruction(pc+2);
  1670 	    exit_block_newpcset(pc+2);
  1671 	    return 4;
  1674 :}
  1675 TRAPA #imm {:  
  1676     if( sh4_x86.in_delay_slot ) {
  1677 	SLOTILLEGAL();
  1678     } else {
  1679 	load_imm32( R_ECX, pc+2 - sh4_x86.block_start_pc );   // 5
  1680 	ADD_r32_sh4r( R_ECX, R_PC );
  1681 	load_imm32( R_EAX, imm );
  1682 	call_func1( sh4_raise_trap, R_EAX );
  1683 	sh4_x86.tstate = TSTATE_NONE;
  1684 	exit_block_pcset(pc);
  1685 	sh4_x86.branch_taken = TRUE;
  1686 	return 2;
  1688 :}
  1689 UNDEF {:  
  1690     if( sh4_x86.in_delay_slot ) {
  1691 	SLOTILLEGAL();
  1692     } else {
  1693 	JMP_exc(EXC_ILLEGAL);
  1694 	return 2;
  1696 :}
  1698 CLRMAC {:  
  1699     XOR_r32_r32(R_EAX, R_EAX);
  1700     store_spreg( R_EAX, R_MACL );
  1701     store_spreg( R_EAX, R_MACH );
  1702     sh4_x86.tstate = TSTATE_NONE;
  1703 :}
  1704 CLRS {:
  1705     CLC();
  1706     SETC_sh4r(R_S);
  1707     sh4_x86.tstate = TSTATE_C;
  1708 :}
  1709 CLRT {:  
  1710     CLC();
  1711     SETC_t();
  1712     sh4_x86.tstate = TSTATE_C;
  1713 :}
  1714 SETS {:  
  1715     STC();
  1716     SETC_sh4r(R_S);
  1717     sh4_x86.tstate = TSTATE_C;
  1718 :}
  1719 SETT {:  
  1720     STC();
  1721     SETC_t();
  1722     sh4_x86.tstate = TSTATE_C;
  1723 :}
  1725 /* Floating point moves */
  1726 FMOV FRm, FRn {:  
  1727     /* As horrible as this looks, it's actually covering 5 separate cases:
  1728      * 1. 32-bit fr-to-fr (PR=0)
  1729      * 2. 64-bit dr-to-dr (PR=1, FRm&1 == 0, FRn&1 == 0 )
  1730      * 3. 64-bit dr-to-xd (PR=1, FRm&1 == 0, FRn&1 == 1 )
  1731      * 4. 64-bit xd-to-dr (PR=1, FRm&1 == 1, FRn&1 == 0 )
  1732      * 5. 64-bit xd-to-xd (PR=1, FRm&1 == 1, FRn&1 == 1 )
  1733      */
  1734     check_fpuen();
  1735     load_spreg( R_ECX, R_FPSCR );
  1736     load_fr_bank( R_EDX );
  1737     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1738     JNE_rel8(8, doublesize);
  1739     load_fr( R_EDX, R_EAX, FRm ); // PR=0 branch
  1740     store_fr( R_EDX, R_EAX, FRn );
  1741     if( FRm&1 ) {
  1742 	JMP_rel8(24, end);
  1743 	JMP_TARGET(doublesize);
  1744 	load_xf_bank( R_ECX ); 
  1745 	load_fr( R_ECX, R_EAX, FRm-1 );
  1746 	if( FRn&1 ) {
  1747 	    load_fr( R_ECX, R_EDX, FRm );
  1748 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1749 	    store_fr( R_ECX, R_EDX, FRn );
  1750 	} else /* FRn&1 == 0 */ {
  1751 	    load_fr( R_ECX, R_ECX, FRm );
  1752 	    store_fr( R_EDX, R_EAX, FRn );
  1753 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1755 	JMP_TARGET(end);
  1756     } else /* FRm&1 == 0 */ {
  1757 	if( FRn&1 ) {
  1758 	    JMP_rel8(24, end);
  1759 	    load_xf_bank( R_ECX );
  1760 	    load_fr( R_EDX, R_EAX, FRm );
  1761 	    load_fr( R_EDX, R_EDX, FRm+1 );
  1762 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1763 	    store_fr( R_ECX, R_EDX, FRn );
  1764 	    JMP_TARGET(end);
  1765 	} else /* FRn&1 == 0 */ {
  1766 	    JMP_rel8(12, end);
  1767 	    load_fr( R_EDX, R_EAX, FRm );
  1768 	    load_fr( R_EDX, R_ECX, FRm+1 );
  1769 	    store_fr( R_EDX, R_EAX, FRn );
  1770 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1771 	    JMP_TARGET(end);
  1774     sh4_x86.tstate = TSTATE_NONE;
  1775 :}
  1776 FMOV FRm, @Rn {: 
  1777     check_fpuen();
  1778     load_reg( R_EAX, Rn );
  1779     check_walign32( R_EAX );
  1780     MMU_TRANSLATE_WRITE( R_EAX );
  1781     load_spreg( R_EDX, R_FPSCR );
  1782     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1783     JNE_rel8(8 + MEM_WRITE_SIZE, doublesize);
  1784     load_fr_bank( R_EDX );
  1785     load_fr( R_EDX, R_ECX, FRm );
  1786     MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
  1787     if( FRm&1 ) {
  1788 	JMP_rel8( 18 + MEM_WRITE_DOUBLE_SIZE, end );
  1789 	JMP_TARGET(doublesize);
  1790 	load_xf_bank( R_EDX );
  1791 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1792 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1793 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1794 	JMP_TARGET(end);
  1795     } else {
  1796 	JMP_rel8( 9 + MEM_WRITE_DOUBLE_SIZE, end );
  1797 	JMP_TARGET(doublesize);
  1798 	load_fr_bank( R_EDX );
  1799 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1800 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1801 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1802 	JMP_TARGET(end);
  1804     sh4_x86.tstate = TSTATE_NONE;
  1805 :}
  1806 FMOV @Rm, FRn {:  
  1807     check_fpuen();
  1808     load_reg( R_EAX, Rm );
  1809     check_ralign32( R_EAX );
  1810     MMU_TRANSLATE_READ( R_EAX );
  1811     load_spreg( R_EDX, R_FPSCR );
  1812     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1813     JNE_rel8(8 + MEM_READ_SIZE, doublesize);
  1814     MEM_READ_LONG( R_EAX, R_EAX );
  1815     load_fr_bank( R_EDX );
  1816     store_fr( R_EDX, R_EAX, FRn );
  1817     if( FRn&1 ) {
  1818 	JMP_rel8(21 + MEM_READ_DOUBLE_SIZE, end);
  1819 	JMP_TARGET(doublesize);
  1820 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1821 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1822 	load_xf_bank( R_EDX );
  1823 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1824 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1825 	JMP_TARGET(end);
  1826     } else {
  1827 	JMP_rel8(9 + MEM_READ_DOUBLE_SIZE, end);
  1828 	JMP_TARGET(doublesize);
  1829 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1830 	load_fr_bank( R_EDX );
  1831 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1832 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1833 	JMP_TARGET(end);
  1835     sh4_x86.tstate = TSTATE_NONE;
  1836 :}
  1837 FMOV FRm, @-Rn {:  
  1838     check_fpuen();
  1839     load_reg( R_EAX, Rn );
  1840     check_walign32( R_EAX );
  1841     load_spreg( R_EDX, R_FPSCR );
  1842     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1843     JNE_rel8(15 + MEM_WRITE_SIZE + MMU_TRANSLATE_SIZE, doublesize);
  1844     ADD_imm8s_r32( -4, R_EAX );
  1845     MMU_TRANSLATE_WRITE( R_EAX );
  1846     load_fr_bank( R_EDX );
  1847     load_fr( R_EDX, R_ECX, FRm );
  1848     ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
  1849     MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
  1850     if( FRm&1 ) {
  1851 	JMP_rel8( 25 + MEM_WRITE_DOUBLE_SIZE + MMU_TRANSLATE_SIZE, end );
  1852 	JMP_TARGET(doublesize);
  1853 	ADD_imm8s_r32(-8,R_EAX);
  1854 	MMU_TRANSLATE_WRITE( R_EAX );
  1855 	load_xf_bank( R_EDX );
  1856 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1857 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1858 	ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
  1859 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1860 	JMP_TARGET(end);
  1861     } else {
  1862 	JMP_rel8( 16 + MEM_WRITE_DOUBLE_SIZE + MMU_TRANSLATE_SIZE, end );
  1863 	JMP_TARGET(doublesize);
  1864 	ADD_imm8s_r32(-8,R_EAX);
  1865 	MMU_TRANSLATE_WRITE( R_EAX );
  1866 	load_fr_bank( R_EDX );
  1867 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1868 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1869 	ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
  1870 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1871 	JMP_TARGET(end);
  1873     sh4_x86.tstate = TSTATE_NONE;
  1874 :}
  1875 FMOV @Rm+, FRn {:
  1876     check_fpuen();
  1877     load_reg( R_EAX, Rm );
  1878     check_ralign32( R_EAX );
  1879     MMU_TRANSLATE_READ( R_EAX );
  1880     load_spreg( R_EDX, R_FPSCR );
  1881     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1882     JNE_rel8(12 + MEM_READ_SIZE, doublesize);
  1883     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1884     MEM_READ_LONG( R_EAX, R_EAX );
  1885     load_fr_bank( R_EDX );
  1886     store_fr( R_EDX, R_EAX, FRn );
  1887     if( FRn&1 ) {
  1888 	JMP_rel8(25 + MEM_READ_DOUBLE_SIZE, end);
  1889 	JMP_TARGET(doublesize);
  1890 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
  1891 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1892 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1893 	load_xf_bank( R_EDX );
  1894 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1895 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1896 	JMP_TARGET(end);
  1897     } else {
  1898 	JMP_rel8(13 + MEM_READ_DOUBLE_SIZE, end);
  1899 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
  1900 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1901 	load_fr_bank( R_EDX );
  1902 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1903 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1904 	JMP_TARGET(end);
  1906     sh4_x86.tstate = TSTATE_NONE;
  1907 :}
  1908 FMOV FRm, @(R0, Rn) {:  
  1909     check_fpuen();
  1910     load_reg( R_EAX, Rn );
  1911     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1912     check_walign32( R_EAX );
  1913     MMU_TRANSLATE_WRITE( R_EAX );
  1914     load_spreg( R_EDX, R_FPSCR );
  1915     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1916     JNE_rel8(8 + MEM_WRITE_SIZE, doublesize);
  1917     load_fr_bank( R_EDX );
  1918     load_fr( R_EDX, R_ECX, FRm );
  1919     MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
  1920     if( FRm&1 ) {
  1921 	JMP_rel8( 18 + MEM_WRITE_DOUBLE_SIZE, end );
  1922 	JMP_TARGET(doublesize);
  1923 	load_xf_bank( R_EDX );
  1924 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1925 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1926 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1927 	JMP_TARGET(end);
  1928     } else {
  1929 	JMP_rel8( 9 + MEM_WRITE_DOUBLE_SIZE, end );
  1930 	JMP_TARGET(doublesize);
  1931 	load_fr_bank( R_EDX );
  1932 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1933 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1934 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1935 	JMP_TARGET(end);
  1937     sh4_x86.tstate = TSTATE_NONE;
  1938 :}
  1939 FMOV @(R0, Rm), FRn {:  
  1940     check_fpuen();
  1941     load_reg( R_EAX, Rm );
  1942     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1943     check_ralign32( R_EAX );
  1944     MMU_TRANSLATE_READ( R_EAX );
  1945     load_spreg( R_EDX, R_FPSCR );
  1946     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1947     JNE_rel8(8 + MEM_READ_SIZE, doublesize);
  1948     MEM_READ_LONG( R_EAX, R_EAX );
  1949     load_fr_bank( R_EDX );
  1950     store_fr( R_EDX, R_EAX, FRn );
  1951     if( FRn&1 ) {
  1952 	JMP_rel8(21 + MEM_READ_DOUBLE_SIZE, end);
  1953 	JMP_TARGET(doublesize);
  1954 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1955 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1956 	load_xf_bank( R_EDX );
  1957 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1958 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1959 	JMP_TARGET(end);
  1960     } else {
  1961 	JMP_rel8(9 + MEM_READ_DOUBLE_SIZE, end);
  1962 	JMP_TARGET(doublesize);
  1963 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1964 	load_fr_bank( R_EDX );
  1965 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1966 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1967 	JMP_TARGET(end);
  1969     sh4_x86.tstate = TSTATE_NONE;
  1970 :}
  1971 FLDI0 FRn {:  /* IFF PR=0 */
  1972     check_fpuen();
  1973     load_spreg( R_ECX, R_FPSCR );
  1974     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1975     JNE_rel8(8, end);
  1976     XOR_r32_r32( R_EAX, R_EAX );
  1977     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  1978     store_fr( R_ECX, R_EAX, FRn );
  1979     JMP_TARGET(end);
  1980     sh4_x86.tstate = TSTATE_NONE;
  1981 :}
  1982 FLDI1 FRn {:  /* IFF PR=0 */
  1983     check_fpuen();
  1984     load_spreg( R_ECX, R_FPSCR );
  1985     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1986     JNE_rel8(11, end);
  1987     load_imm32(R_EAX, 0x3F800000);
  1988     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  1989     store_fr( R_ECX, R_EAX, FRn );
  1990     JMP_TARGET(end);
  1991     sh4_x86.tstate = TSTATE_NONE;
  1992 :}
  1994 FLOAT FPUL, FRn {:  
  1995     check_fpuen();
  1996     load_spreg( R_ECX, R_FPSCR );
  1997     load_spreg(R_EDX, REG_OFFSET(fr_bank));
  1998     FILD_sh4r(R_FPUL);
  1999     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2000     JNE_rel8(5, doubleprec);
  2001     pop_fr( R_EDX, FRn );
  2002     JMP_rel8(3, end);
  2003     JMP_TARGET(doubleprec);
  2004     pop_dr( R_EDX, FRn );
  2005     JMP_TARGET(end);
  2006     sh4_x86.tstate = TSTATE_NONE;
  2007 :}
  2008 FTRC FRm, FPUL {:  
  2009     check_fpuen();
  2010     load_spreg( R_ECX, R_FPSCR );
  2011     load_fr_bank( R_EDX );
  2012     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2013     JNE_rel8(5, doubleprec);
  2014     push_fr( R_EDX, FRm );
  2015     JMP_rel8(3, doop);
  2016     JMP_TARGET(doubleprec);
  2017     push_dr( R_EDX, FRm );
  2018     JMP_TARGET( doop );
  2019     load_imm32( R_ECX, (uint32_t)&max_int );
  2020     FILD_r32ind( R_ECX );
  2021     FCOMIP_st(1);
  2022     JNA_rel8( 32, sat );
  2023     load_imm32( R_ECX, (uint32_t)&min_int );  // 5
  2024     FILD_r32ind( R_ECX );           // 2
  2025     FCOMIP_st(1);                   // 2
  2026     JAE_rel8( 21, sat2 );            // 2
  2027     load_imm32( R_EAX, (uint32_t)&save_fcw );
  2028     FNSTCW_r32ind( R_EAX );
  2029     load_imm32( R_EDX, (uint32_t)&trunc_fcw );
  2030     FLDCW_r32ind( R_EDX );
  2031     FISTP_sh4r(R_FPUL);             // 3
  2032     FLDCW_r32ind( R_EAX );
  2033     JMP_rel8( 9, end );             // 2
  2035     JMP_TARGET(sat);
  2036     JMP_TARGET(sat2);
  2037     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  2038     store_spreg( R_ECX, R_FPUL );
  2039     FPOP_st();
  2040     JMP_TARGET(end);
  2041     sh4_x86.tstate = TSTATE_NONE;
  2042 :}
  2043 FLDS FRm, FPUL {:  
  2044     check_fpuen();
  2045     load_fr_bank( R_ECX );
  2046     load_fr( R_ECX, R_EAX, FRm );
  2047     store_spreg( R_EAX, R_FPUL );
  2048     sh4_x86.tstate = TSTATE_NONE;
  2049 :}
  2050 FSTS FPUL, FRn {:  
  2051     check_fpuen();
  2052     load_fr_bank( R_ECX );
  2053     load_spreg( R_EAX, R_FPUL );
  2054     store_fr( R_ECX, R_EAX, FRn );
  2055     sh4_x86.tstate = TSTATE_NONE;
  2056 :}
  2057 FCNVDS FRm, FPUL {:  
  2058     check_fpuen();
  2059     load_spreg( R_ECX, R_FPSCR );
  2060     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2061     JE_rel8(9, end); // only when PR=1
  2062     load_fr_bank( R_ECX );
  2063     push_dr( R_ECX, FRm );
  2064     pop_fpul();
  2065     JMP_TARGET(end);
  2066     sh4_x86.tstate = TSTATE_NONE;
  2067 :}
  2068 FCNVSD FPUL, FRn {:  
  2069     check_fpuen();
  2070     load_spreg( R_ECX, R_FPSCR );
  2071     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2072     JE_rel8(9, end); // only when PR=1
  2073     load_fr_bank( R_ECX );
  2074     push_fpul();
  2075     pop_dr( R_ECX, FRn );
  2076     JMP_TARGET(end);
  2077     sh4_x86.tstate = TSTATE_NONE;
  2078 :}
  2080 /* Floating point instructions */
  2081 FABS FRn {:  
  2082     check_fpuen();
  2083     load_spreg( R_ECX, R_FPSCR );
  2084     load_fr_bank( R_EDX );
  2085     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2086     JNE_rel8(10, doubleprec);
  2087     push_fr(R_EDX, FRn); // 3
  2088     FABS_st0(); // 2
  2089     pop_fr( R_EDX, FRn); //3
  2090     JMP_rel8(8,end); // 2
  2091     JMP_TARGET(doubleprec);
  2092     push_dr(R_EDX, FRn);
  2093     FABS_st0();
  2094     pop_dr(R_EDX, FRn);
  2095     JMP_TARGET(end);
  2096     sh4_x86.tstate = TSTATE_NONE;
  2097 :}
  2098 FADD FRm, FRn {:  
  2099     check_fpuen();
  2100     load_spreg( R_ECX, R_FPSCR );
  2101     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2102     load_fr_bank( R_EDX );
  2103     JNE_rel8(13,doubleprec);
  2104     push_fr(R_EDX, FRm);
  2105     push_fr(R_EDX, FRn);
  2106     FADDP_st(1);
  2107     pop_fr(R_EDX, FRn);
  2108     JMP_rel8(11,end);
  2109     JMP_TARGET(doubleprec);
  2110     push_dr(R_EDX, FRm);
  2111     push_dr(R_EDX, FRn);
  2112     FADDP_st(1);
  2113     pop_dr(R_EDX, FRn);
  2114     JMP_TARGET(end);
  2115     sh4_x86.tstate = TSTATE_NONE;
  2116 :}
  2117 FDIV FRm, FRn {:  
  2118     check_fpuen();
  2119     load_spreg( R_ECX, R_FPSCR );
  2120     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2121     load_fr_bank( R_EDX );
  2122     JNE_rel8(13, doubleprec);
  2123     push_fr(R_EDX, FRn);
  2124     push_fr(R_EDX, FRm);
  2125     FDIVP_st(1);
  2126     pop_fr(R_EDX, FRn);
  2127     JMP_rel8(11, end);
  2128     JMP_TARGET(doubleprec);
  2129     push_dr(R_EDX, FRn);
  2130     push_dr(R_EDX, FRm);
  2131     FDIVP_st(1);
  2132     pop_dr(R_EDX, FRn);
  2133     JMP_TARGET(end);
  2134     sh4_x86.tstate = TSTATE_NONE;
  2135 :}
  2136 FMAC FR0, FRm, FRn {:  
  2137     check_fpuen();
  2138     load_spreg( R_ECX, R_FPSCR );
  2139     load_spreg( R_EDX, REG_OFFSET(fr_bank));
  2140     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2141     JNE_rel8(18, doubleprec);
  2142     push_fr( R_EDX, 0 );
  2143     push_fr( R_EDX, FRm );
  2144     FMULP_st(1);
  2145     push_fr( R_EDX, FRn );
  2146     FADDP_st(1);
  2147     pop_fr( R_EDX, FRn );
  2148     JMP_rel8(16, end);
  2149     JMP_TARGET(doubleprec);
  2150     push_dr( R_EDX, 0 );
  2151     push_dr( R_EDX, FRm );
  2152     FMULP_st(1);
  2153     push_dr( R_EDX, FRn );
  2154     FADDP_st(1);
  2155     pop_dr( R_EDX, FRn );
  2156     JMP_TARGET(end);
  2157     sh4_x86.tstate = TSTATE_NONE;
  2158 :}
  2160 FMUL FRm, FRn {:  
  2161     check_fpuen();
  2162     load_spreg( R_ECX, R_FPSCR );
  2163     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2164     load_fr_bank( R_EDX );
  2165     JNE_rel8(13, doubleprec);
  2166     push_fr(R_EDX, FRm);
  2167     push_fr(R_EDX, FRn);
  2168     FMULP_st(1);
  2169     pop_fr(R_EDX, FRn);
  2170     JMP_rel8(11, end);
  2171     JMP_TARGET(doubleprec);
  2172     push_dr(R_EDX, FRm);
  2173     push_dr(R_EDX, FRn);
  2174     FMULP_st(1);
  2175     pop_dr(R_EDX, FRn);
  2176     JMP_TARGET(end);
  2177     sh4_x86.tstate = TSTATE_NONE;
  2178 :}
  2179 FNEG FRn {:  
  2180     check_fpuen();
  2181     load_spreg( R_ECX, R_FPSCR );
  2182     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2183     load_fr_bank( R_EDX );
  2184     JNE_rel8(10, doubleprec);
  2185     push_fr(R_EDX, FRn);
  2186     FCHS_st0();
  2187     pop_fr(R_EDX, FRn);
  2188     JMP_rel8(8, end);
  2189     JMP_TARGET(doubleprec);
  2190     push_dr(R_EDX, FRn);
  2191     FCHS_st0();
  2192     pop_dr(R_EDX, FRn);
  2193     JMP_TARGET(end);
  2194     sh4_x86.tstate = TSTATE_NONE;
  2195 :}
  2196 FSRRA FRn {:  
  2197     check_fpuen();
  2198     load_spreg( R_ECX, R_FPSCR );
  2199     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2200     load_fr_bank( R_EDX );
  2201     JNE_rel8(12, end); // PR=0 only
  2202     FLD1_st0();
  2203     push_fr(R_EDX, FRn);
  2204     FSQRT_st0();
  2205     FDIVP_st(1);
  2206     pop_fr(R_EDX, FRn);
  2207     JMP_TARGET(end);
  2208     sh4_x86.tstate = TSTATE_NONE;
  2209 :}
  2210 FSQRT FRn {:  
  2211     check_fpuen();
  2212     load_spreg( R_ECX, R_FPSCR );
  2213     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2214     load_fr_bank( R_EDX );
  2215     JNE_rel8(10, doubleprec);
  2216     push_fr(R_EDX, FRn);
  2217     FSQRT_st0();
  2218     pop_fr(R_EDX, FRn);
  2219     JMP_rel8(8, end);
  2220     JMP_TARGET(doubleprec);
  2221     push_dr(R_EDX, FRn);
  2222     FSQRT_st0();
  2223     pop_dr(R_EDX, FRn);
  2224     JMP_TARGET(end);
  2225     sh4_x86.tstate = TSTATE_NONE;
  2226 :}
  2227 FSUB FRm, FRn {:  
  2228     check_fpuen();
  2229     load_spreg( R_ECX, R_FPSCR );
  2230     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2231     load_fr_bank( R_EDX );
  2232     JNE_rel8(13, doubleprec);
  2233     push_fr(R_EDX, FRn);
  2234     push_fr(R_EDX, FRm);
  2235     FSUBP_st(1);
  2236     pop_fr(R_EDX, FRn);
  2237     JMP_rel8(11, end);
  2238     JMP_TARGET(doubleprec);
  2239     push_dr(R_EDX, FRn);
  2240     push_dr(R_EDX, FRm);
  2241     FSUBP_st(1);
  2242     pop_dr(R_EDX, FRn);
  2243     JMP_TARGET(end);
  2244     sh4_x86.tstate = TSTATE_NONE;
  2245 :}
  2247 FCMP/EQ FRm, FRn {:  
  2248     check_fpuen();
  2249     load_spreg( R_ECX, R_FPSCR );
  2250     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2251     load_fr_bank( R_EDX );
  2252     JNE_rel8(8, doubleprec);
  2253     push_fr(R_EDX, FRm);
  2254     push_fr(R_EDX, FRn);
  2255     JMP_rel8(6, end);
  2256     JMP_TARGET(doubleprec);
  2257     push_dr(R_EDX, FRm);
  2258     push_dr(R_EDX, FRn);
  2259     JMP_TARGET(end);
  2260     FCOMIP_st(1);
  2261     SETE_t();
  2262     FPOP_st();
  2263     sh4_x86.tstate = TSTATE_NONE;
  2264 :}
  2265 FCMP/GT FRm, FRn {:  
  2266     check_fpuen();
  2267     load_spreg( R_ECX, R_FPSCR );
  2268     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2269     load_fr_bank( R_EDX );
  2270     JNE_rel8(8, doubleprec);
  2271     push_fr(R_EDX, FRm);
  2272     push_fr(R_EDX, FRn);
  2273     JMP_rel8(6, end);
  2274     JMP_TARGET(doubleprec);
  2275     push_dr(R_EDX, FRm);
  2276     push_dr(R_EDX, FRn);
  2277     JMP_TARGET(end);
  2278     FCOMIP_st(1);
  2279     SETA_t();
  2280     FPOP_st();
  2281     sh4_x86.tstate = TSTATE_NONE;
  2282 :}
  2284 FSCA FPUL, FRn {:  
  2285     check_fpuen();
  2286     load_spreg( R_ECX, R_FPSCR );
  2287     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2288     JNE_rel8( CALL_FUNC2_SIZE + 9, doubleprec );
  2289     load_fr_bank( R_ECX );
  2290     ADD_imm8s_r32( (FRn&0x0E)<<2, R_ECX );
  2291     load_spreg( R_EDX, R_FPUL );
  2292     call_func2( sh4_fsca, R_EDX, R_ECX );
  2293     JMP_TARGET(doubleprec);
  2294     sh4_x86.tstate = TSTATE_NONE;
  2295 :}
  2296 FIPR FVm, FVn {:  
  2297     check_fpuen();
  2298     load_spreg( R_ECX, R_FPSCR );
  2299     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2300     JNE_rel8(44, doubleprec);
  2302     load_fr_bank( R_ECX );
  2303     push_fr( R_ECX, FVm<<2 );
  2304     push_fr( R_ECX, FVn<<2 );
  2305     FMULP_st(1);
  2306     push_fr( R_ECX, (FVm<<2)+1);
  2307     push_fr( R_ECX, (FVn<<2)+1);
  2308     FMULP_st(1);
  2309     FADDP_st(1);
  2310     push_fr( R_ECX, (FVm<<2)+2);
  2311     push_fr( R_ECX, (FVn<<2)+2);
  2312     FMULP_st(1);
  2313     FADDP_st(1);
  2314     push_fr( R_ECX, (FVm<<2)+3);
  2315     push_fr( R_ECX, (FVn<<2)+3);
  2316     FMULP_st(1);
  2317     FADDP_st(1);
  2318     pop_fr( R_ECX, (FVn<<2)+3);
  2319     JMP_TARGET(doubleprec);
  2320     sh4_x86.tstate = TSTATE_NONE;
  2321 :}
  2322 FTRV XMTRX, FVn {:  
  2323     check_fpuen();
  2324     load_spreg( R_ECX, R_FPSCR );
  2325     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2326     JNE_rel8( 18 + CALL_FUNC2_SIZE, doubleprec );
  2327     load_fr_bank( R_EDX );                 // 3
  2328     ADD_imm8s_r32( FVn<<4, R_EDX );        // 3
  2329     load_xf_bank( R_ECX );                 // 12
  2330     call_func2( sh4_ftrv, R_EDX, R_ECX );  // 12
  2331     JMP_TARGET(doubleprec);
  2332     sh4_x86.tstate = TSTATE_NONE;
  2333 :}
  2335 FRCHG {:  
  2336     check_fpuen();
  2337     load_spreg( R_ECX, R_FPSCR );
  2338     XOR_imm32_r32( FPSCR_FR, R_ECX );
  2339     store_spreg( R_ECX, R_FPSCR );
  2340     update_fr_bank( R_ECX );
  2341     sh4_x86.tstate = TSTATE_NONE;
  2342 :}
  2343 FSCHG {:  
  2344     check_fpuen();
  2345     load_spreg( R_ECX, R_FPSCR );
  2346     XOR_imm32_r32( FPSCR_SZ, R_ECX );
  2347     store_spreg( R_ECX, R_FPSCR );
  2348     sh4_x86.tstate = TSTATE_NONE;
  2349 :}
  2351 /* Processor control instructions */
  2352 LDC Rm, SR {:
  2353     if( sh4_x86.in_delay_slot ) {
  2354 	SLOTILLEGAL();
  2355     } else {
  2356 	check_priv();
  2357 	load_reg( R_EAX, Rm );
  2358 	call_func1( sh4_write_sr, R_EAX );
  2359 	sh4_x86.priv_checked = FALSE;
  2360 	sh4_x86.fpuen_checked = FALSE;
  2361 	sh4_x86.tstate = TSTATE_NONE;
  2363 :}
  2364 LDC Rm, GBR {: 
  2365     load_reg( R_EAX, Rm );
  2366     store_spreg( R_EAX, R_GBR );
  2367 :}
  2368 LDC Rm, VBR {:  
  2369     check_priv();
  2370     load_reg( R_EAX, Rm );
  2371     store_spreg( R_EAX, R_VBR );
  2372     sh4_x86.tstate = TSTATE_NONE;
  2373 :}
  2374 LDC Rm, SSR {:  
  2375     check_priv();
  2376     load_reg( R_EAX, Rm );
  2377     store_spreg( R_EAX, R_SSR );
  2378     sh4_x86.tstate = TSTATE_NONE;
  2379 :}
  2380 LDC Rm, SGR {:  
  2381     check_priv();
  2382     load_reg( R_EAX, Rm );
  2383     store_spreg( R_EAX, R_SGR );
  2384     sh4_x86.tstate = TSTATE_NONE;
  2385 :}
  2386 LDC Rm, SPC {:  
  2387     check_priv();
  2388     load_reg( R_EAX, Rm );
  2389     store_spreg( R_EAX, R_SPC );
  2390     sh4_x86.tstate = TSTATE_NONE;
  2391 :}
  2392 LDC Rm, DBR {:  
  2393     check_priv();
  2394     load_reg( R_EAX, Rm );
  2395     store_spreg( R_EAX, R_DBR );
  2396     sh4_x86.tstate = TSTATE_NONE;
  2397 :}
  2398 LDC Rm, Rn_BANK {:  
  2399     check_priv();
  2400     load_reg( R_EAX, Rm );
  2401     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2402     sh4_x86.tstate = TSTATE_NONE;
  2403 :}
  2404 LDC.L @Rm+, GBR {:  
  2405     load_reg( R_EAX, Rm );
  2406     check_ralign32( R_EAX );
  2407     MMU_TRANSLATE_READ( R_EAX );
  2408     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2409     MEM_READ_LONG( R_EAX, R_EAX );
  2410     store_spreg( R_EAX, R_GBR );
  2411     sh4_x86.tstate = TSTATE_NONE;
  2412 :}
  2413 LDC.L @Rm+, SR {:
  2414     if( sh4_x86.in_delay_slot ) {
  2415 	SLOTILLEGAL();
  2416     } else {
  2417 	check_priv();
  2418 	load_reg( R_EAX, Rm );
  2419 	check_ralign32( R_EAX );
  2420 	MMU_TRANSLATE_READ( R_EAX );
  2421 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2422 	MEM_READ_LONG( R_EAX, R_EAX );
  2423 	call_func1( sh4_write_sr, R_EAX );
  2424 	sh4_x86.priv_checked = FALSE;
  2425 	sh4_x86.fpuen_checked = FALSE;
  2426 	sh4_x86.tstate = TSTATE_NONE;
  2428 :}
  2429 LDC.L @Rm+, VBR {:  
  2430     check_priv();
  2431     load_reg( R_EAX, Rm );
  2432     check_ralign32( R_EAX );
  2433     MMU_TRANSLATE_READ( R_EAX );
  2434     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2435     MEM_READ_LONG( R_EAX, R_EAX );
  2436     store_spreg( R_EAX, R_VBR );
  2437     sh4_x86.tstate = TSTATE_NONE;
  2438 :}
  2439 LDC.L @Rm+, SSR {:
  2440     check_priv();
  2441     load_reg( R_EAX, Rm );
  2442     check_ralign32( R_EAX );
  2443     MMU_TRANSLATE_READ( R_EAX );
  2444     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2445     MEM_READ_LONG( R_EAX, R_EAX );
  2446     store_spreg( R_EAX, R_SSR );
  2447     sh4_x86.tstate = TSTATE_NONE;
  2448 :}
  2449 LDC.L @Rm+, SGR {:  
  2450     check_priv();
  2451     load_reg( R_EAX, Rm );
  2452     check_ralign32( R_EAX );
  2453     MMU_TRANSLATE_READ( R_EAX );
  2454     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2455     MEM_READ_LONG( R_EAX, R_EAX );
  2456     store_spreg( R_EAX, R_SGR );
  2457     sh4_x86.tstate = TSTATE_NONE;
  2458 :}
  2459 LDC.L @Rm+, SPC {:  
  2460     check_priv();
  2461     load_reg( R_EAX, Rm );
  2462     check_ralign32( R_EAX );
  2463     MMU_TRANSLATE_READ( R_EAX );
  2464     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2465     MEM_READ_LONG( R_EAX, R_EAX );
  2466     store_spreg( R_EAX, R_SPC );
  2467     sh4_x86.tstate = TSTATE_NONE;
  2468 :}
  2469 LDC.L @Rm+, DBR {:  
  2470     check_priv();
  2471     load_reg( R_EAX, Rm );
  2472     check_ralign32( R_EAX );
  2473     MMU_TRANSLATE_READ( R_EAX );
  2474     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2475     MEM_READ_LONG( R_EAX, R_EAX );
  2476     store_spreg( R_EAX, R_DBR );
  2477     sh4_x86.tstate = TSTATE_NONE;
  2478 :}
  2479 LDC.L @Rm+, Rn_BANK {:  
  2480     check_priv();
  2481     load_reg( R_EAX, Rm );
  2482     check_ralign32( R_EAX );
  2483     MMU_TRANSLATE_READ( R_EAX );
  2484     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2485     MEM_READ_LONG( R_EAX, R_EAX );
  2486     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2487     sh4_x86.tstate = TSTATE_NONE;
  2488 :}
  2489 LDS Rm, FPSCR {:  
  2490     load_reg( R_EAX, Rm );
  2491     store_spreg( R_EAX, R_FPSCR );
  2492     update_fr_bank( R_EAX );
  2493     sh4_x86.tstate = TSTATE_NONE;
  2494 :}
  2495 LDS.L @Rm+, FPSCR {:  
  2496     load_reg( R_EAX, Rm );
  2497     check_ralign32( R_EAX );
  2498     MMU_TRANSLATE_READ( R_EAX );
  2499     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2500     MEM_READ_LONG( R_EAX, R_EAX );
  2501     store_spreg( R_EAX, R_FPSCR );
  2502     update_fr_bank( R_EAX );
  2503     sh4_x86.tstate = TSTATE_NONE;
  2504 :}
  2505 LDS Rm, FPUL {:  
  2506     load_reg( R_EAX, Rm );
  2507     store_spreg( R_EAX, R_FPUL );
  2508 :}
  2509 LDS.L @Rm+, FPUL {:  
  2510     load_reg( R_EAX, Rm );
  2511     check_ralign32( R_EAX );
  2512     MMU_TRANSLATE_READ( R_EAX );
  2513     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2514     MEM_READ_LONG( R_EAX, R_EAX );
  2515     store_spreg( R_EAX, R_FPUL );
  2516     sh4_x86.tstate = TSTATE_NONE;
  2517 :}
  2518 LDS Rm, MACH {: 
  2519     load_reg( R_EAX, Rm );
  2520     store_spreg( R_EAX, R_MACH );
  2521 :}
  2522 LDS.L @Rm+, MACH {:  
  2523     load_reg( R_EAX, Rm );
  2524     check_ralign32( R_EAX );
  2525     MMU_TRANSLATE_READ( R_EAX );
  2526     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2527     MEM_READ_LONG( R_EAX, R_EAX );
  2528     store_spreg( R_EAX, R_MACH );
  2529     sh4_x86.tstate = TSTATE_NONE;
  2530 :}
  2531 LDS Rm, MACL {:  
  2532     load_reg( R_EAX, Rm );
  2533     store_spreg( R_EAX, R_MACL );
  2534 :}
  2535 LDS.L @Rm+, MACL {:  
  2536     load_reg( R_EAX, Rm );
  2537     check_ralign32( R_EAX );
  2538     MMU_TRANSLATE_READ( R_EAX );
  2539     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2540     MEM_READ_LONG( R_EAX, R_EAX );
  2541     store_spreg( R_EAX, R_MACL );
  2542     sh4_x86.tstate = TSTATE_NONE;
  2543 :}
  2544 LDS Rm, PR {:  
  2545     load_reg( R_EAX, Rm );
  2546     store_spreg( R_EAX, R_PR );
  2547 :}
  2548 LDS.L @Rm+, PR {:  
  2549     load_reg( R_EAX, Rm );
  2550     check_ralign32( R_EAX );
  2551     MMU_TRANSLATE_READ( R_EAX );
  2552     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2553     MEM_READ_LONG( R_EAX, R_EAX );
  2554     store_spreg( R_EAX, R_PR );
  2555     sh4_x86.tstate = TSTATE_NONE;
  2556 :}
  2557 LDTLB {:  
  2558     call_func0( MMU_ldtlb );
  2559 :}
  2560 OCBI @Rn {:  :}
  2561 OCBP @Rn {:  :}
  2562 OCBWB @Rn {:  :}
  2563 PREF @Rn {:
  2564     load_reg( R_EAX, Rn );
  2565     MOV_r32_r32( R_EAX, R_ECX );
  2566     AND_imm32_r32( 0xFC000000, R_EAX );
  2567     CMP_imm32_r32( 0xE0000000, R_EAX );
  2568     JNE_rel8(8+CALL_FUNC1_SIZE, end);
  2569     call_func1( sh4_flush_store_queue, R_ECX );
  2570     TEST_r32_r32( R_EAX, R_EAX );
  2571     JE_exc(-1);
  2572     JMP_TARGET(end);
  2573     sh4_x86.tstate = TSTATE_NONE;
  2574 :}
  2575 SLEEP {: 
  2576     check_priv();
  2577     call_func0( sh4_sleep );
  2578     sh4_x86.tstate = TSTATE_NONE;
  2579     sh4_x86.in_delay_slot = DELAY_NONE;
  2580     return 2;
  2581 :}
  2582 STC SR, Rn {:
  2583     check_priv();
  2584     call_func0(sh4_read_sr);
  2585     store_reg( R_EAX, Rn );
  2586     sh4_x86.tstate = TSTATE_NONE;
  2587 :}
  2588 STC GBR, Rn {:  
  2589     load_spreg( R_EAX, R_GBR );
  2590     store_reg( R_EAX, Rn );
  2591 :}
  2592 STC VBR, Rn {:  
  2593     check_priv();
  2594     load_spreg( R_EAX, R_VBR );
  2595     store_reg( R_EAX, Rn );
  2596     sh4_x86.tstate = TSTATE_NONE;
  2597 :}
  2598 STC SSR, Rn {:  
  2599     check_priv();
  2600     load_spreg( R_EAX, R_SSR );
  2601     store_reg( R_EAX, Rn );
  2602     sh4_x86.tstate = TSTATE_NONE;
  2603 :}
  2604 STC SPC, Rn {:  
  2605     check_priv();
  2606     load_spreg( R_EAX, R_SPC );
  2607     store_reg( R_EAX, Rn );
  2608     sh4_x86.tstate = TSTATE_NONE;
  2609 :}
  2610 STC SGR, Rn {:  
  2611     check_priv();
  2612     load_spreg( R_EAX, R_SGR );
  2613     store_reg( R_EAX, Rn );
  2614     sh4_x86.tstate = TSTATE_NONE;
  2615 :}
  2616 STC DBR, Rn {:  
  2617     check_priv();
  2618     load_spreg( R_EAX, R_DBR );
  2619     store_reg( R_EAX, Rn );
  2620     sh4_x86.tstate = TSTATE_NONE;
  2621 :}
  2622 STC Rm_BANK, Rn {:
  2623     check_priv();
  2624     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2625     store_reg( R_EAX, Rn );
  2626     sh4_x86.tstate = TSTATE_NONE;
  2627 :}
  2628 STC.L SR, @-Rn {:
  2629     check_priv();
  2630     load_reg( R_EAX, Rn );
  2631     check_walign32( R_EAX );
  2632     ADD_imm8s_r32( -4, R_EAX );
  2633     MMU_TRANSLATE_WRITE( R_EAX );
  2634     PUSH_realigned_r32( R_EAX );
  2635     call_func0( sh4_read_sr );
  2636     POP_realigned_r32( R_ECX );
  2637     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2638     MEM_WRITE_LONG( R_ECX, R_EAX );
  2639     sh4_x86.tstate = TSTATE_NONE;
  2640 :}
  2641 STC.L VBR, @-Rn {:  
  2642     check_priv();
  2643     load_reg( R_EAX, Rn );
  2644     check_walign32( R_EAX );
  2645     ADD_imm8s_r32( -4, R_EAX );
  2646     MMU_TRANSLATE_WRITE( R_EAX );
  2647     load_spreg( R_EDX, R_VBR );
  2648     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2649     MEM_WRITE_LONG( R_EAX, R_EDX );
  2650     sh4_x86.tstate = TSTATE_NONE;
  2651 :}
  2652 STC.L SSR, @-Rn {:  
  2653     check_priv();
  2654     load_reg( R_EAX, Rn );
  2655     check_walign32( R_EAX );
  2656     ADD_imm8s_r32( -4, R_EAX );
  2657     MMU_TRANSLATE_WRITE( R_EAX );
  2658     load_spreg( R_EDX, R_SSR );
  2659     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2660     MEM_WRITE_LONG( R_EAX, R_EDX );
  2661     sh4_x86.tstate = TSTATE_NONE;
  2662 :}
  2663 STC.L SPC, @-Rn {:
  2664     check_priv();
  2665     load_reg( R_EAX, Rn );
  2666     check_walign32( R_EAX );
  2667     ADD_imm8s_r32( -4, R_EAX );
  2668     MMU_TRANSLATE_WRITE( R_EAX );
  2669     load_spreg( R_EDX, R_SPC );
  2670     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2671     MEM_WRITE_LONG( R_EAX, R_EDX );
  2672     sh4_x86.tstate = TSTATE_NONE;
  2673 :}
  2674 STC.L SGR, @-Rn {:  
  2675     check_priv();
  2676     load_reg( R_EAX, Rn );
  2677     check_walign32( R_EAX );
  2678     ADD_imm8s_r32( -4, R_EAX );
  2679     MMU_TRANSLATE_WRITE( R_EAX );
  2680     load_spreg( R_EDX, R_SGR );
  2681     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2682     MEM_WRITE_LONG( R_EAX, R_EDX );
  2683     sh4_x86.tstate = TSTATE_NONE;
  2684 :}
  2685 STC.L DBR, @-Rn {:  
  2686     check_priv();
  2687     load_reg( R_EAX, Rn );
  2688     check_walign32( R_EAX );
  2689     ADD_imm8s_r32( -4, R_EAX );
  2690     MMU_TRANSLATE_WRITE( R_EAX );
  2691     load_spreg( R_EDX, R_DBR );
  2692     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2693     MEM_WRITE_LONG( R_EAX, R_EDX );
  2694     sh4_x86.tstate = TSTATE_NONE;
  2695 :}
  2696 STC.L Rm_BANK, @-Rn {:  
  2697     check_priv();
  2698     load_reg( R_EAX, Rn );
  2699     check_walign32( R_EAX );
  2700     ADD_imm8s_r32( -4, R_EAX );
  2701     MMU_TRANSLATE_WRITE( R_EAX );
  2702     load_spreg( R_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
  2703     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2704     MEM_WRITE_LONG( R_EAX, R_EDX );
  2705     sh4_x86.tstate = TSTATE_NONE;
  2706 :}
  2707 STC.L GBR, @-Rn {:  
  2708     load_reg( R_EAX, Rn );
  2709     check_walign32( R_EAX );
  2710     ADD_imm8s_r32( -4, R_EAX );
  2711     MMU_TRANSLATE_WRITE( R_EAX );
  2712     load_spreg( R_EDX, R_GBR );
  2713     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2714     MEM_WRITE_LONG( R_EAX, R_EDX );
  2715     sh4_x86.tstate = TSTATE_NONE;
  2716 :}
  2717 STS FPSCR, Rn {:  
  2718     load_spreg( R_EAX, R_FPSCR );
  2719     store_reg( R_EAX, Rn );
  2720 :}
  2721 STS.L FPSCR, @-Rn {:  
  2722     load_reg( R_EAX, Rn );
  2723     check_walign32( R_EAX );
  2724     ADD_imm8s_r32( -4, R_EAX );
  2725     MMU_TRANSLATE_WRITE( R_EAX );
  2726     load_spreg( R_EDX, R_FPSCR );
  2727     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2728     MEM_WRITE_LONG( R_EAX, R_EDX );
  2729     sh4_x86.tstate = TSTATE_NONE;
  2730 :}
  2731 STS FPUL, Rn {:  
  2732     load_spreg( R_EAX, R_FPUL );
  2733     store_reg( R_EAX, Rn );
  2734 :}
  2735 STS.L FPUL, @-Rn {:  
  2736     load_reg( R_EAX, Rn );
  2737     check_walign32( R_EAX );
  2738     ADD_imm8s_r32( -4, R_EAX );
  2739     MMU_TRANSLATE_WRITE( R_EAX );
  2740     load_spreg( R_EDX, R_FPUL );
  2741     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2742     MEM_WRITE_LONG( R_EAX, R_EDX );
  2743     sh4_x86.tstate = TSTATE_NONE;
  2744 :}
  2745 STS MACH, Rn {:  
  2746     load_spreg( R_EAX, R_MACH );
  2747     store_reg( R_EAX, Rn );
  2748 :}
  2749 STS.L MACH, @-Rn {:  
  2750     load_reg( R_EAX, Rn );
  2751     check_walign32( R_EAX );
  2752     ADD_imm8s_r32( -4, R_EAX );
  2753     MMU_TRANSLATE_WRITE( R_EAX );
  2754     load_spreg( R_EDX, R_MACH );
  2755     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2756     MEM_WRITE_LONG( R_EAX, R_EDX );
  2757     sh4_x86.tstate = TSTATE_NONE;
  2758 :}
  2759 STS MACL, Rn {:  
  2760     load_spreg( R_EAX, R_MACL );
  2761     store_reg( R_EAX, Rn );
  2762 :}
  2763 STS.L MACL, @-Rn {:  
  2764     load_reg( R_EAX, Rn );
  2765     check_walign32( R_EAX );
  2766     ADD_imm8s_r32( -4, R_EAX );
  2767     MMU_TRANSLATE_WRITE( R_EAX );
  2768     load_spreg( R_EDX, R_MACL );
  2769     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2770     MEM_WRITE_LONG( R_EAX, R_EDX );
  2771     sh4_x86.tstate = TSTATE_NONE;
  2772 :}
  2773 STS PR, Rn {:  
  2774     load_spreg( R_EAX, R_PR );
  2775     store_reg( R_EAX, Rn );
  2776 :}
  2777 STS.L PR, @-Rn {:  
  2778     load_reg( R_EAX, Rn );
  2779     check_walign32( R_EAX );
  2780     ADD_imm8s_r32( -4, R_EAX );
  2781     MMU_TRANSLATE_WRITE( R_EAX );
  2782     load_spreg( R_EDX, R_PR );
  2783     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2784     MEM_WRITE_LONG( R_EAX, R_EDX );
  2785     sh4_x86.tstate = TSTATE_NONE;
  2786 :}
  2788 NOP {: /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ :}
  2789 %%
  2790     sh4_x86.in_delay_slot = DELAY_NONE;
  2791     return 0;
.