Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 527:14c9489f647e
prev526:ba3da45b5754
next532:43653e748030
author nkeynes
date Sun Nov 18 11:12:44 2007 +0000 (12 years ago)
permissions -rw-r--r--
last change x86-64 translator work-in-progress
view annotate diff log raw
     1 /**
     2  * $Id: sh4x86.in,v 1.20 2007-11-08 11:54:16 nkeynes Exp $
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "sh4/xltcache.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4trans.h"
    31 #include "sh4/sh4mmio.h"
    32 #include "sh4/x86op.h"
    33 #include "clock.h"
    35 #define DEFAULT_BACKPATCH_SIZE 4096
    37 /** 
    38  * Struct to manage internal translation state. This state is not saved -
    39  * it is only valid between calls to sh4_translate_begin_block() and
    40  * sh4_translate_end_block()
    41  */
    42 struct sh4_x86_state {
    43     gboolean in_delay_slot;
    44     gboolean priv_checked; /* true if we've already checked the cpu mode. */
    45     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    46     gboolean branch_taken; /* true if we branched unconditionally */
    47     uint32_t block_start_pc;
    48     int tstate;
    50     /* Allocated memory for the (block-wide) back-patch list */
    51     uint32_t **backpatch_list;
    52     uint32_t backpatch_posn;
    53     uint32_t backpatch_size;
    54 };
    56 #define TSTATE_NONE -1
    57 #define TSTATE_O    0
    58 #define TSTATE_C    2
    59 #define TSTATE_E    4
    60 #define TSTATE_NE   5
    61 #define TSTATE_G    0xF
    62 #define TSTATE_GE   0xD
    63 #define TSTATE_A    7
    64 #define TSTATE_AE   3
    66 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
    67 #define JT_rel8(rel8,label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    68 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    69     OP(0x70+sh4_x86.tstate); OP(rel8); \
    70     MARK_JMP(rel8,label)
    71 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
    72 #define JF_rel8(rel8,label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    73 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    74     OP(0x70+ (sh4_x86.tstate^1)); OP(rel8); \
    75     MARK_JMP(rel8, label)
    78 #define EXIT_DATA_ADDR_READ 0
    79 #define EXIT_DATA_ADDR_WRITE 7
    80 #define EXIT_ILLEGAL 14
    81 #define EXIT_SLOT_ILLEGAL 21
    82 #define EXIT_FPU_DISABLED 28
    83 #define EXIT_SLOT_FPU_DISABLED 35
    85 static struct sh4_x86_state sh4_x86;
    87 static uint32_t max_int = 0x7FFFFFFF;
    88 static uint32_t min_int = 0x80000000;
    89 static uint32_t save_fcw; /* save value for fpu control word */
    90 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
    92 void sh4_x86_init()
    93 {
    94     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
    95     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(uint32_t *);
    96 }
    99 static void sh4_x86_add_backpatch( uint8_t *ptr )
   100 {
   101     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   102 	sh4_x86.backpatch_size <<= 1;
   103 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, sh4_x86.backpatch_size * sizeof(uint32_t *) );
   104 	assert( sh4_x86.backpatch_list != NULL );
   105     }
   106     sh4_x86.backpatch_list[sh4_x86.backpatch_posn++] = (uint32_t *)ptr;
   107 }
   109 static void sh4_x86_do_backpatch( uint8_t *reloc_base )
   110 {
   111     unsigned int i;
   112     for( i=0; i<sh4_x86.backpatch_posn; i++ ) {
   113 	*sh4_x86.backpatch_list[i] += (reloc_base - ((uint8_t *)sh4_x86.backpatch_list[i]) - 4);
   114     }
   115 }
   117 /**
   118  * Emit an instruction to load an SH4 reg into a real register
   119  */
   120 static inline void load_reg( int x86reg, int sh4reg ) 
   121 {
   122     /* mov [bp+n], reg */
   123     OP(0x8B);
   124     OP(0x45 + (x86reg<<3));
   125     OP(REG_OFFSET(r[sh4reg]));
   126 }
   128 static inline void load_reg16s( int x86reg, int sh4reg )
   129 {
   130     OP(0x0F);
   131     OP(0xBF);
   132     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   133 }
   135 static inline void load_reg16u( int x86reg, int sh4reg )
   136 {
   137     OP(0x0F);
   138     OP(0xB7);
   139     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   141 }
   143 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   144 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   145 /**
   146  * Emit an instruction to load an immediate value into a register
   147  */
   148 static inline void load_imm32( int x86reg, uint32_t value ) {
   149     /* mov #value, reg */
   150     OP(0xB8 + x86reg);
   151     OP32(value);
   152 }
   154 /**
   155  * Load an immediate 64-bit quantity (note: x86-64 only)
   156  */
   157 static inline void load_imm64( int x86reg, uint32_t value ) {
   158     /* mov #value, reg */
   159     REXW();
   160     OP(0xB8 + x86reg);
   161     OP64(value);
   162 }
   165 /**
   166  * Emit an instruction to store an SH4 reg (RN)
   167  */
   168 void static inline store_reg( int x86reg, int sh4reg ) {
   169     /* mov reg, [bp+n] */
   170     OP(0x89);
   171     OP(0x45 + (x86reg<<3));
   172     OP(REG_OFFSET(r[sh4reg]));
   173 }
   175 #define load_fr_bank(bankreg) load_spreg( bankreg, REG_OFFSET(fr_bank))
   177 /**
   178  * Load an FR register (single-precision floating point) into an integer x86
   179  * register (eg for register-to-register moves)
   180  */
   181 void static inline load_fr( int bankreg, int x86reg, int frm )
   182 {
   183     OP(0x8B); OP(0x40+bankreg+(x86reg<<3)); OP((frm^1)<<2);
   184 }
   186 /**
   187  * Store an FR register (single-precision floating point) into an integer x86
   188  * register (eg for register-to-register moves)
   189  */
   190 void static inline store_fr( int bankreg, int x86reg, int frn )
   191 {
   192     OP(0x89);  OP(0x40+bankreg+(x86reg<<3)); OP((frn^1)<<2);
   193 }
   196 /**
   197  * Load a pointer to the back fp back into the specified x86 register. The
   198  * bankreg must have been previously loaded with FPSCR.
   199  * NB: 12 bytes
   200  */
   201 static inline void load_xf_bank( int bankreg )
   202 {
   203     NOT_r32( bankreg );
   204     SHR_imm8_r32( (21 - 6), bankreg ); // Extract bit 21 then *64 for bank size
   205     AND_imm8s_r32( 0x40, bankreg );    // Complete extraction
   206     OP(0x8D); OP(0x44+(bankreg<<3)); OP(0x28+bankreg); OP(REG_OFFSET(fr)); // LEA [ebp+bankreg+disp], bankreg
   207 }
   209 /**
   210  * Update the fr_bank pointer based on the current fpscr value.
   211  */
   212 static inline void update_fr_bank( int fpscrreg )
   213 {
   214     SHR_imm8_r32( (21 - 6), fpscrreg ); // Extract bit 21 then *64 for bank size
   215     AND_imm8s_r32( 0x40, fpscrreg );    // Complete extraction
   216     OP(0x8D); OP(0x44+(fpscrreg<<3)); OP(0x28+fpscrreg); OP(REG_OFFSET(fr)); // LEA [ebp+fpscrreg+disp], fpscrreg
   217     store_spreg( fpscrreg, REG_OFFSET(fr_bank) );
   218 }
   219 /**
   220  * Push FPUL (as a 32-bit float) onto the FPU stack
   221  */
   222 static inline void push_fpul( )
   223 {
   224     OP(0xD9); OP(0x45); OP(R_FPUL);
   225 }
   227 /**
   228  * Pop FPUL (as a 32-bit float) from the FPU stack
   229  */
   230 static inline void pop_fpul( )
   231 {
   232     OP(0xD9); OP(0x5D); OP(R_FPUL);
   233 }
   235 /**
   236  * Push a 32-bit float onto the FPU stack, with bankreg previously loaded
   237  * with the location of the current fp bank.
   238  */
   239 static inline void push_fr( int bankreg, int frm ) 
   240 {
   241     OP(0xD9); OP(0x40 + bankreg); OP((frm^1)<<2);  // FLD.S [bankreg + frm^1*4]
   242 }
   244 /**
   245  * Pop a 32-bit float from the FPU stack and store it back into the fp bank, 
   246  * with bankreg previously loaded with the location of the current fp bank.
   247  */
   248 static inline void pop_fr( int bankreg, int frm )
   249 {
   250     OP(0xD9); OP(0x58 + bankreg); OP((frm^1)<<2); // FST.S [bankreg + frm^1*4]
   251 }
   253 /**
   254  * Push a 64-bit double onto the FPU stack, with bankreg previously loaded
   255  * with the location of the current fp bank.
   256  */
   257 static inline void push_dr( int bankreg, int frm )
   258 {
   259     OP(0xDD); OP(0x40 + bankreg); OP(frm<<2); // FLD.D [bankreg + frm*4]
   260 }
   262 static inline void pop_dr( int bankreg, int frm )
   263 {
   264     OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]
   265 }
   267 #if SH4_TRANSLATOR == TARGET_X86_64
   268 /* X86-64 has different calling conventions... */
   269 /**
   270  * Note: clobbers EAX to make the indirect call - this isn't usually
   271  * a problem since the callee will usually clobber it anyway.
   272  * Size: 12 bytes
   273  */
   274 #define CALL_FUNC0_SIZE 12
   275 static inline void call_func0( void *ptr )
   276 {
   277     load_imm64(R_EAX, (uint64_t)ptr);
   278     CALL_r32(R_EAX);
   279 }
   281 #define CALL_FUNC1_SIZE 14
   282 static inline void call_func1( void *ptr, int arg1 )
   283 {
   284     MOV_r32_r32(arg1, R_EDI);
   285     call_func0(ptr);
   286 }
   288 #define CALL_FUNC2_SIZE 16
   289 static inline void call_func2( void *ptr, int arg1, int arg2 )
   290 {
   291     MOV_r32_r32(arg1, R_EDI);
   292     MOV_r32_r32(arg2, R_ESI);
   293     call_func0(ptr);
   294 }
   296 #define MEM_WRITE_DOUBLE_SIZE 39
   297 /**
   298  * Write a double (64-bit) value into memory, with the first word in arg2a, and
   299  * the second in arg2b
   300  */
   301 static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )
   302 {
   303 /*
   304     MOV_r32_r32( addr, R_EDI );
   305     MOV_r32_r32( arg2b, R_ESI );
   306     REXW(); SHL_imm8_r32( 32, R_ESI );
   307     REXW(); MOVZX_r16_r32( arg2a, arg2a );
   308     REXW(); OR_r32_r32( arg2a, R_ESI );
   309     call_func0(sh4_write_quad);
   310 */
   311     PUSH_r32(arg2b);
   312     PUSH_r32(addr);
   313     call_func2(sh4_write_long, addr, arg2a);
   314     POP_r32(addr);
   315     POP_r32(arg2b);
   316     ADD_imm8s_r32(4, addr);
   317     call_func2(sh4_write_long, addr, arg2b);
   318 }
   320 #define MEM_READ_DOUBLE_SIZE 35
   321 /**
   322  * Read a double (64-bit) value from memory, writing the first word into arg2a
   323  * and the second into arg2b. The addr must not be in EAX
   324  */
   325 static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )
   326 {
   327 /*
   328     MOV_r32_r32( addr, R_EDI );
   329     call_func0(sh4_read_quad);
   330     REXW(); MOV_r32_r32( R_EAX, arg2a );
   331     REXW(); MOV_r32_r32( R_EAX, arg2b );
   332     REXW(); SHR_imm8_r32( 32, arg2b );
   333 */
   334     PUSH_r32(addr);
   335     call_func1(sh4_read_long, addr);
   336     POP_r32(R_EDI);
   337     PUSH_r32(R_EAX);
   338     ADD_imm8s_r32(4, R_EDI);
   339     call_func0(sh4_read_long);
   340     MOV_r32_r32(R_EAX, arg2b);
   341     POP_r32(arg2a);
   342 }
   344 #define EXIT_BLOCK_SIZE 35
   345 /**
   346  * Exit the block to an absolute PC
   347  */
   348 void exit_block( sh4addr_t pc, sh4addr_t endpc )
   349 {
   350     load_imm32( R_ECX, pc );                            // 5
   351     store_spreg( R_ECX, REG_OFFSET(pc) );               // 3
   352     REXW(); MOV_moff32_EAX( xlat_get_lut_entry(pc) );
   353     REXW(); AND_imm8s_r32( 0xFC, R_EAX ); // 3
   354     load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
   355     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
   356     POP_r32(R_EBP);
   357     RET();
   358 }
   361 /**
   362  * Write the block trailer (exception handling block)
   363  */
   364 void sh4_translate_end_block( sh4addr_t pc ) {
   365     if( sh4_x86.branch_taken == FALSE ) {
   366 	// Didn't exit unconditionally already, so write the termination here
   367 	exit_block( pc, pc );
   368     }
   369     if( sh4_x86.backpatch_posn != 0 ) {
   370 	uint8_t *end_ptr = xlat_output;
   371 	// Exception termination. Jump block for various exception codes:
   372 	load_imm32( R_EDI, EXC_DATA_ADDR_READ );
   373 	JMP_rel8( 33, target1 );
   374 	load_imm32( R_EDI, EXC_DATA_ADDR_WRITE );
   375 	JMP_rel8( 26, target2 );
   376 	load_imm32( R_EDI, EXC_ILLEGAL );
   377 	JMP_rel8( 19, target3 );
   378 	load_imm32( R_EDI, EXC_SLOT_ILLEGAL ); 
   379 	JMP_rel8( 12, target4 );
   380 	load_imm32( R_EDI, EXC_FPU_DISABLED ); 
   381 	JMP_rel8( 5, target5 );
   382 	load_imm32( R_EDI, EXC_SLOT_FPU_DISABLED );
   383 	// target
   384 	JMP_TARGET(target1);
   385 	JMP_TARGET(target2);
   386 	JMP_TARGET(target3);
   387 	JMP_TARGET(target4);
   388 	JMP_TARGET(target5);
   389 	// Raise exception
   390 	load_spreg( R_ECX, REG_OFFSET(pc) );
   391 	ADD_r32_r32( R_EDX, R_ECX );
   392 	ADD_r32_r32( R_EDX, R_ECX );
   393 	store_spreg( R_ECX, REG_OFFSET(pc) );
   394 	MOV_moff32_EAX( &sh4_cpu_period );
   395 	MUL_r32( R_EDX );
   396 	ADD_r32_sh4r( R_EAX, REG_OFFSET(slice_cycle) );
   398 	call_func0( sh4_raise_exception );
   399 	load_spreg( R_EAX, REG_OFFSET(pc) );
   400 	call_func1(xlat_get_code,R_EAX);
   401 	POP_r32(R_EBP);
   402 	RET();
   404 	sh4_x86_do_backpatch( end_ptr );
   405     }
   406 }
   408 #else /* SH4_TRANSLATOR == TARGET_X86 */
   410 /**
   411  * Note: clobbers EAX to make the indirect call - this isn't usually
   412  * a problem since the callee will usually clobber it anyway.
   413  */
   414 #define CALL_FUNC0_SIZE 7
   415 static inline void call_func0( void *ptr )
   416 {
   417     load_imm32(R_EAX, (uint32_t)ptr);
   418     CALL_r32(R_EAX);
   419 }
   421 #define CALL_FUNC1_SIZE 11
   422 static inline void call_func1( void *ptr, int arg1 )
   423 {
   424     PUSH_r32(arg1);
   425     call_func0(ptr);
   426     ADD_imm8s_r32( 4, R_ESP );
   427 }
   429 #define CALL_FUNC2_SIZE 12
   430 static inline void call_func2( void *ptr, int arg1, int arg2 )
   431 {
   432     PUSH_r32(arg2);
   433     PUSH_r32(arg1);
   434     call_func0(ptr);
   435     ADD_imm8s_r32( 8, R_ESP );
   436 }
   438 /**
   439  * Write a double (64-bit) value into memory, with the first word in arg2a, and
   440  * the second in arg2b
   441  * NB: 30 bytes
   442  */
   443 #define MEM_WRITE_DOUBLE_SIZE 30
   444 static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )
   445 {
   446     ADD_imm8s_r32( 4, addr );
   447     PUSH_r32(arg2b);
   448     PUSH_r32(addr);
   449     ADD_imm8s_r32( -4, addr );
   450     PUSH_r32(arg2a);
   451     PUSH_r32(addr);
   452     call_func0(sh4_write_long);
   453     ADD_imm8s_r32( 8, R_ESP );
   454     call_func0(sh4_write_long);
   455     ADD_imm8s_r32( 8, R_ESP );
   456 }
   458 /**
   459  * Read a double (64-bit) value from memory, writing the first word into arg2a
   460  * and the second into arg2b. The addr must not be in EAX
   461  * NB: 27 bytes
   462  */
   463 #define MEM_READ_DOUBLE_SIZE 27
   464 static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )
   465 {
   466     PUSH_r32(addr);
   467     call_func0(sh4_read_long);
   468     POP_r32(addr);
   469     PUSH_r32(R_EAX);
   470     ADD_imm8s_r32( 4, addr );
   471     PUSH_r32(addr);
   472     call_func0(sh4_read_long);
   473     ADD_imm8s_r32( 4, R_ESP );
   474     MOV_r32_r32( R_EAX, arg2b );
   475     POP_r32(arg2a);
   476 }
   478 #define EXIT_BLOCK_SIZE 29
   479 /**
   480  * Exit the block to an absolute PC
   481  */
   482 void exit_block( sh4addr_t pc, sh4addr_t endpc )
   483 {
   484     load_imm32( R_ECX, pc );                            // 5
   485     store_spreg( R_ECX, REG_OFFSET(pc) );               // 3
   486     MOV_moff32_EAX( xlat_get_lut_entry(pc) ); // 5
   487     AND_imm8s_r32( 0xFC, R_EAX ); // 3
   488     load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
   489     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
   490     POP_r32(R_EBP);
   491     RET();
   492 }
   494 /**
   495  * Write the block trailer (exception handling block)
   496  */
   497 void sh4_translate_end_block( sh4addr_t pc ) {
   498     if( sh4_x86.branch_taken == FALSE ) {
   499 	// Didn't exit unconditionally already, so write the termination here
   500 	exit_block( pc, pc );
   501     }
   502     if( sh4_x86.backpatch_posn != 0 ) {
   503 	uint8_t *end_ptr = xlat_output;
   504 	// Exception termination. Jump block for various exception codes:
   505 	PUSH_imm32( EXC_DATA_ADDR_READ );
   506 	JMP_rel8( 33, target1 );
   507 	PUSH_imm32( EXC_DATA_ADDR_WRITE );
   508 	JMP_rel8( 26, target2 );
   509 	PUSH_imm32( EXC_ILLEGAL );
   510 	JMP_rel8( 19, target3 );
   511 	PUSH_imm32( EXC_SLOT_ILLEGAL ); 
   512 	JMP_rel8( 12, target4 );
   513 	PUSH_imm32( EXC_FPU_DISABLED ); 
   514 	JMP_rel8( 5, target5 );
   515 	PUSH_imm32( EXC_SLOT_FPU_DISABLED );
   516 	// target
   517 	JMP_TARGET(target1);
   518 	JMP_TARGET(target2);
   519 	JMP_TARGET(target3);
   520 	JMP_TARGET(target4);
   521 	JMP_TARGET(target5);
   522 	// Raise exception
   523 	load_spreg( R_ECX, REG_OFFSET(pc) );
   524 	ADD_r32_r32( R_EDX, R_ECX );
   525 	ADD_r32_r32( R_EDX, R_ECX );
   526 	store_spreg( R_ECX, REG_OFFSET(pc) );
   527 	MOV_moff32_EAX( &sh4_cpu_period );
   528 	MUL_r32( R_EDX );
   529 	ADD_r32_sh4r( R_EAX, REG_OFFSET(slice_cycle) );
   531 	call_func0( sh4_raise_exception );
   532 	ADD_imm8s_r32( 4, R_ESP );
   533 	load_spreg( R_EAX, REG_OFFSET(pc) );
   534 	call_func1(xlat_get_code,R_EAX);
   535 	POP_r32(R_EBP);
   536 	RET();
   538 	sh4_x86_do_backpatch( end_ptr );
   539     }
   540 }
   541 #endif
   543 /* Exception checks - Note that all exception checks will clobber EAX */
   544 #define precheck() load_imm32(R_EDX, (pc-sh4_x86.block_start_pc-(sh4_x86.in_delay_slot?2:0))>>1)
   546 #define check_priv( ) \
   547     if( !sh4_x86.priv_checked ) { \
   548 	sh4_x86.priv_checked = TRUE;\
   549 	precheck();\
   550 	load_spreg( R_EAX, R_SR );\
   551 	AND_imm32_r32( SR_MD, R_EAX );\
   552 	if( sh4_x86.in_delay_slot ) {\
   553 	    JE_exit( EXIT_SLOT_ILLEGAL );\
   554 	} else {\
   555 	    JE_exit( EXIT_ILLEGAL );\
   556 	}\
   557     }\
   560 static void check_priv_no_precheck()
   561 {
   562     if( !sh4_x86.priv_checked ) {
   563 	sh4_x86.priv_checked = TRUE;
   564 	load_spreg( R_EAX, R_SR );
   565 	AND_imm32_r32( SR_MD, R_EAX );
   566 	if( sh4_x86.in_delay_slot ) {
   567 	    JE_exit( EXIT_SLOT_ILLEGAL );
   568 	} else {
   569 	    JE_exit( EXIT_ILLEGAL );
   570 	}
   571     }
   572 }
   574 #define check_fpuen( ) \
   575     if( !sh4_x86.fpuen_checked ) {\
   576 	sh4_x86.fpuen_checked = TRUE;\
   577 	precheck();\
   578 	load_spreg( R_EAX, R_SR );\
   579 	AND_imm32_r32( SR_FD, R_EAX );\
   580 	if( sh4_x86.in_delay_slot ) {\
   581 	    JNE_exit(EXIT_SLOT_FPU_DISABLED);\
   582 	} else {\
   583 	    JNE_exit(EXIT_FPU_DISABLED);\
   584 	}\
   585     }
   587 static void check_fpuen_no_precheck()
   588 {
   589     if( !sh4_x86.fpuen_checked ) {
   590 	sh4_x86.fpuen_checked = TRUE;
   591 	load_spreg( R_EAX, R_SR );
   592 	AND_imm32_r32( SR_FD, R_EAX );
   593 	if( sh4_x86.in_delay_slot ) {
   594 	    JNE_exit(EXIT_SLOT_FPU_DISABLED);
   595 	} else {
   596 	    JNE_exit(EXIT_FPU_DISABLED);
   597 	}
   598     }
   600 }
   602 static void check_ralign16( int x86reg )
   603 {
   604     TEST_imm32_r32( 0x00000001, x86reg );
   605     JNE_exit(EXIT_DATA_ADDR_READ);
   606 }
   608 static void check_walign16( int x86reg )
   609 {
   610     TEST_imm32_r32( 0x00000001, x86reg );
   611     JNE_exit(EXIT_DATA_ADDR_WRITE);
   612 }
   614 static void check_ralign32( int x86reg )
   615 {
   616     TEST_imm32_r32( 0x00000003, x86reg );
   617     JNE_exit(EXIT_DATA_ADDR_READ);
   618 }
   619 static void check_walign32( int x86reg )
   620 {
   621     TEST_imm32_r32( 0x00000003, x86reg );
   622     JNE_exit(EXIT_DATA_ADDR_WRITE);
   623 }
   625 #define UNDEF()
   626 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   627 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
   628 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
   629 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
   630 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
   631 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
   632 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
   634 #define SLOTILLEGAL() precheck(); JMP_exit(EXIT_SLOT_ILLEGAL); sh4_x86.in_delay_slot = FALSE; return 1;
   638 /**
   639  * Emit the 'start of block' assembly. Sets up the stack frame and save
   640  * SI/DI as required
   641  */
   642 void sh4_translate_begin_block( sh4addr_t pc ) 
   643 {
   644     PUSH_r32(R_EBP);
   645     /* mov &sh4r, ebp */
   646     load_imm32( R_EBP, (uint32_t)&sh4r );
   648     sh4_x86.in_delay_slot = FALSE;
   649     sh4_x86.priv_checked = FALSE;
   650     sh4_x86.fpuen_checked = FALSE;
   651     sh4_x86.branch_taken = FALSE;
   652     sh4_x86.backpatch_posn = 0;
   653     sh4_x86.block_start_pc = pc;
   654     sh4_x86.tstate = TSTATE_NONE;
   655 }
   657 /**
   658  * Exit the block with sh4r.pc already written
   659  * Bytes: 15
   660  */
   661 void exit_block_pcset( pc )
   662 {
   663     load_imm32( R_ECX, ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
   664     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );    // 6
   665     load_spreg( R_EAX, REG_OFFSET(pc) );
   666     call_func1(xlat_get_code,R_EAX);
   667     POP_r32(R_EBP);
   668     RET();
   669 }
   671 extern uint16_t *sh4_icache;
   672 extern uint32_t sh4_icache_addr;
   674 /**
   675  * Translate a single instruction. Delayed branches are handled specially
   676  * by translating both branch and delayed instruction as a single unit (as
   677  * 
   678  *
   679  * @return true if the instruction marks the end of a basic block
   680  * (eg a branch or 
   681  */
   682 uint32_t sh4_translate_instruction( sh4addr_t pc )
   683 {
   684     uint32_t ir;
   685     /* Read instruction */
   686     uint32_t pageaddr = pc >> 12;
   687     if( sh4_icache != NULL && pageaddr == sh4_icache_addr ) {
   688 	ir = sh4_icache[(pc&0xFFF)>>1];
   689     } else {
   690 	sh4_icache = (uint16_t *)mem_get_page(pc);
   691 	if( ((uintptr_t)sh4_icache) < MAX_IO_REGIONS ) {
   692 	    /* If someone's actually been so daft as to try to execute out of an IO
   693 	     * region, fallback on the full-blown memory read
   694 	     */
   695 	    sh4_icache = NULL;
   696 	    ir = sh4_read_word(pc);
   697 	} else {
   698 	    sh4_icache_addr = pageaddr;
   699 	    ir = sh4_icache[(pc&0xFFF)>>1];
   700 	}
   701     }
   703 %%
   704 /* ALU operations */
   705 ADD Rm, Rn {:
   706     load_reg( R_EAX, Rm );
   707     load_reg( R_ECX, Rn );
   708     ADD_r32_r32( R_EAX, R_ECX );
   709     store_reg( R_ECX, Rn );
   710     sh4_x86.tstate = TSTATE_NONE;
   711 :}
   712 ADD #imm, Rn {:  
   713     load_reg( R_EAX, Rn );
   714     ADD_imm8s_r32( imm, R_EAX );
   715     store_reg( R_EAX, Rn );
   716     sh4_x86.tstate = TSTATE_NONE;
   717 :}
   718 ADDC Rm, Rn {:
   719     if( sh4_x86.tstate != TSTATE_C ) {
   720 	LDC_t();
   721     }
   722     load_reg( R_EAX, Rm );
   723     load_reg( R_ECX, Rn );
   724     ADC_r32_r32( R_EAX, R_ECX );
   725     store_reg( R_ECX, Rn );
   726     SETC_t();
   727     sh4_x86.tstate = TSTATE_C;
   728 :}
   729 ADDV Rm, Rn {:
   730     load_reg( R_EAX, Rm );
   731     load_reg( R_ECX, Rn );
   732     ADD_r32_r32( R_EAX, R_ECX );
   733     store_reg( R_ECX, Rn );
   734     SETO_t();
   735     sh4_x86.tstate = TSTATE_O;
   736 :}
   737 AND Rm, Rn {:
   738     load_reg( R_EAX, Rm );
   739     load_reg( R_ECX, Rn );
   740     AND_r32_r32( R_EAX, R_ECX );
   741     store_reg( R_ECX, Rn );
   742     sh4_x86.tstate = TSTATE_NONE;
   743 :}
   744 AND #imm, R0 {:  
   745     load_reg( R_EAX, 0 );
   746     AND_imm32_r32(imm, R_EAX); 
   747     store_reg( R_EAX, 0 );
   748     sh4_x86.tstate = TSTATE_NONE;
   749 :}
   750 AND.B #imm, @(R0, GBR) {: 
   751     load_reg( R_EAX, 0 );
   752     load_spreg( R_ECX, R_GBR );
   753     ADD_r32_r32( R_EAX, R_ECX );
   754     PUSH_r32(R_ECX);
   755     MEM_READ_BYTE( R_ECX, R_EAX );
   756     POP_r32(R_ECX);
   757     AND_imm32_r32(imm, R_EAX );
   758     MEM_WRITE_BYTE( R_ECX, R_EAX );
   759     sh4_x86.tstate = TSTATE_NONE;
   760 :}
   761 CMP/EQ Rm, Rn {:  
   762     load_reg( R_EAX, Rm );
   763     load_reg( R_ECX, Rn );
   764     CMP_r32_r32( R_EAX, R_ECX );
   765     SETE_t();
   766     sh4_x86.tstate = TSTATE_E;
   767 :}
   768 CMP/EQ #imm, R0 {:  
   769     load_reg( R_EAX, 0 );
   770     CMP_imm8s_r32(imm, R_EAX);
   771     SETE_t();
   772     sh4_x86.tstate = TSTATE_E;
   773 :}
   774 CMP/GE Rm, Rn {:  
   775     load_reg( R_EAX, Rm );
   776     load_reg( R_ECX, Rn );
   777     CMP_r32_r32( R_EAX, R_ECX );
   778     SETGE_t();
   779     sh4_x86.tstate = TSTATE_GE;
   780 :}
   781 CMP/GT Rm, Rn {: 
   782     load_reg( R_EAX, Rm );
   783     load_reg( R_ECX, Rn );
   784     CMP_r32_r32( R_EAX, R_ECX );
   785     SETG_t();
   786     sh4_x86.tstate = TSTATE_G;
   787 :}
   788 CMP/HI Rm, Rn {:  
   789     load_reg( R_EAX, Rm );
   790     load_reg( R_ECX, Rn );
   791     CMP_r32_r32( R_EAX, R_ECX );
   792     SETA_t();
   793     sh4_x86.tstate = TSTATE_A;
   794 :}
   795 CMP/HS Rm, Rn {: 
   796     load_reg( R_EAX, Rm );
   797     load_reg( R_ECX, Rn );
   798     CMP_r32_r32( R_EAX, R_ECX );
   799     SETAE_t();
   800     sh4_x86.tstate = TSTATE_AE;
   801  :}
   802 CMP/PL Rn {: 
   803     load_reg( R_EAX, Rn );
   804     CMP_imm8s_r32( 0, R_EAX );
   805     SETG_t();
   806     sh4_x86.tstate = TSTATE_G;
   807 :}
   808 CMP/PZ Rn {:  
   809     load_reg( R_EAX, Rn );
   810     CMP_imm8s_r32( 0, R_EAX );
   811     SETGE_t();
   812     sh4_x86.tstate = TSTATE_GE;
   813 :}
   814 CMP/STR Rm, Rn {:  
   815     load_reg( R_EAX, Rm );
   816     load_reg( R_ECX, Rn );
   817     XOR_r32_r32( R_ECX, R_EAX );
   818     TEST_r8_r8( R_AL, R_AL );
   819     JE_rel8(13, target1);
   820     TEST_r8_r8( R_AH, R_AH ); // 2
   821     JE_rel8(9, target2);
   822     SHR_imm8_r32( 16, R_EAX ); // 3
   823     TEST_r8_r8( R_AL, R_AL ); // 2
   824     JE_rel8(2, target3);
   825     TEST_r8_r8( R_AH, R_AH ); // 2
   826     JMP_TARGET(target1);
   827     JMP_TARGET(target2);
   828     JMP_TARGET(target3);
   829     SETE_t();
   830     sh4_x86.tstate = TSTATE_E;
   831 :}
   832 DIV0S Rm, Rn {:
   833     load_reg( R_EAX, Rm );
   834     load_reg( R_ECX, Rn );
   835     SHR_imm8_r32( 31, R_EAX );
   836     SHR_imm8_r32( 31, R_ECX );
   837     store_spreg( R_EAX, R_M );
   838     store_spreg( R_ECX, R_Q );
   839     CMP_r32_r32( R_EAX, R_ECX );
   840     SETNE_t();
   841     sh4_x86.tstate = TSTATE_NE;
   842 :}
   843 DIV0U {:  
   844     XOR_r32_r32( R_EAX, R_EAX );
   845     store_spreg( R_EAX, R_Q );
   846     store_spreg( R_EAX, R_M );
   847     store_spreg( R_EAX, R_T );
   848     sh4_x86.tstate = TSTATE_C; // works for DIV1
   849 :}
   850 DIV1 Rm, Rn {:
   851     load_spreg( R_ECX, R_M );
   852     load_reg( R_EAX, Rn );
   853     if( sh4_x86.tstate != TSTATE_C ) {
   854 	LDC_t();
   855     }
   856     RCL1_r32( R_EAX );
   857     SETC_r8( R_DL ); // Q'
   858     CMP_sh4r_r32( R_Q, R_ECX );
   859     JE_rel8(5, mqequal);
   860     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   861     JMP_rel8(3, end);
   862     JMP_TARGET(mqequal);
   863     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   864     JMP_TARGET(end);
   865     store_reg( R_EAX, Rn ); // Done with Rn now
   866     SETC_r8(R_AL); // tmp1
   867     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   868     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   869     store_spreg( R_ECX, R_Q );
   870     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   871     MOVZX_r8_r32( R_AL, R_EAX );
   872     store_spreg( R_EAX, R_T );
   873     sh4_x86.tstate = TSTATE_NONE;
   874 :}
   875 DMULS.L Rm, Rn {:  
   876     load_reg( R_EAX, Rm );
   877     load_reg( R_ECX, Rn );
   878     IMUL_r32(R_ECX);
   879     store_spreg( R_EDX, R_MACH );
   880     store_spreg( R_EAX, R_MACL );
   881     sh4_x86.tstate = TSTATE_NONE;
   882 :}
   883 DMULU.L Rm, Rn {:  
   884     load_reg( R_EAX, Rm );
   885     load_reg( R_ECX, Rn );
   886     MUL_r32(R_ECX);
   887     store_spreg( R_EDX, R_MACH );
   888     store_spreg( R_EAX, R_MACL );    
   889     sh4_x86.tstate = TSTATE_NONE;
   890 :}
   891 DT Rn {:  
   892     load_reg( R_EAX, Rn );
   893     ADD_imm8s_r32( -1, R_EAX );
   894     store_reg( R_EAX, Rn );
   895     SETE_t();
   896     sh4_x86.tstate = TSTATE_E;
   897 :}
   898 EXTS.B Rm, Rn {:  
   899     load_reg( R_EAX, Rm );
   900     MOVSX_r8_r32( R_EAX, R_EAX );
   901     store_reg( R_EAX, Rn );
   902 :}
   903 EXTS.W Rm, Rn {:  
   904     load_reg( R_EAX, Rm );
   905     MOVSX_r16_r32( R_EAX, R_EAX );
   906     store_reg( R_EAX, Rn );
   907 :}
   908 EXTU.B Rm, Rn {:  
   909     load_reg( R_EAX, Rm );
   910     MOVZX_r8_r32( R_EAX, R_EAX );
   911     store_reg( R_EAX, Rn );
   912 :}
   913 EXTU.W Rm, Rn {:  
   914     load_reg( R_EAX, Rm );
   915     MOVZX_r16_r32( R_EAX, R_EAX );
   916     store_reg( R_EAX, Rn );
   917 :}
   918 MAC.L @Rm+, @Rn+ {:  
   919     load_reg( R_ECX, Rm );
   920     precheck();
   921     check_ralign32( R_ECX );
   922     load_reg( R_ECX, Rn );
   923     check_ralign32( R_ECX );
   924     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   925     MEM_READ_LONG( R_ECX, R_EAX );
   926     PUSH_r32( R_EAX );
   927     load_reg( R_ECX, Rm );
   928     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   929     MEM_READ_LONG( R_ECX, R_EAX );
   930     POP_r32( R_ECX );
   931     IMUL_r32( R_ECX );
   932     ADD_r32_sh4r( R_EAX, R_MACL );
   933     ADC_r32_sh4r( R_EDX, R_MACH );
   935     load_spreg( R_ECX, R_S );
   936     TEST_r32_r32(R_ECX, R_ECX);
   937     JE_rel8( CALL_FUNC0_SIZE, nosat );
   938     call_func0( signsat48 );
   939     JMP_TARGET( nosat );
   940     sh4_x86.tstate = TSTATE_NONE;
   941 :}
   942 MAC.W @Rm+, @Rn+ {:  
   943     load_reg( R_ECX, Rm );
   944     precheck();
   945     check_ralign16( R_ECX );
   946     load_reg( R_ECX, Rn );
   947     check_ralign16( R_ECX );
   948     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   949     MEM_READ_WORD( R_ECX, R_EAX );
   950     PUSH_r32( R_EAX );
   951     load_reg( R_ECX, Rm );
   952     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   953     MEM_READ_WORD( R_ECX, R_EAX );
   954     POP_r32( R_ECX );
   955     IMUL_r32( R_ECX );
   957     load_spreg( R_ECX, R_S );
   958     TEST_r32_r32( R_ECX, R_ECX );
   959     JE_rel8( 47, nosat );
   961     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   962     JNO_rel8( 51, end );            // 2
   963     load_imm32( R_EDX, 1 );         // 5
   964     store_spreg( R_EDX, R_MACH );   // 6
   965     JS_rel8( 13, positive );        // 2
   966     load_imm32( R_EAX, 0x80000000 );// 5
   967     store_spreg( R_EAX, R_MACL );   // 6
   968     JMP_rel8( 25, end2 );           // 2
   970     JMP_TARGET(positive);
   971     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   972     store_spreg( R_EAX, R_MACL );   // 6
   973     JMP_rel8( 12, end3);            // 2
   975     JMP_TARGET(nosat);
   976     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   977     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   978     JMP_TARGET(end);
   979     JMP_TARGET(end2);
   980     JMP_TARGET(end3);
   981     sh4_x86.tstate = TSTATE_NONE;
   982 :}
   983 MOVT Rn {:  
   984     load_spreg( R_EAX, R_T );
   985     store_reg( R_EAX, Rn );
   986 :}
   987 MUL.L Rm, Rn {:  
   988     load_reg( R_EAX, Rm );
   989     load_reg( R_ECX, Rn );
   990     MUL_r32( R_ECX );
   991     store_spreg( R_EAX, R_MACL );
   992     sh4_x86.tstate = TSTATE_NONE;
   993 :}
   994 MULS.W Rm, Rn {:
   995     load_reg16s( R_EAX, Rm );
   996     load_reg16s( R_ECX, Rn );
   997     MUL_r32( R_ECX );
   998     store_spreg( R_EAX, R_MACL );
   999     sh4_x86.tstate = TSTATE_NONE;
  1000 :}
  1001 MULU.W Rm, Rn {:  
  1002     load_reg16u( R_EAX, Rm );
  1003     load_reg16u( R_ECX, Rn );
  1004     MUL_r32( R_ECX );
  1005     store_spreg( R_EAX, R_MACL );
  1006     sh4_x86.tstate = TSTATE_NONE;
  1007 :}
  1008 NEG Rm, Rn {:
  1009     load_reg( R_EAX, Rm );
  1010     NEG_r32( R_EAX );
  1011     store_reg( R_EAX, Rn );
  1012     sh4_x86.tstate = TSTATE_NONE;
  1013 :}
  1014 NEGC Rm, Rn {:  
  1015     load_reg( R_EAX, Rm );
  1016     XOR_r32_r32( R_ECX, R_ECX );
  1017     LDC_t();
  1018     SBB_r32_r32( R_EAX, R_ECX );
  1019     store_reg( R_ECX, Rn );
  1020     SETC_t();
  1021     sh4_x86.tstate = TSTATE_C;
  1022 :}
  1023 NOT Rm, Rn {:  
  1024     load_reg( R_EAX, Rm );
  1025     NOT_r32( R_EAX );
  1026     store_reg( R_EAX, Rn );
  1027     sh4_x86.tstate = TSTATE_NONE;
  1028 :}
  1029 OR Rm, Rn {:  
  1030     load_reg( R_EAX, Rm );
  1031     load_reg( R_ECX, Rn );
  1032     OR_r32_r32( R_EAX, R_ECX );
  1033     store_reg( R_ECX, Rn );
  1034     sh4_x86.tstate = TSTATE_NONE;
  1035 :}
  1036 OR #imm, R0 {:
  1037     load_reg( R_EAX, 0 );
  1038     OR_imm32_r32(imm, R_EAX);
  1039     store_reg( R_EAX, 0 );
  1040     sh4_x86.tstate = TSTATE_NONE;
  1041 :}
  1042 OR.B #imm, @(R0, GBR) {:  
  1043     load_reg( R_EAX, 0 );
  1044     load_spreg( R_ECX, R_GBR );
  1045     ADD_r32_r32( R_EAX, R_ECX );
  1046     PUSH_r32(R_ECX);
  1047     MEM_READ_BYTE( R_ECX, R_EAX );
  1048     POP_r32(R_ECX);
  1049     OR_imm32_r32(imm, R_EAX );
  1050     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1051     sh4_x86.tstate = TSTATE_NONE;
  1052 :}
  1053 ROTCL Rn {:
  1054     load_reg( R_EAX, Rn );
  1055     if( sh4_x86.tstate != TSTATE_C ) {
  1056 	LDC_t();
  1058     RCL1_r32( R_EAX );
  1059     store_reg( R_EAX, Rn );
  1060     SETC_t();
  1061     sh4_x86.tstate = TSTATE_C;
  1062 :}
  1063 ROTCR Rn {:  
  1064     load_reg( R_EAX, Rn );
  1065     if( sh4_x86.tstate != TSTATE_C ) {
  1066 	LDC_t();
  1068     RCR1_r32( R_EAX );
  1069     store_reg( R_EAX, Rn );
  1070     SETC_t();
  1071     sh4_x86.tstate = TSTATE_C;
  1072 :}
  1073 ROTL Rn {:  
  1074     load_reg( R_EAX, Rn );
  1075     ROL1_r32( R_EAX );
  1076     store_reg( R_EAX, Rn );
  1077     SETC_t();
  1078     sh4_x86.tstate = TSTATE_C;
  1079 :}
  1080 ROTR Rn {:  
  1081     load_reg( R_EAX, Rn );
  1082     ROR1_r32( R_EAX );
  1083     store_reg( R_EAX, Rn );
  1084     SETC_t();
  1085     sh4_x86.tstate = TSTATE_C;
  1086 :}
  1087 SHAD Rm, Rn {:
  1088     /* Annoyingly enough, not directly convertible */
  1089     load_reg( R_EAX, Rn );
  1090     load_reg( R_ECX, Rm );
  1091     CMP_imm32_r32( 0, R_ECX );
  1092     JGE_rel8(16, doshl);
  1094     NEG_r32( R_ECX );      // 2
  1095     AND_imm8_r8( 0x1F, R_CL ); // 3
  1096     JE_rel8( 4, emptysar);     // 2
  1097     SAR_r32_CL( R_EAX );       // 2
  1098     JMP_rel8(10, end);          // 2
  1100     JMP_TARGET(emptysar);
  1101     SAR_imm8_r32(31, R_EAX );  // 3
  1102     JMP_rel8(5, end2);
  1104     JMP_TARGET(doshl);
  1105     AND_imm8_r8( 0x1F, R_CL ); // 3
  1106     SHL_r32_CL( R_EAX );       // 2
  1107     JMP_TARGET(end);
  1108     JMP_TARGET(end2);
  1109     store_reg( R_EAX, Rn );
  1110     sh4_x86.tstate = TSTATE_NONE;
  1111 :}
  1112 SHLD Rm, Rn {:  
  1113     load_reg( R_EAX, Rn );
  1114     load_reg( R_ECX, Rm );
  1115     CMP_imm32_r32( 0, R_ECX );
  1116     JGE_rel8(15, doshl);
  1118     NEG_r32( R_ECX );      // 2
  1119     AND_imm8_r8( 0x1F, R_CL ); // 3
  1120     JE_rel8( 4, emptyshr );
  1121     SHR_r32_CL( R_EAX );       // 2
  1122     JMP_rel8(9, end);          // 2
  1124     JMP_TARGET(emptyshr);
  1125     XOR_r32_r32( R_EAX, R_EAX );
  1126     JMP_rel8(5, end2);
  1128     JMP_TARGET(doshl);
  1129     AND_imm8_r8( 0x1F, R_CL ); // 3
  1130     SHL_r32_CL( R_EAX );       // 2
  1131     JMP_TARGET(end);
  1132     JMP_TARGET(end2);
  1133     store_reg( R_EAX, Rn );
  1134     sh4_x86.tstate = TSTATE_NONE;
  1135 :}
  1136 SHAL Rn {: 
  1137     load_reg( R_EAX, Rn );
  1138     SHL1_r32( R_EAX );
  1139     SETC_t();
  1140     store_reg( R_EAX, Rn );
  1141     sh4_x86.tstate = TSTATE_C;
  1142 :}
  1143 SHAR Rn {:  
  1144     load_reg( R_EAX, Rn );
  1145     SAR1_r32( R_EAX );
  1146     SETC_t();
  1147     store_reg( R_EAX, Rn );
  1148     sh4_x86.tstate = TSTATE_C;
  1149 :}
  1150 SHLL Rn {:  
  1151     load_reg( R_EAX, Rn );
  1152     SHL1_r32( R_EAX );
  1153     SETC_t();
  1154     store_reg( R_EAX, Rn );
  1155     sh4_x86.tstate = TSTATE_C;
  1156 :}
  1157 SHLL2 Rn {:
  1158     load_reg( R_EAX, Rn );
  1159     SHL_imm8_r32( 2, R_EAX );
  1160     store_reg( R_EAX, Rn );
  1161     sh4_x86.tstate = TSTATE_NONE;
  1162 :}
  1163 SHLL8 Rn {:  
  1164     load_reg( R_EAX, Rn );
  1165     SHL_imm8_r32( 8, R_EAX );
  1166     store_reg( R_EAX, Rn );
  1167     sh4_x86.tstate = TSTATE_NONE;
  1168 :}
  1169 SHLL16 Rn {:  
  1170     load_reg( R_EAX, Rn );
  1171     SHL_imm8_r32( 16, R_EAX );
  1172     store_reg( R_EAX, Rn );
  1173     sh4_x86.tstate = TSTATE_NONE;
  1174 :}
  1175 SHLR Rn {:  
  1176     load_reg( R_EAX, Rn );
  1177     SHR1_r32( R_EAX );
  1178     SETC_t();
  1179     store_reg( R_EAX, Rn );
  1180     sh4_x86.tstate = TSTATE_C;
  1181 :}
  1182 SHLR2 Rn {:  
  1183     load_reg( R_EAX, Rn );
  1184     SHR_imm8_r32( 2, R_EAX );
  1185     store_reg( R_EAX, Rn );
  1186     sh4_x86.tstate = TSTATE_NONE;
  1187 :}
  1188 SHLR8 Rn {:  
  1189     load_reg( R_EAX, Rn );
  1190     SHR_imm8_r32( 8, R_EAX );
  1191     store_reg( R_EAX, Rn );
  1192     sh4_x86.tstate = TSTATE_NONE;
  1193 :}
  1194 SHLR16 Rn {:  
  1195     load_reg( R_EAX, Rn );
  1196     SHR_imm8_r32( 16, R_EAX );
  1197     store_reg( R_EAX, Rn );
  1198     sh4_x86.tstate = TSTATE_NONE;
  1199 :}
  1200 SUB Rm, Rn {:  
  1201     load_reg( R_EAX, Rm );
  1202     load_reg( R_ECX, Rn );
  1203     SUB_r32_r32( R_EAX, R_ECX );
  1204     store_reg( R_ECX, Rn );
  1205     sh4_x86.tstate = TSTATE_NONE;
  1206 :}
  1207 SUBC Rm, Rn {:  
  1208     load_reg( R_EAX, Rm );
  1209     load_reg( R_ECX, Rn );
  1210     if( sh4_x86.tstate != TSTATE_C ) {
  1211 	LDC_t();
  1213     SBB_r32_r32( R_EAX, R_ECX );
  1214     store_reg( R_ECX, Rn );
  1215     SETC_t();
  1216     sh4_x86.tstate = TSTATE_C;
  1217 :}
  1218 SUBV Rm, Rn {:  
  1219     load_reg( R_EAX, Rm );
  1220     load_reg( R_ECX, Rn );
  1221     SUB_r32_r32( R_EAX, R_ECX );
  1222     store_reg( R_ECX, Rn );
  1223     SETO_t();
  1224     sh4_x86.tstate = TSTATE_O;
  1225 :}
  1226 SWAP.B Rm, Rn {:  
  1227     load_reg( R_EAX, Rm );
  1228     XCHG_r8_r8( R_AL, R_AH );
  1229     store_reg( R_EAX, Rn );
  1230 :}
  1231 SWAP.W Rm, Rn {:  
  1232     load_reg( R_EAX, Rm );
  1233     MOV_r32_r32( R_EAX, R_ECX );
  1234     SHL_imm8_r32( 16, R_ECX );
  1235     SHR_imm8_r32( 16, R_EAX );
  1236     OR_r32_r32( R_EAX, R_ECX );
  1237     store_reg( R_ECX, Rn );
  1238     sh4_x86.tstate = TSTATE_NONE;
  1239 :}
  1240 TAS.B @Rn {:  
  1241     load_reg( R_ECX, Rn );
  1242     MEM_READ_BYTE( R_ECX, R_EAX );
  1243     TEST_r8_r8( R_AL, R_AL );
  1244     SETE_t();
  1245     OR_imm8_r8( 0x80, R_AL );
  1246     load_reg( R_ECX, Rn );
  1247     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1248     sh4_x86.tstate = TSTATE_NONE;
  1249 :}
  1250 TST Rm, Rn {:  
  1251     load_reg( R_EAX, Rm );
  1252     load_reg( R_ECX, Rn );
  1253     TEST_r32_r32( R_EAX, R_ECX );
  1254     SETE_t();
  1255     sh4_x86.tstate = TSTATE_E;
  1256 :}
  1257 TST #imm, R0 {:  
  1258     load_reg( R_EAX, 0 );
  1259     TEST_imm32_r32( imm, R_EAX );
  1260     SETE_t();
  1261     sh4_x86.tstate = TSTATE_E;
  1262 :}
  1263 TST.B #imm, @(R0, GBR) {:  
  1264     load_reg( R_EAX, 0);
  1265     load_reg( R_ECX, R_GBR);
  1266     ADD_r32_r32( R_EAX, R_ECX );
  1267     MEM_READ_BYTE( R_ECX, R_EAX );
  1268     TEST_imm8_r8( imm, R_AL );
  1269     SETE_t();
  1270     sh4_x86.tstate = TSTATE_E;
  1271 :}
  1272 XOR Rm, Rn {:  
  1273     load_reg( R_EAX, Rm );
  1274     load_reg( R_ECX, Rn );
  1275     XOR_r32_r32( R_EAX, R_ECX );
  1276     store_reg( R_ECX, Rn );
  1277     sh4_x86.tstate = TSTATE_NONE;
  1278 :}
  1279 XOR #imm, R0 {:  
  1280     load_reg( R_EAX, 0 );
  1281     XOR_imm32_r32( imm, R_EAX );
  1282     store_reg( R_EAX, 0 );
  1283     sh4_x86.tstate = TSTATE_NONE;
  1284 :}
  1285 XOR.B #imm, @(R0, GBR) {:  
  1286     load_reg( R_EAX, 0 );
  1287     load_spreg( R_ECX, R_GBR );
  1288     ADD_r32_r32( R_EAX, R_ECX );
  1289     PUSH_r32(R_ECX);
  1290     MEM_READ_BYTE(R_ECX, R_EAX);
  1291     POP_r32(R_ECX);
  1292     XOR_imm32_r32( imm, R_EAX );
  1293     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1294     sh4_x86.tstate = TSTATE_NONE;
  1295 :}
  1296 XTRCT Rm, Rn {:
  1297     load_reg( R_EAX, Rm );
  1298     load_reg( R_ECX, Rn );
  1299     SHL_imm8_r32( 16, R_EAX );
  1300     SHR_imm8_r32( 16, R_ECX );
  1301     OR_r32_r32( R_EAX, R_ECX );
  1302     store_reg( R_ECX, Rn );
  1303     sh4_x86.tstate = TSTATE_NONE;
  1304 :}
  1306 /* Data move instructions */
  1307 MOV Rm, Rn {:  
  1308     load_reg( R_EAX, Rm );
  1309     store_reg( R_EAX, Rn );
  1310 :}
  1311 MOV #imm, Rn {:  
  1312     load_imm32( R_EAX, imm );
  1313     store_reg( R_EAX, Rn );
  1314 :}
  1315 MOV.B Rm, @Rn {:  
  1316     load_reg( R_EAX, Rm );
  1317     load_reg( R_ECX, Rn );
  1318     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1319     sh4_x86.tstate = TSTATE_NONE;
  1320 :}
  1321 MOV.B Rm, @-Rn {:  
  1322     load_reg( R_EAX, Rm );
  1323     load_reg( R_ECX, Rn );
  1324     ADD_imm8s_r32( -1, R_ECX );
  1325     store_reg( R_ECX, Rn );
  1326     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1327     sh4_x86.tstate = TSTATE_NONE;
  1328 :}
  1329 MOV.B Rm, @(R0, Rn) {:  
  1330     load_reg( R_EAX, 0 );
  1331     load_reg( R_ECX, Rn );
  1332     ADD_r32_r32( R_EAX, R_ECX );
  1333     load_reg( R_EAX, Rm );
  1334     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1335     sh4_x86.tstate = TSTATE_NONE;
  1336 :}
  1337 MOV.B R0, @(disp, GBR) {:  
  1338     load_reg( R_EAX, 0 );
  1339     load_spreg( R_ECX, R_GBR );
  1340     ADD_imm32_r32( disp, R_ECX );
  1341     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1342     sh4_x86.tstate = TSTATE_NONE;
  1343 :}
  1344 MOV.B R0, @(disp, Rn) {:  
  1345     load_reg( R_EAX, 0 );
  1346     load_reg( R_ECX, Rn );
  1347     ADD_imm32_r32( disp, R_ECX );
  1348     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1349     sh4_x86.tstate = TSTATE_NONE;
  1350 :}
  1351 MOV.B @Rm, Rn {:  
  1352     load_reg( R_ECX, Rm );
  1353     MEM_READ_BYTE( R_ECX, R_EAX );
  1354     store_reg( R_EAX, Rn );
  1355     sh4_x86.tstate = TSTATE_NONE;
  1356 :}
  1357 MOV.B @Rm+, Rn {:  
  1358     load_reg( R_ECX, Rm );
  1359     MOV_r32_r32( R_ECX, R_EAX );
  1360     ADD_imm8s_r32( 1, R_EAX );
  1361     store_reg( R_EAX, Rm );
  1362     MEM_READ_BYTE( R_ECX, R_EAX );
  1363     store_reg( R_EAX, Rn );
  1364     sh4_x86.tstate = TSTATE_NONE;
  1365 :}
  1366 MOV.B @(R0, Rm), Rn {:  
  1367     load_reg( R_EAX, 0 );
  1368     load_reg( R_ECX, Rm );
  1369     ADD_r32_r32( R_EAX, R_ECX );
  1370     MEM_READ_BYTE( R_ECX, R_EAX );
  1371     store_reg( R_EAX, Rn );
  1372     sh4_x86.tstate = TSTATE_NONE;
  1373 :}
  1374 MOV.B @(disp, GBR), R0 {:  
  1375     load_spreg( R_ECX, R_GBR );
  1376     ADD_imm32_r32( disp, R_ECX );
  1377     MEM_READ_BYTE( R_ECX, R_EAX );
  1378     store_reg( R_EAX, 0 );
  1379     sh4_x86.tstate = TSTATE_NONE;
  1380 :}
  1381 MOV.B @(disp, Rm), R0 {:  
  1382     load_reg( R_ECX, Rm );
  1383     ADD_imm32_r32( disp, R_ECX );
  1384     MEM_READ_BYTE( R_ECX, R_EAX );
  1385     store_reg( R_EAX, 0 );
  1386     sh4_x86.tstate = TSTATE_NONE;
  1387 :}
  1388 MOV.L Rm, @Rn {:
  1389     load_reg( R_EAX, Rm );
  1390     load_reg( R_ECX, Rn );
  1391     precheck();
  1392     check_walign32(R_ECX);
  1393     MEM_WRITE_LONG( R_ECX, R_EAX );
  1394     sh4_x86.tstate = TSTATE_NONE;
  1395 :}
  1396 MOV.L Rm, @-Rn {:  
  1397     load_reg( R_EAX, Rm );
  1398     load_reg( R_ECX, Rn );
  1399     precheck();
  1400     check_walign32( R_ECX );
  1401     ADD_imm8s_r32( -4, R_ECX );
  1402     store_reg( R_ECX, Rn );
  1403     MEM_WRITE_LONG( R_ECX, R_EAX );
  1404     sh4_x86.tstate = TSTATE_NONE;
  1405 :}
  1406 MOV.L Rm, @(R0, Rn) {:  
  1407     load_reg( R_EAX, 0 );
  1408     load_reg( R_ECX, Rn );
  1409     ADD_r32_r32( R_EAX, R_ECX );
  1410     precheck();
  1411     check_walign32( R_ECX );
  1412     load_reg( R_EAX, Rm );
  1413     MEM_WRITE_LONG( R_ECX, R_EAX );
  1414     sh4_x86.tstate = TSTATE_NONE;
  1415 :}
  1416 MOV.L R0, @(disp, GBR) {:  
  1417     load_spreg( R_ECX, R_GBR );
  1418     load_reg( R_EAX, 0 );
  1419     ADD_imm32_r32( disp, R_ECX );
  1420     precheck();
  1421     check_walign32( R_ECX );
  1422     MEM_WRITE_LONG( R_ECX, R_EAX );
  1423     sh4_x86.tstate = TSTATE_NONE;
  1424 :}
  1425 MOV.L Rm, @(disp, Rn) {:  
  1426     load_reg( R_ECX, Rn );
  1427     load_reg( R_EAX, Rm );
  1428     ADD_imm32_r32( disp, R_ECX );
  1429     precheck();
  1430     check_walign32( R_ECX );
  1431     MEM_WRITE_LONG( R_ECX, R_EAX );
  1432     sh4_x86.tstate = TSTATE_NONE;
  1433 :}
  1434 MOV.L @Rm, Rn {:  
  1435     load_reg( R_ECX, Rm );
  1436     precheck();
  1437     check_ralign32( R_ECX );
  1438     MEM_READ_LONG( R_ECX, R_EAX );
  1439     store_reg( R_EAX, Rn );
  1440     sh4_x86.tstate = TSTATE_NONE;
  1441 :}
  1442 MOV.L @Rm+, Rn {:  
  1443     load_reg( R_EAX, Rm );
  1444     precheck();
  1445     check_ralign32( R_EAX );
  1446     MOV_r32_r32( R_EAX, R_ECX );
  1447     ADD_imm8s_r32( 4, R_EAX );
  1448     store_reg( R_EAX, Rm );
  1449     MEM_READ_LONG( R_ECX, R_EAX );
  1450     store_reg( R_EAX, Rn );
  1451     sh4_x86.tstate = TSTATE_NONE;
  1452 :}
  1453 MOV.L @(R0, Rm), Rn {:  
  1454     load_reg( R_EAX, 0 );
  1455     load_reg( R_ECX, Rm );
  1456     ADD_r32_r32( R_EAX, R_ECX );
  1457     precheck();
  1458     check_ralign32( R_ECX );
  1459     MEM_READ_LONG( R_ECX, R_EAX );
  1460     store_reg( R_EAX, Rn );
  1461     sh4_x86.tstate = TSTATE_NONE;
  1462 :}
  1463 MOV.L @(disp, GBR), R0 {:
  1464     load_spreg( R_ECX, R_GBR );
  1465     ADD_imm32_r32( disp, R_ECX );
  1466     precheck();
  1467     check_ralign32( R_ECX );
  1468     MEM_READ_LONG( R_ECX, R_EAX );
  1469     store_reg( R_EAX, 0 );
  1470     sh4_x86.tstate = TSTATE_NONE;
  1471 :}
  1472 MOV.L @(disp, PC), Rn {:  
  1473     if( sh4_x86.in_delay_slot ) {
  1474 	SLOTILLEGAL();
  1475     } else {
  1476 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1477 	sh4ptr_t ptr = mem_get_region(target);
  1478 	if( ptr != NULL ) {
  1479 	    MOV_moff32_EAX( ptr );
  1480 	} else {
  1481 	    load_imm32( R_ECX, target );
  1482 	    MEM_READ_LONG( R_ECX, R_EAX );
  1484 	store_reg( R_EAX, Rn );
  1485 	sh4_x86.tstate = TSTATE_NONE;
  1487 :}
  1488 MOV.L @(disp, Rm), Rn {:  
  1489     load_reg( R_ECX, Rm );
  1490     ADD_imm8s_r32( disp, R_ECX );
  1491     precheck();
  1492     check_ralign32( R_ECX );
  1493     MEM_READ_LONG( R_ECX, R_EAX );
  1494     store_reg( R_EAX, Rn );
  1495     sh4_x86.tstate = TSTATE_NONE;
  1496 :}
  1497 MOV.W Rm, @Rn {:  
  1498     load_reg( R_ECX, Rn );
  1499     precheck();
  1500     check_walign16( R_ECX );
  1501     load_reg( R_EAX, Rm );
  1502     MEM_WRITE_WORD( R_ECX, R_EAX );
  1503     sh4_x86.tstate = TSTATE_NONE;
  1504 :}
  1505 MOV.W Rm, @-Rn {:  
  1506     load_reg( R_ECX, Rn );
  1507     precheck();
  1508     check_walign16( R_ECX );
  1509     load_reg( R_EAX, Rm );
  1510     ADD_imm8s_r32( -2, R_ECX );
  1511     store_reg( R_ECX, Rn );
  1512     MEM_WRITE_WORD( R_ECX, R_EAX );
  1513     sh4_x86.tstate = TSTATE_NONE;
  1514 :}
  1515 MOV.W Rm, @(R0, Rn) {:  
  1516     load_reg( R_EAX, 0 );
  1517     load_reg( R_ECX, Rn );
  1518     ADD_r32_r32( R_EAX, R_ECX );
  1519     precheck();
  1520     check_walign16( R_ECX );
  1521     load_reg( R_EAX, Rm );
  1522     MEM_WRITE_WORD( R_ECX, R_EAX );
  1523     sh4_x86.tstate = TSTATE_NONE;
  1524 :}
  1525 MOV.W R0, @(disp, GBR) {:  
  1526     load_spreg( R_ECX, R_GBR );
  1527     load_reg( R_EAX, 0 );
  1528     ADD_imm32_r32( disp, R_ECX );
  1529     precheck();
  1530     check_walign16( R_ECX );
  1531     MEM_WRITE_WORD( R_ECX, R_EAX );
  1532     sh4_x86.tstate = TSTATE_NONE;
  1533 :}
  1534 MOV.W R0, @(disp, Rn) {:  
  1535     load_reg( R_ECX, Rn );
  1536     load_reg( R_EAX, 0 );
  1537     ADD_imm32_r32( disp, R_ECX );
  1538     precheck();
  1539     check_walign16( R_ECX );
  1540     MEM_WRITE_WORD( R_ECX, R_EAX );
  1541     sh4_x86.tstate = TSTATE_NONE;
  1542 :}
  1543 MOV.W @Rm, Rn {:  
  1544     load_reg( R_ECX, Rm );
  1545     precheck();
  1546     check_ralign16( R_ECX );
  1547     MEM_READ_WORD( R_ECX, R_EAX );
  1548     store_reg( R_EAX, Rn );
  1549     sh4_x86.tstate = TSTATE_NONE;
  1550 :}
  1551 MOV.W @Rm+, Rn {:  
  1552     load_reg( R_EAX, Rm );
  1553     precheck();
  1554     check_ralign16( R_EAX );
  1555     MOV_r32_r32( R_EAX, R_ECX );
  1556     ADD_imm8s_r32( 2, R_EAX );
  1557     store_reg( R_EAX, Rm );
  1558     MEM_READ_WORD( R_ECX, R_EAX );
  1559     store_reg( R_EAX, Rn );
  1560     sh4_x86.tstate = TSTATE_NONE;
  1561 :}
  1562 MOV.W @(R0, Rm), Rn {:  
  1563     load_reg( R_EAX, 0 );
  1564     load_reg( R_ECX, Rm );
  1565     ADD_r32_r32( R_EAX, R_ECX );
  1566     precheck();
  1567     check_ralign16( R_ECX );
  1568     MEM_READ_WORD( R_ECX, R_EAX );
  1569     store_reg( R_EAX, Rn );
  1570     sh4_x86.tstate = TSTATE_NONE;
  1571 :}
  1572 MOV.W @(disp, GBR), R0 {:  
  1573     load_spreg( R_ECX, R_GBR );
  1574     ADD_imm32_r32( disp, R_ECX );
  1575     precheck();
  1576     check_ralign16( R_ECX );
  1577     MEM_READ_WORD( R_ECX, R_EAX );
  1578     store_reg( R_EAX, 0 );
  1579     sh4_x86.tstate = TSTATE_NONE;
  1580 :}
  1581 MOV.W @(disp, PC), Rn {:  
  1582     if( sh4_x86.in_delay_slot ) {
  1583 	SLOTILLEGAL();
  1584     } else {
  1585 	load_imm32( R_ECX, pc + disp + 4 );
  1586 	MEM_READ_WORD( R_ECX, R_EAX );
  1587 	store_reg( R_EAX, Rn );
  1588 	sh4_x86.tstate = TSTATE_NONE;
  1590 :}
  1591 MOV.W @(disp, Rm), R0 {:  
  1592     load_reg( R_ECX, Rm );
  1593     ADD_imm32_r32( disp, R_ECX );
  1594     precheck();
  1595     check_ralign16( R_ECX );
  1596     MEM_READ_WORD( R_ECX, R_EAX );
  1597     store_reg( R_EAX, 0 );
  1598     sh4_x86.tstate = TSTATE_NONE;
  1599 :}
  1600 MOVA @(disp, PC), R0 {:  
  1601     if( sh4_x86.in_delay_slot ) {
  1602 	SLOTILLEGAL();
  1603     } else {
  1604 	load_imm32( R_ECX, (pc & 0xFFFFFFFC) + disp + 4 );
  1605 	store_reg( R_ECX, 0 );
  1607 :}
  1608 MOVCA.L R0, @Rn {:  
  1609     load_reg( R_EAX, 0 );
  1610     load_reg( R_ECX, Rn );
  1611     precheck();
  1612     check_walign32( R_ECX );
  1613     MEM_WRITE_LONG( R_ECX, R_EAX );
  1614     sh4_x86.tstate = TSTATE_NONE;
  1615 :}
  1617 /* Control transfer instructions */
  1618 BF disp {:
  1619     if( sh4_x86.in_delay_slot ) {
  1620 	SLOTILLEGAL();
  1621     } else {
  1622 	JT_rel8( EXIT_BLOCK_SIZE, nottaken );
  1623 	exit_block( disp + pc + 4, pc+2 );
  1624 	JMP_TARGET(nottaken);
  1625 	return 2;
  1627 :}
  1628 BF/S disp {:
  1629     if( sh4_x86.in_delay_slot ) {
  1630 	SLOTILLEGAL();
  1631     } else {
  1632 	sh4_x86.in_delay_slot = TRUE;
  1633 	if( sh4_x86.tstate == TSTATE_NONE ) {
  1634 	    CMP_imm8s_sh4r( 1, R_T );
  1635 	    sh4_x86.tstate = TSTATE_E;
  1637 	OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JNE rel32
  1638 	sh4_translate_instruction(pc+2);
  1639 	exit_block( disp + pc + 4, pc+4 );
  1640 	// not taken
  1641 	*patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1642 	sh4_translate_instruction(pc+2);
  1643 	return 4;
  1645 :}
  1646 BRA disp {:  
  1647     if( sh4_x86.in_delay_slot ) {
  1648 	SLOTILLEGAL();
  1649     } else {
  1650 	sh4_x86.in_delay_slot = TRUE;
  1651 	sh4_translate_instruction( pc + 2 );
  1652 	exit_block( disp + pc + 4, pc+4 );
  1653 	sh4_x86.branch_taken = TRUE;
  1654 	return 4;
  1656 :}
  1657 BRAF Rn {:  
  1658     if( sh4_x86.in_delay_slot ) {
  1659 	SLOTILLEGAL();
  1660     } else {
  1661 	load_reg( R_EAX, Rn );
  1662 	ADD_imm32_r32( pc + 4, R_EAX );
  1663 	store_spreg( R_EAX, REG_OFFSET(pc) );
  1664 	sh4_x86.in_delay_slot = TRUE;
  1665 	sh4_x86.tstate = TSTATE_NONE;
  1666 	sh4_translate_instruction( pc + 2 );
  1667 	exit_block_pcset(pc+2);
  1668 	sh4_x86.branch_taken = TRUE;
  1669 	return 4;
  1671 :}
  1672 BSR disp {:  
  1673     if( sh4_x86.in_delay_slot ) {
  1674 	SLOTILLEGAL();
  1675     } else {
  1676 	load_imm32( R_EAX, pc + 4 );
  1677 	store_spreg( R_EAX, R_PR );
  1678 	sh4_x86.in_delay_slot = TRUE;
  1679 	sh4_translate_instruction( pc + 2 );
  1680 	exit_block( disp + pc + 4, pc+4 );
  1681 	sh4_x86.branch_taken = TRUE;
  1682 	return 4;
  1684 :}
  1685 BSRF Rn {:  
  1686     if( sh4_x86.in_delay_slot ) {
  1687 	SLOTILLEGAL();
  1688     } else {
  1689 	load_imm32( R_ECX, pc + 4 );
  1690 	store_spreg( R_ECX, R_PR );
  1691 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_ECX );
  1692 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1693 	sh4_x86.in_delay_slot = TRUE;
  1694 	sh4_x86.tstate = TSTATE_NONE;
  1695 	sh4_translate_instruction( pc + 2 );
  1696 	exit_block_pcset(pc+2);
  1697 	sh4_x86.branch_taken = TRUE;
  1698 	return 4;
  1700 :}
  1701 BT disp {:
  1702     if( sh4_x86.in_delay_slot ) {
  1703 	SLOTILLEGAL();
  1704     } else {
  1705 	JF_rel8( EXIT_BLOCK_SIZE, nottaken );
  1706 	exit_block( disp + pc + 4, pc+2 );
  1707 	JMP_TARGET(nottaken);
  1708 	return 2;
  1710 :}
  1711 BT/S disp {:
  1712     if( sh4_x86.in_delay_slot ) {
  1713 	SLOTILLEGAL();
  1714     } else {
  1715 	sh4_x86.in_delay_slot = TRUE;
  1716 	if( sh4_x86.tstate == TSTATE_NONE ) {
  1717 	    CMP_imm8s_sh4r( 1, R_T );
  1718 	    sh4_x86.tstate = TSTATE_E;
  1720 	OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JE rel32
  1721 	sh4_translate_instruction(pc+2);
  1722 	exit_block( disp + pc + 4, pc+4 );
  1723 	// not taken
  1724 	*patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1725 	sh4_translate_instruction(pc+2);
  1726 	return 4;
  1728 :}
  1729 JMP @Rn {:  
  1730     if( sh4_x86.in_delay_slot ) {
  1731 	SLOTILLEGAL();
  1732     } else {
  1733 	load_reg( R_ECX, Rn );
  1734 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1735 	sh4_x86.in_delay_slot = TRUE;
  1736 	sh4_translate_instruction(pc+2);
  1737 	exit_block_pcset(pc+2);
  1738 	sh4_x86.branch_taken = TRUE;
  1739 	return 4;
  1741 :}
  1742 JSR @Rn {:  
  1743     if( sh4_x86.in_delay_slot ) {
  1744 	SLOTILLEGAL();
  1745     } else {
  1746 	load_imm32( R_EAX, pc + 4 );
  1747 	store_spreg( R_EAX, R_PR );
  1748 	load_reg( R_ECX, Rn );
  1749 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1750 	sh4_x86.in_delay_slot = TRUE;
  1751 	sh4_translate_instruction(pc+2);
  1752 	exit_block_pcset(pc+2);
  1753 	sh4_x86.branch_taken = TRUE;
  1754 	return 4;
  1756 :}
  1757 RTE {:  
  1758     if( sh4_x86.in_delay_slot ) {
  1759 	SLOTILLEGAL();
  1760     } else {
  1761 	check_priv();
  1762 	load_spreg( R_ECX, R_SPC );
  1763 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1764 	load_spreg( R_EAX, R_SSR );
  1765 	call_func1( sh4_write_sr, R_EAX );
  1766 	sh4_x86.in_delay_slot = TRUE;
  1767 	sh4_x86.priv_checked = FALSE;
  1768 	sh4_x86.fpuen_checked = FALSE;
  1769 	sh4_x86.tstate = TSTATE_NONE;
  1770 	sh4_translate_instruction(pc+2);
  1771 	exit_block_pcset(pc+2);
  1772 	sh4_x86.branch_taken = TRUE;
  1773 	return 4;
  1775 :}
  1776 RTS {:  
  1777     if( sh4_x86.in_delay_slot ) {
  1778 	SLOTILLEGAL();
  1779     } else {
  1780 	load_spreg( R_ECX, R_PR );
  1781 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1782 	sh4_x86.in_delay_slot = TRUE;
  1783 	sh4_translate_instruction(pc+2);
  1784 	exit_block_pcset(pc+2);
  1785 	sh4_x86.branch_taken = TRUE;
  1786 	return 4;
  1788 :}
  1789 TRAPA #imm {:  
  1790     if( sh4_x86.in_delay_slot ) {
  1791 	SLOTILLEGAL();
  1792     } else {
  1793 	load_imm32( R_EAX, imm );
  1794 	call_func1( sh4_raise_trap, R_EAX );
  1795 	ADD_imm8s_r32( 4, R_ESP );
  1796 	sh4_x86.tstate = TSTATE_NONE;
  1797 	exit_block_pcset(pc);
  1798 	sh4_x86.branch_taken = TRUE;
  1799 	return 2;
  1801 :}
  1802 UNDEF {:  
  1803     if( sh4_x86.in_delay_slot ) {
  1804 	SLOTILLEGAL();
  1805     } else {
  1806 	precheck();
  1807 	JMP_exit(EXIT_ILLEGAL);
  1808 	return 2;
  1810 :}
  1812 CLRMAC {:  
  1813     XOR_r32_r32(R_EAX, R_EAX);
  1814     store_spreg( R_EAX, R_MACL );
  1815     store_spreg( R_EAX, R_MACH );
  1816     sh4_x86.tstate = TSTATE_NONE;
  1817 :}
  1818 CLRS {:
  1819     CLC();
  1820     SETC_sh4r(R_S);
  1821     sh4_x86.tstate = TSTATE_C;
  1822 :}
  1823 CLRT {:  
  1824     CLC();
  1825     SETC_t();
  1826     sh4_x86.tstate = TSTATE_C;
  1827 :}
  1828 SETS {:  
  1829     STC();
  1830     SETC_sh4r(R_S);
  1831     sh4_x86.tstate = TSTATE_C;
  1832 :}
  1833 SETT {:  
  1834     STC();
  1835     SETC_t();
  1836     sh4_x86.tstate = TSTATE_C;
  1837 :}
  1839 /* Floating point moves */
  1840 FMOV FRm, FRn {:  
  1841     /* As horrible as this looks, it's actually covering 5 separate cases:
  1842      * 1. 32-bit fr-to-fr (PR=0)
  1843      * 2. 64-bit dr-to-dr (PR=1, FRm&1 == 0, FRn&1 == 0 )
  1844      * 3. 64-bit dr-to-xd (PR=1, FRm&1 == 0, FRn&1 == 1 )
  1845      * 4. 64-bit xd-to-dr (PR=1, FRm&1 == 1, FRn&1 == 0 )
  1846      * 5. 64-bit xd-to-xd (PR=1, FRm&1 == 1, FRn&1 == 1 )
  1847      */
  1848     check_fpuen();
  1849     load_spreg( R_ECX, R_FPSCR );
  1850     load_fr_bank( R_EDX );
  1851     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1852     JNE_rel8(8, doublesize);
  1853     load_fr( R_EDX, R_EAX, FRm ); // PR=0 branch
  1854     store_fr( R_EDX, R_EAX, FRn );
  1855     if( FRm&1 ) {
  1856 	JMP_rel8(24, end);
  1857 	JMP_TARGET(doublesize);
  1858 	load_xf_bank( R_ECX ); 
  1859 	load_fr( R_ECX, R_EAX, FRm-1 );
  1860 	if( FRn&1 ) {
  1861 	    load_fr( R_ECX, R_EDX, FRm );
  1862 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1863 	    store_fr( R_ECX, R_EDX, FRn );
  1864 	} else /* FRn&1 == 0 */ {
  1865 	    load_fr( R_ECX, R_ECX, FRm );
  1866 	    store_fr( R_EDX, R_EAX, FRn );
  1867 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1869 	JMP_TARGET(end);
  1870     } else /* FRm&1 == 0 */ {
  1871 	if( FRn&1 ) {
  1872 	    JMP_rel8(24, end);
  1873 	    load_xf_bank( R_ECX );
  1874 	    load_fr( R_EDX, R_EAX, FRm );
  1875 	    load_fr( R_EDX, R_EDX, FRm+1 );
  1876 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1877 	    store_fr( R_ECX, R_EDX, FRn );
  1878 	    JMP_TARGET(end);
  1879 	} else /* FRn&1 == 0 */ {
  1880 	    JMP_rel8(12, end);
  1881 	    load_fr( R_EDX, R_EAX, FRm );
  1882 	    load_fr( R_EDX, R_ECX, FRm+1 );
  1883 	    store_fr( R_EDX, R_EAX, FRn );
  1884 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1885 	    JMP_TARGET(end);
  1888     sh4_x86.tstate = TSTATE_NONE;
  1889 :}
  1890 FMOV FRm, @Rn {: 
  1891     precheck();
  1892     check_fpuen_no_precheck();
  1893     load_reg( R_ECX, Rn );
  1894     check_walign32( R_ECX );
  1895     load_spreg( R_EDX, R_FPSCR );
  1896     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1897     JNE_rel8(8 + CALL_FUNC2_SIZE, doublesize);
  1898     load_fr_bank( R_EDX );
  1899     load_fr( R_EDX, R_EAX, FRm );
  1900     MEM_WRITE_LONG( R_ECX, R_EAX ); // 12
  1901     if( FRm&1 ) {
  1902 	JMP_rel8( 18 + MEM_WRITE_DOUBLE_SIZE, end );
  1903 	JMP_TARGET(doublesize);
  1904 	load_xf_bank( R_EDX );
  1905 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1906 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1907 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1908 	JMP_TARGET(end);
  1909     } else {
  1910 	JMP_rel8( 9 + MEM_WRITE_DOUBLE_SIZE, end );
  1911 	JMP_TARGET(doublesize);
  1912 	load_fr_bank( R_EDX );
  1913 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1914 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1915 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1916 	JMP_TARGET(end);
  1918     sh4_x86.tstate = TSTATE_NONE;
  1919 :}
  1920 FMOV @Rm, FRn {:  
  1921     precheck();
  1922     check_fpuen_no_precheck();
  1923     load_reg( R_ECX, Rm );
  1924     check_ralign32( R_ECX );
  1925     load_spreg( R_EDX, R_FPSCR );
  1926     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1927     JNE_rel8(8 + CALL_FUNC1_SIZE, doublesize);
  1928     MEM_READ_LONG( R_ECX, R_EAX );
  1929     load_fr_bank( R_EDX );
  1930     store_fr( R_EDX, R_EAX, FRn );
  1931     if( FRn&1 ) {
  1932 	JMP_rel8(21 + MEM_READ_DOUBLE_SIZE, end);
  1933 	JMP_TARGET(doublesize);
  1934 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1935 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1936 	load_xf_bank( R_EDX );
  1937 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1938 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1939 	JMP_TARGET(end);
  1940     } else {
  1941 	JMP_rel8(9 + MEM_READ_DOUBLE_SIZE, end);
  1942 	JMP_TARGET(doublesize);
  1943 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1944 	load_fr_bank( R_EDX );
  1945 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1946 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1947 	JMP_TARGET(end);
  1949     sh4_x86.tstate = TSTATE_NONE;
  1950 :}
  1951 FMOV FRm, @-Rn {:  
  1952     precheck();
  1953     check_fpuen_no_precheck();
  1954     load_reg( R_ECX, Rn );
  1955     check_walign32( R_ECX );
  1956     load_spreg( R_EDX, R_FPSCR );
  1957     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1958     JNE_rel8(14 + CALL_FUNC2_SIZE, doublesize);
  1959     load_fr_bank( R_EDX );
  1960     load_fr( R_EDX, R_EAX, FRm );
  1961     ADD_imm8s_r32(-4,R_ECX);
  1962     store_reg( R_ECX, Rn );
  1963     MEM_WRITE_LONG( R_ECX, R_EAX ); // 12
  1964     if( FRm&1 ) {
  1965 	JMP_rel8( 24 + MEM_WRITE_DOUBLE_SIZE, end );
  1966 	JMP_TARGET(doublesize);
  1967 	load_xf_bank( R_EDX );
  1968 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1969 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1970 	ADD_imm8s_r32(-8,R_ECX);
  1971 	store_reg( R_ECX, Rn );
  1972 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1973 	JMP_TARGET(end);
  1974     } else {
  1975 	JMP_rel8( 15 + MEM_WRITE_DOUBLE_SIZE, end );
  1976 	JMP_TARGET(doublesize);
  1977 	load_fr_bank( R_EDX );
  1978 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1979 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1980 	ADD_imm8s_r32(-8,R_ECX);
  1981 	store_reg( R_ECX, Rn );
  1982 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1983 	JMP_TARGET(end);
  1985     sh4_x86.tstate = TSTATE_NONE;
  1986 :}
  1987 FMOV @Rm+, FRn {:
  1988     precheck();
  1989     check_fpuen_no_precheck();
  1990     load_reg( R_ECX, Rm );
  1991     check_ralign32( R_ECX );
  1992     MOV_r32_r32( R_ECX, R_EAX );
  1993     load_spreg( R_EDX, R_FPSCR );
  1994     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1995     JNE_rel8(14 + CALL_FUNC1_SIZE, doublesize);
  1996     ADD_imm8s_r32( 4, R_EAX );
  1997     store_reg( R_EAX, Rm );
  1998     MEM_READ_LONG( R_ECX, R_EAX );
  1999     load_fr_bank( R_EDX );
  2000     store_fr( R_EDX, R_EAX, FRn );
  2001     if( FRn&1 ) {
  2002 	JMP_rel8(27 + MEM_READ_DOUBLE_SIZE, end);
  2003 	JMP_TARGET(doublesize);
  2004 	ADD_imm8s_r32( 8, R_EAX );
  2005 	store_reg(R_EAX, Rm);
  2006 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  2007 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  2008 	load_xf_bank( R_EDX );
  2009 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  2010 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  2011 	JMP_TARGET(end);
  2012     } else {
  2013 	JMP_rel8(15 + MEM_READ_DOUBLE_SIZE, end);
  2014 	ADD_imm8s_r32( 8, R_EAX );
  2015 	store_reg(R_EAX, Rm);
  2016 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  2017 	load_fr_bank( R_EDX );
  2018 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  2019 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  2020 	JMP_TARGET(end);
  2022     sh4_x86.tstate = TSTATE_NONE;
  2023 :}
  2024 FMOV FRm, @(R0, Rn) {:  
  2025     precheck();
  2026     check_fpuen_no_precheck();
  2027     load_reg( R_ECX, Rn );
  2028     ADD_sh4r_r32( REG_OFFSET(r[0]), R_ECX );
  2029     check_walign32( R_ECX );
  2030     load_spreg( R_EDX, R_FPSCR );
  2031     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  2032     JNE_rel8(8 + CALL_FUNC2_SIZE, doublesize);
  2033     load_fr_bank( R_EDX );
  2034     load_fr( R_EDX, R_EAX, FRm );
  2035     MEM_WRITE_LONG( R_ECX, R_EAX ); // 12
  2036     if( FRm&1 ) {
  2037 	JMP_rel8( 18 + MEM_WRITE_DOUBLE_SIZE, end );
  2038 	JMP_TARGET(doublesize);
  2039 	load_xf_bank( R_EDX );
  2040 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  2041 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  2042 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  2043 	JMP_TARGET(end);
  2044     } else {
  2045 	JMP_rel8( 9 + MEM_WRITE_DOUBLE_SIZE, end );
  2046 	JMP_TARGET(doublesize);
  2047 	load_fr_bank( R_EDX );
  2048 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  2049 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  2050 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  2051 	JMP_TARGET(end);
  2053     sh4_x86.tstate = TSTATE_NONE;
  2054 :}
  2055 FMOV @(R0, Rm), FRn {:  
  2056     precheck();
  2057     check_fpuen_no_precheck();
  2058     load_reg( R_ECX, Rm );
  2059     ADD_sh4r_r32( REG_OFFSET(r[0]), R_ECX );
  2060     check_ralign32( R_ECX );
  2061     load_spreg( R_EDX, R_FPSCR );
  2062     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  2063     JNE_rel8(8 + CALL_FUNC1_SIZE, doublesize);
  2064     MEM_READ_LONG( R_ECX, R_EAX );
  2065     load_fr_bank( R_EDX );
  2066     store_fr( R_EDX, R_EAX, FRn );
  2067     if( FRn&1 ) {
  2068 	JMP_rel8(21 + MEM_READ_DOUBLE_SIZE, end);
  2069 	JMP_TARGET(doublesize);
  2070 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  2071 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  2072 	load_xf_bank( R_EDX );
  2073 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  2074 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  2075 	JMP_TARGET(end);
  2076     } else {
  2077 	JMP_rel8(9 + MEM_READ_DOUBLE_SIZE, end);
  2078 	JMP_TARGET(doublesize);
  2079 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  2080 	load_fr_bank( R_EDX );
  2081 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  2082 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  2083 	JMP_TARGET(end);
  2085     sh4_x86.tstate = TSTATE_NONE;
  2086 :}
  2087 FLDI0 FRn {:  /* IFF PR=0 */
  2088     check_fpuen();
  2089     load_spreg( R_ECX, R_FPSCR );
  2090     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2091     JNE_rel8(8, end);
  2092     XOR_r32_r32( R_EAX, R_EAX );
  2093     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  2094     store_fr( R_ECX, R_EAX, FRn );
  2095     JMP_TARGET(end);
  2096     sh4_x86.tstate = TSTATE_NONE;
  2097 :}
  2098 FLDI1 FRn {:  /* IFF PR=0 */
  2099     check_fpuen();
  2100     load_spreg( R_ECX, R_FPSCR );
  2101     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2102     JNE_rel8(11, end);
  2103     load_imm32(R_EAX, 0x3F800000);
  2104     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  2105     store_fr( R_ECX, R_EAX, FRn );
  2106     JMP_TARGET(end);
  2107     sh4_x86.tstate = TSTATE_NONE;
  2108 :}
  2110 FLOAT FPUL, FRn {:  
  2111     check_fpuen();
  2112     load_spreg( R_ECX, R_FPSCR );
  2113     load_spreg(R_EDX, REG_OFFSET(fr_bank));
  2114     FILD_sh4r(R_FPUL);
  2115     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2116     JNE_rel8(5, doubleprec);
  2117     pop_fr( R_EDX, FRn );
  2118     JMP_rel8(3, end);
  2119     JMP_TARGET(doubleprec);
  2120     pop_dr( R_EDX, FRn );
  2121     JMP_TARGET(end);
  2122     sh4_x86.tstate = TSTATE_NONE;
  2123 :}
  2124 FTRC FRm, FPUL {:  
  2125     check_fpuen();
  2126     load_spreg( R_ECX, R_FPSCR );
  2127     load_fr_bank( R_EDX );
  2128     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2129     JNE_rel8(5, doubleprec);
  2130     push_fr( R_EDX, FRm );
  2131     JMP_rel8(3, doop);
  2132     JMP_TARGET(doubleprec);
  2133     push_dr( R_EDX, FRm );
  2134     JMP_TARGET( doop );
  2135     load_imm32( R_ECX, (uint32_t)&max_int );
  2136     FILD_r32ind( R_ECX );
  2137     FCOMIP_st(1);
  2138     JNA_rel8( 32, sat );
  2139     load_imm32( R_ECX, (uint32_t)&min_int );  // 5
  2140     FILD_r32ind( R_ECX );           // 2
  2141     FCOMIP_st(1);                   // 2
  2142     JAE_rel8( 21, sat2 );            // 2
  2143     load_imm32( R_EAX, (uint32_t)&save_fcw );
  2144     FNSTCW_r32ind( R_EAX );
  2145     load_imm32( R_EDX, (uint32_t)&trunc_fcw );
  2146     FLDCW_r32ind( R_EDX );
  2147     FISTP_sh4r(R_FPUL);             // 3
  2148     FLDCW_r32ind( R_EAX );
  2149     JMP_rel8( 9, end );             // 2
  2151     JMP_TARGET(sat);
  2152     JMP_TARGET(sat2);
  2153     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  2154     store_spreg( R_ECX, R_FPUL );
  2155     FPOP_st();
  2156     JMP_TARGET(end);
  2157     sh4_x86.tstate = TSTATE_NONE;
  2158 :}
  2159 FLDS FRm, FPUL {:  
  2160     check_fpuen();
  2161     load_fr_bank( R_ECX );
  2162     load_fr( R_ECX, R_EAX, FRm );
  2163     store_spreg( R_EAX, R_FPUL );
  2164     sh4_x86.tstate = TSTATE_NONE;
  2165 :}
  2166 FSTS FPUL, FRn {:  
  2167     check_fpuen();
  2168     load_fr_bank( R_ECX );
  2169     load_spreg( R_EAX, R_FPUL );
  2170     store_fr( R_ECX, R_EAX, FRn );
  2171     sh4_x86.tstate = TSTATE_NONE;
  2172 :}
  2173 FCNVDS FRm, FPUL {:  
  2174     check_fpuen();
  2175     load_spreg( R_ECX, R_FPSCR );
  2176     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2177     JE_rel8(9, end); // only when PR=1
  2178     load_fr_bank( R_ECX );
  2179     push_dr( R_ECX, FRm );
  2180     pop_fpul();
  2181     JMP_TARGET(end);
  2182     sh4_x86.tstate = TSTATE_NONE;
  2183 :}
  2184 FCNVSD FPUL, FRn {:  
  2185     check_fpuen();
  2186     load_spreg( R_ECX, R_FPSCR );
  2187     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2188     JE_rel8(9, end); // only when PR=1
  2189     load_fr_bank( R_ECX );
  2190     push_fpul();
  2191     pop_dr( R_ECX, FRn );
  2192     JMP_TARGET(end);
  2193     sh4_x86.tstate = TSTATE_NONE;
  2194 :}
  2196 /* Floating point instructions */
  2197 FABS FRn {:  
  2198     check_fpuen();
  2199     load_spreg( R_ECX, R_FPSCR );
  2200     load_fr_bank( R_EDX );
  2201     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2202     JNE_rel8(10, doubleprec);
  2203     push_fr(R_EDX, FRn); // 3
  2204     FABS_st0(); // 2
  2205     pop_fr( R_EDX, FRn); //3
  2206     JMP_rel8(8,end); // 2
  2207     JMP_TARGET(doubleprec);
  2208     push_dr(R_EDX, FRn);
  2209     FABS_st0();
  2210     pop_dr(R_EDX, FRn);
  2211     JMP_TARGET(end);
  2212     sh4_x86.tstate = TSTATE_NONE;
  2213 :}
  2214 FADD FRm, FRn {:  
  2215     check_fpuen();
  2216     load_spreg( R_ECX, R_FPSCR );
  2217     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2218     load_fr_bank( R_EDX );
  2219     JNE_rel8(13,doubleprec);
  2220     push_fr(R_EDX, FRm);
  2221     push_fr(R_EDX, FRn);
  2222     FADDP_st(1);
  2223     pop_fr(R_EDX, FRn);
  2224     JMP_rel8(11,end);
  2225     JMP_TARGET(doubleprec);
  2226     push_dr(R_EDX, FRm);
  2227     push_dr(R_EDX, FRn);
  2228     FADDP_st(1);
  2229     pop_dr(R_EDX, FRn);
  2230     JMP_TARGET(end);
  2231     sh4_x86.tstate = TSTATE_NONE;
  2232 :}
  2233 FDIV FRm, FRn {:  
  2234     check_fpuen();
  2235     load_spreg( R_ECX, R_FPSCR );
  2236     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2237     load_fr_bank( R_EDX );
  2238     JNE_rel8(13, doubleprec);
  2239     push_fr(R_EDX, FRn);
  2240     push_fr(R_EDX, FRm);
  2241     FDIVP_st(1);
  2242     pop_fr(R_EDX, FRn);
  2243     JMP_rel8(11, end);
  2244     JMP_TARGET(doubleprec);
  2245     push_dr(R_EDX, FRn);
  2246     push_dr(R_EDX, FRm);
  2247     FDIVP_st(1);
  2248     pop_dr(R_EDX, FRn);
  2249     JMP_TARGET(end);
  2250     sh4_x86.tstate = TSTATE_NONE;
  2251 :}
  2252 FMAC FR0, FRm, FRn {:  
  2253     check_fpuen();
  2254     load_spreg( R_ECX, R_FPSCR );
  2255     load_spreg( R_EDX, REG_OFFSET(fr_bank));
  2256     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2257     JNE_rel8(18, doubleprec);
  2258     push_fr( R_EDX, 0 );
  2259     push_fr( R_EDX, FRm );
  2260     FMULP_st(1);
  2261     push_fr( R_EDX, FRn );
  2262     FADDP_st(1);
  2263     pop_fr( R_EDX, FRn );
  2264     JMP_rel8(16, end);
  2265     JMP_TARGET(doubleprec);
  2266     push_dr( R_EDX, 0 );
  2267     push_dr( R_EDX, FRm );
  2268     FMULP_st(1);
  2269     push_dr( R_EDX, FRn );
  2270     FADDP_st(1);
  2271     pop_dr( R_EDX, FRn );
  2272     JMP_TARGET(end);
  2273     sh4_x86.tstate = TSTATE_NONE;
  2274 :}
  2276 FMUL FRm, FRn {:  
  2277     check_fpuen();
  2278     load_spreg( R_ECX, R_FPSCR );
  2279     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2280     load_fr_bank( R_EDX );
  2281     JNE_rel8(13, doubleprec);
  2282     push_fr(R_EDX, FRm);
  2283     push_fr(R_EDX, FRn);
  2284     FMULP_st(1);
  2285     pop_fr(R_EDX, FRn);
  2286     JMP_rel8(11, end);
  2287     JMP_TARGET(doubleprec);
  2288     push_dr(R_EDX, FRm);
  2289     push_dr(R_EDX, FRn);
  2290     FMULP_st(1);
  2291     pop_dr(R_EDX, FRn);
  2292     JMP_TARGET(end);
  2293     sh4_x86.tstate = TSTATE_NONE;
  2294 :}
  2295 FNEG FRn {:  
  2296     check_fpuen();
  2297     load_spreg( R_ECX, R_FPSCR );
  2298     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2299     load_fr_bank( R_EDX );
  2300     JNE_rel8(10, doubleprec);
  2301     push_fr(R_EDX, FRn);
  2302     FCHS_st0();
  2303     pop_fr(R_EDX, FRn);
  2304     JMP_rel8(8, end);
  2305     JMP_TARGET(doubleprec);
  2306     push_dr(R_EDX, FRn);
  2307     FCHS_st0();
  2308     pop_dr(R_EDX, FRn);
  2309     JMP_TARGET(end);
  2310     sh4_x86.tstate = TSTATE_NONE;
  2311 :}
  2312 FSRRA FRn {:  
  2313     check_fpuen();
  2314     load_spreg( R_ECX, R_FPSCR );
  2315     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2316     load_fr_bank( R_EDX );
  2317     JNE_rel8(12, end); // PR=0 only
  2318     FLD1_st0();
  2319     push_fr(R_EDX, FRn);
  2320     FSQRT_st0();
  2321     FDIVP_st(1);
  2322     pop_fr(R_EDX, FRn);
  2323     JMP_TARGET(end);
  2324     sh4_x86.tstate = TSTATE_NONE;
  2325 :}
  2326 FSQRT FRn {:  
  2327     check_fpuen();
  2328     load_spreg( R_ECX, R_FPSCR );
  2329     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2330     load_fr_bank( R_EDX );
  2331     JNE_rel8(10, doubleprec);
  2332     push_fr(R_EDX, FRn);
  2333     FSQRT_st0();
  2334     pop_fr(R_EDX, FRn);
  2335     JMP_rel8(8, end);
  2336     JMP_TARGET(doubleprec);
  2337     push_dr(R_EDX, FRn);
  2338     FSQRT_st0();
  2339     pop_dr(R_EDX, FRn);
  2340     JMP_TARGET(end);
  2341     sh4_x86.tstate = TSTATE_NONE;
  2342 :}
  2343 FSUB FRm, FRn {:  
  2344     check_fpuen();
  2345     load_spreg( R_ECX, R_FPSCR );
  2346     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2347     load_fr_bank( R_EDX );
  2348     JNE_rel8(13, doubleprec);
  2349     push_fr(R_EDX, FRn);
  2350     push_fr(R_EDX, FRm);
  2351     FSUBP_st(1);
  2352     pop_fr(R_EDX, FRn);
  2353     JMP_rel8(11, end);
  2354     JMP_TARGET(doubleprec);
  2355     push_dr(R_EDX, FRn);
  2356     push_dr(R_EDX, FRm);
  2357     FSUBP_st(1);
  2358     pop_dr(R_EDX, FRn);
  2359     JMP_TARGET(end);
  2360     sh4_x86.tstate = TSTATE_NONE;
  2361 :}
  2363 FCMP/EQ FRm, FRn {:  
  2364     check_fpuen();
  2365     load_spreg( R_ECX, R_FPSCR );
  2366     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2367     load_fr_bank( R_EDX );
  2368     JNE_rel8(8, doubleprec);
  2369     push_fr(R_EDX, FRm);
  2370     push_fr(R_EDX, FRn);
  2371     JMP_rel8(6, end);
  2372     JMP_TARGET(doubleprec);
  2373     push_dr(R_EDX, FRm);
  2374     push_dr(R_EDX, FRn);
  2375     JMP_TARGET(end);
  2376     FCOMIP_st(1);
  2377     SETE_t();
  2378     FPOP_st();
  2379     sh4_x86.tstate = TSTATE_NONE;
  2380 :}
  2381 FCMP/GT FRm, FRn {:  
  2382     check_fpuen();
  2383     load_spreg( R_ECX, R_FPSCR );
  2384     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2385     load_fr_bank( R_EDX );
  2386     JNE_rel8(8, doubleprec);
  2387     push_fr(R_EDX, FRm);
  2388     push_fr(R_EDX, FRn);
  2389     JMP_rel8(6, end);
  2390     JMP_TARGET(doubleprec);
  2391     push_dr(R_EDX, FRm);
  2392     push_dr(R_EDX, FRn);
  2393     JMP_TARGET(end);
  2394     FCOMIP_st(1);
  2395     SETA_t();
  2396     FPOP_st();
  2397     sh4_x86.tstate = TSTATE_NONE;
  2398 :}
  2400 FSCA FPUL, FRn {:  
  2401     check_fpuen();
  2402     load_spreg( R_ECX, R_FPSCR );
  2403     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2404     JNE_rel8( CALL_FUNC2_SIZE + 9, doubleprec );
  2405     load_fr_bank( R_ECX );
  2406     ADD_imm8s_r32( (FRn&0x0E)<<2, R_ECX );
  2407     load_spreg( R_EDX, R_FPUL );
  2408     call_func2( sh4_fsca, R_EDX, R_ECX );
  2409     JMP_TARGET(doubleprec);
  2410     sh4_x86.tstate = TSTATE_NONE;
  2411 :}
  2412 FIPR FVm, FVn {:  
  2413     check_fpuen();
  2414     load_spreg( R_ECX, R_FPSCR );
  2415     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2416     JNE_rel8(44, doubleprec);
  2418     load_fr_bank( R_ECX );
  2419     push_fr( R_ECX, FVm<<2 );
  2420     push_fr( R_ECX, FVn<<2 );
  2421     FMULP_st(1);
  2422     push_fr( R_ECX, (FVm<<2)+1);
  2423     push_fr( R_ECX, (FVn<<2)+1);
  2424     FMULP_st(1);
  2425     FADDP_st(1);
  2426     push_fr( R_ECX, (FVm<<2)+2);
  2427     push_fr( R_ECX, (FVn<<2)+2);
  2428     FMULP_st(1);
  2429     FADDP_st(1);
  2430     push_fr( R_ECX, (FVm<<2)+3);
  2431     push_fr( R_ECX, (FVn<<2)+3);
  2432     FMULP_st(1);
  2433     FADDP_st(1);
  2434     pop_fr( R_ECX, (FVn<<2)+3);
  2435     JMP_TARGET(doubleprec);
  2436     sh4_x86.tstate = TSTATE_NONE;
  2437 :}
  2438 FTRV XMTRX, FVn {:  
  2439     check_fpuen();
  2440     load_spreg( R_ECX, R_FPSCR );
  2441     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2442     JNE_rel8( 18 + CALL_FUNC2_SIZE, doubleprec );
  2443     load_fr_bank( R_EDX );                 // 3
  2444     ADD_imm8s_r32( FVn<<4, R_EDX );        // 3
  2445     load_xf_bank( R_ECX );                 // 12
  2446     call_func2( sh4_ftrv, R_EDX, R_ECX );  // 12
  2447     JMP_TARGET(doubleprec);
  2448     sh4_x86.tstate = TSTATE_NONE;
  2449 :}
  2451 FRCHG {:  
  2452     check_fpuen();
  2453     load_spreg( R_ECX, R_FPSCR );
  2454     XOR_imm32_r32( FPSCR_FR, R_ECX );
  2455     store_spreg( R_ECX, R_FPSCR );
  2456     update_fr_bank( R_ECX );
  2457     sh4_x86.tstate = TSTATE_NONE;
  2458 :}
  2459 FSCHG {:  
  2460     check_fpuen();
  2461     load_spreg( R_ECX, R_FPSCR );
  2462     XOR_imm32_r32( FPSCR_SZ, R_ECX );
  2463     store_spreg( R_ECX, R_FPSCR );
  2464     sh4_x86.tstate = TSTATE_NONE;
  2465 :}
  2467 /* Processor control instructions */
  2468 LDC Rm, SR {:
  2469     if( sh4_x86.in_delay_slot ) {
  2470 	SLOTILLEGAL();
  2471     } else {
  2472 	check_priv();
  2473 	load_reg( R_EAX, Rm );
  2474 	call_func1( sh4_write_sr, R_EAX );
  2475 	sh4_x86.priv_checked = FALSE;
  2476 	sh4_x86.fpuen_checked = FALSE;
  2477 	sh4_x86.tstate = TSTATE_NONE;
  2479 :}
  2480 LDC Rm, GBR {: 
  2481     load_reg( R_EAX, Rm );
  2482     store_spreg( R_EAX, R_GBR );
  2483 :}
  2484 LDC Rm, VBR {:  
  2485     check_priv();
  2486     load_reg( R_EAX, Rm );
  2487     store_spreg( R_EAX, R_VBR );
  2488     sh4_x86.tstate = TSTATE_NONE;
  2489 :}
  2490 LDC Rm, SSR {:  
  2491     check_priv();
  2492     load_reg( R_EAX, Rm );
  2493     store_spreg( R_EAX, R_SSR );
  2494     sh4_x86.tstate = TSTATE_NONE;
  2495 :}
  2496 LDC Rm, SGR {:  
  2497     check_priv();
  2498     load_reg( R_EAX, Rm );
  2499     store_spreg( R_EAX, R_SGR );
  2500     sh4_x86.tstate = TSTATE_NONE;
  2501 :}
  2502 LDC Rm, SPC {:  
  2503     check_priv();
  2504     load_reg( R_EAX, Rm );
  2505     store_spreg( R_EAX, R_SPC );
  2506     sh4_x86.tstate = TSTATE_NONE;
  2507 :}
  2508 LDC Rm, DBR {:  
  2509     check_priv();
  2510     load_reg( R_EAX, Rm );
  2511     store_spreg( R_EAX, R_DBR );
  2512     sh4_x86.tstate = TSTATE_NONE;
  2513 :}
  2514 LDC Rm, Rn_BANK {:  
  2515     check_priv();
  2516     load_reg( R_EAX, Rm );
  2517     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2518     sh4_x86.tstate = TSTATE_NONE;
  2519 :}
  2520 LDC.L @Rm+, GBR {:  
  2521     load_reg( R_EAX, Rm );
  2522     precheck();
  2523     check_ralign32( R_EAX );
  2524     MOV_r32_r32( R_EAX, R_ECX );
  2525     ADD_imm8s_r32( 4, R_EAX );
  2526     store_reg( R_EAX, Rm );
  2527     MEM_READ_LONG( R_ECX, R_EAX );
  2528     store_spreg( R_EAX, R_GBR );
  2529     sh4_x86.tstate = TSTATE_NONE;
  2530 :}
  2531 LDC.L @Rm+, SR {:
  2532     if( sh4_x86.in_delay_slot ) {
  2533 	SLOTILLEGAL();
  2534     } else {
  2535 	precheck();
  2536 	check_priv_no_precheck();
  2537 	load_reg( R_EAX, Rm );
  2538 	check_ralign32( R_EAX );
  2539 	MOV_r32_r32( R_EAX, R_ECX );
  2540 	ADD_imm8s_r32( 4, R_EAX );
  2541 	store_reg( R_EAX, Rm );
  2542 	MEM_READ_LONG( R_ECX, R_EAX );
  2543 	call_func1( sh4_write_sr, R_EAX );
  2544 	sh4_x86.priv_checked = FALSE;
  2545 	sh4_x86.fpuen_checked = FALSE;
  2546 	sh4_x86.tstate = TSTATE_NONE;
  2548 :}
  2549 LDC.L @Rm+, VBR {:  
  2550     precheck();
  2551     check_priv_no_precheck();
  2552     load_reg( R_EAX, Rm );
  2553     check_ralign32( R_EAX );
  2554     MOV_r32_r32( R_EAX, R_ECX );
  2555     ADD_imm8s_r32( 4, R_EAX );
  2556     store_reg( R_EAX, Rm );
  2557     MEM_READ_LONG( R_ECX, R_EAX );
  2558     store_spreg( R_EAX, R_VBR );
  2559     sh4_x86.tstate = TSTATE_NONE;
  2560 :}
  2561 LDC.L @Rm+, SSR {:
  2562     precheck();
  2563     check_priv_no_precheck();
  2564     load_reg( R_EAX, Rm );
  2565     check_ralign32( R_EAX );
  2566     MOV_r32_r32( R_EAX, R_ECX );
  2567     ADD_imm8s_r32( 4, R_EAX );
  2568     store_reg( R_EAX, Rm );
  2569     MEM_READ_LONG( R_ECX, R_EAX );
  2570     store_spreg( R_EAX, R_SSR );
  2571     sh4_x86.tstate = TSTATE_NONE;
  2572 :}
  2573 LDC.L @Rm+, SGR {:  
  2574     precheck();
  2575     check_priv_no_precheck();
  2576     load_reg( R_EAX, Rm );
  2577     check_ralign32( R_EAX );
  2578     MOV_r32_r32( R_EAX, R_ECX );
  2579     ADD_imm8s_r32( 4, R_EAX );
  2580     store_reg( R_EAX, Rm );
  2581     MEM_READ_LONG( R_ECX, R_EAX );
  2582     store_spreg( R_EAX, R_SGR );
  2583     sh4_x86.tstate = TSTATE_NONE;
  2584 :}
  2585 LDC.L @Rm+, SPC {:  
  2586     precheck();
  2587     check_priv_no_precheck();
  2588     load_reg( R_EAX, Rm );
  2589     check_ralign32( R_EAX );
  2590     MOV_r32_r32( R_EAX, R_ECX );
  2591     ADD_imm8s_r32( 4, R_EAX );
  2592     store_reg( R_EAX, Rm );
  2593     MEM_READ_LONG( R_ECX, R_EAX );
  2594     store_spreg( R_EAX, R_SPC );
  2595     sh4_x86.tstate = TSTATE_NONE;
  2596 :}
  2597 LDC.L @Rm+, DBR {:  
  2598     precheck();
  2599     check_priv_no_precheck();
  2600     load_reg( R_EAX, Rm );
  2601     check_ralign32( R_EAX );
  2602     MOV_r32_r32( R_EAX, R_ECX );
  2603     ADD_imm8s_r32( 4, R_EAX );
  2604     store_reg( R_EAX, Rm );
  2605     MEM_READ_LONG( R_ECX, R_EAX );
  2606     store_spreg( R_EAX, R_DBR );
  2607     sh4_x86.tstate = TSTATE_NONE;
  2608 :}
  2609 LDC.L @Rm+, Rn_BANK {:  
  2610     precheck();
  2611     check_priv_no_precheck();
  2612     load_reg( R_EAX, Rm );
  2613     check_ralign32( R_EAX );
  2614     MOV_r32_r32( R_EAX, R_ECX );
  2615     ADD_imm8s_r32( 4, R_EAX );
  2616     store_reg( R_EAX, Rm );
  2617     MEM_READ_LONG( R_ECX, R_EAX );
  2618     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2619     sh4_x86.tstate = TSTATE_NONE;
  2620 :}
  2621 LDS Rm, FPSCR {:  
  2622     load_reg( R_EAX, Rm );
  2623     store_spreg( R_EAX, R_FPSCR );
  2624     update_fr_bank( R_EAX );
  2625     sh4_x86.tstate = TSTATE_NONE;
  2626 :}
  2627 LDS.L @Rm+, FPSCR {:  
  2628     load_reg( R_EAX, Rm );
  2629     precheck();
  2630     check_ralign32( R_EAX );
  2631     MOV_r32_r32( R_EAX, R_ECX );
  2632     ADD_imm8s_r32( 4, R_EAX );
  2633     store_reg( R_EAX, Rm );
  2634     MEM_READ_LONG( R_ECX, R_EAX );
  2635     store_spreg( R_EAX, R_FPSCR );
  2636     update_fr_bank( R_EAX );
  2637     sh4_x86.tstate = TSTATE_NONE;
  2638 :}
  2639 LDS Rm, FPUL {:  
  2640     load_reg( R_EAX, Rm );
  2641     store_spreg( R_EAX, R_FPUL );
  2642 :}
  2643 LDS.L @Rm+, FPUL {:  
  2644     load_reg( R_EAX, Rm );
  2645     precheck();
  2646     check_ralign32( R_EAX );
  2647     MOV_r32_r32( R_EAX, R_ECX );
  2648     ADD_imm8s_r32( 4, R_EAX );
  2649     store_reg( R_EAX, Rm );
  2650     MEM_READ_LONG( R_ECX, R_EAX );
  2651     store_spreg( R_EAX, R_FPUL );
  2652     sh4_x86.tstate = TSTATE_NONE;
  2653 :}
  2654 LDS Rm, MACH {: 
  2655     load_reg( R_EAX, Rm );
  2656     store_spreg( R_EAX, R_MACH );
  2657 :}
  2658 LDS.L @Rm+, MACH {:  
  2659     load_reg( R_EAX, Rm );
  2660     precheck();
  2661     check_ralign32( R_EAX );
  2662     MOV_r32_r32( R_EAX, R_ECX );
  2663     ADD_imm8s_r32( 4, R_EAX );
  2664     store_reg( R_EAX, Rm );
  2665     MEM_READ_LONG( R_ECX, R_EAX );
  2666     store_spreg( R_EAX, R_MACH );
  2667     sh4_x86.tstate = TSTATE_NONE;
  2668 :}
  2669 LDS Rm, MACL {:  
  2670     load_reg( R_EAX, Rm );
  2671     store_spreg( R_EAX, R_MACL );
  2672 :}
  2673 LDS.L @Rm+, MACL {:  
  2674     load_reg( R_EAX, Rm );
  2675     precheck();
  2676     check_ralign32( R_EAX );
  2677     MOV_r32_r32( R_EAX, R_ECX );
  2678     ADD_imm8s_r32( 4, R_EAX );
  2679     store_reg( R_EAX, Rm );
  2680     MEM_READ_LONG( R_ECX, R_EAX );
  2681     store_spreg( R_EAX, R_MACL );
  2682     sh4_x86.tstate = TSTATE_NONE;
  2683 :}
  2684 LDS Rm, PR {:  
  2685     load_reg( R_EAX, Rm );
  2686     store_spreg( R_EAX, R_PR );
  2687 :}
  2688 LDS.L @Rm+, PR {:  
  2689     load_reg( R_EAX, Rm );
  2690     precheck();
  2691     check_ralign32( R_EAX );
  2692     MOV_r32_r32( R_EAX, R_ECX );
  2693     ADD_imm8s_r32( 4, R_EAX );
  2694     store_reg( R_EAX, Rm );
  2695     MEM_READ_LONG( R_ECX, R_EAX );
  2696     store_spreg( R_EAX, R_PR );
  2697     sh4_x86.tstate = TSTATE_NONE;
  2698 :}
  2699 LDTLB {:  :}
  2700 OCBI @Rn {:  :}
  2701 OCBP @Rn {:  :}
  2702 OCBWB @Rn {:  :}
  2703 PREF @Rn {:
  2704     load_reg( R_EAX, Rn );
  2705     PUSH_r32( R_EAX );
  2706     AND_imm32_r32( 0xFC000000, R_EAX );
  2707     CMP_imm32_r32( 0xE0000000, R_EAX );
  2708     JNE_rel8(CALL_FUNC0_SIZE, end);
  2709     call_func0( sh4_flush_store_queue );
  2710     JMP_TARGET(end);
  2711     ADD_imm8s_r32( 4, R_ESP );
  2712     sh4_x86.tstate = TSTATE_NONE;
  2713 :}
  2714 SLEEP {: 
  2715     check_priv();
  2716     call_func0( sh4_sleep );
  2717     sh4_x86.tstate = TSTATE_NONE;
  2718     sh4_x86.in_delay_slot = FALSE;
  2719     return 2;
  2720 :}
  2721 STC SR, Rn {:
  2722     check_priv();
  2723     call_func0(sh4_read_sr);
  2724     store_reg( R_EAX, Rn );
  2725     sh4_x86.tstate = TSTATE_NONE;
  2726 :}
  2727 STC GBR, Rn {:  
  2728     load_spreg( R_EAX, R_GBR );
  2729     store_reg( R_EAX, Rn );
  2730 :}
  2731 STC VBR, Rn {:  
  2732     check_priv();
  2733     load_spreg( R_EAX, R_VBR );
  2734     store_reg( R_EAX, Rn );
  2735     sh4_x86.tstate = TSTATE_NONE;
  2736 :}
  2737 STC SSR, Rn {:  
  2738     check_priv();
  2739     load_spreg( R_EAX, R_SSR );
  2740     store_reg( R_EAX, Rn );
  2741     sh4_x86.tstate = TSTATE_NONE;
  2742 :}
  2743 STC SPC, Rn {:  
  2744     check_priv();
  2745     load_spreg( R_EAX, R_SPC );
  2746     store_reg( R_EAX, Rn );
  2747     sh4_x86.tstate = TSTATE_NONE;
  2748 :}
  2749 STC SGR, Rn {:  
  2750     check_priv();
  2751     load_spreg( R_EAX, R_SGR );
  2752     store_reg( R_EAX, Rn );
  2753     sh4_x86.tstate = TSTATE_NONE;
  2754 :}
  2755 STC DBR, Rn {:  
  2756     check_priv();
  2757     load_spreg( R_EAX, R_DBR );
  2758     store_reg( R_EAX, Rn );
  2759     sh4_x86.tstate = TSTATE_NONE;
  2760 :}
  2761 STC Rm_BANK, Rn {:
  2762     check_priv();
  2763     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2764     store_reg( R_EAX, Rn );
  2765     sh4_x86.tstate = TSTATE_NONE;
  2766 :}
  2767 STC.L SR, @-Rn {:
  2768     precheck();
  2769     check_priv_no_precheck();
  2770     call_func0( sh4_read_sr );
  2771     load_reg( R_ECX, Rn );
  2772     check_walign32( R_ECX );
  2773     ADD_imm8s_r32( -4, R_ECX );
  2774     store_reg( R_ECX, Rn );
  2775     MEM_WRITE_LONG( R_ECX, R_EAX );
  2776     sh4_x86.tstate = TSTATE_NONE;
  2777 :}
  2778 STC.L VBR, @-Rn {:  
  2779     precheck();
  2780     check_priv_no_precheck();
  2781     load_reg( R_ECX, Rn );
  2782     check_walign32( R_ECX );
  2783     ADD_imm8s_r32( -4, R_ECX );
  2784     store_reg( R_ECX, Rn );
  2785     load_spreg( R_EAX, R_VBR );
  2786     MEM_WRITE_LONG( R_ECX, R_EAX );
  2787     sh4_x86.tstate = TSTATE_NONE;
  2788 :}
  2789 STC.L SSR, @-Rn {:  
  2790     precheck();
  2791     check_priv_no_precheck();
  2792     load_reg( R_ECX, Rn );
  2793     check_walign32( R_ECX );
  2794     ADD_imm8s_r32( -4, R_ECX );
  2795     store_reg( R_ECX, Rn );
  2796     load_spreg( R_EAX, R_SSR );
  2797     MEM_WRITE_LONG( R_ECX, R_EAX );
  2798     sh4_x86.tstate = TSTATE_NONE;
  2799 :}
  2800 STC.L SPC, @-Rn {:
  2801     precheck();
  2802     check_priv_no_precheck();
  2803     load_reg( R_ECX, Rn );
  2804     check_walign32( R_ECX );
  2805     ADD_imm8s_r32( -4, R_ECX );
  2806     store_reg( R_ECX, Rn );
  2807     load_spreg( R_EAX, R_SPC );
  2808     MEM_WRITE_LONG( R_ECX, R_EAX );
  2809     sh4_x86.tstate = TSTATE_NONE;
  2810 :}
  2811 STC.L SGR, @-Rn {:  
  2812     precheck();
  2813     check_priv_no_precheck();
  2814     load_reg( R_ECX, Rn );
  2815     check_walign32( R_ECX );
  2816     ADD_imm8s_r32( -4, R_ECX );
  2817     store_reg( R_ECX, Rn );
  2818     load_spreg( R_EAX, R_SGR );
  2819     MEM_WRITE_LONG( R_ECX, R_EAX );
  2820     sh4_x86.tstate = TSTATE_NONE;
  2821 :}
  2822 STC.L DBR, @-Rn {:  
  2823     precheck();
  2824     check_priv_no_precheck();
  2825     load_reg( R_ECX, Rn );
  2826     check_walign32( R_ECX );
  2827     ADD_imm8s_r32( -4, R_ECX );
  2828     store_reg( R_ECX, Rn );
  2829     load_spreg( R_EAX, R_DBR );
  2830     MEM_WRITE_LONG( R_ECX, R_EAX );
  2831     sh4_x86.tstate = TSTATE_NONE;
  2832 :}
  2833 STC.L Rm_BANK, @-Rn {:  
  2834     precheck();
  2835     check_priv_no_precheck();
  2836     load_reg( R_ECX, Rn );
  2837     check_walign32( R_ECX );
  2838     ADD_imm8s_r32( -4, R_ECX );
  2839     store_reg( R_ECX, Rn );
  2840     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2841     MEM_WRITE_LONG( R_ECX, R_EAX );
  2842     sh4_x86.tstate = TSTATE_NONE;
  2843 :}
  2844 STC.L GBR, @-Rn {:  
  2845     load_reg( R_ECX, Rn );
  2846     precheck();
  2847     check_walign32( R_ECX );
  2848     ADD_imm8s_r32( -4, R_ECX );
  2849     store_reg( R_ECX, Rn );
  2850     load_spreg( R_EAX, R_GBR );
  2851     MEM_WRITE_LONG( R_ECX, R_EAX );
  2852     sh4_x86.tstate = TSTATE_NONE;
  2853 :}
  2854 STS FPSCR, Rn {:  
  2855     load_spreg( R_EAX, R_FPSCR );
  2856     store_reg( R_EAX, Rn );
  2857 :}
  2858 STS.L FPSCR, @-Rn {:  
  2859     load_reg( R_ECX, Rn );
  2860     precheck();
  2861     check_walign32( R_ECX );
  2862     ADD_imm8s_r32( -4, R_ECX );
  2863     store_reg( R_ECX, Rn );
  2864     load_spreg( R_EAX, R_FPSCR );
  2865     MEM_WRITE_LONG( R_ECX, R_EAX );
  2866     sh4_x86.tstate = TSTATE_NONE;
  2867 :}
  2868 STS FPUL, Rn {:  
  2869     load_spreg( R_EAX, R_FPUL );
  2870     store_reg( R_EAX, Rn );
  2871 :}
  2872 STS.L FPUL, @-Rn {:  
  2873     load_reg( R_ECX, Rn );
  2874     precheck();
  2875     check_walign32( R_ECX );
  2876     ADD_imm8s_r32( -4, R_ECX );
  2877     store_reg( R_ECX, Rn );
  2878     load_spreg( R_EAX, R_FPUL );
  2879     MEM_WRITE_LONG( R_ECX, R_EAX );
  2880     sh4_x86.tstate = TSTATE_NONE;
  2881 :}
  2882 STS MACH, Rn {:  
  2883     load_spreg( R_EAX, R_MACH );
  2884     store_reg( R_EAX, Rn );
  2885 :}
  2886 STS.L MACH, @-Rn {:  
  2887     load_reg( R_ECX, Rn );
  2888     precheck();
  2889     check_walign32( R_ECX );
  2890     ADD_imm8s_r32( -4, R_ECX );
  2891     store_reg( R_ECX, Rn );
  2892     load_spreg( R_EAX, R_MACH );
  2893     MEM_WRITE_LONG( R_ECX, R_EAX );
  2894     sh4_x86.tstate = TSTATE_NONE;
  2895 :}
  2896 STS MACL, Rn {:  
  2897     load_spreg( R_EAX, R_MACL );
  2898     store_reg( R_EAX, Rn );
  2899 :}
  2900 STS.L MACL, @-Rn {:  
  2901     load_reg( R_ECX, Rn );
  2902     precheck();
  2903     check_walign32( R_ECX );
  2904     ADD_imm8s_r32( -4, R_ECX );
  2905     store_reg( R_ECX, Rn );
  2906     load_spreg( R_EAX, R_MACL );
  2907     MEM_WRITE_LONG( R_ECX, R_EAX );
  2908     sh4_x86.tstate = TSTATE_NONE;
  2909 :}
  2910 STS PR, Rn {:  
  2911     load_spreg( R_EAX, R_PR );
  2912     store_reg( R_EAX, Rn );
  2913 :}
  2914 STS.L PR, @-Rn {:  
  2915     load_reg( R_ECX, Rn );
  2916     precheck();
  2917     check_walign32( R_ECX );
  2918     ADD_imm8s_r32( -4, R_ECX );
  2919     store_reg( R_ECX, Rn );
  2920     load_spreg( R_EAX, R_PR );
  2921     MEM_WRITE_LONG( R_ECX, R_EAX );
  2922     sh4_x86.tstate = TSTATE_NONE;
  2923 :}
  2925 NOP {: /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ :}
  2926 %%
  2927     sh4_x86.in_delay_slot = FALSE;
  2928     return 0;
.