Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 532:43653e748030
prev527:14c9489f647e
next533:9764673fd4a5
author nkeynes
date Mon Nov 19 08:51:03 2007 +0000 (12 years ago)
permissions -rw-r--r--
last change Fix PREF for x86-64, add load_ptr macro
view annotate diff log raw
     1 /**
     2  * $Id: sh4x86.in,v 1.20 2007-11-08 11:54:16 nkeynes Exp $
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "sh4/xltcache.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4trans.h"
    31 #include "sh4/sh4mmio.h"
    32 #include "sh4/x86op.h"
    33 #include "clock.h"
    35 #define DEFAULT_BACKPATCH_SIZE 4096
    37 /** 
    38  * Struct to manage internal translation state. This state is not saved -
    39  * it is only valid between calls to sh4_translate_begin_block() and
    40  * sh4_translate_end_block()
    41  */
    42 struct sh4_x86_state {
    43     gboolean in_delay_slot;
    44     gboolean priv_checked; /* true if we've already checked the cpu mode. */
    45     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    46     gboolean branch_taken; /* true if we branched unconditionally */
    47     uint32_t block_start_pc;
    48     int tstate;
    50     /* Allocated memory for the (block-wide) back-patch list */
    51     uint32_t **backpatch_list;
    52     uint32_t backpatch_posn;
    53     uint32_t backpatch_size;
    54 };
    56 #define TSTATE_NONE -1
    57 #define TSTATE_O    0
    58 #define TSTATE_C    2
    59 #define TSTATE_E    4
    60 #define TSTATE_NE   5
    61 #define TSTATE_G    0xF
    62 #define TSTATE_GE   0xD
    63 #define TSTATE_A    7
    64 #define TSTATE_AE   3
    66 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
    67 #define JT_rel8(rel8,label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    68 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    69     OP(0x70+sh4_x86.tstate); OP(rel8); \
    70     MARK_JMP(rel8,label)
    71 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
    72 #define JF_rel8(rel8,label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    73 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    74     OP(0x70+ (sh4_x86.tstate^1)); OP(rel8); \
    75     MARK_JMP(rel8, label)
    78 #define EXIT_DATA_ADDR_READ 0
    79 #define EXIT_DATA_ADDR_WRITE 7
    80 #define EXIT_ILLEGAL 14
    81 #define EXIT_SLOT_ILLEGAL 21
    82 #define EXIT_FPU_DISABLED 28
    83 #define EXIT_SLOT_FPU_DISABLED 35
    85 static struct sh4_x86_state sh4_x86;
    87 static uint32_t max_int = 0x7FFFFFFF;
    88 static uint32_t min_int = 0x80000000;
    89 static uint32_t save_fcw; /* save value for fpu control word */
    90 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
    92 void sh4_x86_init()
    93 {
    94     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
    95     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(uint32_t *);
    96 }
    99 static void sh4_x86_add_backpatch( uint8_t *ptr )
   100 {
   101     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   102 	sh4_x86.backpatch_size <<= 1;
   103 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, sh4_x86.backpatch_size * sizeof(uint32_t *) );
   104 	assert( sh4_x86.backpatch_list != NULL );
   105     }
   106     sh4_x86.backpatch_list[sh4_x86.backpatch_posn++] = (uint32_t *)ptr;
   107 }
   109 static void sh4_x86_do_backpatch( uint8_t *reloc_base )
   110 {
   111     unsigned int i;
   112     for( i=0; i<sh4_x86.backpatch_posn; i++ ) {
   113 	*sh4_x86.backpatch_list[i] += (reloc_base - ((uint8_t *)sh4_x86.backpatch_list[i]) - 4);
   114     }
   115 }
   117 /**
   118  * Emit an instruction to load an SH4 reg into a real register
   119  */
   120 static inline void load_reg( int x86reg, int sh4reg ) 
   121 {
   122     /* mov [bp+n], reg */
   123     OP(0x8B);
   124     OP(0x45 + (x86reg<<3));
   125     OP(REG_OFFSET(r[sh4reg]));
   126 }
   128 static inline void load_reg16s( int x86reg, int sh4reg )
   129 {
   130     OP(0x0F);
   131     OP(0xBF);
   132     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   133 }
   135 static inline void load_reg16u( int x86reg, int sh4reg )
   136 {
   137     OP(0x0F);
   138     OP(0xB7);
   139     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   141 }
   143 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   144 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   145 /**
   146  * Emit an instruction to load an immediate value into a register
   147  */
   148 static inline void load_imm32( int x86reg, uint32_t value ) {
   149     /* mov #value, reg */
   150     OP(0xB8 + x86reg);
   151     OP32(value);
   152 }
   154 /**
   155  * Load an immediate 64-bit quantity (note: x86-64 only)
   156  */
   157 static inline void load_imm64( int x86reg, uint32_t value ) {
   158     /* mov #value, reg */
   159     REXW();
   160     OP(0xB8 + x86reg);
   161     OP64(value);
   162 }
   165 /**
   166  * Emit an instruction to store an SH4 reg (RN)
   167  */
   168 void static inline store_reg( int x86reg, int sh4reg ) {
   169     /* mov reg, [bp+n] */
   170     OP(0x89);
   171     OP(0x45 + (x86reg<<3));
   172     OP(REG_OFFSET(r[sh4reg]));
   173 }
   175 #define load_fr_bank(bankreg) load_spreg( bankreg, REG_OFFSET(fr_bank))
   177 /**
   178  * Load an FR register (single-precision floating point) into an integer x86
   179  * register (eg for register-to-register moves)
   180  */
   181 void static inline load_fr( int bankreg, int x86reg, int frm )
   182 {
   183     OP(0x8B); OP(0x40+bankreg+(x86reg<<3)); OP((frm^1)<<2);
   184 }
   186 /**
   187  * Store an FR register (single-precision floating point) into an integer x86
   188  * register (eg for register-to-register moves)
   189  */
   190 void static inline store_fr( int bankreg, int x86reg, int frn )
   191 {
   192     OP(0x89);  OP(0x40+bankreg+(x86reg<<3)); OP((frn^1)<<2);
   193 }
   196 /**
   197  * Load a pointer to the back fp back into the specified x86 register. The
   198  * bankreg must have been previously loaded with FPSCR.
   199  * NB: 12 bytes
   200  */
   201 static inline void load_xf_bank( int bankreg )
   202 {
   203     NOT_r32( bankreg );
   204     SHR_imm8_r32( (21 - 6), bankreg ); // Extract bit 21 then *64 for bank size
   205     AND_imm8s_r32( 0x40, bankreg );    // Complete extraction
   206     OP(0x8D); OP(0x44+(bankreg<<3)); OP(0x28+bankreg); OP(REG_OFFSET(fr)); // LEA [ebp+bankreg+disp], bankreg
   207 }
   209 /**
   210  * Update the fr_bank pointer based on the current fpscr value.
   211  */
   212 static inline void update_fr_bank( int fpscrreg )
   213 {
   214     SHR_imm8_r32( (21 - 6), fpscrreg ); // Extract bit 21 then *64 for bank size
   215     AND_imm8s_r32( 0x40, fpscrreg );    // Complete extraction
   216     OP(0x8D); OP(0x44+(fpscrreg<<3)); OP(0x28+fpscrreg); OP(REG_OFFSET(fr)); // LEA [ebp+fpscrreg+disp], fpscrreg
   217     store_spreg( fpscrreg, REG_OFFSET(fr_bank) );
   218 }
   219 /**
   220  * Push FPUL (as a 32-bit float) onto the FPU stack
   221  */
   222 static inline void push_fpul( )
   223 {
   224     OP(0xD9); OP(0x45); OP(R_FPUL);
   225 }
   227 /**
   228  * Pop FPUL (as a 32-bit float) from the FPU stack
   229  */
   230 static inline void pop_fpul( )
   231 {
   232     OP(0xD9); OP(0x5D); OP(R_FPUL);
   233 }
   235 /**
   236  * Push a 32-bit float onto the FPU stack, with bankreg previously loaded
   237  * with the location of the current fp bank.
   238  */
   239 static inline void push_fr( int bankreg, int frm ) 
   240 {
   241     OP(0xD9); OP(0x40 + bankreg); OP((frm^1)<<2);  // FLD.S [bankreg + frm^1*4]
   242 }
   244 /**
   245  * Pop a 32-bit float from the FPU stack and store it back into the fp bank, 
   246  * with bankreg previously loaded with the location of the current fp bank.
   247  */
   248 static inline void pop_fr( int bankreg, int frm )
   249 {
   250     OP(0xD9); OP(0x58 + bankreg); OP((frm^1)<<2); // FST.S [bankreg + frm^1*4]
   251 }
   253 /**
   254  * Push a 64-bit double onto the FPU stack, with bankreg previously loaded
   255  * with the location of the current fp bank.
   256  */
   257 static inline void push_dr( int bankreg, int frm )
   258 {
   259     OP(0xDD); OP(0x40 + bankreg); OP(frm<<2); // FLD.D [bankreg + frm*4]
   260 }
   262 static inline void pop_dr( int bankreg, int frm )
   263 {
   264     OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]
   265 }
   267 #if SH4_TRANSLATOR == TARGET_X86_64
   268 /* X86-64 has different calling conventions... */
   270 #define load_ptr( reg, ptr ) load_imm64( reg, (uint64_t)ptr );
   272 /**
   273  * Note: clobbers EAX to make the indirect call - this isn't usually
   274  * a problem since the callee will usually clobber it anyway.
   275  * Size: 12 bytes
   276  */
   277 #define CALL_FUNC0_SIZE 12
   278 static inline void call_func0( void *ptr )
   279 {
   280     load_imm64(R_EAX, (uint64_t)ptr);
   281     CALL_r32(R_EAX);
   282 }
   284 #define CALL_FUNC1_SIZE 14
   285 static inline void call_func1( void *ptr, int arg1 )
   286 {
   287     MOV_r32_r32(arg1, R_EDI);
   288     call_func0(ptr);
   289 }
   291 #define CALL_FUNC2_SIZE 16
   292 static inline void call_func2( void *ptr, int arg1, int arg2 )
   293 {
   294     MOV_r32_r32(arg1, R_EDI);
   295     MOV_r32_r32(arg2, R_ESI);
   296     call_func0(ptr);
   297 }
   299 #define MEM_WRITE_DOUBLE_SIZE 39
   300 /**
   301  * Write a double (64-bit) value into memory, with the first word in arg2a, and
   302  * the second in arg2b
   303  */
   304 static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )
   305 {
   306 /*
   307     MOV_r32_r32( addr, R_EDI );
   308     MOV_r32_r32( arg2b, R_ESI );
   309     REXW(); SHL_imm8_r32( 32, R_ESI );
   310     REXW(); MOVZX_r16_r32( arg2a, arg2a );
   311     REXW(); OR_r32_r32( arg2a, R_ESI );
   312     call_func0(sh4_write_quad);
   313 */
   314     PUSH_r32(arg2b);
   315     PUSH_r32(addr);
   316     call_func2(sh4_write_long, addr, arg2a);
   317     POP_r32(addr);
   318     POP_r32(arg2b);
   319     ADD_imm8s_r32(4, addr);
   320     call_func2(sh4_write_long, addr, arg2b);
   321 }
   323 #define MEM_READ_DOUBLE_SIZE 35
   324 /**
   325  * Read a double (64-bit) value from memory, writing the first word into arg2a
   326  * and the second into arg2b. The addr must not be in EAX
   327  */
   328 static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )
   329 {
   330 /*
   331     MOV_r32_r32( addr, R_EDI );
   332     call_func0(sh4_read_quad);
   333     REXW(); MOV_r32_r32( R_EAX, arg2a );
   334     REXW(); MOV_r32_r32( R_EAX, arg2b );
   335     REXW(); SHR_imm8_r32( 32, arg2b );
   336 */
   337     PUSH_r32(addr);
   338     call_func1(sh4_read_long, addr);
   339     POP_r32(R_EDI);
   340     PUSH_r32(R_EAX);
   341     ADD_imm8s_r32(4, R_EDI);
   342     call_func0(sh4_read_long);
   343     MOV_r32_r32(R_EAX, arg2b);
   344     POP_r32(arg2a);
   345 }
   347 #define EXIT_BLOCK_SIZE 35
   348 /**
   349  * Exit the block to an absolute PC
   350  */
   351 void exit_block( sh4addr_t pc, sh4addr_t endpc )
   352 {
   353     load_imm32( R_ECX, pc );                            // 5
   354     store_spreg( R_ECX, REG_OFFSET(pc) );               // 3
   355     REXW(); MOV_moff32_EAX( xlat_get_lut_entry(pc) );
   356     REXW(); AND_imm8s_r32( 0xFC, R_EAX ); // 3
   357     load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
   358     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
   359     POP_r32(R_EBP);
   360     RET();
   361 }
   364 /**
   365  * Write the block trailer (exception handling block)
   366  */
   367 void sh4_translate_end_block( sh4addr_t pc ) {
   368     if( sh4_x86.branch_taken == FALSE ) {
   369 	// Didn't exit unconditionally already, so write the termination here
   370 	exit_block( pc, pc );
   371     }
   372     if( sh4_x86.backpatch_posn != 0 ) {
   373 	uint8_t *end_ptr = xlat_output;
   374 	// Exception termination. Jump block for various exception codes:
   375 	load_imm32( R_EDI, EXC_DATA_ADDR_READ );
   376 	JMP_rel8( 33, target1 );
   377 	load_imm32( R_EDI, EXC_DATA_ADDR_WRITE );
   378 	JMP_rel8( 26, target2 );
   379 	load_imm32( R_EDI, EXC_ILLEGAL );
   380 	JMP_rel8( 19, target3 );
   381 	load_imm32( R_EDI, EXC_SLOT_ILLEGAL ); 
   382 	JMP_rel8( 12, target4 );
   383 	load_imm32( R_EDI, EXC_FPU_DISABLED ); 
   384 	JMP_rel8( 5, target5 );
   385 	load_imm32( R_EDI, EXC_SLOT_FPU_DISABLED );
   386 	// target
   387 	JMP_TARGET(target1);
   388 	JMP_TARGET(target2);
   389 	JMP_TARGET(target3);
   390 	JMP_TARGET(target4);
   391 	JMP_TARGET(target5);
   392 	// Raise exception
   393 	load_spreg( R_ECX, REG_OFFSET(pc) );
   394 	ADD_r32_r32( R_EDX, R_ECX );
   395 	ADD_r32_r32( R_EDX, R_ECX );
   396 	store_spreg( R_ECX, REG_OFFSET(pc) );
   397 	MOV_moff32_EAX( &sh4_cpu_period );
   398 	MUL_r32( R_EDX );
   399 	ADD_r32_sh4r( R_EAX, REG_OFFSET(slice_cycle) );
   401 	call_func0( sh4_raise_exception );
   402 	load_spreg( R_EAX, REG_OFFSET(pc) );
   403 	call_func1(xlat_get_code,R_EAX);
   404 	POP_r32(R_EBP);
   405 	RET();
   407 	sh4_x86_do_backpatch( end_ptr );
   408     }
   409 }
   411 #else /* SH4_TRANSLATOR == TARGET_X86 */
   413 #define load_ptr( reg, ptr ) load_imm32( reg, (uint32_t)ptr );
   415 /**
   416  * Note: clobbers EAX to make the indirect call - this isn't usually
   417  * a problem since the callee will usually clobber it anyway.
   418  */
   419 #define CALL_FUNC0_SIZE 7
   420 static inline void call_func0( void *ptr )
   421 {
   422     load_imm32(R_EAX, (uint32_t)ptr);
   423     CALL_r32(R_EAX);
   424 }
   426 #define CALL_FUNC1_SIZE 11
   427 static inline void call_func1( void *ptr, int arg1 )
   428 {
   429     PUSH_r32(arg1);
   430     call_func0(ptr);
   431     ADD_imm8s_r32( 4, R_ESP );
   432 }
   434 #define CALL_FUNC2_SIZE 12
   435 static inline void call_func2( void *ptr, int arg1, int arg2 )
   436 {
   437     PUSH_r32(arg2);
   438     PUSH_r32(arg1);
   439     call_func0(ptr);
   440     ADD_imm8s_r32( 8, R_ESP );
   441 }
   443 /**
   444  * Write a double (64-bit) value into memory, with the first word in arg2a, and
   445  * the second in arg2b
   446  * NB: 30 bytes
   447  */
   448 #define MEM_WRITE_DOUBLE_SIZE 30
   449 static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )
   450 {
   451     ADD_imm8s_r32( 4, addr );
   452     PUSH_r32(arg2b);
   453     PUSH_r32(addr);
   454     ADD_imm8s_r32( -4, addr );
   455     PUSH_r32(arg2a);
   456     PUSH_r32(addr);
   457     call_func0(sh4_write_long);
   458     ADD_imm8s_r32( 8, R_ESP );
   459     call_func0(sh4_write_long);
   460     ADD_imm8s_r32( 8, R_ESP );
   461 }
   463 /**
   464  * Read a double (64-bit) value from memory, writing the first word into arg2a
   465  * and the second into arg2b. The addr must not be in EAX
   466  * NB: 27 bytes
   467  */
   468 #define MEM_READ_DOUBLE_SIZE 27
   469 static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )
   470 {
   471     PUSH_r32(addr);
   472     call_func0(sh4_read_long);
   473     POP_r32(addr);
   474     PUSH_r32(R_EAX);
   475     ADD_imm8s_r32( 4, addr );
   476     PUSH_r32(addr);
   477     call_func0(sh4_read_long);
   478     ADD_imm8s_r32( 4, R_ESP );
   479     MOV_r32_r32( R_EAX, arg2b );
   480     POP_r32(arg2a);
   481 }
   483 #define EXIT_BLOCK_SIZE 29
   484 /**
   485  * Exit the block to an absolute PC
   486  */
   487 void exit_block( sh4addr_t pc, sh4addr_t endpc )
   488 {
   489     load_imm32( R_ECX, pc );                            // 5
   490     store_spreg( R_ECX, REG_OFFSET(pc) );               // 3
   491     MOV_moff32_EAX( xlat_get_lut_entry(pc) ); // 5
   492     AND_imm8s_r32( 0xFC, R_EAX ); // 3
   493     load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
   494     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
   495     POP_r32(R_EBP);
   496     RET();
   497 }
   499 /**
   500  * Write the block trailer (exception handling block)
   501  */
   502 void sh4_translate_end_block( sh4addr_t pc ) {
   503     if( sh4_x86.branch_taken == FALSE ) {
   504 	// Didn't exit unconditionally already, so write the termination here
   505 	exit_block( pc, pc );
   506     }
   507     if( sh4_x86.backpatch_posn != 0 ) {
   508 	uint8_t *end_ptr = xlat_output;
   509 	// Exception termination. Jump block for various exception codes:
   510 	PUSH_imm32( EXC_DATA_ADDR_READ );
   511 	JMP_rel8( 33, target1 );
   512 	PUSH_imm32( EXC_DATA_ADDR_WRITE );
   513 	JMP_rel8( 26, target2 );
   514 	PUSH_imm32( EXC_ILLEGAL );
   515 	JMP_rel8( 19, target3 );
   516 	PUSH_imm32( EXC_SLOT_ILLEGAL ); 
   517 	JMP_rel8( 12, target4 );
   518 	PUSH_imm32( EXC_FPU_DISABLED ); 
   519 	JMP_rel8( 5, target5 );
   520 	PUSH_imm32( EXC_SLOT_FPU_DISABLED );
   521 	// target
   522 	JMP_TARGET(target1);
   523 	JMP_TARGET(target2);
   524 	JMP_TARGET(target3);
   525 	JMP_TARGET(target4);
   526 	JMP_TARGET(target5);
   527 	// Raise exception
   528 	load_spreg( R_ECX, REG_OFFSET(pc) );
   529 	ADD_r32_r32( R_EDX, R_ECX );
   530 	ADD_r32_r32( R_EDX, R_ECX );
   531 	store_spreg( R_ECX, REG_OFFSET(pc) );
   532 	MOV_moff32_EAX( &sh4_cpu_period );
   533 	MUL_r32( R_EDX );
   534 	ADD_r32_sh4r( R_EAX, REG_OFFSET(slice_cycle) );
   536 	call_func0( sh4_raise_exception );
   537 	ADD_imm8s_r32( 4, R_ESP );
   538 	load_spreg( R_EAX, REG_OFFSET(pc) );
   539 	call_func1(xlat_get_code,R_EAX);
   540 	POP_r32(R_EBP);
   541 	RET();
   543 	sh4_x86_do_backpatch( end_ptr );
   544     }
   545 }
   546 #endif
   548 /* Exception checks - Note that all exception checks will clobber EAX */
   549 #define precheck() load_imm32(R_EDX, (pc-sh4_x86.block_start_pc-(sh4_x86.in_delay_slot?2:0))>>1)
   551 #define check_priv( ) \
   552     if( !sh4_x86.priv_checked ) { \
   553 	sh4_x86.priv_checked = TRUE;\
   554 	precheck();\
   555 	load_spreg( R_EAX, R_SR );\
   556 	AND_imm32_r32( SR_MD, R_EAX );\
   557 	if( sh4_x86.in_delay_slot ) {\
   558 	    JE_exit( EXIT_SLOT_ILLEGAL );\
   559 	} else {\
   560 	    JE_exit( EXIT_ILLEGAL );\
   561 	}\
   562     }\
   565 static void check_priv_no_precheck()
   566 {
   567     if( !sh4_x86.priv_checked ) {
   568 	sh4_x86.priv_checked = TRUE;
   569 	load_spreg( R_EAX, R_SR );
   570 	AND_imm32_r32( SR_MD, R_EAX );
   571 	if( sh4_x86.in_delay_slot ) {
   572 	    JE_exit( EXIT_SLOT_ILLEGAL );
   573 	} else {
   574 	    JE_exit( EXIT_ILLEGAL );
   575 	}
   576     }
   577 }
   579 #define check_fpuen( ) \
   580     if( !sh4_x86.fpuen_checked ) {\
   581 	sh4_x86.fpuen_checked = TRUE;\
   582 	precheck();\
   583 	load_spreg( R_EAX, R_SR );\
   584 	AND_imm32_r32( SR_FD, R_EAX );\
   585 	if( sh4_x86.in_delay_slot ) {\
   586 	    JNE_exit(EXIT_SLOT_FPU_DISABLED);\
   587 	} else {\
   588 	    JNE_exit(EXIT_FPU_DISABLED);\
   589 	}\
   590     }
   592 static void check_fpuen_no_precheck()
   593 {
   594     if( !sh4_x86.fpuen_checked ) {
   595 	sh4_x86.fpuen_checked = TRUE;
   596 	load_spreg( R_EAX, R_SR );
   597 	AND_imm32_r32( SR_FD, R_EAX );
   598 	if( sh4_x86.in_delay_slot ) {
   599 	    JNE_exit(EXIT_SLOT_FPU_DISABLED);
   600 	} else {
   601 	    JNE_exit(EXIT_FPU_DISABLED);
   602 	}
   603     }
   605 }
   607 static void check_ralign16( int x86reg )
   608 {
   609     TEST_imm32_r32( 0x00000001, x86reg );
   610     JNE_exit(EXIT_DATA_ADDR_READ);
   611 }
   613 static void check_walign16( int x86reg )
   614 {
   615     TEST_imm32_r32( 0x00000001, x86reg );
   616     JNE_exit(EXIT_DATA_ADDR_WRITE);
   617 }
   619 static void check_ralign32( int x86reg )
   620 {
   621     TEST_imm32_r32( 0x00000003, x86reg );
   622     JNE_exit(EXIT_DATA_ADDR_READ);
   623 }
   624 static void check_walign32( int x86reg )
   625 {
   626     TEST_imm32_r32( 0x00000003, x86reg );
   627     JNE_exit(EXIT_DATA_ADDR_WRITE);
   628 }
   630 #define UNDEF()
   631 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   632 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
   633 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
   634 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
   635 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
   636 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
   637 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
   639 #define SLOTILLEGAL() precheck(); JMP_exit(EXIT_SLOT_ILLEGAL); sh4_x86.in_delay_slot = FALSE; return 1;
   643 /**
   644  * Emit the 'start of block' assembly. Sets up the stack frame and save
   645  * SI/DI as required
   646  */
   647 void sh4_translate_begin_block( sh4addr_t pc ) 
   648 {
   649     PUSH_r32(R_EBP);
   650     /* mov &sh4r, ebp */
   651     load_ptr( R_EBP, &sh4r );
   653     sh4_x86.in_delay_slot = FALSE;
   654     sh4_x86.priv_checked = FALSE;
   655     sh4_x86.fpuen_checked = FALSE;
   656     sh4_x86.branch_taken = FALSE;
   657     sh4_x86.backpatch_posn = 0;
   658     sh4_x86.block_start_pc = pc;
   659     sh4_x86.tstate = TSTATE_NONE;
   660 }
   662 /**
   663  * Exit the block with sh4r.pc already written
   664  * Bytes: 15
   665  */
   666 void exit_block_pcset( pc )
   667 {
   668     load_imm32( R_ECX, ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
   669     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );    // 6
   670     load_spreg( R_EAX, REG_OFFSET(pc) );
   671     call_func1(xlat_get_code,R_EAX);
   672     POP_r32(R_EBP);
   673     RET();
   674 }
   676 extern uint16_t *sh4_icache;
   677 extern uint32_t sh4_icache_addr;
   679 /**
   680  * Translate a single instruction. Delayed branches are handled specially
   681  * by translating both branch and delayed instruction as a single unit (as
   682  * 
   683  *
   684  * @return true if the instruction marks the end of a basic block
   685  * (eg a branch or 
   686  */
   687 uint32_t sh4_translate_instruction( sh4addr_t pc )
   688 {
   689     uint32_t ir;
   690     /* Read instruction */
   691     uint32_t pageaddr = pc >> 12;
   692     if( sh4_icache != NULL && pageaddr == sh4_icache_addr ) {
   693 	ir = sh4_icache[(pc&0xFFF)>>1];
   694     } else {
   695 	sh4_icache = (uint16_t *)mem_get_page(pc);
   696 	if( ((uintptr_t)sh4_icache) < MAX_IO_REGIONS ) {
   697 	    /* If someone's actually been so daft as to try to execute out of an IO
   698 	     * region, fallback on the full-blown memory read
   699 	     */
   700 	    sh4_icache = NULL;
   701 	    ir = sh4_read_word(pc);
   702 	} else {
   703 	    sh4_icache_addr = pageaddr;
   704 	    ir = sh4_icache[(pc&0xFFF)>>1];
   705 	}
   706     }
   708 %%
   709 /* ALU operations */
   710 ADD Rm, Rn {:
   711     load_reg( R_EAX, Rm );
   712     load_reg( R_ECX, Rn );
   713     ADD_r32_r32( R_EAX, R_ECX );
   714     store_reg( R_ECX, Rn );
   715     sh4_x86.tstate = TSTATE_NONE;
   716 :}
   717 ADD #imm, Rn {:  
   718     load_reg( R_EAX, Rn );
   719     ADD_imm8s_r32( imm, R_EAX );
   720     store_reg( R_EAX, Rn );
   721     sh4_x86.tstate = TSTATE_NONE;
   722 :}
   723 ADDC Rm, Rn {:
   724     if( sh4_x86.tstate != TSTATE_C ) {
   725 	LDC_t();
   726     }
   727     load_reg( R_EAX, Rm );
   728     load_reg( R_ECX, Rn );
   729     ADC_r32_r32( R_EAX, R_ECX );
   730     store_reg( R_ECX, Rn );
   731     SETC_t();
   732     sh4_x86.tstate = TSTATE_C;
   733 :}
   734 ADDV Rm, Rn {:
   735     load_reg( R_EAX, Rm );
   736     load_reg( R_ECX, Rn );
   737     ADD_r32_r32( R_EAX, R_ECX );
   738     store_reg( R_ECX, Rn );
   739     SETO_t();
   740     sh4_x86.tstate = TSTATE_O;
   741 :}
   742 AND Rm, Rn {:
   743     load_reg( R_EAX, Rm );
   744     load_reg( R_ECX, Rn );
   745     AND_r32_r32( R_EAX, R_ECX );
   746     store_reg( R_ECX, Rn );
   747     sh4_x86.tstate = TSTATE_NONE;
   748 :}
   749 AND #imm, R0 {:  
   750     load_reg( R_EAX, 0 );
   751     AND_imm32_r32(imm, R_EAX); 
   752     store_reg( R_EAX, 0 );
   753     sh4_x86.tstate = TSTATE_NONE;
   754 :}
   755 AND.B #imm, @(R0, GBR) {: 
   756     load_reg( R_EAX, 0 );
   757     load_spreg( R_ECX, R_GBR );
   758     ADD_r32_r32( R_EAX, R_ECX );
   759     PUSH_r32(R_ECX);
   760     MEM_READ_BYTE( R_ECX, R_EAX );
   761     POP_r32(R_ECX);
   762     AND_imm32_r32(imm, R_EAX );
   763     MEM_WRITE_BYTE( R_ECX, R_EAX );
   764     sh4_x86.tstate = TSTATE_NONE;
   765 :}
   766 CMP/EQ Rm, Rn {:  
   767     load_reg( R_EAX, Rm );
   768     load_reg( R_ECX, Rn );
   769     CMP_r32_r32( R_EAX, R_ECX );
   770     SETE_t();
   771     sh4_x86.tstate = TSTATE_E;
   772 :}
   773 CMP/EQ #imm, R0 {:  
   774     load_reg( R_EAX, 0 );
   775     CMP_imm8s_r32(imm, R_EAX);
   776     SETE_t();
   777     sh4_x86.tstate = TSTATE_E;
   778 :}
   779 CMP/GE Rm, Rn {:  
   780     load_reg( R_EAX, Rm );
   781     load_reg( R_ECX, Rn );
   782     CMP_r32_r32( R_EAX, R_ECX );
   783     SETGE_t();
   784     sh4_x86.tstate = TSTATE_GE;
   785 :}
   786 CMP/GT Rm, Rn {: 
   787     load_reg( R_EAX, Rm );
   788     load_reg( R_ECX, Rn );
   789     CMP_r32_r32( R_EAX, R_ECX );
   790     SETG_t();
   791     sh4_x86.tstate = TSTATE_G;
   792 :}
   793 CMP/HI Rm, Rn {:  
   794     load_reg( R_EAX, Rm );
   795     load_reg( R_ECX, Rn );
   796     CMP_r32_r32( R_EAX, R_ECX );
   797     SETA_t();
   798     sh4_x86.tstate = TSTATE_A;
   799 :}
   800 CMP/HS Rm, Rn {: 
   801     load_reg( R_EAX, Rm );
   802     load_reg( R_ECX, Rn );
   803     CMP_r32_r32( R_EAX, R_ECX );
   804     SETAE_t();
   805     sh4_x86.tstate = TSTATE_AE;
   806  :}
   807 CMP/PL Rn {: 
   808     load_reg( R_EAX, Rn );
   809     CMP_imm8s_r32( 0, R_EAX );
   810     SETG_t();
   811     sh4_x86.tstate = TSTATE_G;
   812 :}
   813 CMP/PZ Rn {:  
   814     load_reg( R_EAX, Rn );
   815     CMP_imm8s_r32( 0, R_EAX );
   816     SETGE_t();
   817     sh4_x86.tstate = TSTATE_GE;
   818 :}
   819 CMP/STR Rm, Rn {:  
   820     load_reg( R_EAX, Rm );
   821     load_reg( R_ECX, Rn );
   822     XOR_r32_r32( R_ECX, R_EAX );
   823     TEST_r8_r8( R_AL, R_AL );
   824     JE_rel8(13, target1);
   825     TEST_r8_r8( R_AH, R_AH ); // 2
   826     JE_rel8(9, target2);
   827     SHR_imm8_r32( 16, R_EAX ); // 3
   828     TEST_r8_r8( R_AL, R_AL ); // 2
   829     JE_rel8(2, target3);
   830     TEST_r8_r8( R_AH, R_AH ); // 2
   831     JMP_TARGET(target1);
   832     JMP_TARGET(target2);
   833     JMP_TARGET(target3);
   834     SETE_t();
   835     sh4_x86.tstate = TSTATE_E;
   836 :}
   837 DIV0S Rm, Rn {:
   838     load_reg( R_EAX, Rm );
   839     load_reg( R_ECX, Rn );
   840     SHR_imm8_r32( 31, R_EAX );
   841     SHR_imm8_r32( 31, R_ECX );
   842     store_spreg( R_EAX, R_M );
   843     store_spreg( R_ECX, R_Q );
   844     CMP_r32_r32( R_EAX, R_ECX );
   845     SETNE_t();
   846     sh4_x86.tstate = TSTATE_NE;
   847 :}
   848 DIV0U {:  
   849     XOR_r32_r32( R_EAX, R_EAX );
   850     store_spreg( R_EAX, R_Q );
   851     store_spreg( R_EAX, R_M );
   852     store_spreg( R_EAX, R_T );
   853     sh4_x86.tstate = TSTATE_C; // works for DIV1
   854 :}
   855 DIV1 Rm, Rn {:
   856     load_spreg( R_ECX, R_M );
   857     load_reg( R_EAX, Rn );
   858     if( sh4_x86.tstate != TSTATE_C ) {
   859 	LDC_t();
   860     }
   861     RCL1_r32( R_EAX );
   862     SETC_r8( R_DL ); // Q'
   863     CMP_sh4r_r32( R_Q, R_ECX );
   864     JE_rel8(5, mqequal);
   865     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   866     JMP_rel8(3, end);
   867     JMP_TARGET(mqequal);
   868     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   869     JMP_TARGET(end);
   870     store_reg( R_EAX, Rn ); // Done with Rn now
   871     SETC_r8(R_AL); // tmp1
   872     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   873     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   874     store_spreg( R_ECX, R_Q );
   875     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   876     MOVZX_r8_r32( R_AL, R_EAX );
   877     store_spreg( R_EAX, R_T );
   878     sh4_x86.tstate = TSTATE_NONE;
   879 :}
   880 DMULS.L Rm, Rn {:  
   881     load_reg( R_EAX, Rm );
   882     load_reg( R_ECX, Rn );
   883     IMUL_r32(R_ECX);
   884     store_spreg( R_EDX, R_MACH );
   885     store_spreg( R_EAX, R_MACL );
   886     sh4_x86.tstate = TSTATE_NONE;
   887 :}
   888 DMULU.L Rm, Rn {:  
   889     load_reg( R_EAX, Rm );
   890     load_reg( R_ECX, Rn );
   891     MUL_r32(R_ECX);
   892     store_spreg( R_EDX, R_MACH );
   893     store_spreg( R_EAX, R_MACL );    
   894     sh4_x86.tstate = TSTATE_NONE;
   895 :}
   896 DT Rn {:  
   897     load_reg( R_EAX, Rn );
   898     ADD_imm8s_r32( -1, R_EAX );
   899     store_reg( R_EAX, Rn );
   900     SETE_t();
   901     sh4_x86.tstate = TSTATE_E;
   902 :}
   903 EXTS.B Rm, Rn {:  
   904     load_reg( R_EAX, Rm );
   905     MOVSX_r8_r32( R_EAX, R_EAX );
   906     store_reg( R_EAX, Rn );
   907 :}
   908 EXTS.W Rm, Rn {:  
   909     load_reg( R_EAX, Rm );
   910     MOVSX_r16_r32( R_EAX, R_EAX );
   911     store_reg( R_EAX, Rn );
   912 :}
   913 EXTU.B Rm, Rn {:  
   914     load_reg( R_EAX, Rm );
   915     MOVZX_r8_r32( R_EAX, R_EAX );
   916     store_reg( R_EAX, Rn );
   917 :}
   918 EXTU.W Rm, Rn {:  
   919     load_reg( R_EAX, Rm );
   920     MOVZX_r16_r32( R_EAX, R_EAX );
   921     store_reg( R_EAX, Rn );
   922 :}
   923 MAC.L @Rm+, @Rn+ {:  
   924     load_reg( R_ECX, Rm );
   925     precheck();
   926     check_ralign32( R_ECX );
   927     load_reg( R_ECX, Rn );
   928     check_ralign32( R_ECX );
   929     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   930     MEM_READ_LONG( R_ECX, R_EAX );
   931     PUSH_r32( R_EAX );
   932     load_reg( R_ECX, Rm );
   933     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   934     MEM_READ_LONG( R_ECX, R_EAX );
   935     POP_r32( R_ECX );
   936     IMUL_r32( R_ECX );
   937     ADD_r32_sh4r( R_EAX, R_MACL );
   938     ADC_r32_sh4r( R_EDX, R_MACH );
   940     load_spreg( R_ECX, R_S );
   941     TEST_r32_r32(R_ECX, R_ECX);
   942     JE_rel8( CALL_FUNC0_SIZE, nosat );
   943     call_func0( signsat48 );
   944     JMP_TARGET( nosat );
   945     sh4_x86.tstate = TSTATE_NONE;
   946 :}
   947 MAC.W @Rm+, @Rn+ {:  
   948     load_reg( R_ECX, Rm );
   949     precheck();
   950     check_ralign16( R_ECX );
   951     load_reg( R_ECX, Rn );
   952     check_ralign16( R_ECX );
   953     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   954     MEM_READ_WORD( R_ECX, R_EAX );
   955     PUSH_r32( R_EAX );
   956     load_reg( R_ECX, Rm );
   957     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   958     MEM_READ_WORD( R_ECX, R_EAX );
   959     POP_r32( R_ECX );
   960     IMUL_r32( R_ECX );
   962     load_spreg( R_ECX, R_S );
   963     TEST_r32_r32( R_ECX, R_ECX );
   964     JE_rel8( 47, nosat );
   966     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   967     JNO_rel8( 51, end );            // 2
   968     load_imm32( R_EDX, 1 );         // 5
   969     store_spreg( R_EDX, R_MACH );   // 6
   970     JS_rel8( 13, positive );        // 2
   971     load_imm32( R_EAX, 0x80000000 );// 5
   972     store_spreg( R_EAX, R_MACL );   // 6
   973     JMP_rel8( 25, end2 );           // 2
   975     JMP_TARGET(positive);
   976     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   977     store_spreg( R_EAX, R_MACL );   // 6
   978     JMP_rel8( 12, end3);            // 2
   980     JMP_TARGET(nosat);
   981     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   982     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   983     JMP_TARGET(end);
   984     JMP_TARGET(end2);
   985     JMP_TARGET(end3);
   986     sh4_x86.tstate = TSTATE_NONE;
   987 :}
   988 MOVT Rn {:  
   989     load_spreg( R_EAX, R_T );
   990     store_reg( R_EAX, Rn );
   991 :}
   992 MUL.L Rm, Rn {:  
   993     load_reg( R_EAX, Rm );
   994     load_reg( R_ECX, Rn );
   995     MUL_r32( R_ECX );
   996     store_spreg( R_EAX, R_MACL );
   997     sh4_x86.tstate = TSTATE_NONE;
   998 :}
   999 MULS.W Rm, Rn {:
  1000     load_reg16s( R_EAX, Rm );
  1001     load_reg16s( R_ECX, Rn );
  1002     MUL_r32( R_ECX );
  1003     store_spreg( R_EAX, R_MACL );
  1004     sh4_x86.tstate = TSTATE_NONE;
  1005 :}
  1006 MULU.W Rm, Rn {:  
  1007     load_reg16u( R_EAX, Rm );
  1008     load_reg16u( R_ECX, Rn );
  1009     MUL_r32( R_ECX );
  1010     store_spreg( R_EAX, R_MACL );
  1011     sh4_x86.tstate = TSTATE_NONE;
  1012 :}
  1013 NEG Rm, Rn {:
  1014     load_reg( R_EAX, Rm );
  1015     NEG_r32( R_EAX );
  1016     store_reg( R_EAX, Rn );
  1017     sh4_x86.tstate = TSTATE_NONE;
  1018 :}
  1019 NEGC Rm, Rn {:  
  1020     load_reg( R_EAX, Rm );
  1021     XOR_r32_r32( R_ECX, R_ECX );
  1022     LDC_t();
  1023     SBB_r32_r32( R_EAX, R_ECX );
  1024     store_reg( R_ECX, Rn );
  1025     SETC_t();
  1026     sh4_x86.tstate = TSTATE_C;
  1027 :}
  1028 NOT Rm, Rn {:  
  1029     load_reg( R_EAX, Rm );
  1030     NOT_r32( R_EAX );
  1031     store_reg( R_EAX, Rn );
  1032     sh4_x86.tstate = TSTATE_NONE;
  1033 :}
  1034 OR Rm, Rn {:  
  1035     load_reg( R_EAX, Rm );
  1036     load_reg( R_ECX, Rn );
  1037     OR_r32_r32( R_EAX, R_ECX );
  1038     store_reg( R_ECX, Rn );
  1039     sh4_x86.tstate = TSTATE_NONE;
  1040 :}
  1041 OR #imm, R0 {:
  1042     load_reg( R_EAX, 0 );
  1043     OR_imm32_r32(imm, R_EAX);
  1044     store_reg( R_EAX, 0 );
  1045     sh4_x86.tstate = TSTATE_NONE;
  1046 :}
  1047 OR.B #imm, @(R0, GBR) {:  
  1048     load_reg( R_EAX, 0 );
  1049     load_spreg( R_ECX, R_GBR );
  1050     ADD_r32_r32( R_EAX, R_ECX );
  1051     PUSH_r32(R_ECX);
  1052     MEM_READ_BYTE( R_ECX, R_EAX );
  1053     POP_r32(R_ECX);
  1054     OR_imm32_r32(imm, R_EAX );
  1055     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1056     sh4_x86.tstate = TSTATE_NONE;
  1057 :}
  1058 ROTCL Rn {:
  1059     load_reg( R_EAX, Rn );
  1060     if( sh4_x86.tstate != TSTATE_C ) {
  1061 	LDC_t();
  1063     RCL1_r32( R_EAX );
  1064     store_reg( R_EAX, Rn );
  1065     SETC_t();
  1066     sh4_x86.tstate = TSTATE_C;
  1067 :}
  1068 ROTCR Rn {:  
  1069     load_reg( R_EAX, Rn );
  1070     if( sh4_x86.tstate != TSTATE_C ) {
  1071 	LDC_t();
  1073     RCR1_r32( R_EAX );
  1074     store_reg( R_EAX, Rn );
  1075     SETC_t();
  1076     sh4_x86.tstate = TSTATE_C;
  1077 :}
  1078 ROTL Rn {:  
  1079     load_reg( R_EAX, Rn );
  1080     ROL1_r32( R_EAX );
  1081     store_reg( R_EAX, Rn );
  1082     SETC_t();
  1083     sh4_x86.tstate = TSTATE_C;
  1084 :}
  1085 ROTR Rn {:  
  1086     load_reg( R_EAX, Rn );
  1087     ROR1_r32( R_EAX );
  1088     store_reg( R_EAX, Rn );
  1089     SETC_t();
  1090     sh4_x86.tstate = TSTATE_C;
  1091 :}
  1092 SHAD Rm, Rn {:
  1093     /* Annoyingly enough, not directly convertible */
  1094     load_reg( R_EAX, Rn );
  1095     load_reg( R_ECX, Rm );
  1096     CMP_imm32_r32( 0, R_ECX );
  1097     JGE_rel8(16, doshl);
  1099     NEG_r32( R_ECX );      // 2
  1100     AND_imm8_r8( 0x1F, R_CL ); // 3
  1101     JE_rel8( 4, emptysar);     // 2
  1102     SAR_r32_CL( R_EAX );       // 2
  1103     JMP_rel8(10, end);          // 2
  1105     JMP_TARGET(emptysar);
  1106     SAR_imm8_r32(31, R_EAX );  // 3
  1107     JMP_rel8(5, end2);
  1109     JMP_TARGET(doshl);
  1110     AND_imm8_r8( 0x1F, R_CL ); // 3
  1111     SHL_r32_CL( R_EAX );       // 2
  1112     JMP_TARGET(end);
  1113     JMP_TARGET(end2);
  1114     store_reg( R_EAX, Rn );
  1115     sh4_x86.tstate = TSTATE_NONE;
  1116 :}
  1117 SHLD Rm, Rn {:  
  1118     load_reg( R_EAX, Rn );
  1119     load_reg( R_ECX, Rm );
  1120     CMP_imm32_r32( 0, R_ECX );
  1121     JGE_rel8(15, doshl);
  1123     NEG_r32( R_ECX );      // 2
  1124     AND_imm8_r8( 0x1F, R_CL ); // 3
  1125     JE_rel8( 4, emptyshr );
  1126     SHR_r32_CL( R_EAX );       // 2
  1127     JMP_rel8(9, end);          // 2
  1129     JMP_TARGET(emptyshr);
  1130     XOR_r32_r32( R_EAX, R_EAX );
  1131     JMP_rel8(5, end2);
  1133     JMP_TARGET(doshl);
  1134     AND_imm8_r8( 0x1F, R_CL ); // 3
  1135     SHL_r32_CL( R_EAX );       // 2
  1136     JMP_TARGET(end);
  1137     JMP_TARGET(end2);
  1138     store_reg( R_EAX, Rn );
  1139     sh4_x86.tstate = TSTATE_NONE;
  1140 :}
  1141 SHAL Rn {: 
  1142     load_reg( R_EAX, Rn );
  1143     SHL1_r32( R_EAX );
  1144     SETC_t();
  1145     store_reg( R_EAX, Rn );
  1146     sh4_x86.tstate = TSTATE_C;
  1147 :}
  1148 SHAR Rn {:  
  1149     load_reg( R_EAX, Rn );
  1150     SAR1_r32( R_EAX );
  1151     SETC_t();
  1152     store_reg( R_EAX, Rn );
  1153     sh4_x86.tstate = TSTATE_C;
  1154 :}
  1155 SHLL Rn {:  
  1156     load_reg( R_EAX, Rn );
  1157     SHL1_r32( R_EAX );
  1158     SETC_t();
  1159     store_reg( R_EAX, Rn );
  1160     sh4_x86.tstate = TSTATE_C;
  1161 :}
  1162 SHLL2 Rn {:
  1163     load_reg( R_EAX, Rn );
  1164     SHL_imm8_r32( 2, R_EAX );
  1165     store_reg( R_EAX, Rn );
  1166     sh4_x86.tstate = TSTATE_NONE;
  1167 :}
  1168 SHLL8 Rn {:  
  1169     load_reg( R_EAX, Rn );
  1170     SHL_imm8_r32( 8, R_EAX );
  1171     store_reg( R_EAX, Rn );
  1172     sh4_x86.tstate = TSTATE_NONE;
  1173 :}
  1174 SHLL16 Rn {:  
  1175     load_reg( R_EAX, Rn );
  1176     SHL_imm8_r32( 16, R_EAX );
  1177     store_reg( R_EAX, Rn );
  1178     sh4_x86.tstate = TSTATE_NONE;
  1179 :}
  1180 SHLR Rn {:  
  1181     load_reg( R_EAX, Rn );
  1182     SHR1_r32( R_EAX );
  1183     SETC_t();
  1184     store_reg( R_EAX, Rn );
  1185     sh4_x86.tstate = TSTATE_C;
  1186 :}
  1187 SHLR2 Rn {:  
  1188     load_reg( R_EAX, Rn );
  1189     SHR_imm8_r32( 2, R_EAX );
  1190     store_reg( R_EAX, Rn );
  1191     sh4_x86.tstate = TSTATE_NONE;
  1192 :}
  1193 SHLR8 Rn {:  
  1194     load_reg( R_EAX, Rn );
  1195     SHR_imm8_r32( 8, R_EAX );
  1196     store_reg( R_EAX, Rn );
  1197     sh4_x86.tstate = TSTATE_NONE;
  1198 :}
  1199 SHLR16 Rn {:  
  1200     load_reg( R_EAX, Rn );
  1201     SHR_imm8_r32( 16, R_EAX );
  1202     store_reg( R_EAX, Rn );
  1203     sh4_x86.tstate = TSTATE_NONE;
  1204 :}
  1205 SUB Rm, Rn {:  
  1206     load_reg( R_EAX, Rm );
  1207     load_reg( R_ECX, Rn );
  1208     SUB_r32_r32( R_EAX, R_ECX );
  1209     store_reg( R_ECX, Rn );
  1210     sh4_x86.tstate = TSTATE_NONE;
  1211 :}
  1212 SUBC Rm, Rn {:  
  1213     load_reg( R_EAX, Rm );
  1214     load_reg( R_ECX, Rn );
  1215     if( sh4_x86.tstate != TSTATE_C ) {
  1216 	LDC_t();
  1218     SBB_r32_r32( R_EAX, R_ECX );
  1219     store_reg( R_ECX, Rn );
  1220     SETC_t();
  1221     sh4_x86.tstate = TSTATE_C;
  1222 :}
  1223 SUBV Rm, Rn {:  
  1224     load_reg( R_EAX, Rm );
  1225     load_reg( R_ECX, Rn );
  1226     SUB_r32_r32( R_EAX, R_ECX );
  1227     store_reg( R_ECX, Rn );
  1228     SETO_t();
  1229     sh4_x86.tstate = TSTATE_O;
  1230 :}
  1231 SWAP.B Rm, Rn {:  
  1232     load_reg( R_EAX, Rm );
  1233     XCHG_r8_r8( R_AL, R_AH );
  1234     store_reg( R_EAX, Rn );
  1235 :}
  1236 SWAP.W Rm, Rn {:  
  1237     load_reg( R_EAX, Rm );
  1238     MOV_r32_r32( R_EAX, R_ECX );
  1239     SHL_imm8_r32( 16, R_ECX );
  1240     SHR_imm8_r32( 16, R_EAX );
  1241     OR_r32_r32( R_EAX, R_ECX );
  1242     store_reg( R_ECX, Rn );
  1243     sh4_x86.tstate = TSTATE_NONE;
  1244 :}
  1245 TAS.B @Rn {:  
  1246     load_reg( R_ECX, Rn );
  1247     MEM_READ_BYTE( R_ECX, R_EAX );
  1248     TEST_r8_r8( R_AL, R_AL );
  1249     SETE_t();
  1250     OR_imm8_r8( 0x80, R_AL );
  1251     load_reg( R_ECX, Rn );
  1252     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1253     sh4_x86.tstate = TSTATE_NONE;
  1254 :}
  1255 TST Rm, Rn {:  
  1256     load_reg( R_EAX, Rm );
  1257     load_reg( R_ECX, Rn );
  1258     TEST_r32_r32( R_EAX, R_ECX );
  1259     SETE_t();
  1260     sh4_x86.tstate = TSTATE_E;
  1261 :}
  1262 TST #imm, R0 {:  
  1263     load_reg( R_EAX, 0 );
  1264     TEST_imm32_r32( imm, R_EAX );
  1265     SETE_t();
  1266     sh4_x86.tstate = TSTATE_E;
  1267 :}
  1268 TST.B #imm, @(R0, GBR) {:  
  1269     load_reg( R_EAX, 0);
  1270     load_reg( R_ECX, R_GBR);
  1271     ADD_r32_r32( R_EAX, R_ECX );
  1272     MEM_READ_BYTE( R_ECX, R_EAX );
  1273     TEST_imm8_r8( imm, R_AL );
  1274     SETE_t();
  1275     sh4_x86.tstate = TSTATE_E;
  1276 :}
  1277 XOR Rm, Rn {:  
  1278     load_reg( R_EAX, Rm );
  1279     load_reg( R_ECX, Rn );
  1280     XOR_r32_r32( R_EAX, R_ECX );
  1281     store_reg( R_ECX, Rn );
  1282     sh4_x86.tstate = TSTATE_NONE;
  1283 :}
  1284 XOR #imm, R0 {:  
  1285     load_reg( R_EAX, 0 );
  1286     XOR_imm32_r32( imm, R_EAX );
  1287     store_reg( R_EAX, 0 );
  1288     sh4_x86.tstate = TSTATE_NONE;
  1289 :}
  1290 XOR.B #imm, @(R0, GBR) {:  
  1291     load_reg( R_EAX, 0 );
  1292     load_spreg( R_ECX, R_GBR );
  1293     ADD_r32_r32( R_EAX, R_ECX );
  1294     PUSH_r32(R_ECX);
  1295     MEM_READ_BYTE(R_ECX, R_EAX);
  1296     POP_r32(R_ECX);
  1297     XOR_imm32_r32( imm, R_EAX );
  1298     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1299     sh4_x86.tstate = TSTATE_NONE;
  1300 :}
  1301 XTRCT Rm, Rn {:
  1302     load_reg( R_EAX, Rm );
  1303     load_reg( R_ECX, Rn );
  1304     SHL_imm8_r32( 16, R_EAX );
  1305     SHR_imm8_r32( 16, R_ECX );
  1306     OR_r32_r32( R_EAX, R_ECX );
  1307     store_reg( R_ECX, Rn );
  1308     sh4_x86.tstate = TSTATE_NONE;
  1309 :}
  1311 /* Data move instructions */
  1312 MOV Rm, Rn {:  
  1313     load_reg( R_EAX, Rm );
  1314     store_reg( R_EAX, Rn );
  1315 :}
  1316 MOV #imm, Rn {:  
  1317     load_imm32( R_EAX, imm );
  1318     store_reg( R_EAX, Rn );
  1319 :}
  1320 MOV.B Rm, @Rn {:  
  1321     load_reg( R_EAX, Rm );
  1322     load_reg( R_ECX, Rn );
  1323     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1324     sh4_x86.tstate = TSTATE_NONE;
  1325 :}
  1326 MOV.B Rm, @-Rn {:  
  1327     load_reg( R_EAX, Rm );
  1328     load_reg( R_ECX, Rn );
  1329     ADD_imm8s_r32( -1, R_ECX );
  1330     store_reg( R_ECX, Rn );
  1331     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1332     sh4_x86.tstate = TSTATE_NONE;
  1333 :}
  1334 MOV.B Rm, @(R0, Rn) {:  
  1335     load_reg( R_EAX, 0 );
  1336     load_reg( R_ECX, Rn );
  1337     ADD_r32_r32( R_EAX, R_ECX );
  1338     load_reg( R_EAX, Rm );
  1339     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1340     sh4_x86.tstate = TSTATE_NONE;
  1341 :}
  1342 MOV.B R0, @(disp, GBR) {:  
  1343     load_reg( R_EAX, 0 );
  1344     load_spreg( R_ECX, R_GBR );
  1345     ADD_imm32_r32( disp, R_ECX );
  1346     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1347     sh4_x86.tstate = TSTATE_NONE;
  1348 :}
  1349 MOV.B R0, @(disp, Rn) {:  
  1350     load_reg( R_EAX, 0 );
  1351     load_reg( R_ECX, Rn );
  1352     ADD_imm32_r32( disp, R_ECX );
  1353     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1354     sh4_x86.tstate = TSTATE_NONE;
  1355 :}
  1356 MOV.B @Rm, Rn {:  
  1357     load_reg( R_ECX, Rm );
  1358     MEM_READ_BYTE( R_ECX, R_EAX );
  1359     store_reg( R_EAX, Rn );
  1360     sh4_x86.tstate = TSTATE_NONE;
  1361 :}
  1362 MOV.B @Rm+, Rn {:  
  1363     load_reg( R_ECX, Rm );
  1364     MOV_r32_r32( R_ECX, R_EAX );
  1365     ADD_imm8s_r32( 1, R_EAX );
  1366     store_reg( R_EAX, Rm );
  1367     MEM_READ_BYTE( R_ECX, R_EAX );
  1368     store_reg( R_EAX, Rn );
  1369     sh4_x86.tstate = TSTATE_NONE;
  1370 :}
  1371 MOV.B @(R0, Rm), Rn {:  
  1372     load_reg( R_EAX, 0 );
  1373     load_reg( R_ECX, Rm );
  1374     ADD_r32_r32( R_EAX, R_ECX );
  1375     MEM_READ_BYTE( R_ECX, R_EAX );
  1376     store_reg( R_EAX, Rn );
  1377     sh4_x86.tstate = TSTATE_NONE;
  1378 :}
  1379 MOV.B @(disp, GBR), R0 {:  
  1380     load_spreg( R_ECX, R_GBR );
  1381     ADD_imm32_r32( disp, R_ECX );
  1382     MEM_READ_BYTE( R_ECX, R_EAX );
  1383     store_reg( R_EAX, 0 );
  1384     sh4_x86.tstate = TSTATE_NONE;
  1385 :}
  1386 MOV.B @(disp, Rm), R0 {:  
  1387     load_reg( R_ECX, Rm );
  1388     ADD_imm32_r32( disp, R_ECX );
  1389     MEM_READ_BYTE( R_ECX, R_EAX );
  1390     store_reg( R_EAX, 0 );
  1391     sh4_x86.tstate = TSTATE_NONE;
  1392 :}
  1393 MOV.L Rm, @Rn {:
  1394     load_reg( R_EAX, Rm );
  1395     load_reg( R_ECX, Rn );
  1396     precheck();
  1397     check_walign32(R_ECX);
  1398     MEM_WRITE_LONG( R_ECX, R_EAX );
  1399     sh4_x86.tstate = TSTATE_NONE;
  1400 :}
  1401 MOV.L Rm, @-Rn {:  
  1402     load_reg( R_EAX, Rm );
  1403     load_reg( R_ECX, Rn );
  1404     precheck();
  1405     check_walign32( R_ECX );
  1406     ADD_imm8s_r32( -4, R_ECX );
  1407     store_reg( R_ECX, Rn );
  1408     MEM_WRITE_LONG( R_ECX, R_EAX );
  1409     sh4_x86.tstate = TSTATE_NONE;
  1410 :}
  1411 MOV.L Rm, @(R0, Rn) {:  
  1412     load_reg( R_EAX, 0 );
  1413     load_reg( R_ECX, Rn );
  1414     ADD_r32_r32( R_EAX, R_ECX );
  1415     precheck();
  1416     check_walign32( R_ECX );
  1417     load_reg( R_EAX, Rm );
  1418     MEM_WRITE_LONG( R_ECX, R_EAX );
  1419     sh4_x86.tstate = TSTATE_NONE;
  1420 :}
  1421 MOV.L R0, @(disp, GBR) {:  
  1422     load_spreg( R_ECX, R_GBR );
  1423     load_reg( R_EAX, 0 );
  1424     ADD_imm32_r32( disp, R_ECX );
  1425     precheck();
  1426     check_walign32( R_ECX );
  1427     MEM_WRITE_LONG( R_ECX, R_EAX );
  1428     sh4_x86.tstate = TSTATE_NONE;
  1429 :}
  1430 MOV.L Rm, @(disp, Rn) {:  
  1431     load_reg( R_ECX, Rn );
  1432     load_reg( R_EAX, Rm );
  1433     ADD_imm32_r32( disp, R_ECX );
  1434     precheck();
  1435     check_walign32( R_ECX );
  1436     MEM_WRITE_LONG( R_ECX, R_EAX );
  1437     sh4_x86.tstate = TSTATE_NONE;
  1438 :}
  1439 MOV.L @Rm, Rn {:  
  1440     load_reg( R_ECX, Rm );
  1441     precheck();
  1442     check_ralign32( R_ECX );
  1443     MEM_READ_LONG( R_ECX, R_EAX );
  1444     store_reg( R_EAX, Rn );
  1445     sh4_x86.tstate = TSTATE_NONE;
  1446 :}
  1447 MOV.L @Rm+, Rn {:  
  1448     load_reg( R_EAX, Rm );
  1449     precheck();
  1450     check_ralign32( R_EAX );
  1451     MOV_r32_r32( R_EAX, R_ECX );
  1452     ADD_imm8s_r32( 4, R_EAX );
  1453     store_reg( R_EAX, Rm );
  1454     MEM_READ_LONG( R_ECX, R_EAX );
  1455     store_reg( R_EAX, Rn );
  1456     sh4_x86.tstate = TSTATE_NONE;
  1457 :}
  1458 MOV.L @(R0, Rm), Rn {:  
  1459     load_reg( R_EAX, 0 );
  1460     load_reg( R_ECX, Rm );
  1461     ADD_r32_r32( R_EAX, R_ECX );
  1462     precheck();
  1463     check_ralign32( R_ECX );
  1464     MEM_READ_LONG( R_ECX, R_EAX );
  1465     store_reg( R_EAX, Rn );
  1466     sh4_x86.tstate = TSTATE_NONE;
  1467 :}
  1468 MOV.L @(disp, GBR), R0 {:
  1469     load_spreg( R_ECX, R_GBR );
  1470     ADD_imm32_r32( disp, R_ECX );
  1471     precheck();
  1472     check_ralign32( R_ECX );
  1473     MEM_READ_LONG( R_ECX, R_EAX );
  1474     store_reg( R_EAX, 0 );
  1475     sh4_x86.tstate = TSTATE_NONE;
  1476 :}
  1477 MOV.L @(disp, PC), Rn {:  
  1478     if( sh4_x86.in_delay_slot ) {
  1479 	SLOTILLEGAL();
  1480     } else {
  1481 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1482 	sh4ptr_t ptr = mem_get_region(target);
  1483 	if( ptr != NULL ) {
  1484 	    MOV_moff32_EAX( ptr );
  1485 	} else {
  1486 	    load_imm32( R_ECX, target );
  1487 	    MEM_READ_LONG( R_ECX, R_EAX );
  1489 	store_reg( R_EAX, Rn );
  1490 	sh4_x86.tstate = TSTATE_NONE;
  1492 :}
  1493 MOV.L @(disp, Rm), Rn {:  
  1494     load_reg( R_ECX, Rm );
  1495     ADD_imm8s_r32( disp, R_ECX );
  1496     precheck();
  1497     check_ralign32( R_ECX );
  1498     MEM_READ_LONG( R_ECX, R_EAX );
  1499     store_reg( R_EAX, Rn );
  1500     sh4_x86.tstate = TSTATE_NONE;
  1501 :}
  1502 MOV.W Rm, @Rn {:  
  1503     load_reg( R_ECX, Rn );
  1504     precheck();
  1505     check_walign16( R_ECX );
  1506     load_reg( R_EAX, Rm );
  1507     MEM_WRITE_WORD( R_ECX, R_EAX );
  1508     sh4_x86.tstate = TSTATE_NONE;
  1509 :}
  1510 MOV.W Rm, @-Rn {:  
  1511     load_reg( R_ECX, Rn );
  1512     precheck();
  1513     check_walign16( R_ECX );
  1514     load_reg( R_EAX, Rm );
  1515     ADD_imm8s_r32( -2, R_ECX );
  1516     store_reg( R_ECX, Rn );
  1517     MEM_WRITE_WORD( R_ECX, R_EAX );
  1518     sh4_x86.tstate = TSTATE_NONE;
  1519 :}
  1520 MOV.W Rm, @(R0, Rn) {:  
  1521     load_reg( R_EAX, 0 );
  1522     load_reg( R_ECX, Rn );
  1523     ADD_r32_r32( R_EAX, R_ECX );
  1524     precheck();
  1525     check_walign16( R_ECX );
  1526     load_reg( R_EAX, Rm );
  1527     MEM_WRITE_WORD( R_ECX, R_EAX );
  1528     sh4_x86.tstate = TSTATE_NONE;
  1529 :}
  1530 MOV.W R0, @(disp, GBR) {:  
  1531     load_spreg( R_ECX, R_GBR );
  1532     load_reg( R_EAX, 0 );
  1533     ADD_imm32_r32( disp, R_ECX );
  1534     precheck();
  1535     check_walign16( R_ECX );
  1536     MEM_WRITE_WORD( R_ECX, R_EAX );
  1537     sh4_x86.tstate = TSTATE_NONE;
  1538 :}
  1539 MOV.W R0, @(disp, Rn) {:  
  1540     load_reg( R_ECX, Rn );
  1541     load_reg( R_EAX, 0 );
  1542     ADD_imm32_r32( disp, R_ECX );
  1543     precheck();
  1544     check_walign16( R_ECX );
  1545     MEM_WRITE_WORD( R_ECX, R_EAX );
  1546     sh4_x86.tstate = TSTATE_NONE;
  1547 :}
  1548 MOV.W @Rm, Rn {:  
  1549     load_reg( R_ECX, Rm );
  1550     precheck();
  1551     check_ralign16( R_ECX );
  1552     MEM_READ_WORD( R_ECX, R_EAX );
  1553     store_reg( R_EAX, Rn );
  1554     sh4_x86.tstate = TSTATE_NONE;
  1555 :}
  1556 MOV.W @Rm+, Rn {:  
  1557     load_reg( R_EAX, Rm );
  1558     precheck();
  1559     check_ralign16( R_EAX );
  1560     MOV_r32_r32( R_EAX, R_ECX );
  1561     ADD_imm8s_r32( 2, R_EAX );
  1562     store_reg( R_EAX, Rm );
  1563     MEM_READ_WORD( R_ECX, R_EAX );
  1564     store_reg( R_EAX, Rn );
  1565     sh4_x86.tstate = TSTATE_NONE;
  1566 :}
  1567 MOV.W @(R0, Rm), Rn {:  
  1568     load_reg( R_EAX, 0 );
  1569     load_reg( R_ECX, Rm );
  1570     ADD_r32_r32( R_EAX, R_ECX );
  1571     precheck();
  1572     check_ralign16( R_ECX );
  1573     MEM_READ_WORD( R_ECX, R_EAX );
  1574     store_reg( R_EAX, Rn );
  1575     sh4_x86.tstate = TSTATE_NONE;
  1576 :}
  1577 MOV.W @(disp, GBR), R0 {:  
  1578     load_spreg( R_ECX, R_GBR );
  1579     ADD_imm32_r32( disp, R_ECX );
  1580     precheck();
  1581     check_ralign16( R_ECX );
  1582     MEM_READ_WORD( R_ECX, R_EAX );
  1583     store_reg( R_EAX, 0 );
  1584     sh4_x86.tstate = TSTATE_NONE;
  1585 :}
  1586 MOV.W @(disp, PC), Rn {:  
  1587     if( sh4_x86.in_delay_slot ) {
  1588 	SLOTILLEGAL();
  1589     } else {
  1590 	load_imm32( R_ECX, pc + disp + 4 );
  1591 	MEM_READ_WORD( R_ECX, R_EAX );
  1592 	store_reg( R_EAX, Rn );
  1593 	sh4_x86.tstate = TSTATE_NONE;
  1595 :}
  1596 MOV.W @(disp, Rm), R0 {:  
  1597     load_reg( R_ECX, Rm );
  1598     ADD_imm32_r32( disp, R_ECX );
  1599     precheck();
  1600     check_ralign16( R_ECX );
  1601     MEM_READ_WORD( R_ECX, R_EAX );
  1602     store_reg( R_EAX, 0 );
  1603     sh4_x86.tstate = TSTATE_NONE;
  1604 :}
  1605 MOVA @(disp, PC), R0 {:  
  1606     if( sh4_x86.in_delay_slot ) {
  1607 	SLOTILLEGAL();
  1608     } else {
  1609 	load_imm32( R_ECX, (pc & 0xFFFFFFFC) + disp + 4 );
  1610 	store_reg( R_ECX, 0 );
  1612 :}
  1613 MOVCA.L R0, @Rn {:  
  1614     load_reg( R_EAX, 0 );
  1615     load_reg( R_ECX, Rn );
  1616     precheck();
  1617     check_walign32( R_ECX );
  1618     MEM_WRITE_LONG( R_ECX, R_EAX );
  1619     sh4_x86.tstate = TSTATE_NONE;
  1620 :}
  1622 /* Control transfer instructions */
  1623 BF disp {:
  1624     if( sh4_x86.in_delay_slot ) {
  1625 	SLOTILLEGAL();
  1626     } else {
  1627 	JT_rel8( EXIT_BLOCK_SIZE, nottaken );
  1628 	exit_block( disp + pc + 4, pc+2 );
  1629 	JMP_TARGET(nottaken);
  1630 	return 2;
  1632 :}
  1633 BF/S disp {:
  1634     if( sh4_x86.in_delay_slot ) {
  1635 	SLOTILLEGAL();
  1636     } else {
  1637 	sh4_x86.in_delay_slot = TRUE;
  1638 	if( sh4_x86.tstate == TSTATE_NONE ) {
  1639 	    CMP_imm8s_sh4r( 1, R_T );
  1640 	    sh4_x86.tstate = TSTATE_E;
  1642 	OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JNE rel32
  1643 	sh4_translate_instruction(pc+2);
  1644 	exit_block( disp + pc + 4, pc+4 );
  1645 	// not taken
  1646 	*patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1647 	sh4_translate_instruction(pc+2);
  1648 	return 4;
  1650 :}
  1651 BRA disp {:  
  1652     if( sh4_x86.in_delay_slot ) {
  1653 	SLOTILLEGAL();
  1654     } else {
  1655 	sh4_x86.in_delay_slot = TRUE;
  1656 	sh4_translate_instruction( pc + 2 );
  1657 	exit_block( disp + pc + 4, pc+4 );
  1658 	sh4_x86.branch_taken = TRUE;
  1659 	return 4;
  1661 :}
  1662 BRAF Rn {:  
  1663     if( sh4_x86.in_delay_slot ) {
  1664 	SLOTILLEGAL();
  1665     } else {
  1666 	load_reg( R_EAX, Rn );
  1667 	ADD_imm32_r32( pc + 4, R_EAX );
  1668 	store_spreg( R_EAX, REG_OFFSET(pc) );
  1669 	sh4_x86.in_delay_slot = TRUE;
  1670 	sh4_x86.tstate = TSTATE_NONE;
  1671 	sh4_translate_instruction( pc + 2 );
  1672 	exit_block_pcset(pc+2);
  1673 	sh4_x86.branch_taken = TRUE;
  1674 	return 4;
  1676 :}
  1677 BSR disp {:  
  1678     if( sh4_x86.in_delay_slot ) {
  1679 	SLOTILLEGAL();
  1680     } else {
  1681 	load_imm32( R_EAX, pc + 4 );
  1682 	store_spreg( R_EAX, R_PR );
  1683 	sh4_x86.in_delay_slot = TRUE;
  1684 	sh4_translate_instruction( pc + 2 );
  1685 	exit_block( disp + pc + 4, pc+4 );
  1686 	sh4_x86.branch_taken = TRUE;
  1687 	return 4;
  1689 :}
  1690 BSRF Rn {:  
  1691     if( sh4_x86.in_delay_slot ) {
  1692 	SLOTILLEGAL();
  1693     } else {
  1694 	load_imm32( R_ECX, pc + 4 );
  1695 	store_spreg( R_ECX, R_PR );
  1696 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_ECX );
  1697 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1698 	sh4_x86.in_delay_slot = TRUE;
  1699 	sh4_x86.tstate = TSTATE_NONE;
  1700 	sh4_translate_instruction( pc + 2 );
  1701 	exit_block_pcset(pc+2);
  1702 	sh4_x86.branch_taken = TRUE;
  1703 	return 4;
  1705 :}
  1706 BT disp {:
  1707     if( sh4_x86.in_delay_slot ) {
  1708 	SLOTILLEGAL();
  1709     } else {
  1710 	JF_rel8( EXIT_BLOCK_SIZE, nottaken );
  1711 	exit_block( disp + pc + 4, pc+2 );
  1712 	JMP_TARGET(nottaken);
  1713 	return 2;
  1715 :}
  1716 BT/S disp {:
  1717     if( sh4_x86.in_delay_slot ) {
  1718 	SLOTILLEGAL();
  1719     } else {
  1720 	sh4_x86.in_delay_slot = TRUE;
  1721 	if( sh4_x86.tstate == TSTATE_NONE ) {
  1722 	    CMP_imm8s_sh4r( 1, R_T );
  1723 	    sh4_x86.tstate = TSTATE_E;
  1725 	OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JE rel32
  1726 	sh4_translate_instruction(pc+2);
  1727 	exit_block( disp + pc + 4, pc+4 );
  1728 	// not taken
  1729 	*patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1730 	sh4_translate_instruction(pc+2);
  1731 	return 4;
  1733 :}
  1734 JMP @Rn {:  
  1735     if( sh4_x86.in_delay_slot ) {
  1736 	SLOTILLEGAL();
  1737     } else {
  1738 	load_reg( R_ECX, Rn );
  1739 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1740 	sh4_x86.in_delay_slot = TRUE;
  1741 	sh4_translate_instruction(pc+2);
  1742 	exit_block_pcset(pc+2);
  1743 	sh4_x86.branch_taken = TRUE;
  1744 	return 4;
  1746 :}
  1747 JSR @Rn {:  
  1748     if( sh4_x86.in_delay_slot ) {
  1749 	SLOTILLEGAL();
  1750     } else {
  1751 	load_imm32( R_EAX, pc + 4 );
  1752 	store_spreg( R_EAX, R_PR );
  1753 	load_reg( R_ECX, Rn );
  1754 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1755 	sh4_x86.in_delay_slot = TRUE;
  1756 	sh4_translate_instruction(pc+2);
  1757 	exit_block_pcset(pc+2);
  1758 	sh4_x86.branch_taken = TRUE;
  1759 	return 4;
  1761 :}
  1762 RTE {:  
  1763     if( sh4_x86.in_delay_slot ) {
  1764 	SLOTILLEGAL();
  1765     } else {
  1766 	check_priv();
  1767 	load_spreg( R_ECX, R_SPC );
  1768 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1769 	load_spreg( R_EAX, R_SSR );
  1770 	call_func1( sh4_write_sr, R_EAX );
  1771 	sh4_x86.in_delay_slot = TRUE;
  1772 	sh4_x86.priv_checked = FALSE;
  1773 	sh4_x86.fpuen_checked = FALSE;
  1774 	sh4_x86.tstate = TSTATE_NONE;
  1775 	sh4_translate_instruction(pc+2);
  1776 	exit_block_pcset(pc+2);
  1777 	sh4_x86.branch_taken = TRUE;
  1778 	return 4;
  1780 :}
  1781 RTS {:  
  1782     if( sh4_x86.in_delay_slot ) {
  1783 	SLOTILLEGAL();
  1784     } else {
  1785 	load_spreg( R_ECX, R_PR );
  1786 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1787 	sh4_x86.in_delay_slot = TRUE;
  1788 	sh4_translate_instruction(pc+2);
  1789 	exit_block_pcset(pc+2);
  1790 	sh4_x86.branch_taken = TRUE;
  1791 	return 4;
  1793 :}
  1794 TRAPA #imm {:  
  1795     if( sh4_x86.in_delay_slot ) {
  1796 	SLOTILLEGAL();
  1797     } else {
  1798 	load_imm32( R_EAX, imm );
  1799 	call_func1( sh4_raise_trap, R_EAX );
  1800 	ADD_imm8s_r32( 4, R_ESP );
  1801 	sh4_x86.tstate = TSTATE_NONE;
  1802 	exit_block_pcset(pc);
  1803 	sh4_x86.branch_taken = TRUE;
  1804 	return 2;
  1806 :}
  1807 UNDEF {:  
  1808     if( sh4_x86.in_delay_slot ) {
  1809 	SLOTILLEGAL();
  1810     } else {
  1811 	precheck();
  1812 	JMP_exit(EXIT_ILLEGAL);
  1813 	return 2;
  1815 :}
  1817 CLRMAC {:  
  1818     XOR_r32_r32(R_EAX, R_EAX);
  1819     store_spreg( R_EAX, R_MACL );
  1820     store_spreg( R_EAX, R_MACH );
  1821     sh4_x86.tstate = TSTATE_NONE;
  1822 :}
  1823 CLRS {:
  1824     CLC();
  1825     SETC_sh4r(R_S);
  1826     sh4_x86.tstate = TSTATE_C;
  1827 :}
  1828 CLRT {:  
  1829     CLC();
  1830     SETC_t();
  1831     sh4_x86.tstate = TSTATE_C;
  1832 :}
  1833 SETS {:  
  1834     STC();
  1835     SETC_sh4r(R_S);
  1836     sh4_x86.tstate = TSTATE_C;
  1837 :}
  1838 SETT {:  
  1839     STC();
  1840     SETC_t();
  1841     sh4_x86.tstate = TSTATE_C;
  1842 :}
  1844 /* Floating point moves */
  1845 FMOV FRm, FRn {:  
  1846     /* As horrible as this looks, it's actually covering 5 separate cases:
  1847      * 1. 32-bit fr-to-fr (PR=0)
  1848      * 2. 64-bit dr-to-dr (PR=1, FRm&1 == 0, FRn&1 == 0 )
  1849      * 3. 64-bit dr-to-xd (PR=1, FRm&1 == 0, FRn&1 == 1 )
  1850      * 4. 64-bit xd-to-dr (PR=1, FRm&1 == 1, FRn&1 == 0 )
  1851      * 5. 64-bit xd-to-xd (PR=1, FRm&1 == 1, FRn&1 == 1 )
  1852      */
  1853     check_fpuen();
  1854     load_spreg( R_ECX, R_FPSCR );
  1855     load_fr_bank( R_EDX );
  1856     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1857     JNE_rel8(8, doublesize);
  1858     load_fr( R_EDX, R_EAX, FRm ); // PR=0 branch
  1859     store_fr( R_EDX, R_EAX, FRn );
  1860     if( FRm&1 ) {
  1861 	JMP_rel8(24, end);
  1862 	JMP_TARGET(doublesize);
  1863 	load_xf_bank( R_ECX ); 
  1864 	load_fr( R_ECX, R_EAX, FRm-1 );
  1865 	if( FRn&1 ) {
  1866 	    load_fr( R_ECX, R_EDX, FRm );
  1867 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1868 	    store_fr( R_ECX, R_EDX, FRn );
  1869 	} else /* FRn&1 == 0 */ {
  1870 	    load_fr( R_ECX, R_ECX, FRm );
  1871 	    store_fr( R_EDX, R_EAX, FRn );
  1872 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1874 	JMP_TARGET(end);
  1875     } else /* FRm&1 == 0 */ {
  1876 	if( FRn&1 ) {
  1877 	    JMP_rel8(24, end);
  1878 	    load_xf_bank( R_ECX );
  1879 	    load_fr( R_EDX, R_EAX, FRm );
  1880 	    load_fr( R_EDX, R_EDX, FRm+1 );
  1881 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1882 	    store_fr( R_ECX, R_EDX, FRn );
  1883 	    JMP_TARGET(end);
  1884 	} else /* FRn&1 == 0 */ {
  1885 	    JMP_rel8(12, end);
  1886 	    load_fr( R_EDX, R_EAX, FRm );
  1887 	    load_fr( R_EDX, R_ECX, FRm+1 );
  1888 	    store_fr( R_EDX, R_EAX, FRn );
  1889 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1890 	    JMP_TARGET(end);
  1893     sh4_x86.tstate = TSTATE_NONE;
  1894 :}
  1895 FMOV FRm, @Rn {: 
  1896     precheck();
  1897     check_fpuen_no_precheck();
  1898     load_reg( R_ECX, Rn );
  1899     check_walign32( R_ECX );
  1900     load_spreg( R_EDX, R_FPSCR );
  1901     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1902     JNE_rel8(8 + CALL_FUNC2_SIZE, doublesize);
  1903     load_fr_bank( R_EDX );
  1904     load_fr( R_EDX, R_EAX, FRm );
  1905     MEM_WRITE_LONG( R_ECX, R_EAX ); // 12
  1906     if( FRm&1 ) {
  1907 	JMP_rel8( 18 + MEM_WRITE_DOUBLE_SIZE, end );
  1908 	JMP_TARGET(doublesize);
  1909 	load_xf_bank( R_EDX );
  1910 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1911 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1912 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1913 	JMP_TARGET(end);
  1914     } else {
  1915 	JMP_rel8( 9 + MEM_WRITE_DOUBLE_SIZE, end );
  1916 	JMP_TARGET(doublesize);
  1917 	load_fr_bank( R_EDX );
  1918 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1919 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1920 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1921 	JMP_TARGET(end);
  1923     sh4_x86.tstate = TSTATE_NONE;
  1924 :}
  1925 FMOV @Rm, FRn {:  
  1926     precheck();
  1927     check_fpuen_no_precheck();
  1928     load_reg( R_ECX, Rm );
  1929     check_ralign32( R_ECX );
  1930     load_spreg( R_EDX, R_FPSCR );
  1931     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1932     JNE_rel8(8 + CALL_FUNC1_SIZE, doublesize);
  1933     MEM_READ_LONG( R_ECX, R_EAX );
  1934     load_fr_bank( R_EDX );
  1935     store_fr( R_EDX, R_EAX, FRn );
  1936     if( FRn&1 ) {
  1937 	JMP_rel8(21 + MEM_READ_DOUBLE_SIZE, end);
  1938 	JMP_TARGET(doublesize);
  1939 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1940 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1941 	load_xf_bank( R_EDX );
  1942 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1943 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1944 	JMP_TARGET(end);
  1945     } else {
  1946 	JMP_rel8(9 + MEM_READ_DOUBLE_SIZE, end);
  1947 	JMP_TARGET(doublesize);
  1948 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1949 	load_fr_bank( R_EDX );
  1950 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1951 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1952 	JMP_TARGET(end);
  1954     sh4_x86.tstate = TSTATE_NONE;
  1955 :}
  1956 FMOV FRm, @-Rn {:  
  1957     precheck();
  1958     check_fpuen_no_precheck();
  1959     load_reg( R_ECX, Rn );
  1960     check_walign32( R_ECX );
  1961     load_spreg( R_EDX, R_FPSCR );
  1962     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1963     JNE_rel8(14 + CALL_FUNC2_SIZE, doublesize);
  1964     load_fr_bank( R_EDX );
  1965     load_fr( R_EDX, R_EAX, FRm );
  1966     ADD_imm8s_r32(-4,R_ECX);
  1967     store_reg( R_ECX, Rn );
  1968     MEM_WRITE_LONG( R_ECX, R_EAX ); // 12
  1969     if( FRm&1 ) {
  1970 	JMP_rel8( 24 + MEM_WRITE_DOUBLE_SIZE, end );
  1971 	JMP_TARGET(doublesize);
  1972 	load_xf_bank( R_EDX );
  1973 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1974 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1975 	ADD_imm8s_r32(-8,R_ECX);
  1976 	store_reg( R_ECX, Rn );
  1977 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1978 	JMP_TARGET(end);
  1979     } else {
  1980 	JMP_rel8( 15 + MEM_WRITE_DOUBLE_SIZE, end );
  1981 	JMP_TARGET(doublesize);
  1982 	load_fr_bank( R_EDX );
  1983 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1984 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1985 	ADD_imm8s_r32(-8,R_ECX);
  1986 	store_reg( R_ECX, Rn );
  1987 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1988 	JMP_TARGET(end);
  1990     sh4_x86.tstate = TSTATE_NONE;
  1991 :}
  1992 FMOV @Rm+, FRn {:
  1993     precheck();
  1994     check_fpuen_no_precheck();
  1995     load_reg( R_ECX, Rm );
  1996     check_ralign32( R_ECX );
  1997     MOV_r32_r32( R_ECX, R_EAX );
  1998     load_spreg( R_EDX, R_FPSCR );
  1999     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  2000     JNE_rel8(14 + CALL_FUNC1_SIZE, doublesize);
  2001     ADD_imm8s_r32( 4, R_EAX );
  2002     store_reg( R_EAX, Rm );
  2003     MEM_READ_LONG( R_ECX, R_EAX );
  2004     load_fr_bank( R_EDX );
  2005     store_fr( R_EDX, R_EAX, FRn );
  2006     if( FRn&1 ) {
  2007 	JMP_rel8(27 + MEM_READ_DOUBLE_SIZE, end);
  2008 	JMP_TARGET(doublesize);
  2009 	ADD_imm8s_r32( 8, R_EAX );
  2010 	store_reg(R_EAX, Rm);
  2011 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  2012 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  2013 	load_xf_bank( R_EDX );
  2014 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  2015 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  2016 	JMP_TARGET(end);
  2017     } else {
  2018 	JMP_rel8(15 + MEM_READ_DOUBLE_SIZE, end);
  2019 	ADD_imm8s_r32( 8, R_EAX );
  2020 	store_reg(R_EAX, Rm);
  2021 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  2022 	load_fr_bank( R_EDX );
  2023 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  2024 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  2025 	JMP_TARGET(end);
  2027     sh4_x86.tstate = TSTATE_NONE;
  2028 :}
  2029 FMOV FRm, @(R0, Rn) {:  
  2030     precheck();
  2031     check_fpuen_no_precheck();
  2032     load_reg( R_ECX, Rn );
  2033     ADD_sh4r_r32( REG_OFFSET(r[0]), R_ECX );
  2034     check_walign32( R_ECX );
  2035     load_spreg( R_EDX, R_FPSCR );
  2036     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  2037     JNE_rel8(8 + CALL_FUNC2_SIZE, doublesize);
  2038     load_fr_bank( R_EDX );
  2039     load_fr( R_EDX, R_EAX, FRm );
  2040     MEM_WRITE_LONG( R_ECX, R_EAX ); // 12
  2041     if( FRm&1 ) {
  2042 	JMP_rel8( 18 + MEM_WRITE_DOUBLE_SIZE, end );
  2043 	JMP_TARGET(doublesize);
  2044 	load_xf_bank( R_EDX );
  2045 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  2046 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  2047 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  2048 	JMP_TARGET(end);
  2049     } else {
  2050 	JMP_rel8( 9 + MEM_WRITE_DOUBLE_SIZE, end );
  2051 	JMP_TARGET(doublesize);
  2052 	load_fr_bank( R_EDX );
  2053 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  2054 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  2055 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  2056 	JMP_TARGET(end);
  2058     sh4_x86.tstate = TSTATE_NONE;
  2059 :}
  2060 FMOV @(R0, Rm), FRn {:  
  2061     precheck();
  2062     check_fpuen_no_precheck();
  2063     load_reg( R_ECX, Rm );
  2064     ADD_sh4r_r32( REG_OFFSET(r[0]), R_ECX );
  2065     check_ralign32( R_ECX );
  2066     load_spreg( R_EDX, R_FPSCR );
  2067     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  2068     JNE_rel8(8 + CALL_FUNC1_SIZE, doublesize);
  2069     MEM_READ_LONG( R_ECX, R_EAX );
  2070     load_fr_bank( R_EDX );
  2071     store_fr( R_EDX, R_EAX, FRn );
  2072     if( FRn&1 ) {
  2073 	JMP_rel8(21 + MEM_READ_DOUBLE_SIZE, end);
  2074 	JMP_TARGET(doublesize);
  2075 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  2076 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  2077 	load_xf_bank( R_EDX );
  2078 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  2079 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  2080 	JMP_TARGET(end);
  2081     } else {
  2082 	JMP_rel8(9 + MEM_READ_DOUBLE_SIZE, end);
  2083 	JMP_TARGET(doublesize);
  2084 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  2085 	load_fr_bank( R_EDX );
  2086 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  2087 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  2088 	JMP_TARGET(end);
  2090     sh4_x86.tstate = TSTATE_NONE;
  2091 :}
  2092 FLDI0 FRn {:  /* IFF PR=0 */
  2093     check_fpuen();
  2094     load_spreg( R_ECX, R_FPSCR );
  2095     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2096     JNE_rel8(8, end);
  2097     XOR_r32_r32( R_EAX, R_EAX );
  2098     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  2099     store_fr( R_ECX, R_EAX, FRn );
  2100     JMP_TARGET(end);
  2101     sh4_x86.tstate = TSTATE_NONE;
  2102 :}
  2103 FLDI1 FRn {:  /* IFF PR=0 */
  2104     check_fpuen();
  2105     load_spreg( R_ECX, R_FPSCR );
  2106     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2107     JNE_rel8(11, end);
  2108     load_imm32(R_EAX, 0x3F800000);
  2109     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  2110     store_fr( R_ECX, R_EAX, FRn );
  2111     JMP_TARGET(end);
  2112     sh4_x86.tstate = TSTATE_NONE;
  2113 :}
  2115 FLOAT FPUL, FRn {:  
  2116     check_fpuen();
  2117     load_spreg( R_ECX, R_FPSCR );
  2118     load_spreg(R_EDX, REG_OFFSET(fr_bank));
  2119     FILD_sh4r(R_FPUL);
  2120     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2121     JNE_rel8(5, doubleprec);
  2122     pop_fr( R_EDX, FRn );
  2123     JMP_rel8(3, end);
  2124     JMP_TARGET(doubleprec);
  2125     pop_dr( R_EDX, FRn );
  2126     JMP_TARGET(end);
  2127     sh4_x86.tstate = TSTATE_NONE;
  2128 :}
  2129 FTRC FRm, FPUL {:  
  2130     check_fpuen();
  2131     load_spreg( R_ECX, R_FPSCR );
  2132     load_fr_bank( R_EDX );
  2133     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2134     JNE_rel8(5, doubleprec);
  2135     push_fr( R_EDX, FRm );
  2136     JMP_rel8(3, doop);
  2137     JMP_TARGET(doubleprec);
  2138     push_dr( R_EDX, FRm );
  2139     JMP_TARGET( doop );
  2140     load_imm32( R_ECX, (uint32_t)&max_int );
  2141     FILD_r32ind( R_ECX );
  2142     FCOMIP_st(1);
  2143     JNA_rel8( 32, sat );
  2144     load_imm32( R_ECX, (uint32_t)&min_int );  // 5
  2145     FILD_r32ind( R_ECX );           // 2
  2146     FCOMIP_st(1);                   // 2
  2147     JAE_rel8( 21, sat2 );            // 2
  2148     load_imm32( R_EAX, (uint32_t)&save_fcw );
  2149     FNSTCW_r32ind( R_EAX );
  2150     load_imm32( R_EDX, (uint32_t)&trunc_fcw );
  2151     FLDCW_r32ind( R_EDX );
  2152     FISTP_sh4r(R_FPUL);             // 3
  2153     FLDCW_r32ind( R_EAX );
  2154     JMP_rel8( 9, end );             // 2
  2156     JMP_TARGET(sat);
  2157     JMP_TARGET(sat2);
  2158     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  2159     store_spreg( R_ECX, R_FPUL );
  2160     FPOP_st();
  2161     JMP_TARGET(end);
  2162     sh4_x86.tstate = TSTATE_NONE;
  2163 :}
  2164 FLDS FRm, FPUL {:  
  2165     check_fpuen();
  2166     load_fr_bank( R_ECX );
  2167     load_fr( R_ECX, R_EAX, FRm );
  2168     store_spreg( R_EAX, R_FPUL );
  2169     sh4_x86.tstate = TSTATE_NONE;
  2170 :}
  2171 FSTS FPUL, FRn {:  
  2172     check_fpuen();
  2173     load_fr_bank( R_ECX );
  2174     load_spreg( R_EAX, R_FPUL );
  2175     store_fr( R_ECX, R_EAX, FRn );
  2176     sh4_x86.tstate = TSTATE_NONE;
  2177 :}
  2178 FCNVDS FRm, FPUL {:  
  2179     check_fpuen();
  2180     load_spreg( R_ECX, R_FPSCR );
  2181     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2182     JE_rel8(9, end); // only when PR=1
  2183     load_fr_bank( R_ECX );
  2184     push_dr( R_ECX, FRm );
  2185     pop_fpul();
  2186     JMP_TARGET(end);
  2187     sh4_x86.tstate = TSTATE_NONE;
  2188 :}
  2189 FCNVSD FPUL, FRn {:  
  2190     check_fpuen();
  2191     load_spreg( R_ECX, R_FPSCR );
  2192     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2193     JE_rel8(9, end); // only when PR=1
  2194     load_fr_bank( R_ECX );
  2195     push_fpul();
  2196     pop_dr( R_ECX, FRn );
  2197     JMP_TARGET(end);
  2198     sh4_x86.tstate = TSTATE_NONE;
  2199 :}
  2201 /* Floating point instructions */
  2202 FABS FRn {:  
  2203     check_fpuen();
  2204     load_spreg( R_ECX, R_FPSCR );
  2205     load_fr_bank( R_EDX );
  2206     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2207     JNE_rel8(10, doubleprec);
  2208     push_fr(R_EDX, FRn); // 3
  2209     FABS_st0(); // 2
  2210     pop_fr( R_EDX, FRn); //3
  2211     JMP_rel8(8,end); // 2
  2212     JMP_TARGET(doubleprec);
  2213     push_dr(R_EDX, FRn);
  2214     FABS_st0();
  2215     pop_dr(R_EDX, FRn);
  2216     JMP_TARGET(end);
  2217     sh4_x86.tstate = TSTATE_NONE;
  2218 :}
  2219 FADD FRm, FRn {:  
  2220     check_fpuen();
  2221     load_spreg( R_ECX, R_FPSCR );
  2222     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2223     load_fr_bank( R_EDX );
  2224     JNE_rel8(13,doubleprec);
  2225     push_fr(R_EDX, FRm);
  2226     push_fr(R_EDX, FRn);
  2227     FADDP_st(1);
  2228     pop_fr(R_EDX, FRn);
  2229     JMP_rel8(11,end);
  2230     JMP_TARGET(doubleprec);
  2231     push_dr(R_EDX, FRm);
  2232     push_dr(R_EDX, FRn);
  2233     FADDP_st(1);
  2234     pop_dr(R_EDX, FRn);
  2235     JMP_TARGET(end);
  2236     sh4_x86.tstate = TSTATE_NONE;
  2237 :}
  2238 FDIV FRm, FRn {:  
  2239     check_fpuen();
  2240     load_spreg( R_ECX, R_FPSCR );
  2241     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2242     load_fr_bank( R_EDX );
  2243     JNE_rel8(13, doubleprec);
  2244     push_fr(R_EDX, FRn);
  2245     push_fr(R_EDX, FRm);
  2246     FDIVP_st(1);
  2247     pop_fr(R_EDX, FRn);
  2248     JMP_rel8(11, end);
  2249     JMP_TARGET(doubleprec);
  2250     push_dr(R_EDX, FRn);
  2251     push_dr(R_EDX, FRm);
  2252     FDIVP_st(1);
  2253     pop_dr(R_EDX, FRn);
  2254     JMP_TARGET(end);
  2255     sh4_x86.tstate = TSTATE_NONE;
  2256 :}
  2257 FMAC FR0, FRm, FRn {:  
  2258     check_fpuen();
  2259     load_spreg( R_ECX, R_FPSCR );
  2260     load_spreg( R_EDX, REG_OFFSET(fr_bank));
  2261     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2262     JNE_rel8(18, doubleprec);
  2263     push_fr( R_EDX, 0 );
  2264     push_fr( R_EDX, FRm );
  2265     FMULP_st(1);
  2266     push_fr( R_EDX, FRn );
  2267     FADDP_st(1);
  2268     pop_fr( R_EDX, FRn );
  2269     JMP_rel8(16, end);
  2270     JMP_TARGET(doubleprec);
  2271     push_dr( R_EDX, 0 );
  2272     push_dr( R_EDX, FRm );
  2273     FMULP_st(1);
  2274     push_dr( R_EDX, FRn );
  2275     FADDP_st(1);
  2276     pop_dr( R_EDX, FRn );
  2277     JMP_TARGET(end);
  2278     sh4_x86.tstate = TSTATE_NONE;
  2279 :}
  2281 FMUL FRm, FRn {:  
  2282     check_fpuen();
  2283     load_spreg( R_ECX, R_FPSCR );
  2284     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2285     load_fr_bank( R_EDX );
  2286     JNE_rel8(13, doubleprec);
  2287     push_fr(R_EDX, FRm);
  2288     push_fr(R_EDX, FRn);
  2289     FMULP_st(1);
  2290     pop_fr(R_EDX, FRn);
  2291     JMP_rel8(11, end);
  2292     JMP_TARGET(doubleprec);
  2293     push_dr(R_EDX, FRm);
  2294     push_dr(R_EDX, FRn);
  2295     FMULP_st(1);
  2296     pop_dr(R_EDX, FRn);
  2297     JMP_TARGET(end);
  2298     sh4_x86.tstate = TSTATE_NONE;
  2299 :}
  2300 FNEG FRn {:  
  2301     check_fpuen();
  2302     load_spreg( R_ECX, R_FPSCR );
  2303     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2304     load_fr_bank( R_EDX );
  2305     JNE_rel8(10, doubleprec);
  2306     push_fr(R_EDX, FRn);
  2307     FCHS_st0();
  2308     pop_fr(R_EDX, FRn);
  2309     JMP_rel8(8, end);
  2310     JMP_TARGET(doubleprec);
  2311     push_dr(R_EDX, FRn);
  2312     FCHS_st0();
  2313     pop_dr(R_EDX, FRn);
  2314     JMP_TARGET(end);
  2315     sh4_x86.tstate = TSTATE_NONE;
  2316 :}
  2317 FSRRA FRn {:  
  2318     check_fpuen();
  2319     load_spreg( R_ECX, R_FPSCR );
  2320     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2321     load_fr_bank( R_EDX );
  2322     JNE_rel8(12, end); // PR=0 only
  2323     FLD1_st0();
  2324     push_fr(R_EDX, FRn);
  2325     FSQRT_st0();
  2326     FDIVP_st(1);
  2327     pop_fr(R_EDX, FRn);
  2328     JMP_TARGET(end);
  2329     sh4_x86.tstate = TSTATE_NONE;
  2330 :}
  2331 FSQRT FRn {:  
  2332     check_fpuen();
  2333     load_spreg( R_ECX, R_FPSCR );
  2334     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2335     load_fr_bank( R_EDX );
  2336     JNE_rel8(10, doubleprec);
  2337     push_fr(R_EDX, FRn);
  2338     FSQRT_st0();
  2339     pop_fr(R_EDX, FRn);
  2340     JMP_rel8(8, end);
  2341     JMP_TARGET(doubleprec);
  2342     push_dr(R_EDX, FRn);
  2343     FSQRT_st0();
  2344     pop_dr(R_EDX, FRn);
  2345     JMP_TARGET(end);
  2346     sh4_x86.tstate = TSTATE_NONE;
  2347 :}
  2348 FSUB FRm, FRn {:  
  2349     check_fpuen();
  2350     load_spreg( R_ECX, R_FPSCR );
  2351     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2352     load_fr_bank( R_EDX );
  2353     JNE_rel8(13, doubleprec);
  2354     push_fr(R_EDX, FRn);
  2355     push_fr(R_EDX, FRm);
  2356     FSUBP_st(1);
  2357     pop_fr(R_EDX, FRn);
  2358     JMP_rel8(11, end);
  2359     JMP_TARGET(doubleprec);
  2360     push_dr(R_EDX, FRn);
  2361     push_dr(R_EDX, FRm);
  2362     FSUBP_st(1);
  2363     pop_dr(R_EDX, FRn);
  2364     JMP_TARGET(end);
  2365     sh4_x86.tstate = TSTATE_NONE;
  2366 :}
  2368 FCMP/EQ FRm, FRn {:  
  2369     check_fpuen();
  2370     load_spreg( R_ECX, R_FPSCR );
  2371     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2372     load_fr_bank( R_EDX );
  2373     JNE_rel8(8, doubleprec);
  2374     push_fr(R_EDX, FRm);
  2375     push_fr(R_EDX, FRn);
  2376     JMP_rel8(6, end);
  2377     JMP_TARGET(doubleprec);
  2378     push_dr(R_EDX, FRm);
  2379     push_dr(R_EDX, FRn);
  2380     JMP_TARGET(end);
  2381     FCOMIP_st(1);
  2382     SETE_t();
  2383     FPOP_st();
  2384     sh4_x86.tstate = TSTATE_NONE;
  2385 :}
  2386 FCMP/GT FRm, FRn {:  
  2387     check_fpuen();
  2388     load_spreg( R_ECX, R_FPSCR );
  2389     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2390     load_fr_bank( R_EDX );
  2391     JNE_rel8(8, doubleprec);
  2392     push_fr(R_EDX, FRm);
  2393     push_fr(R_EDX, FRn);
  2394     JMP_rel8(6, end);
  2395     JMP_TARGET(doubleprec);
  2396     push_dr(R_EDX, FRm);
  2397     push_dr(R_EDX, FRn);
  2398     JMP_TARGET(end);
  2399     FCOMIP_st(1);
  2400     SETA_t();
  2401     FPOP_st();
  2402     sh4_x86.tstate = TSTATE_NONE;
  2403 :}
  2405 FSCA FPUL, FRn {:  
  2406     check_fpuen();
  2407     load_spreg( R_ECX, R_FPSCR );
  2408     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2409     JNE_rel8( CALL_FUNC2_SIZE + 9, doubleprec );
  2410     load_fr_bank( R_ECX );
  2411     ADD_imm8s_r32( (FRn&0x0E)<<2, R_ECX );
  2412     load_spreg( R_EDX, R_FPUL );
  2413     call_func2( sh4_fsca, R_EDX, R_ECX );
  2414     JMP_TARGET(doubleprec);
  2415     sh4_x86.tstate = TSTATE_NONE;
  2416 :}
  2417 FIPR FVm, FVn {:  
  2418     check_fpuen();
  2419     load_spreg( R_ECX, R_FPSCR );
  2420     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2421     JNE_rel8(44, doubleprec);
  2423     load_fr_bank( R_ECX );
  2424     push_fr( R_ECX, FVm<<2 );
  2425     push_fr( R_ECX, FVn<<2 );
  2426     FMULP_st(1);
  2427     push_fr( R_ECX, (FVm<<2)+1);
  2428     push_fr( R_ECX, (FVn<<2)+1);
  2429     FMULP_st(1);
  2430     FADDP_st(1);
  2431     push_fr( R_ECX, (FVm<<2)+2);
  2432     push_fr( R_ECX, (FVn<<2)+2);
  2433     FMULP_st(1);
  2434     FADDP_st(1);
  2435     push_fr( R_ECX, (FVm<<2)+3);
  2436     push_fr( R_ECX, (FVn<<2)+3);
  2437     FMULP_st(1);
  2438     FADDP_st(1);
  2439     pop_fr( R_ECX, (FVn<<2)+3);
  2440     JMP_TARGET(doubleprec);
  2441     sh4_x86.tstate = TSTATE_NONE;
  2442 :}
  2443 FTRV XMTRX, FVn {:  
  2444     check_fpuen();
  2445     load_spreg( R_ECX, R_FPSCR );
  2446     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2447     JNE_rel8( 18 + CALL_FUNC2_SIZE, doubleprec );
  2448     load_fr_bank( R_EDX );                 // 3
  2449     ADD_imm8s_r32( FVn<<4, R_EDX );        // 3
  2450     load_xf_bank( R_ECX );                 // 12
  2451     call_func2( sh4_ftrv, R_EDX, R_ECX );  // 12
  2452     JMP_TARGET(doubleprec);
  2453     sh4_x86.tstate = TSTATE_NONE;
  2454 :}
  2456 FRCHG {:  
  2457     check_fpuen();
  2458     load_spreg( R_ECX, R_FPSCR );
  2459     XOR_imm32_r32( FPSCR_FR, R_ECX );
  2460     store_spreg( R_ECX, R_FPSCR );
  2461     update_fr_bank( R_ECX );
  2462     sh4_x86.tstate = TSTATE_NONE;
  2463 :}
  2464 FSCHG {:  
  2465     check_fpuen();
  2466     load_spreg( R_ECX, R_FPSCR );
  2467     XOR_imm32_r32( FPSCR_SZ, R_ECX );
  2468     store_spreg( R_ECX, R_FPSCR );
  2469     sh4_x86.tstate = TSTATE_NONE;
  2470 :}
  2472 /* Processor control instructions */
  2473 LDC Rm, SR {:
  2474     if( sh4_x86.in_delay_slot ) {
  2475 	SLOTILLEGAL();
  2476     } else {
  2477 	check_priv();
  2478 	load_reg( R_EAX, Rm );
  2479 	call_func1( sh4_write_sr, R_EAX );
  2480 	sh4_x86.priv_checked = FALSE;
  2481 	sh4_x86.fpuen_checked = FALSE;
  2482 	sh4_x86.tstate = TSTATE_NONE;
  2484 :}
  2485 LDC Rm, GBR {: 
  2486     load_reg( R_EAX, Rm );
  2487     store_spreg( R_EAX, R_GBR );
  2488 :}
  2489 LDC Rm, VBR {:  
  2490     check_priv();
  2491     load_reg( R_EAX, Rm );
  2492     store_spreg( R_EAX, R_VBR );
  2493     sh4_x86.tstate = TSTATE_NONE;
  2494 :}
  2495 LDC Rm, SSR {:  
  2496     check_priv();
  2497     load_reg( R_EAX, Rm );
  2498     store_spreg( R_EAX, R_SSR );
  2499     sh4_x86.tstate = TSTATE_NONE;
  2500 :}
  2501 LDC Rm, SGR {:  
  2502     check_priv();
  2503     load_reg( R_EAX, Rm );
  2504     store_spreg( R_EAX, R_SGR );
  2505     sh4_x86.tstate = TSTATE_NONE;
  2506 :}
  2507 LDC Rm, SPC {:  
  2508     check_priv();
  2509     load_reg( R_EAX, Rm );
  2510     store_spreg( R_EAX, R_SPC );
  2511     sh4_x86.tstate = TSTATE_NONE;
  2512 :}
  2513 LDC Rm, DBR {:  
  2514     check_priv();
  2515     load_reg( R_EAX, Rm );
  2516     store_spreg( R_EAX, R_DBR );
  2517     sh4_x86.tstate = TSTATE_NONE;
  2518 :}
  2519 LDC Rm, Rn_BANK {:  
  2520     check_priv();
  2521     load_reg( R_EAX, Rm );
  2522     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2523     sh4_x86.tstate = TSTATE_NONE;
  2524 :}
  2525 LDC.L @Rm+, GBR {:  
  2526     load_reg( R_EAX, Rm );
  2527     precheck();
  2528     check_ralign32( R_EAX );
  2529     MOV_r32_r32( R_EAX, R_ECX );
  2530     ADD_imm8s_r32( 4, R_EAX );
  2531     store_reg( R_EAX, Rm );
  2532     MEM_READ_LONG( R_ECX, R_EAX );
  2533     store_spreg( R_EAX, R_GBR );
  2534     sh4_x86.tstate = TSTATE_NONE;
  2535 :}
  2536 LDC.L @Rm+, SR {:
  2537     if( sh4_x86.in_delay_slot ) {
  2538 	SLOTILLEGAL();
  2539     } else {
  2540 	precheck();
  2541 	check_priv_no_precheck();
  2542 	load_reg( R_EAX, Rm );
  2543 	check_ralign32( R_EAX );
  2544 	MOV_r32_r32( R_EAX, R_ECX );
  2545 	ADD_imm8s_r32( 4, R_EAX );
  2546 	store_reg( R_EAX, Rm );
  2547 	MEM_READ_LONG( R_ECX, R_EAX );
  2548 	call_func1( sh4_write_sr, R_EAX );
  2549 	sh4_x86.priv_checked = FALSE;
  2550 	sh4_x86.fpuen_checked = FALSE;
  2551 	sh4_x86.tstate = TSTATE_NONE;
  2553 :}
  2554 LDC.L @Rm+, VBR {:  
  2555     precheck();
  2556     check_priv_no_precheck();
  2557     load_reg( R_EAX, Rm );
  2558     check_ralign32( R_EAX );
  2559     MOV_r32_r32( R_EAX, R_ECX );
  2560     ADD_imm8s_r32( 4, R_EAX );
  2561     store_reg( R_EAX, Rm );
  2562     MEM_READ_LONG( R_ECX, R_EAX );
  2563     store_spreg( R_EAX, R_VBR );
  2564     sh4_x86.tstate = TSTATE_NONE;
  2565 :}
  2566 LDC.L @Rm+, SSR {:
  2567     precheck();
  2568     check_priv_no_precheck();
  2569     load_reg( R_EAX, Rm );
  2570     check_ralign32( R_EAX );
  2571     MOV_r32_r32( R_EAX, R_ECX );
  2572     ADD_imm8s_r32( 4, R_EAX );
  2573     store_reg( R_EAX, Rm );
  2574     MEM_READ_LONG( R_ECX, R_EAX );
  2575     store_spreg( R_EAX, R_SSR );
  2576     sh4_x86.tstate = TSTATE_NONE;
  2577 :}
  2578 LDC.L @Rm+, SGR {:  
  2579     precheck();
  2580     check_priv_no_precheck();
  2581     load_reg( R_EAX, Rm );
  2582     check_ralign32( R_EAX );
  2583     MOV_r32_r32( R_EAX, R_ECX );
  2584     ADD_imm8s_r32( 4, R_EAX );
  2585     store_reg( R_EAX, Rm );
  2586     MEM_READ_LONG( R_ECX, R_EAX );
  2587     store_spreg( R_EAX, R_SGR );
  2588     sh4_x86.tstate = TSTATE_NONE;
  2589 :}
  2590 LDC.L @Rm+, SPC {:  
  2591     precheck();
  2592     check_priv_no_precheck();
  2593     load_reg( R_EAX, Rm );
  2594     check_ralign32( R_EAX );
  2595     MOV_r32_r32( R_EAX, R_ECX );
  2596     ADD_imm8s_r32( 4, R_EAX );
  2597     store_reg( R_EAX, Rm );
  2598     MEM_READ_LONG( R_ECX, R_EAX );
  2599     store_spreg( R_EAX, R_SPC );
  2600     sh4_x86.tstate = TSTATE_NONE;
  2601 :}
  2602 LDC.L @Rm+, DBR {:  
  2603     precheck();
  2604     check_priv_no_precheck();
  2605     load_reg( R_EAX, Rm );
  2606     check_ralign32( R_EAX );
  2607     MOV_r32_r32( R_EAX, R_ECX );
  2608     ADD_imm8s_r32( 4, R_EAX );
  2609     store_reg( R_EAX, Rm );
  2610     MEM_READ_LONG( R_ECX, R_EAX );
  2611     store_spreg( R_EAX, R_DBR );
  2612     sh4_x86.tstate = TSTATE_NONE;
  2613 :}
  2614 LDC.L @Rm+, Rn_BANK {:  
  2615     precheck();
  2616     check_priv_no_precheck();
  2617     load_reg( R_EAX, Rm );
  2618     check_ralign32( R_EAX );
  2619     MOV_r32_r32( R_EAX, R_ECX );
  2620     ADD_imm8s_r32( 4, R_EAX );
  2621     store_reg( R_EAX, Rm );
  2622     MEM_READ_LONG( R_ECX, R_EAX );
  2623     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2624     sh4_x86.tstate = TSTATE_NONE;
  2625 :}
  2626 LDS Rm, FPSCR {:  
  2627     load_reg( R_EAX, Rm );
  2628     store_spreg( R_EAX, R_FPSCR );
  2629     update_fr_bank( R_EAX );
  2630     sh4_x86.tstate = TSTATE_NONE;
  2631 :}
  2632 LDS.L @Rm+, FPSCR {:  
  2633     load_reg( R_EAX, Rm );
  2634     precheck();
  2635     check_ralign32( R_EAX );
  2636     MOV_r32_r32( R_EAX, R_ECX );
  2637     ADD_imm8s_r32( 4, R_EAX );
  2638     store_reg( R_EAX, Rm );
  2639     MEM_READ_LONG( R_ECX, R_EAX );
  2640     store_spreg( R_EAX, R_FPSCR );
  2641     update_fr_bank( R_EAX );
  2642     sh4_x86.tstate = TSTATE_NONE;
  2643 :}
  2644 LDS Rm, FPUL {:  
  2645     load_reg( R_EAX, Rm );
  2646     store_spreg( R_EAX, R_FPUL );
  2647 :}
  2648 LDS.L @Rm+, FPUL {:  
  2649     load_reg( R_EAX, Rm );
  2650     precheck();
  2651     check_ralign32( R_EAX );
  2652     MOV_r32_r32( R_EAX, R_ECX );
  2653     ADD_imm8s_r32( 4, R_EAX );
  2654     store_reg( R_EAX, Rm );
  2655     MEM_READ_LONG( R_ECX, R_EAX );
  2656     store_spreg( R_EAX, R_FPUL );
  2657     sh4_x86.tstate = TSTATE_NONE;
  2658 :}
  2659 LDS Rm, MACH {: 
  2660     load_reg( R_EAX, Rm );
  2661     store_spreg( R_EAX, R_MACH );
  2662 :}
  2663 LDS.L @Rm+, MACH {:  
  2664     load_reg( R_EAX, Rm );
  2665     precheck();
  2666     check_ralign32( R_EAX );
  2667     MOV_r32_r32( R_EAX, R_ECX );
  2668     ADD_imm8s_r32( 4, R_EAX );
  2669     store_reg( R_EAX, Rm );
  2670     MEM_READ_LONG( R_ECX, R_EAX );
  2671     store_spreg( R_EAX, R_MACH );
  2672     sh4_x86.tstate = TSTATE_NONE;
  2673 :}
  2674 LDS Rm, MACL {:  
  2675     load_reg( R_EAX, Rm );
  2676     store_spreg( R_EAX, R_MACL );
  2677 :}
  2678 LDS.L @Rm+, MACL {:  
  2679     load_reg( R_EAX, Rm );
  2680     precheck();
  2681     check_ralign32( R_EAX );
  2682     MOV_r32_r32( R_EAX, R_ECX );
  2683     ADD_imm8s_r32( 4, R_EAX );
  2684     store_reg( R_EAX, Rm );
  2685     MEM_READ_LONG( R_ECX, R_EAX );
  2686     store_spreg( R_EAX, R_MACL );
  2687     sh4_x86.tstate = TSTATE_NONE;
  2688 :}
  2689 LDS Rm, PR {:  
  2690     load_reg( R_EAX, Rm );
  2691     store_spreg( R_EAX, R_PR );
  2692 :}
  2693 LDS.L @Rm+, PR {:  
  2694     load_reg( R_EAX, Rm );
  2695     precheck();
  2696     check_ralign32( R_EAX );
  2697     MOV_r32_r32( R_EAX, R_ECX );
  2698     ADD_imm8s_r32( 4, R_EAX );
  2699     store_reg( R_EAX, Rm );
  2700     MEM_READ_LONG( R_ECX, R_EAX );
  2701     store_spreg( R_EAX, R_PR );
  2702     sh4_x86.tstate = TSTATE_NONE;
  2703 :}
  2704 LDTLB {:  :}
  2705 OCBI @Rn {:  :}
  2706 OCBP @Rn {:  :}
  2707 OCBWB @Rn {:  :}
  2708 PREF @Rn {:
  2709     load_reg( R_EAX, Rn );
  2710     MOV_r32_r32( R_EAX, R_ECX );
  2711     AND_imm32_r32( 0xFC000000, R_EAX );
  2712     CMP_imm32_r32( 0xE0000000, R_EAX );
  2713     JNE_rel8(CALL_FUNC1_SIZE, end);
  2714     call_func1( sh4_flush_store_queue, R_ECX );
  2715     JMP_TARGET(end);
  2716     sh4_x86.tstate = TSTATE_NONE;
  2717 :}
  2718 SLEEP {: 
  2719     check_priv();
  2720     call_func0( sh4_sleep );
  2721     sh4_x86.tstate = TSTATE_NONE;
  2722     sh4_x86.in_delay_slot = FALSE;
  2723     return 2;
  2724 :}
  2725 STC SR, Rn {:
  2726     check_priv();
  2727     call_func0(sh4_read_sr);
  2728     store_reg( R_EAX, Rn );
  2729     sh4_x86.tstate = TSTATE_NONE;
  2730 :}
  2731 STC GBR, Rn {:  
  2732     load_spreg( R_EAX, R_GBR );
  2733     store_reg( R_EAX, Rn );
  2734 :}
  2735 STC VBR, Rn {:  
  2736     check_priv();
  2737     load_spreg( R_EAX, R_VBR );
  2738     store_reg( R_EAX, Rn );
  2739     sh4_x86.tstate = TSTATE_NONE;
  2740 :}
  2741 STC SSR, Rn {:  
  2742     check_priv();
  2743     load_spreg( R_EAX, R_SSR );
  2744     store_reg( R_EAX, Rn );
  2745     sh4_x86.tstate = TSTATE_NONE;
  2746 :}
  2747 STC SPC, Rn {:  
  2748     check_priv();
  2749     load_spreg( R_EAX, R_SPC );
  2750     store_reg( R_EAX, Rn );
  2751     sh4_x86.tstate = TSTATE_NONE;
  2752 :}
  2753 STC SGR, Rn {:  
  2754     check_priv();
  2755     load_spreg( R_EAX, R_SGR );
  2756     store_reg( R_EAX, Rn );
  2757     sh4_x86.tstate = TSTATE_NONE;
  2758 :}
  2759 STC DBR, Rn {:  
  2760     check_priv();
  2761     load_spreg( R_EAX, R_DBR );
  2762     store_reg( R_EAX, Rn );
  2763     sh4_x86.tstate = TSTATE_NONE;
  2764 :}
  2765 STC Rm_BANK, Rn {:
  2766     check_priv();
  2767     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2768     store_reg( R_EAX, Rn );
  2769     sh4_x86.tstate = TSTATE_NONE;
  2770 :}
  2771 STC.L SR, @-Rn {:
  2772     precheck();
  2773     check_priv_no_precheck();
  2774     call_func0( sh4_read_sr );
  2775     load_reg( R_ECX, Rn );
  2776     check_walign32( R_ECX );
  2777     ADD_imm8s_r32( -4, R_ECX );
  2778     store_reg( R_ECX, Rn );
  2779     MEM_WRITE_LONG( R_ECX, R_EAX );
  2780     sh4_x86.tstate = TSTATE_NONE;
  2781 :}
  2782 STC.L VBR, @-Rn {:  
  2783     precheck();
  2784     check_priv_no_precheck();
  2785     load_reg( R_ECX, Rn );
  2786     check_walign32( R_ECX );
  2787     ADD_imm8s_r32( -4, R_ECX );
  2788     store_reg( R_ECX, Rn );
  2789     load_spreg( R_EAX, R_VBR );
  2790     MEM_WRITE_LONG( R_ECX, R_EAX );
  2791     sh4_x86.tstate = TSTATE_NONE;
  2792 :}
  2793 STC.L SSR, @-Rn {:  
  2794     precheck();
  2795     check_priv_no_precheck();
  2796     load_reg( R_ECX, Rn );
  2797     check_walign32( R_ECX );
  2798     ADD_imm8s_r32( -4, R_ECX );
  2799     store_reg( R_ECX, Rn );
  2800     load_spreg( R_EAX, R_SSR );
  2801     MEM_WRITE_LONG( R_ECX, R_EAX );
  2802     sh4_x86.tstate = TSTATE_NONE;
  2803 :}
  2804 STC.L SPC, @-Rn {:
  2805     precheck();
  2806     check_priv_no_precheck();
  2807     load_reg( R_ECX, Rn );
  2808     check_walign32( R_ECX );
  2809     ADD_imm8s_r32( -4, R_ECX );
  2810     store_reg( R_ECX, Rn );
  2811     load_spreg( R_EAX, R_SPC );
  2812     MEM_WRITE_LONG( R_ECX, R_EAX );
  2813     sh4_x86.tstate = TSTATE_NONE;
  2814 :}
  2815 STC.L SGR, @-Rn {:  
  2816     precheck();
  2817     check_priv_no_precheck();
  2818     load_reg( R_ECX, Rn );
  2819     check_walign32( R_ECX );
  2820     ADD_imm8s_r32( -4, R_ECX );
  2821     store_reg( R_ECX, Rn );
  2822     load_spreg( R_EAX, R_SGR );
  2823     MEM_WRITE_LONG( R_ECX, R_EAX );
  2824     sh4_x86.tstate = TSTATE_NONE;
  2825 :}
  2826 STC.L DBR, @-Rn {:  
  2827     precheck();
  2828     check_priv_no_precheck();
  2829     load_reg( R_ECX, Rn );
  2830     check_walign32( R_ECX );
  2831     ADD_imm8s_r32( -4, R_ECX );
  2832     store_reg( R_ECX, Rn );
  2833     load_spreg( R_EAX, R_DBR );
  2834     MEM_WRITE_LONG( R_ECX, R_EAX );
  2835     sh4_x86.tstate = TSTATE_NONE;
  2836 :}
  2837 STC.L Rm_BANK, @-Rn {:  
  2838     precheck();
  2839     check_priv_no_precheck();
  2840     load_reg( R_ECX, Rn );
  2841     check_walign32( R_ECX );
  2842     ADD_imm8s_r32( -4, R_ECX );
  2843     store_reg( R_ECX, Rn );
  2844     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2845     MEM_WRITE_LONG( R_ECX, R_EAX );
  2846     sh4_x86.tstate = TSTATE_NONE;
  2847 :}
  2848 STC.L GBR, @-Rn {:  
  2849     load_reg( R_ECX, Rn );
  2850     precheck();
  2851     check_walign32( R_ECX );
  2852     ADD_imm8s_r32( -4, R_ECX );
  2853     store_reg( R_ECX, Rn );
  2854     load_spreg( R_EAX, R_GBR );
  2855     MEM_WRITE_LONG( R_ECX, R_EAX );
  2856     sh4_x86.tstate = TSTATE_NONE;
  2857 :}
  2858 STS FPSCR, Rn {:  
  2859     load_spreg( R_EAX, R_FPSCR );
  2860     store_reg( R_EAX, Rn );
  2861 :}
  2862 STS.L FPSCR, @-Rn {:  
  2863     load_reg( R_ECX, Rn );
  2864     precheck();
  2865     check_walign32( R_ECX );
  2866     ADD_imm8s_r32( -4, R_ECX );
  2867     store_reg( R_ECX, Rn );
  2868     load_spreg( R_EAX, R_FPSCR );
  2869     MEM_WRITE_LONG( R_ECX, R_EAX );
  2870     sh4_x86.tstate = TSTATE_NONE;
  2871 :}
  2872 STS FPUL, Rn {:  
  2873     load_spreg( R_EAX, R_FPUL );
  2874     store_reg( R_EAX, Rn );
  2875 :}
  2876 STS.L FPUL, @-Rn {:  
  2877     load_reg( R_ECX, Rn );
  2878     precheck();
  2879     check_walign32( R_ECX );
  2880     ADD_imm8s_r32( -4, R_ECX );
  2881     store_reg( R_ECX, Rn );
  2882     load_spreg( R_EAX, R_FPUL );
  2883     MEM_WRITE_LONG( R_ECX, R_EAX );
  2884     sh4_x86.tstate = TSTATE_NONE;
  2885 :}
  2886 STS MACH, Rn {:  
  2887     load_spreg( R_EAX, R_MACH );
  2888     store_reg( R_EAX, Rn );
  2889 :}
  2890 STS.L MACH, @-Rn {:  
  2891     load_reg( R_ECX, Rn );
  2892     precheck();
  2893     check_walign32( R_ECX );
  2894     ADD_imm8s_r32( -4, R_ECX );
  2895     store_reg( R_ECX, Rn );
  2896     load_spreg( R_EAX, R_MACH );
  2897     MEM_WRITE_LONG( R_ECX, R_EAX );
  2898     sh4_x86.tstate = TSTATE_NONE;
  2899 :}
  2900 STS MACL, Rn {:  
  2901     load_spreg( R_EAX, R_MACL );
  2902     store_reg( R_EAX, Rn );
  2903 :}
  2904 STS.L MACL, @-Rn {:  
  2905     load_reg( R_ECX, Rn );
  2906     precheck();
  2907     check_walign32( R_ECX );
  2908     ADD_imm8s_r32( -4, R_ECX );
  2909     store_reg( R_ECX, Rn );
  2910     load_spreg( R_EAX, R_MACL );
  2911     MEM_WRITE_LONG( R_ECX, R_EAX );
  2912     sh4_x86.tstate = TSTATE_NONE;
  2913 :}
  2914 STS PR, Rn {:  
  2915     load_spreg( R_EAX, R_PR );
  2916     store_reg( R_EAX, Rn );
  2917 :}
  2918 STS.L PR, @-Rn {:  
  2919     load_reg( R_ECX, Rn );
  2920     precheck();
  2921     check_walign32( R_ECX );
  2922     ADD_imm8s_r32( -4, R_ECX );
  2923     store_reg( R_ECX, Rn );
  2924     load_spreg( R_EAX, R_PR );
  2925     MEM_WRITE_LONG( R_ECX, R_EAX );
  2926     sh4_x86.tstate = TSTATE_NONE;
  2927 :}
  2929 NOP {: /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ :}
  2930 %%
  2931     sh4_x86.in_delay_slot = FALSE;
  2932     return 0;
.