Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 416:714df603c869
prev409:549e00835448
next417:bd927df302a9
author nkeynes
date Wed Oct 03 12:19:03 2007 +0000 (12 years ago)
permissions -rw-r--r--
last change Remove INC %esi (and esi in general), replace with load immediates (faster)
view annotate diff log raw
     1 /**
     2  * $Id: sh4x86.in,v 1.18 2007-10-03 12:19:03 nkeynes Exp $
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "sh4/sh4core.h"
    29 #include "sh4/sh4trans.h"
    30 #include "sh4/sh4mmio.h"
    31 #include "sh4/x86op.h"
    32 #include "clock.h"
    34 #define DEFAULT_BACKPATCH_SIZE 4096
    36 /** 
    37  * Struct to manage internal translation state. This state is not saved -
    38  * it is only valid between calls to sh4_translate_begin_block() and
    39  * sh4_translate_end_block()
    40  */
    41 struct sh4_x86_state {
    42     gboolean in_delay_slot;
    43     gboolean priv_checked; /* true if we've already checked the cpu mode. */
    44     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    45     gboolean branch_taken; /* true if we branched unconditionally */
    46     uint32_t block_start_pc;
    48     /* Allocated memory for the (block-wide) back-patch list */
    49     uint32_t **backpatch_list;
    50     uint32_t backpatch_posn;
    51     uint32_t backpatch_size;
    52 };
    54 #define EXIT_DATA_ADDR_READ 0
    55 #define EXIT_DATA_ADDR_WRITE 7
    56 #define EXIT_ILLEGAL 14
    57 #define EXIT_SLOT_ILLEGAL 21
    58 #define EXIT_FPU_DISABLED 28
    59 #define EXIT_SLOT_FPU_DISABLED 35
    61 static struct sh4_x86_state sh4_x86;
    63 static uint32_t max_int = 0x7FFFFFFF;
    64 static uint32_t min_int = 0x80000000;
    65 static uint32_t save_fcw; /* save value for fpu control word */
    66 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
    68 void sh4_x86_init()
    69 {
    70     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
    71     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(uint32_t *);
    72 }
    75 static void sh4_x86_add_backpatch( uint8_t *ptr )
    76 {
    77     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
    78 	sh4_x86.backpatch_size <<= 1;
    79 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, sh4_x86.backpatch_size * sizeof(uint32_t *) );
    80 	assert( sh4_x86.backpatch_list != NULL );
    81     }
    82     sh4_x86.backpatch_list[sh4_x86.backpatch_posn++] = (uint32_t *)ptr;
    83 }
    85 static void sh4_x86_do_backpatch( uint8_t *reloc_base )
    86 {
    87     unsigned int i;
    88     for( i=0; i<sh4_x86.backpatch_posn; i++ ) {
    89 	*sh4_x86.backpatch_list[i] += (reloc_base - ((uint8_t *)sh4_x86.backpatch_list[i]) - 4);
    90     }
    91 }
    93 /**
    94  * Emit an instruction to load an SH4 reg into a real register
    95  */
    96 static inline void load_reg( int x86reg, int sh4reg ) 
    97 {
    98     /* mov [bp+n], reg */
    99     OP(0x8B);
   100     OP(0x45 + (x86reg<<3));
   101     OP(REG_OFFSET(r[sh4reg]));
   102 }
   104 static inline void load_reg16s( int x86reg, int sh4reg )
   105 {
   106     OP(0x0F);
   107     OP(0xBF);
   108     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   109 }
   111 static inline void load_reg16u( int x86reg, int sh4reg )
   112 {
   113     OP(0x0F);
   114     OP(0xB7);
   115     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   117 }
   119 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   120 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   121 /**
   122  * Emit an instruction to load an immediate value into a register
   123  */
   124 static inline void load_imm32( int x86reg, uint32_t value ) {
   125     /* mov #value, reg */
   126     OP(0xB8 + x86reg);
   127     OP32(value);
   128 }
   130 /**
   131  * Emit an instruction to store an SH4 reg (RN)
   132  */
   133 void static inline store_reg( int x86reg, int sh4reg ) {
   134     /* mov reg, [bp+n] */
   135     OP(0x89);
   136     OP(0x45 + (x86reg<<3));
   137     OP(REG_OFFSET(r[sh4reg]));
   138 }
   140 #define load_fr_bank(bankreg) load_spreg( bankreg, REG_OFFSET(fr_bank))
   142 /**
   143  * Load an FR register (single-precision floating point) into an integer x86
   144  * register (eg for register-to-register moves)
   145  */
   146 void static inline load_fr( int bankreg, int x86reg, int frm )
   147 {
   148     OP(0x8B); OP(0x40+bankreg+(x86reg<<3)); OP((frm^1)<<2);
   149 }
   151 /**
   152  * Store an FR register (single-precision floating point) into an integer x86
   153  * register (eg for register-to-register moves)
   154  */
   155 void static inline store_fr( int bankreg, int x86reg, int frn )
   156 {
   157     OP(0x89);  OP(0x40+bankreg+(x86reg<<3)); OP((frn^1)<<2);
   158 }
   161 /**
   162  * Load a pointer to the back fp back into the specified x86 register. The
   163  * bankreg must have been previously loaded with FPSCR.
   164  * NB: 12 bytes
   165  */
   166 static inline void load_xf_bank( int bankreg )
   167 {
   168     NOT_r32( bankreg );
   169     SHR_imm8_r32( (21 - 6), bankreg ); // Extract bit 21 then *64 for bank size
   170     AND_imm8s_r32( 0x40, bankreg );    // Complete extraction
   171     OP(0x8D); OP(0x44+(bankreg<<3)); OP(0x28+bankreg); OP(REG_OFFSET(fr)); // LEA [ebp+bankreg+disp], bankreg
   172 }
   174 /**
   175  * Update the fr_bank pointer based on the current fpscr value.
   176  */
   177 static inline void update_fr_bank( int fpscrreg )
   178 {
   179     SHR_imm8_r32( (21 - 6), fpscrreg ); // Extract bit 21 then *64 for bank size
   180     AND_imm8s_r32( 0x40, fpscrreg );    // Complete extraction
   181     OP(0x8D); OP(0x44+(fpscrreg<<3)); OP(0x28+fpscrreg); OP(REG_OFFSET(fr)); // LEA [ebp+fpscrreg+disp], fpscrreg
   182     store_spreg( fpscrreg, REG_OFFSET(fr_bank) );
   183 }
   184 /**
   185  * Push FPUL (as a 32-bit float) onto the FPU stack
   186  */
   187 static inline void push_fpul( )
   188 {
   189     OP(0xD9); OP(0x45); OP(R_FPUL);
   190 }
   192 /**
   193  * Pop FPUL (as a 32-bit float) from the FPU stack
   194  */
   195 static inline void pop_fpul( )
   196 {
   197     OP(0xD9); OP(0x5D); OP(R_FPUL);
   198 }
   200 /**
   201  * Push a 32-bit float onto the FPU stack, with bankreg previously loaded
   202  * with the location of the current fp bank.
   203  */
   204 static inline void push_fr( int bankreg, int frm ) 
   205 {
   206     OP(0xD9); OP(0x40 + bankreg); OP((frm^1)<<2);  // FLD.S [bankreg + frm^1*4]
   207 }
   209 /**
   210  * Pop a 32-bit float from the FPU stack and store it back into the fp bank, 
   211  * with bankreg previously loaded with the location of the current fp bank.
   212  */
   213 static inline void pop_fr( int bankreg, int frm )
   214 {
   215     OP(0xD9); OP(0x58 + bankreg); OP((frm^1)<<2); // FST.S [bankreg + frm^1*4]
   216 }
   218 /**
   219  * Push a 64-bit double onto the FPU stack, with bankreg previously loaded
   220  * with the location of the current fp bank.
   221  */
   222 static inline void push_dr( int bankreg, int frm )
   223 {
   224     OP(0xDD); OP(0x40 + bankreg); OP(frm<<2); // FLD.D [bankreg + frm*4]
   225 }
   227 static inline void pop_dr( int bankreg, int frm )
   228 {
   229     OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]
   230 }
   232 /**
   233  * Note: clobbers EAX to make the indirect call - this isn't usually
   234  * a problem since the callee will usually clobber it anyway.
   235  */
   236 static inline void call_func0( void *ptr )
   237 {
   238     load_imm32(R_EAX, (uint32_t)ptr);
   239     CALL_r32(R_EAX);
   240 }
   242 static inline void call_func1( void *ptr, int arg1 )
   243 {
   244     PUSH_r32(arg1);
   245     call_func0(ptr);
   246     ADD_imm8s_r32( 4, R_ESP );
   247 }
   249 static inline void call_func2( void *ptr, int arg1, int arg2 )
   250 {
   251     PUSH_r32(arg2);
   252     PUSH_r32(arg1);
   253     call_func0(ptr);
   254     ADD_imm8s_r32( 8, R_ESP );
   255 }
   257 /**
   258  * Write a double (64-bit) value into memory, with the first word in arg2a, and
   259  * the second in arg2b
   260  * NB: 30 bytes
   261  */
   262 static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )
   263 {
   264     ADD_imm8s_r32( 4, addr );
   265     PUSH_r32(arg2b);
   266     PUSH_r32(addr);
   267     ADD_imm8s_r32( -4, addr );
   268     PUSH_r32(arg2a);
   269     PUSH_r32(addr);
   270     call_func0(sh4_write_long);
   271     ADD_imm8s_r32( 8, R_ESP );
   272     call_func0(sh4_write_long);
   273     ADD_imm8s_r32( 8, R_ESP );
   274 }
   276 /**
   277  * Read a double (64-bit) value from memory, writing the first word into arg2a
   278  * and the second into arg2b. The addr must not be in EAX
   279  * NB: 27 bytes
   280  */
   281 static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )
   282 {
   283     PUSH_r32(addr);
   284     call_func0(sh4_read_long);
   285     POP_r32(addr);
   286     PUSH_r32(R_EAX);
   287     ADD_imm8s_r32( 4, addr );
   288     PUSH_r32(addr);
   289     call_func0(sh4_read_long);
   290     ADD_imm8s_r32( 4, R_ESP );
   291     MOV_r32_r32( R_EAX, arg2b );
   292     POP_r32(arg2a);
   293 }
   295 /* Exception checks - Note that all exception checks will clobber EAX */
   296 #define precheck() load_imm32(R_EDX, (pc-sh4_x86.block_start_pc-(sh4_x86.in_delay_slot?2:0))>>1)
   298 #define check_priv( ) \
   299     if( !sh4_x86.priv_checked ) { \
   300 	sh4_x86.priv_checked = TRUE;\
   301 	precheck();\
   302 	load_spreg( R_EAX, R_SR );\
   303 	AND_imm32_r32( SR_MD, R_EAX );\
   304 	if( sh4_x86.in_delay_slot ) {\
   305 	    JE_exit( EXIT_SLOT_ILLEGAL );\
   306 	} else {\
   307 	    JE_exit( EXIT_ILLEGAL );\
   308 	}\
   309     }\
   312 static void check_priv_no_precheck()
   313 {
   314     if( !sh4_x86.priv_checked ) {
   315 	sh4_x86.priv_checked = TRUE;
   316 	load_spreg( R_EAX, R_SR );
   317 	AND_imm32_r32( SR_MD, R_EAX );
   318 	if( sh4_x86.in_delay_slot ) {
   319 	    JE_exit( EXIT_SLOT_ILLEGAL );
   320 	} else {
   321 	    JE_exit( EXIT_ILLEGAL );
   322 	}
   323     }
   324 }
   326 #define check_fpuen( ) \
   327     if( !sh4_x86.fpuen_checked ) {\
   328 	sh4_x86.fpuen_checked = TRUE;\
   329 	precheck();\
   330 	load_spreg( R_EAX, R_SR );\
   331 	AND_imm32_r32( SR_FD, R_EAX );\
   332 	if( sh4_x86.in_delay_slot ) {\
   333 	    JNE_exit(EXIT_SLOT_FPU_DISABLED);\
   334 	} else {\
   335 	    JNE_exit(EXIT_FPU_DISABLED);\
   336 	}\
   337     }
   339 static void check_fpuen_no_precheck()
   340 {
   341     if( !sh4_x86.fpuen_checked ) {
   342 	sh4_x86.fpuen_checked = TRUE;
   343 	load_spreg( R_EAX, R_SR );
   344 	AND_imm32_r32( SR_FD, R_EAX );
   345 	if( sh4_x86.in_delay_slot ) {
   346 	    JNE_exit(EXIT_SLOT_FPU_DISABLED);
   347 	} else {
   348 	    JNE_exit(EXIT_FPU_DISABLED);
   349 	}
   350     }
   352 }
   354 static void check_ralign16( int x86reg )
   355 {
   356     TEST_imm32_r32( 0x00000001, x86reg );
   357     JNE_exit(EXIT_DATA_ADDR_READ);
   358 }
   360 static void check_walign16( int x86reg )
   361 {
   362     TEST_imm32_r32( 0x00000001, x86reg );
   363     JNE_exit(EXIT_DATA_ADDR_WRITE);
   364 }
   366 static void check_ralign32( int x86reg )
   367 {
   368     TEST_imm32_r32( 0x00000003, x86reg );
   369     JNE_exit(EXIT_DATA_ADDR_READ);
   370 }
   371 static void check_walign32( int x86reg )
   372 {
   373     TEST_imm32_r32( 0x00000003, x86reg );
   374     JNE_exit(EXIT_DATA_ADDR_WRITE);
   375 }
   377 #define UNDEF()
   378 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   379 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
   380 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
   381 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
   382 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
   383 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
   384 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
   386 #define SLOTILLEGAL() precheck(); JMP_exit(EXIT_SLOT_ILLEGAL); sh4_x86.in_delay_slot = FALSE; return 1;
   390 /**
   391  * Emit the 'start of block' assembly. Sets up the stack frame and save
   392  * SI/DI as required
   393  */
   394 void sh4_translate_begin_block( sh4addr_t pc ) 
   395 {
   396     PUSH_r32(R_EBP);
   397     /* mov &sh4r, ebp */
   398     load_imm32( R_EBP, (uint32_t)&sh4r );
   400     sh4_x86.in_delay_slot = FALSE;
   401     sh4_x86.priv_checked = FALSE;
   402     sh4_x86.fpuen_checked = FALSE;
   403     sh4_x86.branch_taken = FALSE;
   404     sh4_x86.backpatch_posn = 0;
   405     sh4_x86.block_start_pc = pc;
   406 }
   408 /**
   409  * Exit the block to an absolute PC
   410  * Bytes: 29
   411  */
   412 void exit_block( sh4addr_t pc, sh4addr_t endpc )
   413 {
   414     load_imm32( R_ECX, pc );                            // 5
   415     store_spreg( R_ECX, REG_OFFSET(pc) );               // 3
   416     MOV_moff32_EAX( (uint32_t)xlat_get_lut_entry(pc) ); // 5
   417     AND_imm8s_r32( 0xFC, R_EAX ); // 3
   418     load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
   419     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
   420     POP_r32(R_EBP);
   421     RET();
   422 }
   424 /**
   425  * Exit the block with sh4r.pc already written
   426  * Bytes: 15
   427  */
   428 void exit_block_pcset( pc )
   429 {
   430     XOR_r32_r32( R_EAX, R_EAX );                       // 2
   431     load_imm32( R_ECX, ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
   432     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );    // 6
   433     POP_r32(R_EBP);
   434     RET();
   435 }
   437 /**
   438  * Write the block trailer (exception handling block)
   439  */
   440 void sh4_translate_end_block( sh4addr_t pc ) {
   441     if( sh4_x86.branch_taken == FALSE ) {
   442 	// Didn't exit unconditionally already, so write the termination here
   443 	exit_block( pc, pc );
   444     }
   445     if( sh4_x86.backpatch_posn != 0 ) {
   446 	uint8_t *end_ptr = xlat_output;
   447 	// Exception termination. Jump block for various exception codes:
   448 	PUSH_imm32( EXC_DATA_ADDR_READ );
   449 	JMP_rel8( 33, target1 );
   450 	PUSH_imm32( EXC_DATA_ADDR_WRITE );
   451 	JMP_rel8( 26, target2 );
   452 	PUSH_imm32( EXC_ILLEGAL );
   453 	JMP_rel8( 19, target3 );
   454 	PUSH_imm32( EXC_SLOT_ILLEGAL ); 
   455 	JMP_rel8( 12, target4 );
   456 	PUSH_imm32( EXC_FPU_DISABLED ); 
   457 	JMP_rel8( 5, target5 );
   458 	PUSH_imm32( EXC_SLOT_FPU_DISABLED );
   459 	// target
   460 	JMP_TARGET(target1);
   461 	JMP_TARGET(target2);
   462 	JMP_TARGET(target3);
   463 	JMP_TARGET(target4);
   464 	JMP_TARGET(target5);
   465 	load_spreg( R_ECX, REG_OFFSET(pc) );
   466 	ADD_r32_r32( R_EDX, R_ECX );
   467 	ADD_r32_r32( R_EDX, R_ECX );
   468 	store_spreg( R_ECX, REG_OFFSET(pc) );
   469 	MOV_moff32_EAX( (uint32_t)&sh4_cpu_period );
   470 	load_spreg( R_ECX, REG_OFFSET(slice_cycle) );
   471 	MUL_r32( R_EDX );
   472 	ADD_r32_r32( R_EAX, R_ECX );
   473 	store_spreg( R_ECX, REG_OFFSET(slice_cycle) );
   475 	load_imm32( R_EAX, (uint32_t)sh4_raise_exception ); // 6
   476 	CALL_r32( R_EAX ); // 2
   477 	ADD_imm8s_r32( 4, R_ESP );
   478 	XOR_r32_r32( R_EAX, R_EAX );
   479 	POP_r32(R_EBP);
   480 	RET();
   482 	sh4_x86_do_backpatch( end_ptr );
   483     }
   485 }
   488 extern uint16_t *sh4_icache;
   489 extern uint32_t sh4_icache_addr;
   491 /**
   492  * Translate a single instruction. Delayed branches are handled specially
   493  * by translating both branch and delayed instruction as a single unit (as
   494  * 
   495  *
   496  * @return true if the instruction marks the end of a basic block
   497  * (eg a branch or 
   498  */
   499 uint32_t sh4_x86_translate_instruction( sh4addr_t pc )
   500 {
   501     uint32_t ir;
   502     /* Read instruction */
   503     uint32_t pageaddr = pc >> 12;
   504     if( sh4_icache != NULL && pageaddr == sh4_icache_addr ) {
   505 	ir = sh4_icache[(pc&0xFFF)>>1];
   506     } else {
   507 	sh4_icache = (uint16_t *)mem_get_page(pc);
   508 	if( ((uint32_t)sh4_icache) < MAX_IO_REGIONS ) {
   509 	    /* If someone's actually been so daft as to try to execute out of an IO
   510 	     * region, fallback on the full-blown memory read
   511 	     */
   512 	    sh4_icache = NULL;
   513 	    ir = sh4_read_word(pc);
   514 	} else {
   515 	    sh4_icache_addr = pageaddr;
   516 	    ir = sh4_icache[(pc&0xFFF)>>1];
   517 	}
   518     }
   520 %%
   521 /* ALU operations */
   522 ADD Rm, Rn {:
   523     load_reg( R_EAX, Rm );
   524     load_reg( R_ECX, Rn );
   525     ADD_r32_r32( R_EAX, R_ECX );
   526     store_reg( R_ECX, Rn );
   527 :}
   528 ADD #imm, Rn {:  
   529     load_reg( R_EAX, Rn );
   530     ADD_imm8s_r32( imm, R_EAX );
   531     store_reg( R_EAX, Rn );
   532 :}
   533 ADDC Rm, Rn {:
   534     load_reg( R_EAX, Rm );
   535     load_reg( R_ECX, Rn );
   536     LDC_t();
   537     ADC_r32_r32( R_EAX, R_ECX );
   538     store_reg( R_ECX, Rn );
   539     SETC_t();
   540 :}
   541 ADDV Rm, Rn {:
   542     load_reg( R_EAX, Rm );
   543     load_reg( R_ECX, Rn );
   544     ADD_r32_r32( R_EAX, R_ECX );
   545     store_reg( R_ECX, Rn );
   546     SETO_t();
   547 :}
   548 AND Rm, Rn {:
   549     load_reg( R_EAX, Rm );
   550     load_reg( R_ECX, Rn );
   551     AND_r32_r32( R_EAX, R_ECX );
   552     store_reg( R_ECX, Rn );
   553 :}
   554 AND #imm, R0 {:  
   555     load_reg( R_EAX, 0 );
   556     AND_imm32_r32(imm, R_EAX); 
   557     store_reg( R_EAX, 0 );
   558 :}
   559 AND.B #imm, @(R0, GBR) {: 
   560     load_reg( R_EAX, 0 );
   561     load_spreg( R_ECX, R_GBR );
   562     ADD_r32_r32( R_EAX, R_ECX );
   563     PUSH_r32(R_ECX);
   564     call_func0(sh4_read_byte);
   565     POP_r32(R_ECX);
   566     AND_imm32_r32(imm, R_EAX );
   567     MEM_WRITE_BYTE( R_ECX, R_EAX );
   568 :}
   569 CMP/EQ Rm, Rn {:  
   570     load_reg( R_EAX, Rm );
   571     load_reg( R_ECX, Rn );
   572     CMP_r32_r32( R_EAX, R_ECX );
   573     SETE_t();
   574 :}
   575 CMP/EQ #imm, R0 {:  
   576     load_reg( R_EAX, 0 );
   577     CMP_imm8s_r32(imm, R_EAX);
   578     SETE_t();
   579 :}
   580 CMP/GE Rm, Rn {:  
   581     load_reg( R_EAX, Rm );
   582     load_reg( R_ECX, Rn );
   583     CMP_r32_r32( R_EAX, R_ECX );
   584     SETGE_t();
   585 :}
   586 CMP/GT Rm, Rn {: 
   587     load_reg( R_EAX, Rm );
   588     load_reg( R_ECX, Rn );
   589     CMP_r32_r32( R_EAX, R_ECX );
   590     SETG_t();
   591 :}
   592 CMP/HI Rm, Rn {:  
   593     load_reg( R_EAX, Rm );
   594     load_reg( R_ECX, Rn );
   595     CMP_r32_r32( R_EAX, R_ECX );
   596     SETA_t();
   597 :}
   598 CMP/HS Rm, Rn {: 
   599     load_reg( R_EAX, Rm );
   600     load_reg( R_ECX, Rn );
   601     CMP_r32_r32( R_EAX, R_ECX );
   602     SETAE_t();
   603  :}
   604 CMP/PL Rn {: 
   605     load_reg( R_EAX, Rn );
   606     CMP_imm8s_r32( 0, R_EAX );
   607     SETG_t();
   608 :}
   609 CMP/PZ Rn {:  
   610     load_reg( R_EAX, Rn );
   611     CMP_imm8s_r32( 0, R_EAX );
   612     SETGE_t();
   613 :}
   614 CMP/STR Rm, Rn {:  
   615     load_reg( R_EAX, Rm );
   616     load_reg( R_ECX, Rn );
   617     XOR_r32_r32( R_ECX, R_EAX );
   618     TEST_r8_r8( R_AL, R_AL );
   619     JE_rel8(13, target1);
   620     TEST_r8_r8( R_AH, R_AH ); // 2
   621     JE_rel8(9, target2);
   622     SHR_imm8_r32( 16, R_EAX ); // 3
   623     TEST_r8_r8( R_AL, R_AL ); // 2
   624     JE_rel8(2, target3);
   625     TEST_r8_r8( R_AH, R_AH ); // 2
   626     JMP_TARGET(target1);
   627     JMP_TARGET(target2);
   628     JMP_TARGET(target3);
   629     SETE_t();
   630 :}
   631 DIV0S Rm, Rn {:
   632     load_reg( R_EAX, Rm );
   633     load_reg( R_ECX, Rn );
   634     SHR_imm8_r32( 31, R_EAX );
   635     SHR_imm8_r32( 31, R_ECX );
   636     store_spreg( R_EAX, R_M );
   637     store_spreg( R_ECX, R_Q );
   638     CMP_r32_r32( R_EAX, R_ECX );
   639     SETNE_t();
   640 :}
   641 DIV0U {:  
   642     XOR_r32_r32( R_EAX, R_EAX );
   643     store_spreg( R_EAX, R_Q );
   644     store_spreg( R_EAX, R_M );
   645     store_spreg( R_EAX, R_T );
   646 :}
   647 DIV1 Rm, Rn {:
   648     load_spreg( R_ECX, R_M );
   649     load_reg( R_EAX, Rn );
   650     LDC_t();
   651     RCL1_r32( R_EAX );
   652     SETC_r8( R_DL ); // Q'
   653     CMP_sh4r_r32( R_Q, R_ECX );
   654     JE_rel8(5, mqequal);
   655     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   656     JMP_rel8(3, end);
   657     JMP_TARGET(mqequal);
   658     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   659     JMP_TARGET(end);
   660     store_reg( R_EAX, Rn ); // Done with Rn now
   661     SETC_r8(R_AL); // tmp1
   662     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   663     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   664     store_spreg( R_ECX, R_Q );
   665     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   666     MOVZX_r8_r32( R_AL, R_EAX );
   667     store_spreg( R_EAX, R_T );
   668 :}
   669 DMULS.L Rm, Rn {:  
   670     load_reg( R_EAX, Rm );
   671     load_reg( R_ECX, Rn );
   672     IMUL_r32(R_ECX);
   673     store_spreg( R_EDX, R_MACH );
   674     store_spreg( R_EAX, R_MACL );
   675 :}
   676 DMULU.L Rm, Rn {:  
   677     load_reg( R_EAX, Rm );
   678     load_reg( R_ECX, Rn );
   679     MUL_r32(R_ECX);
   680     store_spreg( R_EDX, R_MACH );
   681     store_spreg( R_EAX, R_MACL );    
   682 :}
   683 DT Rn {:  
   684     load_reg( R_EAX, Rn );
   685     ADD_imm8s_r32( -1, R_EAX );
   686     store_reg( R_EAX, Rn );
   687     SETE_t();
   688 :}
   689 EXTS.B Rm, Rn {:  
   690     load_reg( R_EAX, Rm );
   691     MOVSX_r8_r32( R_EAX, R_EAX );
   692     store_reg( R_EAX, Rn );
   693 :}
   694 EXTS.W Rm, Rn {:  
   695     load_reg( R_EAX, Rm );
   696     MOVSX_r16_r32( R_EAX, R_EAX );
   697     store_reg( R_EAX, Rn );
   698 :}
   699 EXTU.B Rm, Rn {:  
   700     load_reg( R_EAX, Rm );
   701     MOVZX_r8_r32( R_EAX, R_EAX );
   702     store_reg( R_EAX, Rn );
   703 :}
   704 EXTU.W Rm, Rn {:  
   705     load_reg( R_EAX, Rm );
   706     MOVZX_r16_r32( R_EAX, R_EAX );
   707     store_reg( R_EAX, Rn );
   708 :}
   709 MAC.L @Rm+, @Rn+ {:  
   710     load_reg( R_ECX, Rm );
   711     precheck();
   712     check_ralign32( R_ECX );
   713     load_reg( R_ECX, Rn );
   714     check_ralign32( R_ECX );
   715     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   716     MEM_READ_LONG( R_ECX, R_EAX );
   717     PUSH_r32( R_EAX );
   718     load_reg( R_ECX, Rm );
   719     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   720     MEM_READ_LONG( R_ECX, R_EAX );
   721     POP_r32( R_ECX );
   722     IMUL_r32( R_ECX );
   723     ADD_r32_sh4r( R_EAX, R_MACL );
   724     ADC_r32_sh4r( R_EDX, R_MACH );
   726     load_spreg( R_ECX, R_S );
   727     TEST_r32_r32(R_ECX, R_ECX);
   728     JE_rel8( 7, nosat );
   729     call_func0( signsat48 );
   730     JMP_TARGET( nosat );
   731 :}
   732 MAC.W @Rm+, @Rn+ {:  
   733     load_reg( R_ECX, Rm );
   734     precheck();
   735     check_ralign16( R_ECX );
   736     load_reg( R_ECX, Rn );
   737     check_ralign16( R_ECX );
   738     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   739     MEM_READ_WORD( R_ECX, R_EAX );
   740     PUSH_r32( R_EAX );
   741     load_reg( R_ECX, Rm );
   742     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   743     MEM_READ_WORD( R_ECX, R_EAX );
   744     POP_r32( R_ECX );
   745     IMUL_r32( R_ECX );
   747     load_spreg( R_ECX, R_S );
   748     TEST_r32_r32( R_ECX, R_ECX );
   749     JE_rel8( 47, nosat );
   751     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   752     JNO_rel8( 51, end );            // 2
   753     load_imm32( R_EDX, 1 );         // 5
   754     store_spreg( R_EDX, R_MACH );   // 6
   755     JS_rel8( 13, positive );        // 2
   756     load_imm32( R_EAX, 0x80000000 );// 5
   757     store_spreg( R_EAX, R_MACL );   // 6
   758     JMP_rel8( 25, end2 );           // 2
   760     JMP_TARGET(positive);
   761     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   762     store_spreg( R_EAX, R_MACL );   // 6
   763     JMP_rel8( 12, end3);            // 2
   765     JMP_TARGET(nosat);
   766     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   767     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   768     JMP_TARGET(end);
   769     JMP_TARGET(end2);
   770     JMP_TARGET(end3);
   771 :}
   772 MOVT Rn {:  
   773     load_spreg( R_EAX, R_T );
   774     store_reg( R_EAX, Rn );
   775 :}
   776 MUL.L Rm, Rn {:  
   777     load_reg( R_EAX, Rm );
   778     load_reg( R_ECX, Rn );
   779     MUL_r32( R_ECX );
   780     store_spreg( R_EAX, R_MACL );
   781 :}
   782 MULS.W Rm, Rn {:
   783     load_reg16s( R_EAX, Rm );
   784     load_reg16s( R_ECX, Rn );
   785     MUL_r32( R_ECX );
   786     store_spreg( R_EAX, R_MACL );
   787 :}
   788 MULU.W Rm, Rn {:  
   789     load_reg16u( R_EAX, Rm );
   790     load_reg16u( R_ECX, Rn );
   791     MUL_r32( R_ECX );
   792     store_spreg( R_EAX, R_MACL );
   793 :}
   794 NEG Rm, Rn {:
   795     load_reg( R_EAX, Rm );
   796     NEG_r32( R_EAX );
   797     store_reg( R_EAX, Rn );
   798 :}
   799 NEGC Rm, Rn {:  
   800     load_reg( R_EAX, Rm );
   801     XOR_r32_r32( R_ECX, R_ECX );
   802     LDC_t();
   803     SBB_r32_r32( R_EAX, R_ECX );
   804     store_reg( R_ECX, Rn );
   805     SETC_t();
   806 :}
   807 NOT Rm, Rn {:  
   808     load_reg( R_EAX, Rm );
   809     NOT_r32( R_EAX );
   810     store_reg( R_EAX, Rn );
   811 :}
   812 OR Rm, Rn {:  
   813     load_reg( R_EAX, Rm );
   814     load_reg( R_ECX, Rn );
   815     OR_r32_r32( R_EAX, R_ECX );
   816     store_reg( R_ECX, Rn );
   817 :}
   818 OR #imm, R0 {:
   819     load_reg( R_EAX, 0 );
   820     OR_imm32_r32(imm, R_EAX);
   821     store_reg( R_EAX, 0 );
   822 :}
   823 OR.B #imm, @(R0, GBR) {:  
   824     load_reg( R_EAX, 0 );
   825     load_spreg( R_ECX, R_GBR );
   826     ADD_r32_r32( R_EAX, R_ECX );
   827     PUSH_r32(R_ECX);
   828     call_func0(sh4_read_byte);
   829     POP_r32(R_ECX);
   830     OR_imm32_r32(imm, R_EAX );
   831     MEM_WRITE_BYTE( R_ECX, R_EAX );
   832 :}
   833 ROTCL Rn {:
   834     load_reg( R_EAX, Rn );
   835     LDC_t();
   836     RCL1_r32( R_EAX );
   837     store_reg( R_EAX, Rn );
   838     SETC_t();
   839 :}
   840 ROTCR Rn {:  
   841     load_reg( R_EAX, Rn );
   842     LDC_t();
   843     RCR1_r32( R_EAX );
   844     store_reg( R_EAX, Rn );
   845     SETC_t();
   846 :}
   847 ROTL Rn {:  
   848     load_reg( R_EAX, Rn );
   849     ROL1_r32( R_EAX );
   850     store_reg( R_EAX, Rn );
   851     SETC_t();
   852 :}
   853 ROTR Rn {:  
   854     load_reg( R_EAX, Rn );
   855     ROR1_r32( R_EAX );
   856     store_reg( R_EAX, Rn );
   857     SETC_t();
   858 :}
   859 SHAD Rm, Rn {:
   860     /* Annoyingly enough, not directly convertible */
   861     load_reg( R_EAX, Rn );
   862     load_reg( R_ECX, Rm );
   863     CMP_imm32_r32( 0, R_ECX );
   864     JGE_rel8(16, doshl);
   866     NEG_r32( R_ECX );      // 2
   867     AND_imm8_r8( 0x1F, R_CL ); // 3
   868     JE_rel8( 4, emptysar);     // 2
   869     SAR_r32_CL( R_EAX );       // 2
   870     JMP_rel8(10, end);          // 2
   872     JMP_TARGET(emptysar);
   873     SAR_imm8_r32(31, R_EAX );  // 3
   874     JMP_rel8(5, end2);
   876     JMP_TARGET(doshl);
   877     AND_imm8_r8( 0x1F, R_CL ); // 3
   878     SHL_r32_CL( R_EAX );       // 2
   879     JMP_TARGET(end);
   880     JMP_TARGET(end2);
   881     store_reg( R_EAX, Rn );
   882 :}
   883 SHLD Rm, Rn {:  
   884     load_reg( R_EAX, Rn );
   885     load_reg( R_ECX, Rm );
   886     CMP_imm32_r32( 0, R_ECX );
   887     JGE_rel8(15, doshl);
   889     NEG_r32( R_ECX );      // 2
   890     AND_imm8_r8( 0x1F, R_CL ); // 3
   891     JE_rel8( 4, emptyshr );
   892     SHR_r32_CL( R_EAX );       // 2
   893     JMP_rel8(9, end);          // 2
   895     JMP_TARGET(emptyshr);
   896     XOR_r32_r32( R_EAX, R_EAX );
   897     JMP_rel8(5, end2);
   899     JMP_TARGET(doshl);
   900     AND_imm8_r8( 0x1F, R_CL ); // 3
   901     SHL_r32_CL( R_EAX );       // 2
   902     JMP_TARGET(end);
   903     JMP_TARGET(end2);
   904     store_reg( R_EAX, Rn );
   905 :}
   906 SHAL Rn {: 
   907     load_reg( R_EAX, Rn );
   908     SHL1_r32( R_EAX );
   909     SETC_t();
   910     store_reg( R_EAX, Rn );
   911 :}
   912 SHAR Rn {:  
   913     load_reg( R_EAX, Rn );
   914     SAR1_r32( R_EAX );
   915     SETC_t();
   916     store_reg( R_EAX, Rn );
   917 :}
   918 SHLL Rn {:  
   919     load_reg( R_EAX, Rn );
   920     SHL1_r32( R_EAX );
   921     SETC_t();
   922     store_reg( R_EAX, Rn );
   923 :}
   924 SHLL2 Rn {:
   925     load_reg( R_EAX, Rn );
   926     SHL_imm8_r32( 2, R_EAX );
   927     store_reg( R_EAX, Rn );
   928 :}
   929 SHLL8 Rn {:  
   930     load_reg( R_EAX, Rn );
   931     SHL_imm8_r32( 8, R_EAX );
   932     store_reg( R_EAX, Rn );
   933 :}
   934 SHLL16 Rn {:  
   935     load_reg( R_EAX, Rn );
   936     SHL_imm8_r32( 16, R_EAX );
   937     store_reg( R_EAX, Rn );
   938 :}
   939 SHLR Rn {:  
   940     load_reg( R_EAX, Rn );
   941     SHR1_r32( R_EAX );
   942     SETC_t();
   943     store_reg( R_EAX, Rn );
   944 :}
   945 SHLR2 Rn {:  
   946     load_reg( R_EAX, Rn );
   947     SHR_imm8_r32( 2, R_EAX );
   948     store_reg( R_EAX, Rn );
   949 :}
   950 SHLR8 Rn {:  
   951     load_reg( R_EAX, Rn );
   952     SHR_imm8_r32( 8, R_EAX );
   953     store_reg( R_EAX, Rn );
   954 :}
   955 SHLR16 Rn {:  
   956     load_reg( R_EAX, Rn );
   957     SHR_imm8_r32( 16, R_EAX );
   958     store_reg( R_EAX, Rn );
   959 :}
   960 SUB Rm, Rn {:  
   961     load_reg( R_EAX, Rm );
   962     load_reg( R_ECX, Rn );
   963     SUB_r32_r32( R_EAX, R_ECX );
   964     store_reg( R_ECX, Rn );
   965 :}
   966 SUBC Rm, Rn {:  
   967     load_reg( R_EAX, Rm );
   968     load_reg( R_ECX, Rn );
   969     LDC_t();
   970     SBB_r32_r32( R_EAX, R_ECX );
   971     store_reg( R_ECX, Rn );
   972     SETC_t();
   973 :}
   974 SUBV Rm, Rn {:  
   975     load_reg( R_EAX, Rm );
   976     load_reg( R_ECX, Rn );
   977     SUB_r32_r32( R_EAX, R_ECX );
   978     store_reg( R_ECX, Rn );
   979     SETO_t();
   980 :}
   981 SWAP.B Rm, Rn {:  
   982     load_reg( R_EAX, Rm );
   983     XCHG_r8_r8( R_AL, R_AH );
   984     store_reg( R_EAX, Rn );
   985 :}
   986 SWAP.W Rm, Rn {:  
   987     load_reg( R_EAX, Rm );
   988     MOV_r32_r32( R_EAX, R_ECX );
   989     SHL_imm8_r32( 16, R_ECX );
   990     SHR_imm8_r32( 16, R_EAX );
   991     OR_r32_r32( R_EAX, R_ECX );
   992     store_reg( R_ECX, Rn );
   993 :}
   994 TAS.B @Rn {:  
   995     load_reg( R_ECX, Rn );
   996     MEM_READ_BYTE( R_ECX, R_EAX );
   997     TEST_r8_r8( R_AL, R_AL );
   998     SETE_t();
   999     OR_imm8_r8( 0x80, R_AL );
  1000     load_reg( R_ECX, Rn );
  1001     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1002 :}
  1003 TST Rm, Rn {:  
  1004     load_reg( R_EAX, Rm );
  1005     load_reg( R_ECX, Rn );
  1006     TEST_r32_r32( R_EAX, R_ECX );
  1007     SETE_t();
  1008 :}
  1009 TST #imm, R0 {:  
  1010     load_reg( R_EAX, 0 );
  1011     TEST_imm32_r32( imm, R_EAX );
  1012     SETE_t();
  1013 :}
  1014 TST.B #imm, @(R0, GBR) {:  
  1015     load_reg( R_EAX, 0);
  1016     load_reg( R_ECX, R_GBR);
  1017     ADD_r32_r32( R_EAX, R_ECX );
  1018     MEM_READ_BYTE( R_ECX, R_EAX );
  1019     TEST_imm8_r8( imm, R_AL );
  1020     SETE_t();
  1021 :}
  1022 XOR Rm, Rn {:  
  1023     load_reg( R_EAX, Rm );
  1024     load_reg( R_ECX, Rn );
  1025     XOR_r32_r32( R_EAX, R_ECX );
  1026     store_reg( R_ECX, Rn );
  1027 :}
  1028 XOR #imm, R0 {:  
  1029     load_reg( R_EAX, 0 );
  1030     XOR_imm32_r32( imm, R_EAX );
  1031     store_reg( R_EAX, 0 );
  1032 :}
  1033 XOR.B #imm, @(R0, GBR) {:  
  1034     load_reg( R_EAX, 0 );
  1035     load_spreg( R_ECX, R_GBR );
  1036     ADD_r32_r32( R_EAX, R_ECX );
  1037     PUSH_r32(R_ECX);
  1038     call_func0(sh4_read_byte);
  1039     POP_r32(R_ECX);
  1040     XOR_imm32_r32( imm, R_EAX );
  1041     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1042 :}
  1043 XTRCT Rm, Rn {:
  1044     load_reg( R_EAX, Rm );
  1045     load_reg( R_ECX, Rn );
  1046     SHL_imm8_r32( 16, R_EAX );
  1047     SHR_imm8_r32( 16, R_ECX );
  1048     OR_r32_r32( R_EAX, R_ECX );
  1049     store_reg( R_ECX, Rn );
  1050 :}
  1052 /* Data move instructions */
  1053 MOV Rm, Rn {:  
  1054     load_reg( R_EAX, Rm );
  1055     store_reg( R_EAX, Rn );
  1056 :}
  1057 MOV #imm, Rn {:  
  1058     load_imm32( R_EAX, imm );
  1059     store_reg( R_EAX, Rn );
  1060 :}
  1061 MOV.B Rm, @Rn {:  
  1062     load_reg( R_EAX, Rm );
  1063     load_reg( R_ECX, Rn );
  1064     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1065 :}
  1066 MOV.B Rm, @-Rn {:  
  1067     load_reg( R_EAX, Rm );
  1068     load_reg( R_ECX, Rn );
  1069     ADD_imm8s_r32( -1, R_ECX );
  1070     store_reg( R_ECX, Rn );
  1071     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1072 :}
  1073 MOV.B Rm, @(R0, Rn) {:  
  1074     load_reg( R_EAX, 0 );
  1075     load_reg( R_ECX, Rn );
  1076     ADD_r32_r32( R_EAX, R_ECX );
  1077     load_reg( R_EAX, Rm );
  1078     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1079 :}
  1080 MOV.B R0, @(disp, GBR) {:  
  1081     load_reg( R_EAX, 0 );
  1082     load_spreg( R_ECX, R_GBR );
  1083     ADD_imm32_r32( disp, R_ECX );
  1084     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1085 :}
  1086 MOV.B R0, @(disp, Rn) {:  
  1087     load_reg( R_EAX, 0 );
  1088     load_reg( R_ECX, Rn );
  1089     ADD_imm32_r32( disp, R_ECX );
  1090     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1091 :}
  1092 MOV.B @Rm, Rn {:  
  1093     load_reg( R_ECX, Rm );
  1094     MEM_READ_BYTE( R_ECX, R_EAX );
  1095     store_reg( R_EAX, Rn );
  1096 :}
  1097 MOV.B @Rm+, Rn {:  
  1098     load_reg( R_ECX, Rm );
  1099     MOV_r32_r32( R_ECX, R_EAX );
  1100     ADD_imm8s_r32( 1, R_EAX );
  1101     store_reg( R_EAX, Rm );
  1102     MEM_READ_BYTE( R_ECX, R_EAX );
  1103     store_reg( R_EAX, Rn );
  1104 :}
  1105 MOV.B @(R0, Rm), Rn {:  
  1106     load_reg( R_EAX, 0 );
  1107     load_reg( R_ECX, Rm );
  1108     ADD_r32_r32( R_EAX, R_ECX );
  1109     MEM_READ_BYTE( R_ECX, R_EAX );
  1110     store_reg( R_EAX, Rn );
  1111 :}
  1112 MOV.B @(disp, GBR), R0 {:  
  1113     load_spreg( R_ECX, R_GBR );
  1114     ADD_imm32_r32( disp, R_ECX );
  1115     MEM_READ_BYTE( R_ECX, R_EAX );
  1116     store_reg( R_EAX, 0 );
  1117 :}
  1118 MOV.B @(disp, Rm), R0 {:  
  1119     load_reg( R_ECX, Rm );
  1120     ADD_imm32_r32( disp, R_ECX );
  1121     MEM_READ_BYTE( R_ECX, R_EAX );
  1122     store_reg( R_EAX, 0 );
  1123 :}
  1124 MOV.L Rm, @Rn {:
  1125     load_reg( R_EAX, Rm );
  1126     load_reg( R_ECX, Rn );
  1127     precheck();
  1128     check_walign32(R_ECX);
  1129     MEM_WRITE_LONG( R_ECX, R_EAX );
  1130 :}
  1131 MOV.L Rm, @-Rn {:  
  1132     load_reg( R_EAX, Rm );
  1133     load_reg( R_ECX, Rn );
  1134     precheck();
  1135     check_walign32( R_ECX );
  1136     ADD_imm8s_r32( -4, R_ECX );
  1137     store_reg( R_ECX, Rn );
  1138     MEM_WRITE_LONG( R_ECX, R_EAX );
  1139 :}
  1140 MOV.L Rm, @(R0, Rn) {:  
  1141     load_reg( R_EAX, 0 );
  1142     load_reg( R_ECX, Rn );
  1143     ADD_r32_r32( R_EAX, R_ECX );
  1144     precheck();
  1145     check_walign32( R_ECX );
  1146     load_reg( R_EAX, Rm );
  1147     MEM_WRITE_LONG( R_ECX, R_EAX );
  1148 :}
  1149 MOV.L R0, @(disp, GBR) {:  
  1150     load_spreg( R_ECX, R_GBR );
  1151     load_reg( R_EAX, 0 );
  1152     ADD_imm32_r32( disp, R_ECX );
  1153     precheck();
  1154     check_walign32( R_ECX );
  1155     MEM_WRITE_LONG( R_ECX, R_EAX );
  1156 :}
  1157 MOV.L Rm, @(disp, Rn) {:  
  1158     load_reg( R_ECX, Rn );
  1159     load_reg( R_EAX, Rm );
  1160     ADD_imm32_r32( disp, R_ECX );
  1161     precheck();
  1162     check_walign32( R_ECX );
  1163     MEM_WRITE_LONG( R_ECX, R_EAX );
  1164 :}
  1165 MOV.L @Rm, Rn {:  
  1166     load_reg( R_ECX, Rm );
  1167     precheck();
  1168     check_ralign32( R_ECX );
  1169     MEM_READ_LONG( R_ECX, R_EAX );
  1170     store_reg( R_EAX, Rn );
  1171 :}
  1172 MOV.L @Rm+, Rn {:  
  1173     load_reg( R_EAX, Rm );
  1174     precheck();
  1175     check_ralign32( R_EAX );
  1176     MOV_r32_r32( R_EAX, R_ECX );
  1177     ADD_imm8s_r32( 4, R_EAX );
  1178     store_reg( R_EAX, Rm );
  1179     MEM_READ_LONG( R_ECX, R_EAX );
  1180     store_reg( R_EAX, Rn );
  1181 :}
  1182 MOV.L @(R0, Rm), Rn {:  
  1183     load_reg( R_EAX, 0 );
  1184     load_reg( R_ECX, Rm );
  1185     ADD_r32_r32( R_EAX, R_ECX );
  1186     precheck();
  1187     check_ralign32( R_ECX );
  1188     MEM_READ_LONG( R_ECX, R_EAX );
  1189     store_reg( R_EAX, Rn );
  1190 :}
  1191 MOV.L @(disp, GBR), R0 {:
  1192     load_spreg( R_ECX, R_GBR );
  1193     ADD_imm32_r32( disp, R_ECX );
  1194     precheck();
  1195     check_ralign32( R_ECX );
  1196     MEM_READ_LONG( R_ECX, R_EAX );
  1197     store_reg( R_EAX, 0 );
  1198 :}
  1199 MOV.L @(disp, PC), Rn {:  
  1200     if( sh4_x86.in_delay_slot ) {
  1201 	SLOTILLEGAL();
  1202     } else {
  1203 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1204 	char *ptr = mem_get_region(target);
  1205 	if( ptr != NULL ) {
  1206 	    MOV_moff32_EAX( (uint32_t)ptr );
  1207 	} else {
  1208 	    load_imm32( R_ECX, target );
  1209 	    MEM_READ_LONG( R_ECX, R_EAX );
  1211 	store_reg( R_EAX, Rn );
  1213 :}
  1214 MOV.L @(disp, Rm), Rn {:  
  1215     load_reg( R_ECX, Rm );
  1216     ADD_imm8s_r32( disp, R_ECX );
  1217     precheck();
  1218     check_ralign32( R_ECX );
  1219     MEM_READ_LONG( R_ECX, R_EAX );
  1220     store_reg( R_EAX, Rn );
  1221 :}
  1222 MOV.W Rm, @Rn {:  
  1223     load_reg( R_ECX, Rn );
  1224     precheck();
  1225     check_walign16( R_ECX );
  1226     load_reg( R_EAX, Rm );
  1227     MEM_WRITE_WORD( R_ECX, R_EAX );
  1228 :}
  1229 MOV.W Rm, @-Rn {:  
  1230     load_reg( R_ECX, Rn );
  1231     precheck();
  1232     check_walign16( R_ECX );
  1233     load_reg( R_EAX, Rm );
  1234     ADD_imm8s_r32( -2, R_ECX );
  1235     store_reg( R_ECX, Rn );
  1236     MEM_WRITE_WORD( R_ECX, R_EAX );
  1237 :}
  1238 MOV.W Rm, @(R0, Rn) {:  
  1239     load_reg( R_EAX, 0 );
  1240     load_reg( R_ECX, Rn );
  1241     ADD_r32_r32( R_EAX, R_ECX );
  1242     precheck();
  1243     check_walign16( R_ECX );
  1244     load_reg( R_EAX, Rm );
  1245     MEM_WRITE_WORD( R_ECX, R_EAX );
  1246 :}
  1247 MOV.W R0, @(disp, GBR) {:  
  1248     load_spreg( R_ECX, R_GBR );
  1249     load_reg( R_EAX, 0 );
  1250     ADD_imm32_r32( disp, R_ECX );
  1251     precheck();
  1252     check_walign16( R_ECX );
  1253     MEM_WRITE_WORD( R_ECX, R_EAX );
  1254 :}
  1255 MOV.W R0, @(disp, Rn) {:  
  1256     load_reg( R_ECX, Rn );
  1257     load_reg( R_EAX, 0 );
  1258     ADD_imm32_r32( disp, R_ECX );
  1259     precheck();
  1260     check_walign16( R_ECX );
  1261     MEM_WRITE_WORD( R_ECX, R_EAX );
  1262 :}
  1263 MOV.W @Rm, Rn {:  
  1264     load_reg( R_ECX, Rm );
  1265     precheck();
  1266     check_ralign16( R_ECX );
  1267     MEM_READ_WORD( R_ECX, R_EAX );
  1268     store_reg( R_EAX, Rn );
  1269 :}
  1270 MOV.W @Rm+, Rn {:  
  1271     load_reg( R_EAX, Rm );
  1272     precheck();
  1273     check_ralign16( R_EAX );
  1274     MOV_r32_r32( R_EAX, R_ECX );
  1275     ADD_imm8s_r32( 2, R_EAX );
  1276     store_reg( R_EAX, Rm );
  1277     MEM_READ_WORD( R_ECX, R_EAX );
  1278     store_reg( R_EAX, Rn );
  1279 :}
  1280 MOV.W @(R0, Rm), Rn {:  
  1281     load_reg( R_EAX, 0 );
  1282     load_reg( R_ECX, Rm );
  1283     ADD_r32_r32( R_EAX, R_ECX );
  1284     precheck();
  1285     check_ralign16( R_ECX );
  1286     MEM_READ_WORD( R_ECX, R_EAX );
  1287     store_reg( R_EAX, Rn );
  1288 :}
  1289 MOV.W @(disp, GBR), R0 {:  
  1290     load_spreg( R_ECX, R_GBR );
  1291     ADD_imm32_r32( disp, R_ECX );
  1292     precheck();
  1293     check_ralign16( R_ECX );
  1294     MEM_READ_WORD( R_ECX, R_EAX );
  1295     store_reg( R_EAX, 0 );
  1296 :}
  1297 MOV.W @(disp, PC), Rn {:  
  1298     if( sh4_x86.in_delay_slot ) {
  1299 	SLOTILLEGAL();
  1300     } else {
  1301 	load_imm32( R_ECX, pc + disp + 4 );
  1302 	MEM_READ_WORD( R_ECX, R_EAX );
  1303 	store_reg( R_EAX, Rn );
  1305 :}
  1306 MOV.W @(disp, Rm), R0 {:  
  1307     load_reg( R_ECX, Rm );
  1308     ADD_imm32_r32( disp, R_ECX );
  1309     precheck();
  1310     check_ralign16( R_ECX );
  1311     MEM_READ_WORD( R_ECX, R_EAX );
  1312     store_reg( R_EAX, 0 );
  1313 :}
  1314 MOVA @(disp, PC), R0 {:  
  1315     if( sh4_x86.in_delay_slot ) {
  1316 	SLOTILLEGAL();
  1317     } else {
  1318 	load_imm32( R_ECX, (pc & 0xFFFFFFFC) + disp + 4 );
  1319 	store_reg( R_ECX, 0 );
  1321 :}
  1322 MOVCA.L R0, @Rn {:  
  1323     load_reg( R_EAX, 0 );
  1324     load_reg( R_ECX, Rn );
  1325     precheck();
  1326     check_walign32( R_ECX );
  1327     MEM_WRITE_LONG( R_ECX, R_EAX );
  1328 :}
  1330 /* Control transfer instructions */
  1331 BF disp {:
  1332     if( sh4_x86.in_delay_slot ) {
  1333 	SLOTILLEGAL();
  1334     } else {
  1335 	CMP_imm8s_sh4r( 0, R_T );
  1336 	JNE_rel8( 29, nottaken );
  1337 	exit_block( disp + pc + 4, pc+2 );
  1338 	JMP_TARGET(nottaken);
  1339 	return 2;
  1341 :}
  1342 BF/S disp {:
  1343     if( sh4_x86.in_delay_slot ) {
  1344 	SLOTILLEGAL();
  1345     } else {
  1346 	sh4_x86.in_delay_slot = TRUE;
  1347 	CMP_imm8s_sh4r( 0, R_T );
  1348 	OP(0x0F); OP(0x85); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JNE rel32
  1349 	sh4_x86_translate_instruction(pc+2);
  1350 	exit_block( disp + pc + 4, pc+4 );
  1351 	// not taken
  1352 	*patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1353 	sh4_x86_translate_instruction(pc+2);
  1354 	return 4;
  1356 :}
  1357 BRA disp {:  
  1358     if( sh4_x86.in_delay_slot ) {
  1359 	SLOTILLEGAL();
  1360     } else {
  1361 	sh4_x86.in_delay_slot = TRUE;
  1362 	sh4_x86_translate_instruction( pc + 2 );
  1363 	exit_block( disp + pc + 4, pc+4 );
  1364 	sh4_x86.branch_taken = TRUE;
  1365 	return 4;
  1367 :}
  1368 BRAF Rn {:  
  1369     if( sh4_x86.in_delay_slot ) {
  1370 	SLOTILLEGAL();
  1371     } else {
  1372 	load_reg( R_EAX, Rn );
  1373 	ADD_imm32_r32( pc + 4, R_EAX );
  1374 	store_spreg( R_EAX, REG_OFFSET(pc) );
  1375 	sh4_x86.in_delay_slot = TRUE;
  1376 	sh4_x86_translate_instruction( pc + 2 );
  1377 	exit_block_pcset(pc+2);
  1378 	sh4_x86.branch_taken = TRUE;
  1379 	return 4;
  1381 :}
  1382 BSR disp {:  
  1383     if( sh4_x86.in_delay_slot ) {
  1384 	SLOTILLEGAL();
  1385     } else {
  1386 	load_imm32( R_EAX, pc + 4 );
  1387 	store_spreg( R_EAX, R_PR );
  1388 	sh4_x86.in_delay_slot = TRUE;
  1389 	sh4_x86_translate_instruction( pc + 2 );
  1390 	exit_block( disp + pc + 4, pc+4 );
  1391 	sh4_x86.branch_taken = TRUE;
  1392 	return 4;
  1394 :}
  1395 BSRF Rn {:  
  1396     if( sh4_x86.in_delay_slot ) {
  1397 	SLOTILLEGAL();
  1398     } else {
  1399 	load_imm32( R_ECX, pc + 4 );
  1400 	store_spreg( R_ECX, R_PR );
  1401 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_ECX );
  1402 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1403 	sh4_x86.in_delay_slot = TRUE;
  1404 	sh4_x86_translate_instruction( pc + 2 );
  1405 	exit_block_pcset(pc+2);
  1406 	sh4_x86.branch_taken = TRUE;
  1407 	return 4;
  1409 :}
  1410 BT disp {:
  1411     if( sh4_x86.in_delay_slot ) {
  1412 	SLOTILLEGAL();
  1413     } else {
  1414 	CMP_imm8s_sh4r( 0, R_T );
  1415 	JE_rel8( 29, nottaken );
  1416 	exit_block( disp + pc + 4, pc+2 );
  1417 	JMP_TARGET(nottaken);
  1418 	return 2;
  1420 :}
  1421 BT/S disp {:
  1422     if( sh4_x86.in_delay_slot ) {
  1423 	SLOTILLEGAL();
  1424     } else {
  1425 	sh4_x86.in_delay_slot = TRUE;
  1426 	CMP_imm8s_sh4r( 0, R_T );
  1427 	OP(0x0F); OP(0x84); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JE rel32
  1428 	sh4_x86_translate_instruction(pc+2);
  1429 	exit_block( disp + pc + 4, pc+4 );
  1430 	// not taken
  1431 	*patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1432 	sh4_x86_translate_instruction(pc+2);
  1433 	return 4;
  1435 :}
  1436 JMP @Rn {:  
  1437     if( sh4_x86.in_delay_slot ) {
  1438 	SLOTILLEGAL();
  1439     } else {
  1440 	load_reg( R_ECX, Rn );
  1441 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1442 	sh4_x86.in_delay_slot = TRUE;
  1443 	sh4_x86_translate_instruction(pc+2);
  1444 	exit_block_pcset(pc+2);
  1445 	sh4_x86.branch_taken = TRUE;
  1446 	return 4;
  1448 :}
  1449 JSR @Rn {:  
  1450     if( sh4_x86.in_delay_slot ) {
  1451 	SLOTILLEGAL();
  1452     } else {
  1453 	load_imm32( R_EAX, pc + 4 );
  1454 	store_spreg( R_EAX, R_PR );
  1455 	load_reg( R_ECX, Rn );
  1456 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1457 	sh4_x86.in_delay_slot = TRUE;
  1458 	sh4_x86_translate_instruction(pc+2);
  1459 	exit_block_pcset(pc+2);
  1460 	sh4_x86.branch_taken = TRUE;
  1461 	return 4;
  1463 :}
  1464 RTE {:  
  1465     if( sh4_x86.in_delay_slot ) {
  1466 	SLOTILLEGAL();
  1467     } else {
  1468 	check_priv();
  1469 	load_spreg( R_ECX, R_SPC );
  1470 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1471 	load_spreg( R_EAX, R_SSR );
  1472 	call_func1( sh4_write_sr, R_EAX );
  1473 	sh4_x86.in_delay_slot = TRUE;
  1474 	sh4_x86.priv_checked = FALSE;
  1475 	sh4_x86.fpuen_checked = FALSE;
  1476 	sh4_x86_translate_instruction(pc+2);
  1477 	exit_block_pcset(pc+2);
  1478 	sh4_x86.branch_taken = TRUE;
  1479 	return 4;
  1481 :}
  1482 RTS {:  
  1483     if( sh4_x86.in_delay_slot ) {
  1484 	SLOTILLEGAL();
  1485     } else {
  1486 	load_spreg( R_ECX, R_PR );
  1487 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1488 	sh4_x86.in_delay_slot = TRUE;
  1489 	sh4_x86_translate_instruction(pc+2);
  1490 	exit_block_pcset(pc+2);
  1491 	sh4_x86.branch_taken = TRUE;
  1492 	return 4;
  1494 :}
  1495 TRAPA #imm {:  
  1496     if( sh4_x86.in_delay_slot ) {
  1497 	SLOTILLEGAL();
  1498     } else {
  1499 	PUSH_imm32( imm );
  1500 	call_func0( sh4_raise_trap );
  1501 	ADD_imm8s_r32( 4, R_ESP );
  1502 	exit_block_pcset(pc);
  1503 	sh4_x86.branch_taken = TRUE;
  1504 	return 2;
  1506 :}
  1507 UNDEF {:  
  1508     if( sh4_x86.in_delay_slot ) {
  1509 	SLOTILLEGAL();
  1510     } else {
  1511 	precheck();
  1512 	JMP_exit(EXIT_ILLEGAL);
  1513 	return 2;
  1515 :}
  1517 CLRMAC {:  
  1518     XOR_r32_r32(R_EAX, R_EAX);
  1519     store_spreg( R_EAX, R_MACL );
  1520     store_spreg( R_EAX, R_MACH );
  1521 :}
  1522 CLRS {:
  1523     CLC();
  1524     SETC_sh4r(R_S);
  1525 :}
  1526 CLRT {:  
  1527     CLC();
  1528     SETC_t();
  1529 :}
  1530 SETS {:  
  1531     STC();
  1532     SETC_sh4r(R_S);
  1533 :}
  1534 SETT {:  
  1535     STC();
  1536     SETC_t();
  1537 :}
  1539 /* Floating point moves */
  1540 FMOV FRm, FRn {:  
  1541     /* As horrible as this looks, it's actually covering 5 separate cases:
  1542      * 1. 32-bit fr-to-fr (PR=0)
  1543      * 2. 64-bit dr-to-dr (PR=1, FRm&1 == 0, FRn&1 == 0 )
  1544      * 3. 64-bit dr-to-xd (PR=1, FRm&1 == 0, FRn&1 == 1 )
  1545      * 4. 64-bit xd-to-dr (PR=1, FRm&1 == 1, FRn&1 == 0 )
  1546      * 5. 64-bit xd-to-xd (PR=1, FRm&1 == 1, FRn&1 == 1 )
  1547      */
  1548     check_fpuen();
  1549     load_spreg( R_ECX, R_FPSCR );
  1550     load_fr_bank( R_EDX );
  1551     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1552     JNE_rel8(8, doublesize);
  1553     load_fr( R_EDX, R_EAX, FRm ); // PR=0 branch
  1554     store_fr( R_EDX, R_EAX, FRn );
  1555     if( FRm&1 ) {
  1556 	JMP_rel8(24, end);
  1557 	JMP_TARGET(doublesize);
  1558 	load_xf_bank( R_ECX ); 
  1559 	load_fr( R_ECX, R_EAX, FRm-1 );
  1560 	if( FRn&1 ) {
  1561 	    load_fr( R_ECX, R_EDX, FRm );
  1562 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1563 	    store_fr( R_ECX, R_EDX, FRn );
  1564 	} else /* FRn&1 == 0 */ {
  1565 	    load_fr( R_ECX, R_ECX, FRm );
  1566 	    store_fr( R_EDX, R_EAX, FRn );
  1567 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1569 	JMP_TARGET(end);
  1570     } else /* FRm&1 == 0 */ {
  1571 	if( FRn&1 ) {
  1572 	    JMP_rel8(24, end);
  1573 	    load_xf_bank( R_ECX );
  1574 	    load_fr( R_EDX, R_EAX, FRm );
  1575 	    load_fr( R_EDX, R_EDX, FRm+1 );
  1576 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1577 	    store_fr( R_ECX, R_EDX, FRn );
  1578 	    JMP_TARGET(end);
  1579 	} else /* FRn&1 == 0 */ {
  1580 	    JMP_rel8(12, end);
  1581 	    load_fr( R_EDX, R_EAX, FRm );
  1582 	    load_fr( R_EDX, R_ECX, FRm+1 );
  1583 	    store_fr( R_EDX, R_EAX, FRn );
  1584 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1585 	    JMP_TARGET(end);
  1588 :}
  1589 FMOV FRm, @Rn {: 
  1590     precheck();
  1591     check_fpuen_no_precheck();
  1592     load_reg( R_ECX, Rn );
  1593     check_walign32( R_ECX );
  1594     load_spreg( R_EDX, R_FPSCR );
  1595     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1596     JNE_rel8(20, doublesize);
  1597     load_fr_bank( R_EDX );
  1598     load_fr( R_EDX, R_EAX, FRm );
  1599     MEM_WRITE_LONG( R_ECX, R_EAX ); // 12
  1600     if( FRm&1 ) {
  1601 	JMP_rel8( 48, end );
  1602 	JMP_TARGET(doublesize);
  1603 	load_xf_bank( R_EDX );
  1604 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1605 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1606 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1607 	JMP_TARGET(end);
  1608     } else {
  1609 	JMP_rel8( 39, end );
  1610 	JMP_TARGET(doublesize);
  1611 	load_fr_bank( R_EDX );
  1612 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1613 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1614 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1615 	JMP_TARGET(end);
  1617 :}
  1618 FMOV @Rm, FRn {:  
  1619     precheck();
  1620     check_fpuen_no_precheck();
  1621     load_reg( R_ECX, Rm );
  1622     check_ralign32( R_ECX );
  1623     load_spreg( R_EDX, R_FPSCR );
  1624     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1625     JNE_rel8(19, doublesize);
  1626     MEM_READ_LONG( R_ECX, R_EAX );
  1627     load_fr_bank( R_EDX );
  1628     store_fr( R_EDX, R_EAX, FRn );
  1629     if( FRn&1 ) {
  1630 	JMP_rel8(48, end);
  1631 	JMP_TARGET(doublesize);
  1632 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1633 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1634 	load_xf_bank( R_EDX );
  1635 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1636 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1637 	JMP_TARGET(end);
  1638     } else {
  1639 	JMP_rel8(36, end);
  1640 	JMP_TARGET(doublesize);
  1641 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1642 	load_fr_bank( R_EDX );
  1643 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1644 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1645 	JMP_TARGET(end);
  1647 :}
  1648 FMOV FRm, @-Rn {:  
  1649     precheck();
  1650     check_fpuen_no_precheck();
  1651     load_reg( R_ECX, Rn );
  1652     check_walign32( R_ECX );
  1653     load_spreg( R_EDX, R_FPSCR );
  1654     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1655     JNE_rel8(26, doublesize);
  1656     load_fr_bank( R_EDX );
  1657     load_fr( R_EDX, R_EAX, FRm );
  1658     ADD_imm8s_r32(-4,R_ECX);
  1659     store_reg( R_ECX, Rn );
  1660     MEM_WRITE_LONG( R_ECX, R_EAX ); // 12
  1661     if( FRm&1 ) {
  1662 	JMP_rel8( 54, end );
  1663 	JMP_TARGET(doublesize);
  1664 	load_xf_bank( R_EDX );
  1665 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1666 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1667 	ADD_imm8s_r32(-8,R_ECX);
  1668 	store_reg( R_ECX, Rn );
  1669 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1670 	JMP_TARGET(end);
  1671     } else {
  1672 	JMP_rel8( 45, end );
  1673 	JMP_TARGET(doublesize);
  1674 	load_fr_bank( R_EDX );
  1675 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1676 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1677 	ADD_imm8s_r32(-8,R_ECX);
  1678 	store_reg( R_ECX, Rn );
  1679 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1680 	JMP_TARGET(end);
  1682 :}
  1683 FMOV @Rm+, FRn {:
  1684     precheck();
  1685     check_fpuen_no_precheck();
  1686     load_reg( R_ECX, Rm );
  1687     check_ralign32( R_ECX );
  1688     MOV_r32_r32( R_ECX, R_EAX );
  1689     load_spreg( R_EDX, R_FPSCR );
  1690     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1691     JNE_rel8(25, doublesize);
  1692     ADD_imm8s_r32( 4, R_EAX );
  1693     store_reg( R_EAX, Rm );
  1694     MEM_READ_LONG( R_ECX, R_EAX );
  1695     load_fr_bank( R_EDX );
  1696     store_fr( R_EDX, R_EAX, FRn );
  1697     if( FRn&1 ) {
  1698 	JMP_rel8(54, end);
  1699 	JMP_TARGET(doublesize);
  1700 	ADD_imm8s_r32( 8, R_EAX );
  1701 	store_reg(R_EAX, Rm);
  1702 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1703 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1704 	load_xf_bank( R_EDX );
  1705 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1706 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1707 	JMP_TARGET(end);
  1708     } else {
  1709 	JMP_rel8(42, end);
  1710 	ADD_imm8s_r32( 8, R_EAX );
  1711 	store_reg(R_EAX, Rm);
  1712 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1713 	load_fr_bank( R_EDX );
  1714 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1715 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1716 	JMP_TARGET(end);
  1718 :}
  1719 FMOV FRm, @(R0, Rn) {:  
  1720     precheck();
  1721     check_fpuen_no_precheck();
  1722     load_reg( R_ECX, Rn );
  1723     ADD_sh4r_r32( REG_OFFSET(r[0]), R_ECX );
  1724     check_walign32( R_ECX );
  1725     load_spreg( R_EDX, R_FPSCR );
  1726     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1727     JNE_rel8(20, doublesize);
  1728     load_fr_bank( R_EDX );
  1729     load_fr( R_EDX, R_EAX, FRm );
  1730     MEM_WRITE_LONG( R_ECX, R_EAX ); // 12
  1731     if( FRm&1 ) {
  1732 	JMP_rel8( 48, end );
  1733 	JMP_TARGET(doublesize);
  1734 	load_xf_bank( R_EDX );
  1735 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1736 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1737 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1738 	JMP_TARGET(end);
  1739     } else {
  1740 	JMP_rel8( 39, end );
  1741 	JMP_TARGET(doublesize);
  1742 	load_fr_bank( R_EDX );
  1743 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1744 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1745 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1746 	JMP_TARGET(end);
  1748 :}
  1749 FMOV @(R0, Rm), FRn {:  
  1750     precheck();
  1751     check_fpuen_no_precheck();
  1752     load_reg( R_ECX, Rm );
  1753     ADD_sh4r_r32( REG_OFFSET(r[0]), R_ECX );
  1754     check_ralign32( R_ECX );
  1755     load_spreg( R_EDX, R_FPSCR );
  1756     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1757     JNE_rel8(19, doublesize);
  1758     MEM_READ_LONG( R_ECX, R_EAX );
  1759     load_fr_bank( R_EDX );
  1760     store_fr( R_EDX, R_EAX, FRn );
  1761     if( FRn&1 ) {
  1762 	JMP_rel8(48, end);
  1763 	JMP_TARGET(doublesize);
  1764 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1765 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1766 	load_xf_bank( R_EDX );
  1767 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1768 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1769 	JMP_TARGET(end);
  1770     } else {
  1771 	JMP_rel8(36, end);
  1772 	JMP_TARGET(doublesize);
  1773 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1774 	load_fr_bank( R_EDX );
  1775 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1776 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1777 	JMP_TARGET(end);
  1779 :}
  1780 FLDI0 FRn {:  /* IFF PR=0 */
  1781     check_fpuen();
  1782     load_spreg( R_ECX, R_FPSCR );
  1783     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1784     JNE_rel8(8, end);
  1785     XOR_r32_r32( R_EAX, R_EAX );
  1786     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  1787     store_fr( R_ECX, R_EAX, FRn );
  1788     JMP_TARGET(end);
  1789 :}
  1790 FLDI1 FRn {:  /* IFF PR=0 */
  1791     check_fpuen();
  1792     load_spreg( R_ECX, R_FPSCR );
  1793     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1794     JNE_rel8(11, end);
  1795     load_imm32(R_EAX, 0x3F800000);
  1796     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  1797     store_fr( R_ECX, R_EAX, FRn );
  1798     JMP_TARGET(end);
  1799 :}
  1801 FLOAT FPUL, FRn {:  
  1802     check_fpuen();
  1803     load_spreg( R_ECX, R_FPSCR );
  1804     load_spreg(R_EDX, REG_OFFSET(fr_bank));
  1805     FILD_sh4r(R_FPUL);
  1806     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1807     JNE_rel8(5, doubleprec);
  1808     pop_fr( R_EDX, FRn );
  1809     JMP_rel8(3, end);
  1810     JMP_TARGET(doubleprec);
  1811     pop_dr( R_EDX, FRn );
  1812     JMP_TARGET(end);
  1813 :}
  1814 FTRC FRm, FPUL {:  
  1815     check_fpuen();
  1816     load_spreg( R_ECX, R_FPSCR );
  1817     load_fr_bank( R_EDX );
  1818     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1819     JNE_rel8(5, doubleprec);
  1820     push_fr( R_EDX, FRm );
  1821     JMP_rel8(3, doop);
  1822     JMP_TARGET(doubleprec);
  1823     push_dr( R_EDX, FRm );
  1824     JMP_TARGET( doop );
  1825     load_imm32( R_ECX, (uint32_t)&max_int );
  1826     FILD_r32ind( R_ECX );
  1827     FCOMIP_st(1);
  1828     JNA_rel8( 32, sat );
  1829     load_imm32( R_ECX, (uint32_t)&min_int );  // 5
  1830     FILD_r32ind( R_ECX );           // 2
  1831     FCOMIP_st(1);                   // 2
  1832     JAE_rel8( 21, sat2 );            // 2
  1833     load_imm32( R_EAX, (uint32_t)&save_fcw );
  1834     FNSTCW_r32ind( R_EAX );
  1835     load_imm32( R_EDX, (uint32_t)&trunc_fcw );
  1836     FLDCW_r32ind( R_EDX );
  1837     FISTP_sh4r(R_FPUL);             // 3
  1838     FLDCW_r32ind( R_EAX );
  1839     JMP_rel8( 9, end );             // 2
  1841     JMP_TARGET(sat);
  1842     JMP_TARGET(sat2);
  1843     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  1844     store_spreg( R_ECX, R_FPUL );
  1845     FPOP_st();
  1846     JMP_TARGET(end);
  1847 :}
  1848 FLDS FRm, FPUL {:  
  1849     check_fpuen();
  1850     load_fr_bank( R_ECX );
  1851     load_fr( R_ECX, R_EAX, FRm );
  1852     store_spreg( R_EAX, R_FPUL );
  1853 :}
  1854 FSTS FPUL, FRn {:  
  1855     check_fpuen();
  1856     load_fr_bank( R_ECX );
  1857     load_spreg( R_EAX, R_FPUL );
  1858     store_fr( R_ECX, R_EAX, FRn );
  1859 :}
  1860 FCNVDS FRm, FPUL {:  
  1861     check_fpuen();
  1862     load_spreg( R_ECX, R_FPSCR );
  1863     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1864     JE_rel8(9, end); // only when PR=1
  1865     load_fr_bank( R_ECX );
  1866     push_dr( R_ECX, FRm );
  1867     pop_fpul();
  1868     JMP_TARGET(end);
  1869 :}
  1870 FCNVSD FPUL, FRn {:  
  1871     check_fpuen();
  1872     load_spreg( R_ECX, R_FPSCR );
  1873     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1874     JE_rel8(9, end); // only when PR=1
  1875     load_fr_bank( R_ECX );
  1876     push_fpul();
  1877     pop_dr( R_ECX, FRn );
  1878     JMP_TARGET(end);
  1879 :}
  1881 /* Floating point instructions */
  1882 FABS FRn {:  
  1883     check_fpuen();
  1884     load_spreg( R_ECX, R_FPSCR );
  1885     load_fr_bank( R_EDX );
  1886     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1887     JNE_rel8(10, doubleprec);
  1888     push_fr(R_EDX, FRn); // 3
  1889     FABS_st0(); // 2
  1890     pop_fr( R_EDX, FRn); //3
  1891     JMP_rel8(8,end); // 2
  1892     JMP_TARGET(doubleprec);
  1893     push_dr(R_EDX, FRn);
  1894     FABS_st0();
  1895     pop_dr(R_EDX, FRn);
  1896     JMP_TARGET(end);
  1897 :}
  1898 FADD FRm, FRn {:  
  1899     check_fpuen();
  1900     load_spreg( R_ECX, R_FPSCR );
  1901     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1902     load_fr_bank( R_EDX );
  1903     JNE_rel8(13,doubleprec);
  1904     push_fr(R_EDX, FRm);
  1905     push_fr(R_EDX, FRn);
  1906     FADDP_st(1);
  1907     pop_fr(R_EDX, FRn);
  1908     JMP_rel8(11,end);
  1909     JMP_TARGET(doubleprec);
  1910     push_dr(R_EDX, FRm);
  1911     push_dr(R_EDX, FRn);
  1912     FADDP_st(1);
  1913     pop_dr(R_EDX, FRn);
  1914     JMP_TARGET(end);
  1915 :}
  1916 FDIV FRm, FRn {:  
  1917     check_fpuen();
  1918     load_spreg( R_ECX, R_FPSCR );
  1919     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1920     load_fr_bank( R_EDX );
  1921     JNE_rel8(13, doubleprec);
  1922     push_fr(R_EDX, FRn);
  1923     push_fr(R_EDX, FRm);
  1924     FDIVP_st(1);
  1925     pop_fr(R_EDX, FRn);
  1926     JMP_rel8(11, end);
  1927     JMP_TARGET(doubleprec);
  1928     push_dr(R_EDX, FRn);
  1929     push_dr(R_EDX, FRm);
  1930     FDIVP_st(1);
  1931     pop_dr(R_EDX, FRn);
  1932     JMP_TARGET(end);
  1933 :}
  1934 FMAC FR0, FRm, FRn {:  
  1935     check_fpuen();
  1936     load_spreg( R_ECX, R_FPSCR );
  1937     load_spreg( R_EDX, REG_OFFSET(fr_bank));
  1938     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1939     JNE_rel8(18, doubleprec);
  1940     push_fr( R_EDX, 0 );
  1941     push_fr( R_EDX, FRm );
  1942     FMULP_st(1);
  1943     push_fr( R_EDX, FRn );
  1944     FADDP_st(1);
  1945     pop_fr( R_EDX, FRn );
  1946     JMP_rel8(16, end);
  1947     JMP_TARGET(doubleprec);
  1948     push_dr( R_EDX, 0 );
  1949     push_dr( R_EDX, FRm );
  1950     FMULP_st(1);
  1951     push_dr( R_EDX, FRn );
  1952     FADDP_st(1);
  1953     pop_dr( R_EDX, FRn );
  1954     JMP_TARGET(end);
  1955 :}
  1957 FMUL FRm, FRn {:  
  1958     check_fpuen();
  1959     load_spreg( R_ECX, R_FPSCR );
  1960     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1961     load_fr_bank( R_EDX );
  1962     JNE_rel8(13, doubleprec);
  1963     push_fr(R_EDX, FRm);
  1964     push_fr(R_EDX, FRn);
  1965     FMULP_st(1);
  1966     pop_fr(R_EDX, FRn);
  1967     JMP_rel8(11, end);
  1968     JMP_TARGET(doubleprec);
  1969     push_dr(R_EDX, FRm);
  1970     push_dr(R_EDX, FRn);
  1971     FMULP_st(1);
  1972     pop_dr(R_EDX, FRn);
  1973     JMP_TARGET(end);
  1974 :}
  1975 FNEG FRn {:  
  1976     check_fpuen();
  1977     load_spreg( R_ECX, R_FPSCR );
  1978     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1979     load_fr_bank( R_EDX );
  1980     JNE_rel8(10, doubleprec);
  1981     push_fr(R_EDX, FRn);
  1982     FCHS_st0();
  1983     pop_fr(R_EDX, FRn);
  1984     JMP_rel8(8, end);
  1985     JMP_TARGET(doubleprec);
  1986     push_dr(R_EDX, FRn);
  1987     FCHS_st0();
  1988     pop_dr(R_EDX, FRn);
  1989     JMP_TARGET(end);
  1990 :}
  1991 FSRRA FRn {:  
  1992     check_fpuen();
  1993     load_spreg( R_ECX, R_FPSCR );
  1994     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1995     load_fr_bank( R_EDX );
  1996     JNE_rel8(12, end); // PR=0 only
  1997     FLD1_st0();
  1998     push_fr(R_EDX, FRn);
  1999     FSQRT_st0();
  2000     FDIVP_st(1);
  2001     pop_fr(R_EDX, FRn);
  2002     JMP_TARGET(end);
  2003 :}
  2004 FSQRT FRn {:  
  2005     check_fpuen();
  2006     load_spreg( R_ECX, R_FPSCR );
  2007     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2008     load_fr_bank( R_EDX );
  2009     JNE_rel8(10, doubleprec);
  2010     push_fr(R_EDX, FRn);
  2011     FSQRT_st0();
  2012     pop_fr(R_EDX, FRn);
  2013     JMP_rel8(8, end);
  2014     JMP_TARGET(doubleprec);
  2015     push_dr(R_EDX, FRn);
  2016     FSQRT_st0();
  2017     pop_dr(R_EDX, FRn);
  2018     JMP_TARGET(end);
  2019 :}
  2020 FSUB FRm, FRn {:  
  2021     check_fpuen();
  2022     load_spreg( R_ECX, R_FPSCR );
  2023     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2024     load_fr_bank( R_EDX );
  2025     JNE_rel8(13, doubleprec);
  2026     push_fr(R_EDX, FRn);
  2027     push_fr(R_EDX, FRm);
  2028     FSUBP_st(1);
  2029     pop_fr(R_EDX, FRn);
  2030     JMP_rel8(11, end);
  2031     JMP_TARGET(doubleprec);
  2032     push_dr(R_EDX, FRn);
  2033     push_dr(R_EDX, FRm);
  2034     FSUBP_st(1);
  2035     pop_dr(R_EDX, FRn);
  2036     JMP_TARGET(end);
  2037 :}
  2039 FCMP/EQ FRm, FRn {:  
  2040     check_fpuen();
  2041     load_spreg( R_ECX, R_FPSCR );
  2042     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2043     load_fr_bank( R_EDX );
  2044     JNE_rel8(8, doubleprec);
  2045     push_fr(R_EDX, FRm);
  2046     push_fr(R_EDX, FRn);
  2047     JMP_rel8(6, end);
  2048     JMP_TARGET(doubleprec);
  2049     push_dr(R_EDX, FRm);
  2050     push_dr(R_EDX, FRn);
  2051     JMP_TARGET(end);
  2052     FCOMIP_st(1);
  2053     SETE_t();
  2054     FPOP_st();
  2055 :}
  2056 FCMP/GT FRm, FRn {:  
  2057     check_fpuen();
  2058     load_spreg( R_ECX, R_FPSCR );
  2059     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2060     load_fr_bank( R_EDX );
  2061     JNE_rel8(8, doubleprec);
  2062     push_fr(R_EDX, FRm);
  2063     push_fr(R_EDX, FRn);
  2064     JMP_rel8(6, end);
  2065     JMP_TARGET(doubleprec);
  2066     push_dr(R_EDX, FRm);
  2067     push_dr(R_EDX, FRn);
  2068     JMP_TARGET(end);
  2069     FCOMIP_st(1);
  2070     SETA_t();
  2071     FPOP_st();
  2072 :}
  2074 FSCA FPUL, FRn {:  
  2075     check_fpuen();
  2076     load_spreg( R_ECX, R_FPSCR );
  2077     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2078     JNE_rel8( 21, doubleprec );
  2079     load_fr_bank( R_ECX );
  2080     ADD_imm8s_r32( (FRn&0x0E)<<2, R_ECX );
  2081     load_spreg( R_EDX, R_FPUL );
  2082     call_func2( sh4_fsca, R_EDX, R_ECX );
  2083     JMP_TARGET(doubleprec);
  2084 :}
  2085 FIPR FVm, FVn {:  
  2086     check_fpuen();
  2087     load_spreg( R_ECX, R_FPSCR );
  2088     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2089     JNE_rel8(44, doubleprec);
  2091     load_fr_bank( R_ECX );
  2092     push_fr( R_ECX, FVm<<2 );
  2093     push_fr( R_ECX, FVn<<2 );
  2094     FMULP_st(1);
  2095     push_fr( R_ECX, (FVm<<2)+1);
  2096     push_fr( R_ECX, (FVn<<2)+1);
  2097     FMULP_st(1);
  2098     FADDP_st(1);
  2099     push_fr( R_ECX, (FVm<<2)+2);
  2100     push_fr( R_ECX, (FVn<<2)+2);
  2101     FMULP_st(1);
  2102     FADDP_st(1);
  2103     push_fr( R_ECX, (FVm<<2)+3);
  2104     push_fr( R_ECX, (FVn<<2)+3);
  2105     FMULP_st(1);
  2106     FADDP_st(1);
  2107     pop_fr( R_ECX, (FVn<<2)+3);
  2108     JMP_TARGET(doubleprec);
  2109 :}
  2110 FTRV XMTRX, FVn {:  
  2111     check_fpuen();
  2112     load_spreg( R_ECX, R_FPSCR );
  2113     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2114     JNE_rel8( 30, doubleprec );
  2115     load_fr_bank( R_EDX );                 // 3
  2116     ADD_imm8s_r32( FVn<<4, R_EDX );        // 3
  2117     load_xf_bank( R_ECX );                 // 12
  2118     call_func2( sh4_ftrv, R_EDX, R_ECX );  // 12
  2119     JMP_TARGET(doubleprec);
  2120 :}
  2122 FRCHG {:  
  2123     check_fpuen();
  2124     load_spreg( R_ECX, R_FPSCR );
  2125     XOR_imm32_r32( FPSCR_FR, R_ECX );
  2126     store_spreg( R_ECX, R_FPSCR );
  2127     update_fr_bank( R_ECX );
  2128 :}
  2129 FSCHG {:  
  2130     check_fpuen();
  2131     load_spreg( R_ECX, R_FPSCR );
  2132     XOR_imm32_r32( FPSCR_SZ, R_ECX );
  2133     store_spreg( R_ECX, R_FPSCR );
  2134 :}
  2136 /* Processor control instructions */
  2137 LDC Rm, SR {:
  2138     if( sh4_x86.in_delay_slot ) {
  2139 	SLOTILLEGAL();
  2140     } else {
  2141 	check_priv();
  2142 	load_reg( R_EAX, Rm );
  2143 	call_func1( sh4_write_sr, R_EAX );
  2144 	sh4_x86.priv_checked = FALSE;
  2145 	sh4_x86.fpuen_checked = FALSE;
  2147 :}
  2148 LDC Rm, GBR {: 
  2149     load_reg( R_EAX, Rm );
  2150     store_spreg( R_EAX, R_GBR );
  2151 :}
  2152 LDC Rm, VBR {:  
  2153     check_priv();
  2154     load_reg( R_EAX, Rm );
  2155     store_spreg( R_EAX, R_VBR );
  2156 :}
  2157 LDC Rm, SSR {:  
  2158     check_priv();
  2159     load_reg( R_EAX, Rm );
  2160     store_spreg( R_EAX, R_SSR );
  2161 :}
  2162 LDC Rm, SGR {:  
  2163     check_priv();
  2164     load_reg( R_EAX, Rm );
  2165     store_spreg( R_EAX, R_SGR );
  2166 :}
  2167 LDC Rm, SPC {:  
  2168     check_priv();
  2169     load_reg( R_EAX, Rm );
  2170     store_spreg( R_EAX, R_SPC );
  2171 :}
  2172 LDC Rm, DBR {:  
  2173     check_priv();
  2174     load_reg( R_EAX, Rm );
  2175     store_spreg( R_EAX, R_DBR );
  2176 :}
  2177 LDC Rm, Rn_BANK {:  
  2178     check_priv();
  2179     load_reg( R_EAX, Rm );
  2180     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2181 :}
  2182 LDC.L @Rm+, GBR {:  
  2183     load_reg( R_EAX, Rm );
  2184     precheck();
  2185     check_ralign32( R_EAX );
  2186     MOV_r32_r32( R_EAX, R_ECX );
  2187     ADD_imm8s_r32( 4, R_EAX );
  2188     store_reg( R_EAX, Rm );
  2189     MEM_READ_LONG( R_ECX, R_EAX );
  2190     store_spreg( R_EAX, R_GBR );
  2191 :}
  2192 LDC.L @Rm+, SR {:
  2193     if( sh4_x86.in_delay_slot ) {
  2194 	SLOTILLEGAL();
  2195     } else {
  2196 	precheck();
  2197 	check_priv_no_precheck();
  2198 	load_reg( R_EAX, Rm );
  2199 	check_ralign32( R_EAX );
  2200 	MOV_r32_r32( R_EAX, R_ECX );
  2201 	ADD_imm8s_r32( 4, R_EAX );
  2202 	store_reg( R_EAX, Rm );
  2203 	MEM_READ_LONG( R_ECX, R_EAX );
  2204 	call_func1( sh4_write_sr, R_EAX );
  2205 	sh4_x86.priv_checked = FALSE;
  2206 	sh4_x86.fpuen_checked = FALSE;
  2208 :}
  2209 LDC.L @Rm+, VBR {:  
  2210     precheck();
  2211     check_priv_no_precheck();
  2212     load_reg( R_EAX, Rm );
  2213     check_ralign32( R_EAX );
  2214     MOV_r32_r32( R_EAX, R_ECX );
  2215     ADD_imm8s_r32( 4, R_EAX );
  2216     store_reg( R_EAX, Rm );
  2217     MEM_READ_LONG( R_ECX, R_EAX );
  2218     store_spreg( R_EAX, R_VBR );
  2219 :}
  2220 LDC.L @Rm+, SSR {:
  2221     precheck();
  2222     check_priv_no_precheck();
  2223     load_reg( R_EAX, Rm );
  2224     check_ralign32( R_EAX );
  2225     MOV_r32_r32( R_EAX, R_ECX );
  2226     ADD_imm8s_r32( 4, R_EAX );
  2227     store_reg( R_EAX, Rm );
  2228     MEM_READ_LONG( R_ECX, R_EAX );
  2229     store_spreg( R_EAX, R_SSR );
  2230 :}
  2231 LDC.L @Rm+, SGR {:  
  2232     precheck();
  2233     check_priv_no_precheck();
  2234     load_reg( R_EAX, Rm );
  2235     check_ralign32( R_EAX );
  2236     MOV_r32_r32( R_EAX, R_ECX );
  2237     ADD_imm8s_r32( 4, R_EAX );
  2238     store_reg( R_EAX, Rm );
  2239     MEM_READ_LONG( R_ECX, R_EAX );
  2240     store_spreg( R_EAX, R_SGR );
  2241 :}
  2242 LDC.L @Rm+, SPC {:  
  2243     precheck();
  2244     check_priv_no_precheck();
  2245     load_reg( R_EAX, Rm );
  2246     check_ralign32( R_EAX );
  2247     MOV_r32_r32( R_EAX, R_ECX );
  2248     ADD_imm8s_r32( 4, R_EAX );
  2249     store_reg( R_EAX, Rm );
  2250     MEM_READ_LONG( R_ECX, R_EAX );
  2251     store_spreg( R_EAX, R_SPC );
  2252 :}
  2253 LDC.L @Rm+, DBR {:  
  2254     precheck();
  2255     check_priv_no_precheck();
  2256     load_reg( R_EAX, Rm );
  2257     check_ralign32( R_EAX );
  2258     MOV_r32_r32( R_EAX, R_ECX );
  2259     ADD_imm8s_r32( 4, R_EAX );
  2260     store_reg( R_EAX, Rm );
  2261     MEM_READ_LONG( R_ECX, R_EAX );
  2262     store_spreg( R_EAX, R_DBR );
  2263 :}
  2264 LDC.L @Rm+, Rn_BANK {:  
  2265     precheck();
  2266     check_priv_no_precheck();
  2267     load_reg( R_EAX, Rm );
  2268     check_ralign32( R_EAX );
  2269     MOV_r32_r32( R_EAX, R_ECX );
  2270     ADD_imm8s_r32( 4, R_EAX );
  2271     store_reg( R_EAX, Rm );
  2272     MEM_READ_LONG( R_ECX, R_EAX );
  2273     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2274 :}
  2275 LDS Rm, FPSCR {:  
  2276     load_reg( R_EAX, Rm );
  2277     store_spreg( R_EAX, R_FPSCR );
  2278     update_fr_bank( R_EAX );
  2279 :}
  2280 LDS.L @Rm+, FPSCR {:  
  2281     load_reg( R_EAX, Rm );
  2282     precheck();
  2283     check_ralign32( R_EAX );
  2284     MOV_r32_r32( R_EAX, R_ECX );
  2285     ADD_imm8s_r32( 4, R_EAX );
  2286     store_reg( R_EAX, Rm );
  2287     MEM_READ_LONG( R_ECX, R_EAX );
  2288     store_spreg( R_EAX, R_FPSCR );
  2289     update_fr_bank( R_EAX );
  2290 :}
  2291 LDS Rm, FPUL {:  
  2292     load_reg( R_EAX, Rm );
  2293     store_spreg( R_EAX, R_FPUL );
  2294 :}
  2295 LDS.L @Rm+, FPUL {:  
  2296     load_reg( R_EAX, Rm );
  2297     precheck();
  2298     check_ralign32( R_EAX );
  2299     MOV_r32_r32( R_EAX, R_ECX );
  2300     ADD_imm8s_r32( 4, R_EAX );
  2301     store_reg( R_EAX, Rm );
  2302     MEM_READ_LONG( R_ECX, R_EAX );
  2303     store_spreg( R_EAX, R_FPUL );
  2304 :}
  2305 LDS Rm, MACH {: 
  2306     load_reg( R_EAX, Rm );
  2307     store_spreg( R_EAX, R_MACH );
  2308 :}
  2309 LDS.L @Rm+, MACH {:  
  2310     load_reg( R_EAX, Rm );
  2311     precheck();
  2312     check_ralign32( R_EAX );
  2313     MOV_r32_r32( R_EAX, R_ECX );
  2314     ADD_imm8s_r32( 4, R_EAX );
  2315     store_reg( R_EAX, Rm );
  2316     MEM_READ_LONG( R_ECX, R_EAX );
  2317     store_spreg( R_EAX, R_MACH );
  2318 :}
  2319 LDS Rm, MACL {:  
  2320     load_reg( R_EAX, Rm );
  2321     store_spreg( R_EAX, R_MACL );
  2322 :}
  2323 LDS.L @Rm+, MACL {:  
  2324     load_reg( R_EAX, Rm );
  2325     precheck();
  2326     check_ralign32( R_EAX );
  2327     MOV_r32_r32( R_EAX, R_ECX );
  2328     ADD_imm8s_r32( 4, R_EAX );
  2329     store_reg( R_EAX, Rm );
  2330     MEM_READ_LONG( R_ECX, R_EAX );
  2331     store_spreg( R_EAX, R_MACL );
  2332 :}
  2333 LDS Rm, PR {:  
  2334     load_reg( R_EAX, Rm );
  2335     store_spreg( R_EAX, R_PR );
  2336 :}
  2337 LDS.L @Rm+, PR {:  
  2338     load_reg( R_EAX, Rm );
  2339     precheck();
  2340     check_ralign32( R_EAX );
  2341     MOV_r32_r32( R_EAX, R_ECX );
  2342     ADD_imm8s_r32( 4, R_EAX );
  2343     store_reg( R_EAX, Rm );
  2344     MEM_READ_LONG( R_ECX, R_EAX );
  2345     store_spreg( R_EAX, R_PR );
  2346 :}
  2347 LDTLB {:  :}
  2348 OCBI @Rn {:  :}
  2349 OCBP @Rn {:  :}
  2350 OCBWB @Rn {:  :}
  2351 PREF @Rn {:
  2352     load_reg( R_EAX, Rn );
  2353     PUSH_r32( R_EAX );
  2354     AND_imm32_r32( 0xFC000000, R_EAX );
  2355     CMP_imm32_r32( 0xE0000000, R_EAX );
  2356     JNE_rel8(7, end);
  2357     call_func0( sh4_flush_store_queue );
  2358     JMP_TARGET(end);
  2359     ADD_imm8s_r32( 4, R_ESP );
  2360 :}
  2361 SLEEP {: 
  2362     check_priv();
  2363     call_func0( sh4_sleep );
  2364     sh4_x86.in_delay_slot = FALSE;
  2365     return 2;
  2366 :}
  2367 STC SR, Rn {:
  2368     check_priv();
  2369     call_func0(sh4_read_sr);
  2370     store_reg( R_EAX, Rn );
  2371 :}
  2372 STC GBR, Rn {:  
  2373     load_spreg( R_EAX, R_GBR );
  2374     store_reg( R_EAX, Rn );
  2375 :}
  2376 STC VBR, Rn {:  
  2377     check_priv();
  2378     load_spreg( R_EAX, R_VBR );
  2379     store_reg( R_EAX, Rn );
  2380 :}
  2381 STC SSR, Rn {:  
  2382     check_priv();
  2383     load_spreg( R_EAX, R_SSR );
  2384     store_reg( R_EAX, Rn );
  2385 :}
  2386 STC SPC, Rn {:  
  2387     check_priv();
  2388     load_spreg( R_EAX, R_SPC );
  2389     store_reg( R_EAX, Rn );
  2390 :}
  2391 STC SGR, Rn {:  
  2392     check_priv();
  2393     load_spreg( R_EAX, R_SGR );
  2394     store_reg( R_EAX, Rn );
  2395 :}
  2396 STC DBR, Rn {:  
  2397     check_priv();
  2398     load_spreg( R_EAX, R_DBR );
  2399     store_reg( R_EAX, Rn );
  2400 :}
  2401 STC Rm_BANK, Rn {:
  2402     check_priv();
  2403     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2404     store_reg( R_EAX, Rn );
  2405 :}
  2406 STC.L SR, @-Rn {:
  2407     precheck();
  2408     check_priv_no_precheck();
  2409     call_func0( sh4_read_sr );
  2410     load_reg( R_ECX, Rn );
  2411     check_walign32( R_ECX );
  2412     ADD_imm8s_r32( -4, R_ECX );
  2413     store_reg( R_ECX, Rn );
  2414     MEM_WRITE_LONG( R_ECX, R_EAX );
  2415 :}
  2416 STC.L VBR, @-Rn {:  
  2417     precheck();
  2418     check_priv_no_precheck();
  2419     load_reg( R_ECX, Rn );
  2420     check_walign32( R_ECX );
  2421     ADD_imm8s_r32( -4, R_ECX );
  2422     store_reg( R_ECX, Rn );
  2423     load_spreg( R_EAX, R_VBR );
  2424     MEM_WRITE_LONG( R_ECX, R_EAX );
  2425 :}
  2426 STC.L SSR, @-Rn {:  
  2427     precheck();
  2428     check_priv_no_precheck();
  2429     load_reg( R_ECX, Rn );
  2430     check_walign32( R_ECX );
  2431     ADD_imm8s_r32( -4, R_ECX );
  2432     store_reg( R_ECX, Rn );
  2433     load_spreg( R_EAX, R_SSR );
  2434     MEM_WRITE_LONG( R_ECX, R_EAX );
  2435 :}
  2436 STC.L SPC, @-Rn {:
  2437     precheck();
  2438     check_priv_no_precheck();
  2439     load_reg( R_ECX, Rn );
  2440     check_walign32( R_ECX );
  2441     ADD_imm8s_r32( -4, R_ECX );
  2442     store_reg( R_ECX, Rn );
  2443     load_spreg( R_EAX, R_SPC );
  2444     MEM_WRITE_LONG( R_ECX, R_EAX );
  2445 :}
  2446 STC.L SGR, @-Rn {:  
  2447     precheck();
  2448     check_priv_no_precheck();
  2449     load_reg( R_ECX, Rn );
  2450     check_walign32( R_ECX );
  2451     ADD_imm8s_r32( -4, R_ECX );
  2452     store_reg( R_ECX, Rn );
  2453     load_spreg( R_EAX, R_SGR );
  2454     MEM_WRITE_LONG( R_ECX, R_EAX );
  2455 :}
  2456 STC.L DBR, @-Rn {:  
  2457     precheck();
  2458     check_priv_no_precheck();
  2459     load_reg( R_ECX, Rn );
  2460     check_walign32( R_ECX );
  2461     ADD_imm8s_r32( -4, R_ECX );
  2462     store_reg( R_ECX, Rn );
  2463     load_spreg( R_EAX, R_DBR );
  2464     MEM_WRITE_LONG( R_ECX, R_EAX );
  2465 :}
  2466 STC.L Rm_BANK, @-Rn {:  
  2467     precheck();
  2468     check_priv_no_precheck();
  2469     load_reg( R_ECX, Rn );
  2470     check_walign32( R_ECX );
  2471     ADD_imm8s_r32( -4, R_ECX );
  2472     store_reg( R_ECX, Rn );
  2473     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2474     MEM_WRITE_LONG( R_ECX, R_EAX );
  2475 :}
  2476 STC.L GBR, @-Rn {:  
  2477     load_reg( R_ECX, Rn );
  2478     precheck();
  2479     check_walign32( R_ECX );
  2480     ADD_imm8s_r32( -4, R_ECX );
  2481     store_reg( R_ECX, Rn );
  2482     load_spreg( R_EAX, R_GBR );
  2483     MEM_WRITE_LONG( R_ECX, R_EAX );
  2484 :}
  2485 STS FPSCR, Rn {:  
  2486     load_spreg( R_EAX, R_FPSCR );
  2487     store_reg( R_EAX, Rn );
  2488 :}
  2489 STS.L FPSCR, @-Rn {:  
  2490     load_reg( R_ECX, Rn );
  2491     precheck();
  2492     check_walign32( R_ECX );
  2493     ADD_imm8s_r32( -4, R_ECX );
  2494     store_reg( R_ECX, Rn );
  2495     load_spreg( R_EAX, R_FPSCR );
  2496     MEM_WRITE_LONG( R_ECX, R_EAX );
  2497 :}
  2498 STS FPUL, Rn {:  
  2499     load_spreg( R_EAX, R_FPUL );
  2500     store_reg( R_EAX, Rn );
  2501 :}
  2502 STS.L FPUL, @-Rn {:  
  2503     load_reg( R_ECX, Rn );
  2504     precheck();
  2505     check_walign32( R_ECX );
  2506     ADD_imm8s_r32( -4, R_ECX );
  2507     store_reg( R_ECX, Rn );
  2508     load_spreg( R_EAX, R_FPUL );
  2509     MEM_WRITE_LONG( R_ECX, R_EAX );
  2510 :}
  2511 STS MACH, Rn {:  
  2512     load_spreg( R_EAX, R_MACH );
  2513     store_reg( R_EAX, Rn );
  2514 :}
  2515 STS.L MACH, @-Rn {:  
  2516     load_reg( R_ECX, Rn );
  2517     precheck();
  2518     check_walign32( R_ECX );
  2519     ADD_imm8s_r32( -4, R_ECX );
  2520     store_reg( R_ECX, Rn );
  2521     load_spreg( R_EAX, R_MACH );
  2522     MEM_WRITE_LONG( R_ECX, R_EAX );
  2523 :}
  2524 STS MACL, Rn {:  
  2525     load_spreg( R_EAX, R_MACL );
  2526     store_reg( R_EAX, Rn );
  2527 :}
  2528 STS.L MACL, @-Rn {:  
  2529     load_reg( R_ECX, Rn );
  2530     precheck();
  2531     check_walign32( R_ECX );
  2532     ADD_imm8s_r32( -4, R_ECX );
  2533     store_reg( R_ECX, Rn );
  2534     load_spreg( R_EAX, R_MACL );
  2535     MEM_WRITE_LONG( R_ECX, R_EAX );
  2536 :}
  2537 STS PR, Rn {:  
  2538     load_spreg( R_EAX, R_PR );
  2539     store_reg( R_EAX, Rn );
  2540 :}
  2541 STS.L PR, @-Rn {:  
  2542     load_reg( R_ECX, Rn );
  2543     precheck();
  2544     check_walign32( R_ECX );
  2545     ADD_imm8s_r32( -4, R_ECX );
  2546     store_reg( R_ECX, Rn );
  2547     load_spreg( R_EAX, R_PR );
  2548     MEM_WRITE_LONG( R_ECX, R_EAX );
  2549 :}
  2551 NOP {: /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ :}
  2552 %%
  2553     sh4_x86.in_delay_slot = FALSE;
  2554     return 0;
.