Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 409:549e00835448
prev408:af496b734734
next416:714df603c869
author nkeynes
date Sat Sep 29 05:33:02 2007 +0000 (13 years ago)
permissions -rw-r--r--
last change Modify termination again to allow early exit (eg on end-of-page), as well
as allowing follow through on conditional branches if desired.
view annotate diff log raw
     1 /**
     2  * $Id: sh4x86.in,v 1.17 2007-09-29 05:33:02 nkeynes Exp $
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "sh4/sh4core.h"
    29 #include "sh4/sh4trans.h"
    30 #include "sh4/sh4mmio.h"
    31 #include "sh4/x86op.h"
    32 #include "clock.h"
    34 #define DEFAULT_BACKPATCH_SIZE 4096
    36 /** 
    37  * Struct to manage internal translation state. This state is not saved -
    38  * it is only valid between calls to sh4_translate_begin_block() and
    39  * sh4_translate_end_block()
    40  */
    41 struct sh4_x86_state {
    42     gboolean in_delay_slot;
    43     gboolean priv_checked; /* true if we've already checked the cpu mode. */
    44     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    45     gboolean branch_taken; /* true if we branched unconditionally */
    46     uint32_t block_start_pc;
    48     /* Allocated memory for the (block-wide) back-patch list */
    49     uint32_t **backpatch_list;
    50     uint32_t backpatch_posn;
    51     uint32_t backpatch_size;
    52 };
    54 #define EXIT_DATA_ADDR_READ 0
    55 #define EXIT_DATA_ADDR_WRITE 7
    56 #define EXIT_ILLEGAL 14
    57 #define EXIT_SLOT_ILLEGAL 21
    58 #define EXIT_FPU_DISABLED 28
    59 #define EXIT_SLOT_FPU_DISABLED 35
    61 static struct sh4_x86_state sh4_x86;
    63 static uint32_t max_int = 0x7FFFFFFF;
    64 static uint32_t min_int = 0x80000000;
    65 static uint32_t save_fcw; /* save value for fpu control word */
    66 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
    68 void sh4_x86_init()
    69 {
    70     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
    71     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(uint32_t *);
    72 }
    75 static void sh4_x86_add_backpatch( uint8_t *ptr )
    76 {
    77     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
    78 	sh4_x86.backpatch_size <<= 1;
    79 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, sh4_x86.backpatch_size * sizeof(uint32_t *) );
    80 	assert( sh4_x86.backpatch_list != NULL );
    81     }
    82     sh4_x86.backpatch_list[sh4_x86.backpatch_posn++] = (uint32_t *)ptr;
    83 }
    85 static void sh4_x86_do_backpatch( uint8_t *reloc_base )
    86 {
    87     unsigned int i;
    88     for( i=0; i<sh4_x86.backpatch_posn; i++ ) {
    89 	*sh4_x86.backpatch_list[i] += (reloc_base - ((uint8_t *)sh4_x86.backpatch_list[i]) - 4);
    90     }
    91 }
    93 /**
    94  * Emit an instruction to load an SH4 reg into a real register
    95  */
    96 static inline void load_reg( int x86reg, int sh4reg ) 
    97 {
    98     /* mov [bp+n], reg */
    99     OP(0x8B);
   100     OP(0x45 + (x86reg<<3));
   101     OP(REG_OFFSET(r[sh4reg]));
   102 }
   104 static inline void load_reg16s( int x86reg, int sh4reg )
   105 {
   106     OP(0x0F);
   107     OP(0xBF);
   108     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   109 }
   111 static inline void load_reg16u( int x86reg, int sh4reg )
   112 {
   113     OP(0x0F);
   114     OP(0xB7);
   115     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   117 }
   119 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   120 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   121 /**
   122  * Emit an instruction to load an immediate value into a register
   123  */
   124 static inline void load_imm32( int x86reg, uint32_t value ) {
   125     /* mov #value, reg */
   126     OP(0xB8 + x86reg);
   127     OP32(value);
   128 }
   130 /**
   131  * Emit an instruction to store an SH4 reg (RN)
   132  */
   133 void static inline store_reg( int x86reg, int sh4reg ) {
   134     /* mov reg, [bp+n] */
   135     OP(0x89);
   136     OP(0x45 + (x86reg<<3));
   137     OP(REG_OFFSET(r[sh4reg]));
   138 }
   140 #define load_fr_bank(bankreg) load_spreg( bankreg, REG_OFFSET(fr_bank))
   142 /**
   143  * Load an FR register (single-precision floating point) into an integer x86
   144  * register (eg for register-to-register moves)
   145  */
   146 void static inline load_fr( int bankreg, int x86reg, int frm )
   147 {
   148     OP(0x8B); OP(0x40+bankreg+(x86reg<<3)); OP((frm^1)<<2);
   149 }
   151 /**
   152  * Store an FR register (single-precision floating point) into an integer x86
   153  * register (eg for register-to-register moves)
   154  */
   155 void static inline store_fr( int bankreg, int x86reg, int frn )
   156 {
   157     OP(0x89);  OP(0x40+bankreg+(x86reg<<3)); OP((frn^1)<<2);
   158 }
   161 /**
   162  * Load a pointer to the back fp back into the specified x86 register. The
   163  * bankreg must have been previously loaded with FPSCR.
   164  * NB: 12 bytes
   165  */
   166 static inline void load_xf_bank( int bankreg )
   167 {
   168     NOT_r32( bankreg );
   169     SHR_imm8_r32( (21 - 6), bankreg ); // Extract bit 21 then *64 for bank size
   170     AND_imm8s_r32( 0x40, bankreg );    // Complete extraction
   171     OP(0x8D); OP(0x44+(bankreg<<3)); OP(0x28+bankreg); OP(REG_OFFSET(fr)); // LEA [ebp+bankreg+disp], bankreg
   172 }
   174 /**
   175  * Update the fr_bank pointer based on the current fpscr value.
   176  */
   177 static inline void update_fr_bank( int fpscrreg )
   178 {
   179     SHR_imm8_r32( (21 - 6), fpscrreg ); // Extract bit 21 then *64 for bank size
   180     AND_imm8s_r32( 0x40, fpscrreg );    // Complete extraction
   181     OP(0x8D); OP(0x44+(fpscrreg<<3)); OP(0x28+fpscrreg); OP(REG_OFFSET(fr)); // LEA [ebp+fpscrreg+disp], fpscrreg
   182     store_spreg( fpscrreg, REG_OFFSET(fr_bank) );
   183 }
   184 /**
   185  * Push FPUL (as a 32-bit float) onto the FPU stack
   186  */
   187 static inline void push_fpul( )
   188 {
   189     OP(0xD9); OP(0x45); OP(R_FPUL);
   190 }
   192 /**
   193  * Pop FPUL (as a 32-bit float) from the FPU stack
   194  */
   195 static inline void pop_fpul( )
   196 {
   197     OP(0xD9); OP(0x5D); OP(R_FPUL);
   198 }
   200 /**
   201  * Push a 32-bit float onto the FPU stack, with bankreg previously loaded
   202  * with the location of the current fp bank.
   203  */
   204 static inline void push_fr( int bankreg, int frm ) 
   205 {
   206     OP(0xD9); OP(0x40 + bankreg); OP((frm^1)<<2);  // FLD.S [bankreg + frm^1*4]
   207 }
   209 /**
   210  * Pop a 32-bit float from the FPU stack and store it back into the fp bank, 
   211  * with bankreg previously loaded with the location of the current fp bank.
   212  */
   213 static inline void pop_fr( int bankreg, int frm )
   214 {
   215     OP(0xD9); OP(0x58 + bankreg); OP((frm^1)<<2); // FST.S [bankreg + frm^1*4]
   216 }
   218 /**
   219  * Push a 64-bit double onto the FPU stack, with bankreg previously loaded
   220  * with the location of the current fp bank.
   221  */
   222 static inline void push_dr( int bankreg, int frm )
   223 {
   224     OP(0xDD); OP(0x40 + bankreg); OP(frm<<2); // FLD.D [bankreg + frm*4]
   225 }
   227 static inline void pop_dr( int bankreg, int frm )
   228 {
   229     OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]
   230 }
   232 /**
   233  * Note: clobbers EAX to make the indirect call - this isn't usually
   234  * a problem since the callee will usually clobber it anyway.
   235  */
   236 static inline void call_func0( void *ptr )
   237 {
   238     load_imm32(R_EAX, (uint32_t)ptr);
   239     CALL_r32(R_EAX);
   240 }
   242 static inline void call_func1( void *ptr, int arg1 )
   243 {
   244     PUSH_r32(arg1);
   245     call_func0(ptr);
   246     ADD_imm8s_r32( 4, R_ESP );
   247 }
   249 static inline void call_func2( void *ptr, int arg1, int arg2 )
   250 {
   251     PUSH_r32(arg2);
   252     PUSH_r32(arg1);
   253     call_func0(ptr);
   254     ADD_imm8s_r32( 8, R_ESP );
   255 }
   257 /**
   258  * Write a double (64-bit) value into memory, with the first word in arg2a, and
   259  * the second in arg2b
   260  * NB: 30 bytes
   261  */
   262 static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )
   263 {
   264     ADD_imm8s_r32( 4, addr );
   265     PUSH_r32(arg2b);
   266     PUSH_r32(addr);
   267     ADD_imm8s_r32( -4, addr );
   268     PUSH_r32(arg2a);
   269     PUSH_r32(addr);
   270     call_func0(sh4_write_long);
   271     ADD_imm8s_r32( 8, R_ESP );
   272     call_func0(sh4_write_long);
   273     ADD_imm8s_r32( 8, R_ESP );
   274 }
   276 /**
   277  * Read a double (64-bit) value from memory, writing the first word into arg2a
   278  * and the second into arg2b. The addr must not be in EAX
   279  * NB: 27 bytes
   280  */
   281 static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )
   282 {
   283     PUSH_r32(addr);
   284     call_func0(sh4_read_long);
   285     POP_r32(addr);
   286     PUSH_r32(R_EAX);
   287     ADD_imm8s_r32( 4, addr );
   288     PUSH_r32(addr);
   289     call_func0(sh4_read_long);
   290     ADD_imm8s_r32( 4, R_ESP );
   291     MOV_r32_r32( R_EAX, arg2b );
   292     POP_r32(arg2a);
   293 }
   295 /* Exception checks - Note that all exception checks will clobber EAX */
   296 static void check_priv( )
   297 {
   298     if( !sh4_x86.priv_checked ) {
   299 	sh4_x86.priv_checked = TRUE;
   300 	load_spreg( R_EAX, R_SR );
   301 	AND_imm32_r32( SR_MD, R_EAX );
   302 	if( sh4_x86.in_delay_slot ) {
   303 	    JE_exit( EXIT_SLOT_ILLEGAL );
   304 	} else {
   305 	    JE_exit( EXIT_ILLEGAL );
   306 	}
   307     }
   308 }
   310 static void check_fpuen( )
   311 {
   312     if( !sh4_x86.fpuen_checked ) {
   313 	sh4_x86.fpuen_checked = TRUE;
   314 	load_spreg( R_EAX, R_SR );
   315 	AND_imm32_r32( SR_FD, R_EAX );
   316 	if( sh4_x86.in_delay_slot ) {
   317 	    JNE_exit(EXIT_SLOT_FPU_DISABLED);
   318 	} else {
   319 	    JNE_exit(EXIT_FPU_DISABLED);
   320 	}
   321     }
   322 }
   324 static void check_ralign16( int x86reg )
   325 {
   326     TEST_imm32_r32( 0x00000001, x86reg );
   327     JNE_exit(EXIT_DATA_ADDR_READ);
   328 }
   330 static void check_walign16( int x86reg )
   331 {
   332     TEST_imm32_r32( 0x00000001, x86reg );
   333     JNE_exit(EXIT_DATA_ADDR_WRITE);
   334 }
   336 static void check_ralign32( int x86reg )
   337 {
   338     TEST_imm32_r32( 0x00000003, x86reg );
   339     JNE_exit(EXIT_DATA_ADDR_READ);
   340 }
   341 static void check_walign32( int x86reg )
   342 {
   343     TEST_imm32_r32( 0x00000003, x86reg );
   344     JNE_exit(EXIT_DATA_ADDR_WRITE);
   345 }
   347 #define UNDEF()
   348 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   349 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
   350 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
   351 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
   352 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
   353 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
   354 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
   356 #define SLOTILLEGAL() JMP_exit(EXIT_SLOT_ILLEGAL); sh4_x86.in_delay_slot = FALSE; return 1;
   360 /**
   361  * Emit the 'start of block' assembly. Sets up the stack frame and save
   362  * SI/DI as required
   363  */
   364 void sh4_translate_begin_block( sh4addr_t pc ) 
   365 {
   366     PUSH_r32(R_EBP);
   367     /* mov &sh4r, ebp */
   368     load_imm32( R_EBP, (uint32_t)&sh4r );
   369     PUSH_r32(R_ESI);
   370     XOR_r32_r32(R_ESI, R_ESI);
   372     sh4_x86.in_delay_slot = FALSE;
   373     sh4_x86.priv_checked = FALSE;
   374     sh4_x86.fpuen_checked = FALSE;
   375     sh4_x86.branch_taken = FALSE;
   376     sh4_x86.backpatch_posn = 0;
   377     sh4_x86.block_start_pc = pc;
   378 }
   380 /**
   381  * Exit the block to an absolute PC
   382  * Bytes: 30
   383  */
   384 void exit_block( sh4addr_t pc, sh4addr_t endpc )
   385 {
   386     load_imm32( R_ECX, pc );                            // 5
   387     store_spreg( R_ECX, REG_OFFSET(pc) );               // 3
   388     MOV_moff32_EAX( (uint32_t)xlat_get_lut_entry(pc) ); // 5
   389     AND_imm8s_r32( 0xFC, R_EAX ); // 3
   390     load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
   391     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
   392     POP_r32(R_ESI);
   393     POP_r32(R_EBP);
   394     RET();
   395 }
   397 /**
   398  * Exit the block with sh4r.pc already written
   399  * Bytes: 16
   400  */
   401 void exit_block_pcset( pc )
   402 {
   403     XOR_r32_r32( R_EAX, R_EAX );                       // 2
   404     load_imm32( R_ECX, ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
   405     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );    // 6
   406     POP_r32(R_ESI);
   407     POP_r32(R_EBP);
   408     RET();
   409 }
   411 /**
   412  * Write the block trailer (exception handling block)
   413  */
   414 void sh4_translate_end_block( sh4addr_t pc ) {
   415     if( sh4_x86.branch_taken == FALSE ) {
   416 	// Didn't exit unconditionally already, so write the termination here
   417 	exit_block( pc, pc );
   418     }
   419     if( sh4_x86.backpatch_posn != 0 ) {
   420 	uint8_t *end_ptr = xlat_output;
   421 	// Exception termination. Jump block for various exception codes:
   422 	PUSH_imm32( EXC_DATA_ADDR_READ );
   423 	JMP_rel8( 33, target1 );
   424 	PUSH_imm32( EXC_DATA_ADDR_WRITE );
   425 	JMP_rel8( 26, target2 );
   426 	PUSH_imm32( EXC_ILLEGAL );
   427 	JMP_rel8( 19, target3 );
   428 	PUSH_imm32( EXC_SLOT_ILLEGAL ); 
   429 	JMP_rel8( 12, target4 );
   430 	PUSH_imm32( EXC_FPU_DISABLED ); 
   431 	JMP_rel8( 5, target5 );
   432 	PUSH_imm32( EXC_SLOT_FPU_DISABLED );
   433 	// target
   434 	JMP_TARGET(target1);
   435 	JMP_TARGET(target2);
   436 	JMP_TARGET(target3);
   437 	JMP_TARGET(target4);
   438 	JMP_TARGET(target5);
   439 	load_spreg( R_ECX, REG_OFFSET(pc) );
   440 	ADD_r32_r32( R_ESI, R_ECX );
   441 	ADD_r32_r32( R_ESI, R_ECX );
   442 	store_spreg( R_ECX, REG_OFFSET(pc) );
   443 	MOV_moff32_EAX( (uint32_t)&sh4_cpu_period );
   444 	load_spreg( R_ECX, REG_OFFSET(slice_cycle) );
   445 	MUL_r32( R_ESI );
   446 	ADD_r32_r32( R_EAX, R_ECX );
   447 	store_spreg( R_ECX, REG_OFFSET(slice_cycle) );
   449 	load_imm32( R_EAX, (uint32_t)sh4_raise_exception ); // 6
   450 	CALL_r32( R_EAX ); // 2
   451 	ADD_imm8s_r32( 4, R_ESP );
   452 	XOR_r32_r32( R_EAX, R_EAX );
   453 	POP_r32(R_ESI);
   454 	POP_r32(R_EBP);
   455 	RET();
   457 	sh4_x86_do_backpatch( end_ptr );
   458     }
   460 }
   463 extern uint16_t *sh4_icache;
   464 extern uint32_t sh4_icache_addr;
   466 /**
   467  * Translate a single instruction. Delayed branches are handled specially
   468  * by translating both branch and delayed instruction as a single unit (as
   469  * 
   470  *
   471  * @return true if the instruction marks the end of a basic block
   472  * (eg a branch or 
   473  */
   474 uint32_t sh4_x86_translate_instruction( sh4addr_t pc )
   475 {
   476     uint32_t ir;
   477     /* Read instruction */
   478     uint32_t pageaddr = pc >> 12;
   479     if( sh4_icache != NULL && pageaddr == sh4_icache_addr ) {
   480 	ir = sh4_icache[(pc&0xFFF)>>1];
   481     } else {
   482 	sh4_icache = (uint16_t *)mem_get_page(pc);
   483 	if( ((uint32_t)sh4_icache) < MAX_IO_REGIONS ) {
   484 	    /* If someone's actually been so daft as to try to execute out of an IO
   485 	     * region, fallback on the full-blown memory read
   486 	     */
   487 	    sh4_icache = NULL;
   488 	    ir = sh4_read_word(pc);
   489 	} else {
   490 	    sh4_icache_addr = pageaddr;
   491 	    ir = sh4_icache[(pc&0xFFF)>>1];
   492 	}
   493     }
   495 %%
   496 /* ALU operations */
   497 ADD Rm, Rn {:
   498     load_reg( R_EAX, Rm );
   499     load_reg( R_ECX, Rn );
   500     ADD_r32_r32( R_EAX, R_ECX );
   501     store_reg( R_ECX, Rn );
   502 :}
   503 ADD #imm, Rn {:  
   504     load_reg( R_EAX, Rn );
   505     ADD_imm8s_r32( imm, R_EAX );
   506     store_reg( R_EAX, Rn );
   507 :}
   508 ADDC Rm, Rn {:
   509     load_reg( R_EAX, Rm );
   510     load_reg( R_ECX, Rn );
   511     LDC_t();
   512     ADC_r32_r32( R_EAX, R_ECX );
   513     store_reg( R_ECX, Rn );
   514     SETC_t();
   515 :}
   516 ADDV Rm, Rn {:
   517     load_reg( R_EAX, Rm );
   518     load_reg( R_ECX, Rn );
   519     ADD_r32_r32( R_EAX, R_ECX );
   520     store_reg( R_ECX, Rn );
   521     SETO_t();
   522 :}
   523 AND Rm, Rn {:
   524     load_reg( R_EAX, Rm );
   525     load_reg( R_ECX, Rn );
   526     AND_r32_r32( R_EAX, R_ECX );
   527     store_reg( R_ECX, Rn );
   528 :}
   529 AND #imm, R0 {:  
   530     load_reg( R_EAX, 0 );
   531     AND_imm32_r32(imm, R_EAX); 
   532     store_reg( R_EAX, 0 );
   533 :}
   534 AND.B #imm, @(R0, GBR) {: 
   535     load_reg( R_EAX, 0 );
   536     load_spreg( R_ECX, R_GBR );
   537     ADD_r32_r32( R_EAX, R_ECX );
   538     PUSH_r32(R_ECX);
   539     call_func0(sh4_read_byte);
   540     POP_r32(R_ECX);
   541     AND_imm32_r32(imm, R_EAX );
   542     MEM_WRITE_BYTE( R_ECX, R_EAX );
   543 :}
   544 CMP/EQ Rm, Rn {:  
   545     load_reg( R_EAX, Rm );
   546     load_reg( R_ECX, Rn );
   547     CMP_r32_r32( R_EAX, R_ECX );
   548     SETE_t();
   549 :}
   550 CMP/EQ #imm, R0 {:  
   551     load_reg( R_EAX, 0 );
   552     CMP_imm8s_r32(imm, R_EAX);
   553     SETE_t();
   554 :}
   555 CMP/GE Rm, Rn {:  
   556     load_reg( R_EAX, Rm );
   557     load_reg( R_ECX, Rn );
   558     CMP_r32_r32( R_EAX, R_ECX );
   559     SETGE_t();
   560 :}
   561 CMP/GT Rm, Rn {: 
   562     load_reg( R_EAX, Rm );
   563     load_reg( R_ECX, Rn );
   564     CMP_r32_r32( R_EAX, R_ECX );
   565     SETG_t();
   566 :}
   567 CMP/HI Rm, Rn {:  
   568     load_reg( R_EAX, Rm );
   569     load_reg( R_ECX, Rn );
   570     CMP_r32_r32( R_EAX, R_ECX );
   571     SETA_t();
   572 :}
   573 CMP/HS Rm, Rn {: 
   574     load_reg( R_EAX, Rm );
   575     load_reg( R_ECX, Rn );
   576     CMP_r32_r32( R_EAX, R_ECX );
   577     SETAE_t();
   578  :}
   579 CMP/PL Rn {: 
   580     load_reg( R_EAX, Rn );
   581     CMP_imm8s_r32( 0, R_EAX );
   582     SETG_t();
   583 :}
   584 CMP/PZ Rn {:  
   585     load_reg( R_EAX, Rn );
   586     CMP_imm8s_r32( 0, R_EAX );
   587     SETGE_t();
   588 :}
   589 CMP/STR Rm, Rn {:  
   590     load_reg( R_EAX, Rm );
   591     load_reg( R_ECX, Rn );
   592     XOR_r32_r32( R_ECX, R_EAX );
   593     TEST_r8_r8( R_AL, R_AL );
   594     JE_rel8(13, target1);
   595     TEST_r8_r8( R_AH, R_AH ); // 2
   596     JE_rel8(9, target2);
   597     SHR_imm8_r32( 16, R_EAX ); // 3
   598     TEST_r8_r8( R_AL, R_AL ); // 2
   599     JE_rel8(2, target3);
   600     TEST_r8_r8( R_AH, R_AH ); // 2
   601     JMP_TARGET(target1);
   602     JMP_TARGET(target2);
   603     JMP_TARGET(target3);
   604     SETE_t();
   605 :}
   606 DIV0S Rm, Rn {:
   607     load_reg( R_EAX, Rm );
   608     load_reg( R_ECX, Rn );
   609     SHR_imm8_r32( 31, R_EAX );
   610     SHR_imm8_r32( 31, R_ECX );
   611     store_spreg( R_EAX, R_M );
   612     store_spreg( R_ECX, R_Q );
   613     CMP_r32_r32( R_EAX, R_ECX );
   614     SETNE_t();
   615 :}
   616 DIV0U {:  
   617     XOR_r32_r32( R_EAX, R_EAX );
   618     store_spreg( R_EAX, R_Q );
   619     store_spreg( R_EAX, R_M );
   620     store_spreg( R_EAX, R_T );
   621 :}
   622 DIV1 Rm, Rn {:
   623     load_spreg( R_ECX, R_M );
   624     load_reg( R_EAX, Rn );
   625     LDC_t();
   626     RCL1_r32( R_EAX );
   627     SETC_r8( R_DL ); // Q'
   628     CMP_sh4r_r32( R_Q, R_ECX );
   629     JE_rel8(5, mqequal);
   630     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   631     JMP_rel8(3, end);
   632     JMP_TARGET(mqequal);
   633     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   634     JMP_TARGET(end);
   635     store_reg( R_EAX, Rn ); // Done with Rn now
   636     SETC_r8(R_AL); // tmp1
   637     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   638     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   639     store_spreg( R_ECX, R_Q );
   640     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   641     MOVZX_r8_r32( R_AL, R_EAX );
   642     store_spreg( R_EAX, R_T );
   643 :}
   644 DMULS.L Rm, Rn {:  
   645     load_reg( R_EAX, Rm );
   646     load_reg( R_ECX, Rn );
   647     IMUL_r32(R_ECX);
   648     store_spreg( R_EDX, R_MACH );
   649     store_spreg( R_EAX, R_MACL );
   650 :}
   651 DMULU.L Rm, Rn {:  
   652     load_reg( R_EAX, Rm );
   653     load_reg( R_ECX, Rn );
   654     MUL_r32(R_ECX);
   655     store_spreg( R_EDX, R_MACH );
   656     store_spreg( R_EAX, R_MACL );    
   657 :}
   658 DT Rn {:  
   659     load_reg( R_EAX, Rn );
   660     ADD_imm8s_r32( -1, R_EAX );
   661     store_reg( R_EAX, Rn );
   662     SETE_t();
   663 :}
   664 EXTS.B Rm, Rn {:  
   665     load_reg( R_EAX, Rm );
   666     MOVSX_r8_r32( R_EAX, R_EAX );
   667     store_reg( R_EAX, Rn );
   668 :}
   669 EXTS.W Rm, Rn {:  
   670     load_reg( R_EAX, Rm );
   671     MOVSX_r16_r32( R_EAX, R_EAX );
   672     store_reg( R_EAX, Rn );
   673 :}
   674 EXTU.B Rm, Rn {:  
   675     load_reg( R_EAX, Rm );
   676     MOVZX_r8_r32( R_EAX, R_EAX );
   677     store_reg( R_EAX, Rn );
   678 :}
   679 EXTU.W Rm, Rn {:  
   680     load_reg( R_EAX, Rm );
   681     MOVZX_r16_r32( R_EAX, R_EAX );
   682     store_reg( R_EAX, Rn );
   683 :}
   684 MAC.L @Rm+, @Rn+ {:  
   685     load_reg( R_ECX, Rm );
   686     check_ralign32( R_ECX );
   687     load_reg( R_ECX, Rn );
   688     check_ralign32( R_ECX );
   689     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   690     MEM_READ_LONG( R_ECX, R_EAX );
   691     PUSH_r32( R_EAX );
   692     load_reg( R_ECX, Rm );
   693     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   694     MEM_READ_LONG( R_ECX, R_EAX );
   695     POP_r32( R_ECX );
   696     IMUL_r32( R_ECX );
   697     ADD_r32_sh4r( R_EAX, R_MACL );
   698     ADC_r32_sh4r( R_EDX, R_MACH );
   700     load_spreg( R_ECX, R_S );
   701     TEST_r32_r32(R_ECX, R_ECX);
   702     JE_rel8( 7, nosat );
   703     call_func0( signsat48 );
   704     JMP_TARGET( nosat );
   705 :}
   706 MAC.W @Rm+, @Rn+ {:  
   707     load_reg( R_ECX, Rm );
   708     check_ralign16( R_ECX );
   709     load_reg( R_ECX, Rn );
   710     check_ralign16( R_ECX );
   711     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   712     MEM_READ_WORD( R_ECX, R_EAX );
   713     PUSH_r32( R_EAX );
   714     load_reg( R_ECX, Rm );
   715     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   716     MEM_READ_WORD( R_ECX, R_EAX );
   717     POP_r32( R_ECX );
   718     IMUL_r32( R_ECX );
   720     load_spreg( R_ECX, R_S );
   721     TEST_r32_r32( R_ECX, R_ECX );
   722     JE_rel8( 47, nosat );
   724     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   725     JNO_rel8( 51, end );            // 2
   726     load_imm32( R_EDX, 1 );         // 5
   727     store_spreg( R_EDX, R_MACH );   // 6
   728     JS_rel8( 13, positive );        // 2
   729     load_imm32( R_EAX, 0x80000000 );// 5
   730     store_spreg( R_EAX, R_MACL );   // 6
   731     JMP_rel8( 25, end2 );           // 2
   733     JMP_TARGET(positive);
   734     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   735     store_spreg( R_EAX, R_MACL );   // 6
   736     JMP_rel8( 12, end3);            // 2
   738     JMP_TARGET(nosat);
   739     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   740     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   741     JMP_TARGET(end);
   742     JMP_TARGET(end2);
   743     JMP_TARGET(end3);
   744 :}
   745 MOVT Rn {:  
   746     load_spreg( R_EAX, R_T );
   747     store_reg( R_EAX, Rn );
   748 :}
   749 MUL.L Rm, Rn {:  
   750     load_reg( R_EAX, Rm );
   751     load_reg( R_ECX, Rn );
   752     MUL_r32( R_ECX );
   753     store_spreg( R_EAX, R_MACL );
   754 :}
   755 MULS.W Rm, Rn {:
   756     load_reg16s( R_EAX, Rm );
   757     load_reg16s( R_ECX, Rn );
   758     MUL_r32( R_ECX );
   759     store_spreg( R_EAX, R_MACL );
   760 :}
   761 MULU.W Rm, Rn {:  
   762     load_reg16u( R_EAX, Rm );
   763     load_reg16u( R_ECX, Rn );
   764     MUL_r32( R_ECX );
   765     store_spreg( R_EAX, R_MACL );
   766 :}
   767 NEG Rm, Rn {:
   768     load_reg( R_EAX, Rm );
   769     NEG_r32( R_EAX );
   770     store_reg( R_EAX, Rn );
   771 :}
   772 NEGC Rm, Rn {:  
   773     load_reg( R_EAX, Rm );
   774     XOR_r32_r32( R_ECX, R_ECX );
   775     LDC_t();
   776     SBB_r32_r32( R_EAX, R_ECX );
   777     store_reg( R_ECX, Rn );
   778     SETC_t();
   779 :}
   780 NOT Rm, Rn {:  
   781     load_reg( R_EAX, Rm );
   782     NOT_r32( R_EAX );
   783     store_reg( R_EAX, Rn );
   784 :}
   785 OR Rm, Rn {:  
   786     load_reg( R_EAX, Rm );
   787     load_reg( R_ECX, Rn );
   788     OR_r32_r32( R_EAX, R_ECX );
   789     store_reg( R_ECX, Rn );
   790 :}
   791 OR #imm, R0 {:
   792     load_reg( R_EAX, 0 );
   793     OR_imm32_r32(imm, R_EAX);
   794     store_reg( R_EAX, 0 );
   795 :}
   796 OR.B #imm, @(R0, GBR) {:  
   797     load_reg( R_EAX, 0 );
   798     load_spreg( R_ECX, R_GBR );
   799     ADD_r32_r32( R_EAX, R_ECX );
   800     PUSH_r32(R_ECX);
   801     call_func0(sh4_read_byte);
   802     POP_r32(R_ECX);
   803     OR_imm32_r32(imm, R_EAX );
   804     MEM_WRITE_BYTE( R_ECX, R_EAX );
   805 :}
   806 ROTCL Rn {:
   807     load_reg( R_EAX, Rn );
   808     LDC_t();
   809     RCL1_r32( R_EAX );
   810     store_reg( R_EAX, Rn );
   811     SETC_t();
   812 :}
   813 ROTCR Rn {:  
   814     load_reg( R_EAX, Rn );
   815     LDC_t();
   816     RCR1_r32( R_EAX );
   817     store_reg( R_EAX, Rn );
   818     SETC_t();
   819 :}
   820 ROTL Rn {:  
   821     load_reg( R_EAX, Rn );
   822     ROL1_r32( R_EAX );
   823     store_reg( R_EAX, Rn );
   824     SETC_t();
   825 :}
   826 ROTR Rn {:  
   827     load_reg( R_EAX, Rn );
   828     ROR1_r32( R_EAX );
   829     store_reg( R_EAX, Rn );
   830     SETC_t();
   831 :}
   832 SHAD Rm, Rn {:
   833     /* Annoyingly enough, not directly convertible */
   834     load_reg( R_EAX, Rn );
   835     load_reg( R_ECX, Rm );
   836     CMP_imm32_r32( 0, R_ECX );
   837     JGE_rel8(16, doshl);
   839     NEG_r32( R_ECX );      // 2
   840     AND_imm8_r8( 0x1F, R_CL ); // 3
   841     JE_rel8( 4, emptysar);     // 2
   842     SAR_r32_CL( R_EAX );       // 2
   843     JMP_rel8(10, end);          // 2
   845     JMP_TARGET(emptysar);
   846     SAR_imm8_r32(31, R_EAX );  // 3
   847     JMP_rel8(5, end2);
   849     JMP_TARGET(doshl);
   850     AND_imm8_r8( 0x1F, R_CL ); // 3
   851     SHL_r32_CL( R_EAX );       // 2
   852     JMP_TARGET(end);
   853     JMP_TARGET(end2);
   854     store_reg( R_EAX, Rn );
   855 :}
   856 SHLD Rm, Rn {:  
   857     load_reg( R_EAX, Rn );
   858     load_reg( R_ECX, Rm );
   859     CMP_imm32_r32( 0, R_ECX );
   860     JGE_rel8(15, doshl);
   862     NEG_r32( R_ECX );      // 2
   863     AND_imm8_r8( 0x1F, R_CL ); // 3
   864     JE_rel8( 4, emptyshr );
   865     SHR_r32_CL( R_EAX );       // 2
   866     JMP_rel8(9, end);          // 2
   868     JMP_TARGET(emptyshr);
   869     XOR_r32_r32( R_EAX, R_EAX );
   870     JMP_rel8(5, end2);
   872     JMP_TARGET(doshl);
   873     AND_imm8_r8( 0x1F, R_CL ); // 3
   874     SHL_r32_CL( R_EAX );       // 2
   875     JMP_TARGET(end);
   876     JMP_TARGET(end2);
   877     store_reg( R_EAX, Rn );
   878 :}
   879 SHAL Rn {: 
   880     load_reg( R_EAX, Rn );
   881     SHL1_r32( R_EAX );
   882     SETC_t();
   883     store_reg( R_EAX, Rn );
   884 :}
   885 SHAR Rn {:  
   886     load_reg( R_EAX, Rn );
   887     SAR1_r32( R_EAX );
   888     SETC_t();
   889     store_reg( R_EAX, Rn );
   890 :}
   891 SHLL Rn {:  
   892     load_reg( R_EAX, Rn );
   893     SHL1_r32( R_EAX );
   894     SETC_t();
   895     store_reg( R_EAX, Rn );
   896 :}
   897 SHLL2 Rn {:
   898     load_reg( R_EAX, Rn );
   899     SHL_imm8_r32( 2, R_EAX );
   900     store_reg( R_EAX, Rn );
   901 :}
   902 SHLL8 Rn {:  
   903     load_reg( R_EAX, Rn );
   904     SHL_imm8_r32( 8, R_EAX );
   905     store_reg( R_EAX, Rn );
   906 :}
   907 SHLL16 Rn {:  
   908     load_reg( R_EAX, Rn );
   909     SHL_imm8_r32( 16, R_EAX );
   910     store_reg( R_EAX, Rn );
   911 :}
   912 SHLR Rn {:  
   913     load_reg( R_EAX, Rn );
   914     SHR1_r32( R_EAX );
   915     SETC_t();
   916     store_reg( R_EAX, Rn );
   917 :}
   918 SHLR2 Rn {:  
   919     load_reg( R_EAX, Rn );
   920     SHR_imm8_r32( 2, R_EAX );
   921     store_reg( R_EAX, Rn );
   922 :}
   923 SHLR8 Rn {:  
   924     load_reg( R_EAX, Rn );
   925     SHR_imm8_r32( 8, R_EAX );
   926     store_reg( R_EAX, Rn );
   927 :}
   928 SHLR16 Rn {:  
   929     load_reg( R_EAX, Rn );
   930     SHR_imm8_r32( 16, R_EAX );
   931     store_reg( R_EAX, Rn );
   932 :}
   933 SUB Rm, Rn {:  
   934     load_reg( R_EAX, Rm );
   935     load_reg( R_ECX, Rn );
   936     SUB_r32_r32( R_EAX, R_ECX );
   937     store_reg( R_ECX, Rn );
   938 :}
   939 SUBC Rm, Rn {:  
   940     load_reg( R_EAX, Rm );
   941     load_reg( R_ECX, Rn );
   942     LDC_t();
   943     SBB_r32_r32( R_EAX, R_ECX );
   944     store_reg( R_ECX, Rn );
   945     SETC_t();
   946 :}
   947 SUBV Rm, Rn {:  
   948     load_reg( R_EAX, Rm );
   949     load_reg( R_ECX, Rn );
   950     SUB_r32_r32( R_EAX, R_ECX );
   951     store_reg( R_ECX, Rn );
   952     SETO_t();
   953 :}
   954 SWAP.B Rm, Rn {:  
   955     load_reg( R_EAX, Rm );
   956     XCHG_r8_r8( R_AL, R_AH );
   957     store_reg( R_EAX, Rn );
   958 :}
   959 SWAP.W Rm, Rn {:  
   960     load_reg( R_EAX, Rm );
   961     MOV_r32_r32( R_EAX, R_ECX );
   962     SHL_imm8_r32( 16, R_ECX );
   963     SHR_imm8_r32( 16, R_EAX );
   964     OR_r32_r32( R_EAX, R_ECX );
   965     store_reg( R_ECX, Rn );
   966 :}
   967 TAS.B @Rn {:  
   968     load_reg( R_ECX, Rn );
   969     MEM_READ_BYTE( R_ECX, R_EAX );
   970     TEST_r8_r8( R_AL, R_AL );
   971     SETE_t();
   972     OR_imm8_r8( 0x80, R_AL );
   973     load_reg( R_ECX, Rn );
   974     MEM_WRITE_BYTE( R_ECX, R_EAX );
   975 :}
   976 TST Rm, Rn {:  
   977     load_reg( R_EAX, Rm );
   978     load_reg( R_ECX, Rn );
   979     TEST_r32_r32( R_EAX, R_ECX );
   980     SETE_t();
   981 :}
   982 TST #imm, R0 {:  
   983     load_reg( R_EAX, 0 );
   984     TEST_imm32_r32( imm, R_EAX );
   985     SETE_t();
   986 :}
   987 TST.B #imm, @(R0, GBR) {:  
   988     load_reg( R_EAX, 0);
   989     load_reg( R_ECX, R_GBR);
   990     ADD_r32_r32( R_EAX, R_ECX );
   991     MEM_READ_BYTE( R_ECX, R_EAX );
   992     TEST_imm8_r8( imm, R_AL );
   993     SETE_t();
   994 :}
   995 XOR Rm, Rn {:  
   996     load_reg( R_EAX, Rm );
   997     load_reg( R_ECX, Rn );
   998     XOR_r32_r32( R_EAX, R_ECX );
   999     store_reg( R_ECX, Rn );
  1000 :}
  1001 XOR #imm, R0 {:  
  1002     load_reg( R_EAX, 0 );
  1003     XOR_imm32_r32( imm, R_EAX );
  1004     store_reg( R_EAX, 0 );
  1005 :}
  1006 XOR.B #imm, @(R0, GBR) {:  
  1007     load_reg( R_EAX, 0 );
  1008     load_spreg( R_ECX, R_GBR );
  1009     ADD_r32_r32( R_EAX, R_ECX );
  1010     PUSH_r32(R_ECX);
  1011     call_func0(sh4_read_byte);
  1012     POP_r32(R_ECX);
  1013     XOR_imm32_r32( imm, R_EAX );
  1014     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1015 :}
  1016 XTRCT Rm, Rn {:
  1017     load_reg( R_EAX, Rm );
  1018     load_reg( R_ECX, Rn );
  1019     SHL_imm8_r32( 16, R_EAX );
  1020     SHR_imm8_r32( 16, R_ECX );
  1021     OR_r32_r32( R_EAX, R_ECX );
  1022     store_reg( R_ECX, Rn );
  1023 :}
  1025 /* Data move instructions */
  1026 MOV Rm, Rn {:  
  1027     load_reg( R_EAX, Rm );
  1028     store_reg( R_EAX, Rn );
  1029 :}
  1030 MOV #imm, Rn {:  
  1031     load_imm32( R_EAX, imm );
  1032     store_reg( R_EAX, Rn );
  1033 :}
  1034 MOV.B Rm, @Rn {:  
  1035     load_reg( R_EAX, Rm );
  1036     load_reg( R_ECX, Rn );
  1037     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1038 :}
  1039 MOV.B Rm, @-Rn {:  
  1040     load_reg( R_EAX, Rm );
  1041     load_reg( R_ECX, Rn );
  1042     ADD_imm8s_r32( -1, R_ECX );
  1043     store_reg( R_ECX, Rn );
  1044     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1045 :}
  1046 MOV.B Rm, @(R0, Rn) {:  
  1047     load_reg( R_EAX, 0 );
  1048     load_reg( R_ECX, Rn );
  1049     ADD_r32_r32( R_EAX, R_ECX );
  1050     load_reg( R_EAX, Rm );
  1051     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1052 :}
  1053 MOV.B R0, @(disp, GBR) {:  
  1054     load_reg( R_EAX, 0 );
  1055     load_spreg( R_ECX, R_GBR );
  1056     ADD_imm32_r32( disp, R_ECX );
  1057     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1058 :}
  1059 MOV.B R0, @(disp, Rn) {:  
  1060     load_reg( R_EAX, 0 );
  1061     load_reg( R_ECX, Rn );
  1062     ADD_imm32_r32( disp, R_ECX );
  1063     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1064 :}
  1065 MOV.B @Rm, Rn {:  
  1066     load_reg( R_ECX, Rm );
  1067     MEM_READ_BYTE( R_ECX, R_EAX );
  1068     store_reg( R_EAX, Rn );
  1069 :}
  1070 MOV.B @Rm+, Rn {:  
  1071     load_reg( R_ECX, Rm );
  1072     MOV_r32_r32( R_ECX, R_EAX );
  1073     ADD_imm8s_r32( 1, R_EAX );
  1074     store_reg( R_EAX, Rm );
  1075     MEM_READ_BYTE( R_ECX, R_EAX );
  1076     store_reg( R_EAX, Rn );
  1077 :}
  1078 MOV.B @(R0, Rm), Rn {:  
  1079     load_reg( R_EAX, 0 );
  1080     load_reg( R_ECX, Rm );
  1081     ADD_r32_r32( R_EAX, R_ECX );
  1082     MEM_READ_BYTE( R_ECX, R_EAX );
  1083     store_reg( R_EAX, Rn );
  1084 :}
  1085 MOV.B @(disp, GBR), R0 {:  
  1086     load_spreg( R_ECX, R_GBR );
  1087     ADD_imm32_r32( disp, R_ECX );
  1088     MEM_READ_BYTE( R_ECX, R_EAX );
  1089     store_reg( R_EAX, 0 );
  1090 :}
  1091 MOV.B @(disp, Rm), R0 {:  
  1092     load_reg( R_ECX, Rm );
  1093     ADD_imm32_r32( disp, R_ECX );
  1094     MEM_READ_BYTE( R_ECX, R_EAX );
  1095     store_reg( R_EAX, 0 );
  1096 :}
  1097 MOV.L Rm, @Rn {:
  1098     load_reg( R_EAX, Rm );
  1099     load_reg( R_ECX, Rn );
  1100     check_walign32(R_ECX);
  1101     MEM_WRITE_LONG( R_ECX, R_EAX );
  1102 :}
  1103 MOV.L Rm, @-Rn {:  
  1104     load_reg( R_EAX, Rm );
  1105     load_reg( R_ECX, Rn );
  1106     check_walign32( R_ECX );
  1107     ADD_imm8s_r32( -4, R_ECX );
  1108     store_reg( R_ECX, Rn );
  1109     MEM_WRITE_LONG( R_ECX, R_EAX );
  1110 :}
  1111 MOV.L Rm, @(R0, Rn) {:  
  1112     load_reg( R_EAX, 0 );
  1113     load_reg( R_ECX, Rn );
  1114     ADD_r32_r32( R_EAX, R_ECX );
  1115     check_walign32( R_ECX );
  1116     load_reg( R_EAX, Rm );
  1117     MEM_WRITE_LONG( R_ECX, R_EAX );
  1118 :}
  1119 MOV.L R0, @(disp, GBR) {:  
  1120     load_spreg( R_ECX, R_GBR );
  1121     load_reg( R_EAX, 0 );
  1122     ADD_imm32_r32( disp, R_ECX );
  1123     check_walign32( R_ECX );
  1124     MEM_WRITE_LONG( R_ECX, R_EAX );
  1125 :}
  1126 MOV.L Rm, @(disp, Rn) {:  
  1127     load_reg( R_ECX, Rn );
  1128     load_reg( R_EAX, Rm );
  1129     ADD_imm32_r32( disp, R_ECX );
  1130     check_walign32( R_ECX );
  1131     MEM_WRITE_LONG( R_ECX, R_EAX );
  1132 :}
  1133 MOV.L @Rm, Rn {:  
  1134     load_reg( R_ECX, Rm );
  1135     check_ralign32( R_ECX );
  1136     MEM_READ_LONG( R_ECX, R_EAX );
  1137     store_reg( R_EAX, Rn );
  1138 :}
  1139 MOV.L @Rm+, Rn {:  
  1140     load_reg( R_EAX, Rm );
  1141     check_ralign32( R_EAX );
  1142     MOV_r32_r32( R_EAX, R_ECX );
  1143     ADD_imm8s_r32( 4, R_EAX );
  1144     store_reg( R_EAX, Rm );
  1145     MEM_READ_LONG( R_ECX, R_EAX );
  1146     store_reg( R_EAX, Rn );
  1147 :}
  1148 MOV.L @(R0, Rm), Rn {:  
  1149     load_reg( R_EAX, 0 );
  1150     load_reg( R_ECX, Rm );
  1151     ADD_r32_r32( R_EAX, R_ECX );
  1152     check_ralign32( R_ECX );
  1153     MEM_READ_LONG( R_ECX, R_EAX );
  1154     store_reg( R_EAX, Rn );
  1155 :}
  1156 MOV.L @(disp, GBR), R0 {:
  1157     load_spreg( R_ECX, R_GBR );
  1158     ADD_imm32_r32( disp, R_ECX );
  1159     check_ralign32( R_ECX );
  1160     MEM_READ_LONG( R_ECX, R_EAX );
  1161     store_reg( R_EAX, 0 );
  1162 :}
  1163 MOV.L @(disp, PC), Rn {:  
  1164     if( sh4_x86.in_delay_slot ) {
  1165 	SLOTILLEGAL();
  1166     } else {
  1167 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1168 	char *ptr = mem_get_region(target);
  1169 	if( ptr != NULL ) {
  1170 	    MOV_moff32_EAX( (uint32_t)ptr );
  1171 	} else {
  1172 	    load_imm32( R_ECX, target );
  1173 	    MEM_READ_LONG( R_ECX, R_EAX );
  1175 	store_reg( R_EAX, Rn );
  1177 :}
  1178 MOV.L @(disp, Rm), Rn {:  
  1179     load_reg( R_ECX, Rm );
  1180     ADD_imm8s_r32( disp, R_ECX );
  1181     check_ralign32( R_ECX );
  1182     MEM_READ_LONG( R_ECX, R_EAX );
  1183     store_reg( R_EAX, Rn );
  1184 :}
  1185 MOV.W Rm, @Rn {:  
  1186     load_reg( R_ECX, Rn );
  1187     check_walign16( R_ECX );
  1188     load_reg( R_EAX, Rm );
  1189     MEM_WRITE_WORD( R_ECX, R_EAX );
  1190 :}
  1191 MOV.W Rm, @-Rn {:  
  1192     load_reg( R_ECX, Rn );
  1193     check_walign16( R_ECX );
  1194     load_reg( R_EAX, Rm );
  1195     ADD_imm8s_r32( -2, R_ECX );
  1196     store_reg( R_ECX, Rn );
  1197     MEM_WRITE_WORD( R_ECX, R_EAX );
  1198 :}
  1199 MOV.W Rm, @(R0, Rn) {:  
  1200     load_reg( R_EAX, 0 );
  1201     load_reg( R_ECX, Rn );
  1202     ADD_r32_r32( R_EAX, R_ECX );
  1203     check_walign16( R_ECX );
  1204     load_reg( R_EAX, Rm );
  1205     MEM_WRITE_WORD( R_ECX, R_EAX );
  1206 :}
  1207 MOV.W R0, @(disp, GBR) {:  
  1208     load_spreg( R_ECX, R_GBR );
  1209     load_reg( R_EAX, 0 );
  1210     ADD_imm32_r32( disp, R_ECX );
  1211     check_walign16( R_ECX );
  1212     MEM_WRITE_WORD( R_ECX, R_EAX );
  1213 :}
  1214 MOV.W R0, @(disp, Rn) {:  
  1215     load_reg( R_ECX, Rn );
  1216     load_reg( R_EAX, 0 );
  1217     ADD_imm32_r32( disp, R_ECX );
  1218     check_walign16( R_ECX );
  1219     MEM_WRITE_WORD( R_ECX, R_EAX );
  1220 :}
  1221 MOV.W @Rm, Rn {:  
  1222     load_reg( R_ECX, Rm );
  1223     check_ralign16( R_ECX );
  1224     MEM_READ_WORD( R_ECX, R_EAX );
  1225     store_reg( R_EAX, Rn );
  1226 :}
  1227 MOV.W @Rm+, Rn {:  
  1228     load_reg( R_EAX, Rm );
  1229     check_ralign16( R_EAX );
  1230     MOV_r32_r32( R_EAX, R_ECX );
  1231     ADD_imm8s_r32( 2, R_EAX );
  1232     store_reg( R_EAX, Rm );
  1233     MEM_READ_WORD( R_ECX, R_EAX );
  1234     store_reg( R_EAX, Rn );
  1235 :}
  1236 MOV.W @(R0, Rm), Rn {:  
  1237     load_reg( R_EAX, 0 );
  1238     load_reg( R_ECX, Rm );
  1239     ADD_r32_r32( R_EAX, R_ECX );
  1240     check_ralign16( R_ECX );
  1241     MEM_READ_WORD( R_ECX, R_EAX );
  1242     store_reg( R_EAX, Rn );
  1243 :}
  1244 MOV.W @(disp, GBR), R0 {:  
  1245     load_spreg( R_ECX, R_GBR );
  1246     ADD_imm32_r32( disp, R_ECX );
  1247     check_ralign16( R_ECX );
  1248     MEM_READ_WORD( R_ECX, R_EAX );
  1249     store_reg( R_EAX, 0 );
  1250 :}
  1251 MOV.W @(disp, PC), Rn {:  
  1252     if( sh4_x86.in_delay_slot ) {
  1253 	SLOTILLEGAL();
  1254     } else {
  1255 	load_imm32( R_ECX, pc + disp + 4 );
  1256 	MEM_READ_WORD( R_ECX, R_EAX );
  1257 	store_reg( R_EAX, Rn );
  1259 :}
  1260 MOV.W @(disp, Rm), R0 {:  
  1261     load_reg( R_ECX, Rm );
  1262     ADD_imm32_r32( disp, R_ECX );
  1263     check_ralign16( R_ECX );
  1264     MEM_READ_WORD( R_ECX, R_EAX );
  1265     store_reg( R_EAX, 0 );
  1266 :}
  1267 MOVA @(disp, PC), R0 {:  
  1268     if( sh4_x86.in_delay_slot ) {
  1269 	SLOTILLEGAL();
  1270     } else {
  1271 	load_imm32( R_ECX, (pc & 0xFFFFFFFC) + disp + 4 );
  1272 	store_reg( R_ECX, 0 );
  1274 :}
  1275 MOVCA.L R0, @Rn {:  
  1276     load_reg( R_EAX, 0 );
  1277     load_reg( R_ECX, Rn );
  1278     check_walign32( R_ECX );
  1279     MEM_WRITE_LONG( R_ECX, R_EAX );
  1280 :}
  1282 /* Control transfer instructions */
  1283 BF disp {:
  1284     if( sh4_x86.in_delay_slot ) {
  1285 	SLOTILLEGAL();
  1286     } else {
  1287 	CMP_imm8s_sh4r( 0, R_T );
  1288 	JNE_rel8( 30, nottaken );
  1289 	exit_block( disp + pc + 4, pc+2 );
  1290 	JMP_TARGET(nottaken);
  1291 	return 2;
  1293 :}
  1294 BF/S disp {:
  1295     if( sh4_x86.in_delay_slot ) {
  1296 	SLOTILLEGAL();
  1297     } else {
  1298 	sh4_x86.in_delay_slot = TRUE;
  1299 	CMP_imm8s_sh4r( 0, R_T );
  1300 	OP(0x0F); OP(0x85); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JNE rel32
  1301 	sh4_x86_translate_instruction(pc+2);
  1302 	exit_block( disp + pc + 4, pc+4 );
  1303 	// not taken
  1304 	*patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1305 	sh4_x86_translate_instruction(pc+2);
  1306 	return 4;
  1308 :}
  1309 BRA disp {:  
  1310     if( sh4_x86.in_delay_slot ) {
  1311 	SLOTILLEGAL();
  1312     } else {
  1313 	sh4_x86.in_delay_slot = TRUE;
  1314 	sh4_x86_translate_instruction( pc + 2 );
  1315 	exit_block( disp + pc + 4, pc+4 );
  1316 	sh4_x86.branch_taken = TRUE;
  1317 	return 4;
  1319 :}
  1320 BRAF Rn {:  
  1321     if( sh4_x86.in_delay_slot ) {
  1322 	SLOTILLEGAL();
  1323     } else {
  1324 	load_reg( R_EAX, Rn );
  1325 	ADD_imm32_r32( pc + 4, R_EAX );
  1326 	store_spreg( R_EAX, REG_OFFSET(pc) );
  1327 	sh4_x86.in_delay_slot = TRUE;
  1328 	sh4_x86_translate_instruction( pc + 2 );
  1329 	exit_block_pcset(pc+2);
  1330 	sh4_x86.branch_taken = TRUE;
  1331 	return 4;
  1333 :}
  1334 BSR disp {:  
  1335     if( sh4_x86.in_delay_slot ) {
  1336 	SLOTILLEGAL();
  1337     } else {
  1338 	load_imm32( R_EAX, pc + 4 );
  1339 	store_spreg( R_EAX, R_PR );
  1340 	sh4_x86.in_delay_slot = TRUE;
  1341 	sh4_x86_translate_instruction( pc + 2 );
  1342 	exit_block( disp + pc + 4, pc+4 );
  1343 	sh4_x86.branch_taken = TRUE;
  1344 	return 4;
  1346 :}
  1347 BSRF Rn {:  
  1348     if( sh4_x86.in_delay_slot ) {
  1349 	SLOTILLEGAL();
  1350     } else {
  1351 	load_imm32( R_ECX, pc + 4 );
  1352 	store_spreg( R_ECX, R_PR );
  1353 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_ECX );
  1354 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1355 	sh4_x86.in_delay_slot = TRUE;
  1356 	sh4_x86_translate_instruction( pc + 2 );
  1357 	exit_block_pcset(pc+2);
  1358 	sh4_x86.branch_taken = TRUE;
  1359 	return 4;
  1361 :}
  1362 BT disp {:
  1363     if( sh4_x86.in_delay_slot ) {
  1364 	SLOTILLEGAL();
  1365     } else {
  1366 	CMP_imm8s_sh4r( 0, R_T );
  1367 	JE_rel8( 30, nottaken );
  1368 	exit_block( disp + pc + 4, pc+2 );
  1369 	JMP_TARGET(nottaken);
  1370 	return 2;
  1372 :}
  1373 BT/S disp {:
  1374     if( sh4_x86.in_delay_slot ) {
  1375 	SLOTILLEGAL();
  1376     } else {
  1377 	sh4_x86.in_delay_slot = TRUE;
  1378 	CMP_imm8s_sh4r( 0, R_T );
  1379 	OP(0x0F); OP(0x84); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JE rel32
  1380 	sh4_x86_translate_instruction(pc+2);
  1381 	exit_block( disp + pc + 4, pc+4 );
  1382 	// not taken
  1383 	*patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1384 	sh4_x86_translate_instruction(pc+2);
  1385 	return 4;
  1387 :}
  1388 JMP @Rn {:  
  1389     if( sh4_x86.in_delay_slot ) {
  1390 	SLOTILLEGAL();
  1391     } else {
  1392 	load_reg( R_ECX, Rn );
  1393 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1394 	sh4_x86.in_delay_slot = TRUE;
  1395 	sh4_x86_translate_instruction(pc+2);
  1396 	exit_block_pcset(pc+2);
  1397 	sh4_x86.branch_taken = TRUE;
  1398 	return 4;
  1400 :}
  1401 JSR @Rn {:  
  1402     if( sh4_x86.in_delay_slot ) {
  1403 	SLOTILLEGAL();
  1404     } else {
  1405 	load_imm32( R_EAX, pc + 4 );
  1406 	store_spreg( R_EAX, R_PR );
  1407 	load_reg( R_ECX, Rn );
  1408 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1409 	sh4_x86.in_delay_slot = TRUE;
  1410 	sh4_x86_translate_instruction(pc+2);
  1411 	exit_block_pcset(pc+2);
  1412 	sh4_x86.branch_taken = TRUE;
  1413 	return 4;
  1415 :}
  1416 RTE {:  
  1417     if( sh4_x86.in_delay_slot ) {
  1418 	SLOTILLEGAL();
  1419     } else {
  1420 	check_priv();
  1421 	load_spreg( R_ECX, R_SPC );
  1422 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1423 	load_spreg( R_EAX, R_SSR );
  1424 	call_func1( sh4_write_sr, R_EAX );
  1425 	sh4_x86.in_delay_slot = TRUE;
  1426 	sh4_x86.priv_checked = FALSE;
  1427 	sh4_x86.fpuen_checked = FALSE;
  1428 	sh4_x86_translate_instruction(pc+2);
  1429 	exit_block_pcset(pc+2);
  1430 	sh4_x86.branch_taken = TRUE;
  1431 	return 4;
  1433 :}
  1434 RTS {:  
  1435     if( sh4_x86.in_delay_slot ) {
  1436 	SLOTILLEGAL();
  1437     } else {
  1438 	load_spreg( R_ECX, R_PR );
  1439 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1440 	sh4_x86.in_delay_slot = TRUE;
  1441 	sh4_x86_translate_instruction(pc+2);
  1442 	exit_block_pcset(pc+2);
  1443 	sh4_x86.branch_taken = TRUE;
  1444 	return 4;
  1446 :}
  1447 TRAPA #imm {:  
  1448     if( sh4_x86.in_delay_slot ) {
  1449 	SLOTILLEGAL();
  1450     } else {
  1451 	PUSH_imm32( imm );
  1452 	call_func0( sh4_raise_trap );
  1453 	ADD_imm8s_r32( 4, R_ESP );
  1454 	exit_block_pcset(pc);
  1455 	sh4_x86.branch_taken = TRUE;
  1456 	return 2;
  1458 :}
  1459 UNDEF {:  
  1460     if( sh4_x86.in_delay_slot ) {
  1461 	SLOTILLEGAL();
  1462     } else {
  1463 	JMP_exit(EXIT_ILLEGAL);
  1464 	return 2;
  1466 :}
  1468 CLRMAC {:  
  1469     XOR_r32_r32(R_EAX, R_EAX);
  1470     store_spreg( R_EAX, R_MACL );
  1471     store_spreg( R_EAX, R_MACH );
  1472 :}
  1473 CLRS {:
  1474     CLC();
  1475     SETC_sh4r(R_S);
  1476 :}
  1477 CLRT {:  
  1478     CLC();
  1479     SETC_t();
  1480 :}
  1481 SETS {:  
  1482     STC();
  1483     SETC_sh4r(R_S);
  1484 :}
  1485 SETT {:  
  1486     STC();
  1487     SETC_t();
  1488 :}
  1490 /* Floating point moves */
  1491 FMOV FRm, FRn {:  
  1492     /* As horrible as this looks, it's actually covering 5 separate cases:
  1493      * 1. 32-bit fr-to-fr (PR=0)
  1494      * 2. 64-bit dr-to-dr (PR=1, FRm&1 == 0, FRn&1 == 0 )
  1495      * 3. 64-bit dr-to-xd (PR=1, FRm&1 == 0, FRn&1 == 1 )
  1496      * 4. 64-bit xd-to-dr (PR=1, FRm&1 == 1, FRn&1 == 0 )
  1497      * 5. 64-bit xd-to-xd (PR=1, FRm&1 == 1, FRn&1 == 1 )
  1498      */
  1499     check_fpuen();
  1500     load_spreg( R_ECX, R_FPSCR );
  1501     load_fr_bank( R_EDX );
  1502     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1503     JNE_rel8(8, doublesize);
  1504     load_fr( R_EDX, R_EAX, FRm ); // PR=0 branch
  1505     store_fr( R_EDX, R_EAX, FRn );
  1506     if( FRm&1 ) {
  1507 	JMP_rel8(24, end);
  1508 	JMP_TARGET(doublesize);
  1509 	load_xf_bank( R_ECX ); 
  1510 	load_fr( R_ECX, R_EAX, FRm-1 );
  1511 	if( FRn&1 ) {
  1512 	    load_fr( R_ECX, R_EDX, FRm );
  1513 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1514 	    store_fr( R_ECX, R_EDX, FRn );
  1515 	} else /* FRn&1 == 0 */ {
  1516 	    load_fr( R_ECX, R_ECX, FRm );
  1517 	    store_fr( R_EDX, R_EAX, FRn );
  1518 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1520 	JMP_TARGET(end);
  1521     } else /* FRm&1 == 0 */ {
  1522 	if( FRn&1 ) {
  1523 	    JMP_rel8(24, end);
  1524 	    load_xf_bank( R_ECX );
  1525 	    load_fr( R_EDX, R_EAX, FRm );
  1526 	    load_fr( R_EDX, R_EDX, FRm+1 );
  1527 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1528 	    store_fr( R_ECX, R_EDX, FRn );
  1529 	    JMP_TARGET(end);
  1530 	} else /* FRn&1 == 0 */ {
  1531 	    JMP_rel8(12, end);
  1532 	    load_fr( R_EDX, R_EAX, FRm );
  1533 	    load_fr( R_EDX, R_ECX, FRm+1 );
  1534 	    store_fr( R_EDX, R_EAX, FRn );
  1535 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1536 	    JMP_TARGET(end);
  1539 :}
  1540 FMOV FRm, @Rn {:  
  1541     check_fpuen();
  1542     load_reg( R_EDX, Rn );
  1543     check_walign32( R_EDX );
  1544     load_spreg( R_ECX, R_FPSCR );
  1545     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1546     JNE_rel8(20, doublesize);
  1547     load_fr_bank( R_ECX );
  1548     load_fr( R_ECX, R_EAX, FRm );
  1549     MEM_WRITE_LONG( R_EDX, R_EAX ); // 12
  1550     if( FRm&1 ) {
  1551 	JMP_rel8( 48, end );
  1552 	JMP_TARGET(doublesize);
  1553 	load_xf_bank( R_ECX );
  1554 	load_fr( R_ECX, R_EAX, FRm&0x0E );
  1555 	load_fr( R_ECX, R_ECX, FRm|0x01 );
  1556 	MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );
  1557 	JMP_TARGET(end);
  1558     } else {
  1559 	JMP_rel8( 39, end );
  1560 	JMP_TARGET(doublesize);
  1561 	load_fr_bank( R_ECX );
  1562 	load_fr( R_ECX, R_EAX, FRm&0x0E );
  1563 	load_fr( R_ECX, R_ECX, FRm|0x01 );
  1564 	MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );
  1565 	JMP_TARGET(end);
  1567 :}
  1568 FMOV @Rm, FRn {:  
  1569     check_fpuen();
  1570     load_reg( R_EDX, Rm );
  1571     check_ralign32( R_EDX );
  1572     load_spreg( R_ECX, R_FPSCR );
  1573     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1574     JNE_rel8(19, doublesize);
  1575     MEM_READ_LONG( R_EDX, R_EAX );
  1576     load_fr_bank( R_ECX );
  1577     store_fr( R_ECX, R_EAX, FRn );
  1578     if( FRn&1 ) {
  1579 	JMP_rel8(48, end);
  1580 	JMP_TARGET(doublesize);
  1581 	MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
  1582 	load_spreg( R_ECX, R_FPSCR ); // assume read_long clobbered it
  1583 	load_xf_bank( R_ECX );
  1584 	store_fr( R_ECX, R_EAX, FRn&0x0E );
  1585 	store_fr( R_ECX, R_EDX, FRn|0x01 );
  1586 	JMP_TARGET(end);
  1587     } else {
  1588 	JMP_rel8(36, end);
  1589 	JMP_TARGET(doublesize);
  1590 	MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
  1591 	load_fr_bank( R_ECX );
  1592 	store_fr( R_ECX, R_EAX, FRn&0x0E );
  1593 	store_fr( R_ECX, R_EDX, FRn|0x01 );
  1594 	JMP_TARGET(end);
  1596 :}
  1597 FMOV FRm, @-Rn {:  
  1598     check_fpuen();
  1599     load_reg( R_EDX, Rn );
  1600     check_walign32( R_EDX );
  1601     load_spreg( R_ECX, R_FPSCR );
  1602     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1603     JNE_rel8(26, doublesize);
  1604     load_fr_bank( R_ECX );
  1605     load_fr( R_ECX, R_EAX, FRm );
  1606     ADD_imm8s_r32(-4,R_EDX);
  1607     store_reg( R_EDX, Rn );
  1608     MEM_WRITE_LONG( R_EDX, R_EAX ); // 12
  1609     if( FRm&1 ) {
  1610 	JMP_rel8( 54, end );
  1611 	JMP_TARGET(doublesize);
  1612 	load_xf_bank( R_ECX );
  1613 	load_fr( R_ECX, R_EAX, FRm&0x0E );
  1614 	load_fr( R_ECX, R_ECX, FRm|0x01 );
  1615 	ADD_imm8s_r32(-8,R_EDX);
  1616 	store_reg( R_EDX, Rn );
  1617 	MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );
  1618 	JMP_TARGET(end);
  1619     } else {
  1620 	JMP_rel8( 45, end );
  1621 	JMP_TARGET(doublesize);
  1622 	load_fr_bank( R_ECX );
  1623 	load_fr( R_ECX, R_EAX, FRm&0x0E );
  1624 	load_fr( R_ECX, R_ECX, FRm|0x01 );
  1625 	ADD_imm8s_r32(-8,R_EDX);
  1626 	store_reg( R_EDX, Rn );
  1627 	MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );
  1628 	JMP_TARGET(end);
  1630 :}
  1631 FMOV @Rm+, FRn {:  
  1632     check_fpuen();
  1633     load_reg( R_EDX, Rm );
  1634     check_ralign32( R_EDX );
  1635     MOV_r32_r32( R_EDX, R_EAX );
  1636     load_spreg( R_ECX, R_FPSCR );
  1637     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1638     JNE_rel8(25, doublesize);
  1639     ADD_imm8s_r32( 4, R_EAX );
  1640     store_reg( R_EAX, Rm );
  1641     MEM_READ_LONG( R_EDX, R_EAX );
  1642     load_fr_bank( R_ECX );
  1643     store_fr( R_ECX, R_EAX, FRn );
  1644     if( FRn&1 ) {
  1645 	JMP_rel8(54, end);
  1646 	JMP_TARGET(doublesize);
  1647 	ADD_imm8s_r32( 8, R_EAX );
  1648 	store_reg(R_EAX, Rm);
  1649 	MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
  1650 	load_spreg( R_ECX, R_FPSCR ); // assume read_long clobbered it
  1651 	load_xf_bank( R_ECX );
  1652 	store_fr( R_ECX, R_EAX, FRn&0x0E );
  1653 	store_fr( R_ECX, R_EDX, FRn|0x01 );
  1654 	JMP_TARGET(end);
  1655     } else {
  1656 	JMP_rel8(42, end);
  1657 	ADD_imm8s_r32( 8, R_EAX );
  1658 	store_reg(R_EAX, Rm);
  1659 	MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
  1660 	load_fr_bank( R_ECX );
  1661 	store_fr( R_ECX, R_EAX, FRn&0x0E );
  1662 	store_fr( R_ECX, R_EDX, FRn|0x01 );
  1663 	JMP_TARGET(end);
  1665 :}
  1666 FMOV FRm, @(R0, Rn) {:  
  1667     check_fpuen();
  1668     load_reg( R_EDX, Rn );
  1669     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EDX );
  1670     check_walign32( R_EDX );
  1671     load_spreg( R_ECX, R_FPSCR );
  1672     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1673     JNE_rel8(20, doublesize);
  1674     load_fr_bank( R_ECX );
  1675     load_fr( R_ECX, R_EAX, FRm );
  1676     MEM_WRITE_LONG( R_EDX, R_EAX ); // 12
  1677     if( FRm&1 ) {
  1678 	JMP_rel8( 48, end );
  1679 	JMP_TARGET(doublesize);
  1680 	load_xf_bank( R_ECX );
  1681 	load_fr( R_ECX, R_EAX, FRm&0x0E );
  1682 	load_fr( R_ECX, R_ECX, FRm|0x01 );
  1683 	MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );
  1684 	JMP_TARGET(end);
  1685     } else {
  1686 	JMP_rel8( 39, end );
  1687 	JMP_TARGET(doublesize);
  1688 	load_fr_bank( R_ECX );
  1689 	load_fr( R_ECX, R_EAX, FRm&0x0E );
  1690 	load_fr( R_ECX, R_ECX, FRm|0x01 );
  1691 	MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );
  1692 	JMP_TARGET(end);
  1694 :}
  1695 FMOV @(R0, Rm), FRn {:  
  1696     check_fpuen();
  1697     load_reg( R_EDX, Rm );
  1698     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EDX );
  1699     check_ralign32( R_EDX );
  1700     load_spreg( R_ECX, R_FPSCR );
  1701     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1702     JNE_rel8(19, doublesize);
  1703     MEM_READ_LONG( R_EDX, R_EAX );
  1704     load_fr_bank( R_ECX );
  1705     store_fr( R_ECX, R_EAX, FRn );
  1706     if( FRn&1 ) {
  1707 	JMP_rel8(48, end);
  1708 	JMP_TARGET(doublesize);
  1709 	MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
  1710 	load_spreg( R_ECX, R_FPSCR ); // assume read_long clobbered it
  1711 	load_xf_bank( R_ECX );
  1712 	store_fr( R_ECX, R_EAX, FRn&0x0E );
  1713 	store_fr( R_ECX, R_EDX, FRn|0x01 );
  1714 	JMP_TARGET(end);
  1715     } else {
  1716 	JMP_rel8(36, end);
  1717 	JMP_TARGET(doublesize);
  1718 	MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
  1719 	load_fr_bank( R_ECX );
  1720 	store_fr( R_ECX, R_EAX, FRn&0x0E );
  1721 	store_fr( R_ECX, R_EDX, FRn|0x01 );
  1722 	JMP_TARGET(end);
  1724 :}
  1725 FLDI0 FRn {:  /* IFF PR=0 */
  1726     check_fpuen();
  1727     load_spreg( R_ECX, R_FPSCR );
  1728     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1729     JNE_rel8(8, end);
  1730     XOR_r32_r32( R_EAX, R_EAX );
  1731     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  1732     store_fr( R_ECX, R_EAX, FRn );
  1733     JMP_TARGET(end);
  1734 :}
  1735 FLDI1 FRn {:  /* IFF PR=0 */
  1736     check_fpuen();
  1737     load_spreg( R_ECX, R_FPSCR );
  1738     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1739     JNE_rel8(11, end);
  1740     load_imm32(R_EAX, 0x3F800000);
  1741     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  1742     store_fr( R_ECX, R_EAX, FRn );
  1743     JMP_TARGET(end);
  1744 :}
  1746 FLOAT FPUL, FRn {:  
  1747     check_fpuen();
  1748     load_spreg( R_ECX, R_FPSCR );
  1749     load_spreg(R_EDX, REG_OFFSET(fr_bank));
  1750     FILD_sh4r(R_FPUL);
  1751     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1752     JNE_rel8(5, doubleprec);
  1753     pop_fr( R_EDX, FRn );
  1754     JMP_rel8(3, end);
  1755     JMP_TARGET(doubleprec);
  1756     pop_dr( R_EDX, FRn );
  1757     JMP_TARGET(end);
  1758 :}
  1759 FTRC FRm, FPUL {:  
  1760     check_fpuen();
  1761     load_spreg( R_ECX, R_FPSCR );
  1762     load_fr_bank( R_EDX );
  1763     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1764     JNE_rel8(5, doubleprec);
  1765     push_fr( R_EDX, FRm );
  1766     JMP_rel8(3, doop);
  1767     JMP_TARGET(doubleprec);
  1768     push_dr( R_EDX, FRm );
  1769     JMP_TARGET( doop );
  1770     load_imm32( R_ECX, (uint32_t)&max_int );
  1771     FILD_r32ind( R_ECX );
  1772     FCOMIP_st(1);
  1773     JNA_rel8( 32, sat );
  1774     load_imm32( R_ECX, (uint32_t)&min_int );  // 5
  1775     FILD_r32ind( R_ECX );           // 2
  1776     FCOMIP_st(1);                   // 2
  1777     JAE_rel8( 21, sat2 );            // 2
  1778     load_imm32( R_EAX, (uint32_t)&save_fcw );
  1779     FNSTCW_r32ind( R_EAX );
  1780     load_imm32( R_EDX, (uint32_t)&trunc_fcw );
  1781     FLDCW_r32ind( R_EDX );
  1782     FISTP_sh4r(R_FPUL);             // 3
  1783     FLDCW_r32ind( R_EAX );
  1784     JMP_rel8( 9, end );             // 2
  1786     JMP_TARGET(sat);
  1787     JMP_TARGET(sat2);
  1788     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  1789     store_spreg( R_ECX, R_FPUL );
  1790     FPOP_st();
  1791     JMP_TARGET(end);
  1792 :}
  1793 FLDS FRm, FPUL {:  
  1794     check_fpuen();
  1795     load_fr_bank( R_ECX );
  1796     load_fr( R_ECX, R_EAX, FRm );
  1797     store_spreg( R_EAX, R_FPUL );
  1798 :}
  1799 FSTS FPUL, FRn {:  
  1800     check_fpuen();
  1801     load_fr_bank( R_ECX );
  1802     load_spreg( R_EAX, R_FPUL );
  1803     store_fr( R_ECX, R_EAX, FRn );
  1804 :}
  1805 FCNVDS FRm, FPUL {:  
  1806     check_fpuen();
  1807     load_spreg( R_ECX, R_FPSCR );
  1808     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1809     JE_rel8(9, end); // only when PR=1
  1810     load_fr_bank( R_ECX );
  1811     push_dr( R_ECX, FRm );
  1812     pop_fpul();
  1813     JMP_TARGET(end);
  1814 :}
  1815 FCNVSD FPUL, FRn {:  
  1816     check_fpuen();
  1817     load_spreg( R_ECX, R_FPSCR );
  1818     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1819     JE_rel8(9, end); // only when PR=1
  1820     load_fr_bank( R_ECX );
  1821     push_fpul();
  1822     pop_dr( R_ECX, FRn );
  1823     JMP_TARGET(end);
  1824 :}
  1826 /* Floating point instructions */
  1827 FABS FRn {:  
  1828     check_fpuen();
  1829     load_spreg( R_ECX, R_FPSCR );
  1830     load_fr_bank( R_EDX );
  1831     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1832     JNE_rel8(10, doubleprec);
  1833     push_fr(R_EDX, FRn); // 3
  1834     FABS_st0(); // 2
  1835     pop_fr( R_EDX, FRn); //3
  1836     JMP_rel8(8,end); // 2
  1837     JMP_TARGET(doubleprec);
  1838     push_dr(R_EDX, FRn);
  1839     FABS_st0();
  1840     pop_dr(R_EDX, FRn);
  1841     JMP_TARGET(end);
  1842 :}
  1843 FADD FRm, FRn {:  
  1844     check_fpuen();
  1845     load_spreg( R_ECX, R_FPSCR );
  1846     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1847     load_fr_bank( R_EDX );
  1848     JNE_rel8(13,doubleprec);
  1849     push_fr(R_EDX, FRm);
  1850     push_fr(R_EDX, FRn);
  1851     FADDP_st(1);
  1852     pop_fr(R_EDX, FRn);
  1853     JMP_rel8(11,end);
  1854     JMP_TARGET(doubleprec);
  1855     push_dr(R_EDX, FRm);
  1856     push_dr(R_EDX, FRn);
  1857     FADDP_st(1);
  1858     pop_dr(R_EDX, FRn);
  1859     JMP_TARGET(end);
  1860 :}
  1861 FDIV FRm, FRn {:  
  1862     check_fpuen();
  1863     load_spreg( R_ECX, R_FPSCR );
  1864     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1865     load_fr_bank( R_EDX );
  1866     JNE_rel8(13, doubleprec);
  1867     push_fr(R_EDX, FRn);
  1868     push_fr(R_EDX, FRm);
  1869     FDIVP_st(1);
  1870     pop_fr(R_EDX, FRn);
  1871     JMP_rel8(11, end);
  1872     JMP_TARGET(doubleprec);
  1873     push_dr(R_EDX, FRn);
  1874     push_dr(R_EDX, FRm);
  1875     FDIVP_st(1);
  1876     pop_dr(R_EDX, FRn);
  1877     JMP_TARGET(end);
  1878 :}
  1879 FMAC FR0, FRm, FRn {:  
  1880     check_fpuen();
  1881     load_spreg( R_ECX, R_FPSCR );
  1882     load_spreg( R_EDX, REG_OFFSET(fr_bank));
  1883     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1884     JNE_rel8(18, doubleprec);
  1885     push_fr( R_EDX, 0 );
  1886     push_fr( R_EDX, FRm );
  1887     FMULP_st(1);
  1888     push_fr( R_EDX, FRn );
  1889     FADDP_st(1);
  1890     pop_fr( R_EDX, FRn );
  1891     JMP_rel8(16, end);
  1892     JMP_TARGET(doubleprec);
  1893     push_dr( R_EDX, 0 );
  1894     push_dr( R_EDX, FRm );
  1895     FMULP_st(1);
  1896     push_dr( R_EDX, FRn );
  1897     FADDP_st(1);
  1898     pop_dr( R_EDX, FRn );
  1899     JMP_TARGET(end);
  1900 :}
  1902 FMUL FRm, FRn {:  
  1903     check_fpuen();
  1904     load_spreg( R_ECX, R_FPSCR );
  1905     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1906     load_fr_bank( R_EDX );
  1907     JNE_rel8(13, doubleprec);
  1908     push_fr(R_EDX, FRm);
  1909     push_fr(R_EDX, FRn);
  1910     FMULP_st(1);
  1911     pop_fr(R_EDX, FRn);
  1912     JMP_rel8(11, end);
  1913     JMP_TARGET(doubleprec);
  1914     push_dr(R_EDX, FRm);
  1915     push_dr(R_EDX, FRn);
  1916     FMULP_st(1);
  1917     pop_dr(R_EDX, FRn);
  1918     JMP_TARGET(end);
  1919 :}
  1920 FNEG FRn {:  
  1921     check_fpuen();
  1922     load_spreg( R_ECX, R_FPSCR );
  1923     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1924     load_fr_bank( R_EDX );
  1925     JNE_rel8(10, doubleprec);
  1926     push_fr(R_EDX, FRn);
  1927     FCHS_st0();
  1928     pop_fr(R_EDX, FRn);
  1929     JMP_rel8(8, end);
  1930     JMP_TARGET(doubleprec);
  1931     push_dr(R_EDX, FRn);
  1932     FCHS_st0();
  1933     pop_dr(R_EDX, FRn);
  1934     JMP_TARGET(end);
  1935 :}
  1936 FSRRA FRn {:  
  1937     check_fpuen();
  1938     load_spreg( R_ECX, R_FPSCR );
  1939     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1940     load_fr_bank( R_EDX );
  1941     JNE_rel8(12, end); // PR=0 only
  1942     FLD1_st0();
  1943     push_fr(R_EDX, FRn);
  1944     FSQRT_st0();
  1945     FDIVP_st(1);
  1946     pop_fr(R_EDX, FRn);
  1947     JMP_TARGET(end);
  1948 :}
  1949 FSQRT FRn {:  
  1950     check_fpuen();
  1951     load_spreg( R_ECX, R_FPSCR );
  1952     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1953     load_fr_bank( R_EDX );
  1954     JNE_rel8(10, doubleprec);
  1955     push_fr(R_EDX, FRn);
  1956     FSQRT_st0();
  1957     pop_fr(R_EDX, FRn);
  1958     JMP_rel8(8, end);
  1959     JMP_TARGET(doubleprec);
  1960     push_dr(R_EDX, FRn);
  1961     FSQRT_st0();
  1962     pop_dr(R_EDX, FRn);
  1963     JMP_TARGET(end);
  1964 :}
  1965 FSUB FRm, FRn {:  
  1966     check_fpuen();
  1967     load_spreg( R_ECX, R_FPSCR );
  1968     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1969     load_fr_bank( R_EDX );
  1970     JNE_rel8(13, doubleprec);
  1971     push_fr(R_EDX, FRn);
  1972     push_fr(R_EDX, FRm);
  1973     FSUBP_st(1);
  1974     pop_fr(R_EDX, FRn);
  1975     JMP_rel8(11, end);
  1976     JMP_TARGET(doubleprec);
  1977     push_dr(R_EDX, FRn);
  1978     push_dr(R_EDX, FRm);
  1979     FSUBP_st(1);
  1980     pop_dr(R_EDX, FRn);
  1981     JMP_TARGET(end);
  1982 :}
  1984 FCMP/EQ FRm, FRn {:  
  1985     check_fpuen();
  1986     load_spreg( R_ECX, R_FPSCR );
  1987     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1988     load_fr_bank( R_EDX );
  1989     JNE_rel8(8, doubleprec);
  1990     push_fr(R_EDX, FRm);
  1991     push_fr(R_EDX, FRn);
  1992     JMP_rel8(6, end);
  1993     JMP_TARGET(doubleprec);
  1994     push_dr(R_EDX, FRm);
  1995     push_dr(R_EDX, FRn);
  1996     JMP_TARGET(end);
  1997     FCOMIP_st(1);
  1998     SETE_t();
  1999     FPOP_st();
  2000 :}
  2001 FCMP/GT FRm, FRn {:  
  2002     check_fpuen();
  2003     load_spreg( R_ECX, R_FPSCR );
  2004     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2005     load_fr_bank( R_EDX );
  2006     JNE_rel8(8, doubleprec);
  2007     push_fr(R_EDX, FRm);
  2008     push_fr(R_EDX, FRn);
  2009     JMP_rel8(6, end);
  2010     JMP_TARGET(doubleprec);
  2011     push_dr(R_EDX, FRm);
  2012     push_dr(R_EDX, FRn);
  2013     JMP_TARGET(end);
  2014     FCOMIP_st(1);
  2015     SETA_t();
  2016     FPOP_st();
  2017 :}
  2019 FSCA FPUL, FRn {:  
  2020     check_fpuen();
  2021     load_spreg( R_ECX, R_FPSCR );
  2022     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2023     JNE_rel8( 21, doubleprec );
  2024     load_fr_bank( R_ECX );
  2025     ADD_imm8s_r32( (FRn&0x0E)<<2, R_ECX );
  2026     load_spreg( R_EDX, R_FPUL );
  2027     call_func2( sh4_fsca, R_EDX, R_ECX );
  2028     JMP_TARGET(doubleprec);
  2029 :}
  2030 FIPR FVm, FVn {:  
  2031     check_fpuen();
  2032     load_spreg( R_ECX, R_FPSCR );
  2033     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2034     JNE_rel8(44, doubleprec);
  2036     load_fr_bank( R_ECX );
  2037     push_fr( R_ECX, FVm<<2 );
  2038     push_fr( R_ECX, FVn<<2 );
  2039     FMULP_st(1);
  2040     push_fr( R_ECX, (FVm<<2)+1);
  2041     push_fr( R_ECX, (FVn<<2)+1);
  2042     FMULP_st(1);
  2043     FADDP_st(1);
  2044     push_fr( R_ECX, (FVm<<2)+2);
  2045     push_fr( R_ECX, (FVn<<2)+2);
  2046     FMULP_st(1);
  2047     FADDP_st(1);
  2048     push_fr( R_ECX, (FVm<<2)+3);
  2049     push_fr( R_ECX, (FVn<<2)+3);
  2050     FMULP_st(1);
  2051     FADDP_st(1);
  2052     pop_fr( R_ECX, (FVn<<2)+3);
  2053     JMP_TARGET(doubleprec);
  2054 :}
  2055 FTRV XMTRX, FVn {:  
  2056     check_fpuen();
  2057     load_spreg( R_ECX, R_FPSCR );
  2058     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2059     JNE_rel8( 30, doubleprec );
  2060     load_fr_bank( R_EDX );                 // 3
  2061     ADD_imm8s_r32( FVn<<4, R_EDX );        // 3
  2062     load_xf_bank( R_ECX );                 // 12
  2063     call_func2( sh4_ftrv, R_EDX, R_ECX );  // 12
  2064     JMP_TARGET(doubleprec);
  2065 :}
  2067 FRCHG {:  
  2068     check_fpuen();
  2069     load_spreg( R_ECX, R_FPSCR );
  2070     XOR_imm32_r32( FPSCR_FR, R_ECX );
  2071     store_spreg( R_ECX, R_FPSCR );
  2072     update_fr_bank( R_ECX );
  2073 :}
  2074 FSCHG {:  
  2075     check_fpuen();
  2076     load_spreg( R_ECX, R_FPSCR );
  2077     XOR_imm32_r32( FPSCR_SZ, R_ECX );
  2078     store_spreg( R_ECX, R_FPSCR );
  2079 :}
  2081 /* Processor control instructions */
  2082 LDC Rm, SR {:
  2083     if( sh4_x86.in_delay_slot ) {
  2084 	SLOTILLEGAL();
  2085     } else {
  2086 	check_priv();
  2087 	load_reg( R_EAX, Rm );
  2088 	call_func1( sh4_write_sr, R_EAX );
  2089 	sh4_x86.priv_checked = FALSE;
  2090 	sh4_x86.fpuen_checked = FALSE;
  2092 :}
  2093 LDC Rm, GBR {: 
  2094     load_reg( R_EAX, Rm );
  2095     store_spreg( R_EAX, R_GBR );
  2096 :}
  2097 LDC Rm, VBR {:  
  2098     check_priv();
  2099     load_reg( R_EAX, Rm );
  2100     store_spreg( R_EAX, R_VBR );
  2101 :}
  2102 LDC Rm, SSR {:  
  2103     check_priv();
  2104     load_reg( R_EAX, Rm );
  2105     store_spreg( R_EAX, R_SSR );
  2106 :}
  2107 LDC Rm, SGR {:  
  2108     check_priv();
  2109     load_reg( R_EAX, Rm );
  2110     store_spreg( R_EAX, R_SGR );
  2111 :}
  2112 LDC Rm, SPC {:  
  2113     check_priv();
  2114     load_reg( R_EAX, Rm );
  2115     store_spreg( R_EAX, R_SPC );
  2116 :}
  2117 LDC Rm, DBR {:  
  2118     check_priv();
  2119     load_reg( R_EAX, Rm );
  2120     store_spreg( R_EAX, R_DBR );
  2121 :}
  2122 LDC Rm, Rn_BANK {:  
  2123     check_priv();
  2124     load_reg( R_EAX, Rm );
  2125     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2126 :}
  2127 LDC.L @Rm+, GBR {:  
  2128     load_reg( R_EAX, Rm );
  2129     check_ralign32( R_EAX );
  2130     MOV_r32_r32( R_EAX, R_ECX );
  2131     ADD_imm8s_r32( 4, R_EAX );
  2132     store_reg( R_EAX, Rm );
  2133     MEM_READ_LONG( R_ECX, R_EAX );
  2134     store_spreg( R_EAX, R_GBR );
  2135 :}
  2136 LDC.L @Rm+, SR {:
  2137     if( sh4_x86.in_delay_slot ) {
  2138 	SLOTILLEGAL();
  2139     } else {
  2140 	check_priv();
  2141 	load_reg( R_EAX, Rm );
  2142 	check_ralign32( R_EAX );
  2143 	MOV_r32_r32( R_EAX, R_ECX );
  2144 	ADD_imm8s_r32( 4, R_EAX );
  2145 	store_reg( R_EAX, Rm );
  2146 	MEM_READ_LONG( R_ECX, R_EAX );
  2147 	call_func1( sh4_write_sr, R_EAX );
  2148 	sh4_x86.priv_checked = FALSE;
  2149 	sh4_x86.fpuen_checked = FALSE;
  2151 :}
  2152 LDC.L @Rm+, VBR {:  
  2153     check_priv();
  2154     load_reg( R_EAX, Rm );
  2155     check_ralign32( R_EAX );
  2156     MOV_r32_r32( R_EAX, R_ECX );
  2157     ADD_imm8s_r32( 4, R_EAX );
  2158     store_reg( R_EAX, Rm );
  2159     MEM_READ_LONG( R_ECX, R_EAX );
  2160     store_spreg( R_EAX, R_VBR );
  2161 :}
  2162 LDC.L @Rm+, SSR {:
  2163     check_priv();
  2164     load_reg( R_EAX, Rm );
  2165     MOV_r32_r32( R_EAX, R_ECX );
  2166     ADD_imm8s_r32( 4, R_EAX );
  2167     store_reg( R_EAX, Rm );
  2168     MEM_READ_LONG( R_ECX, R_EAX );
  2169     store_spreg( R_EAX, R_SSR );
  2170 :}
  2171 LDC.L @Rm+, SGR {:  
  2172     check_priv();
  2173     load_reg( R_EAX, Rm );
  2174     check_ralign32( R_EAX );
  2175     MOV_r32_r32( R_EAX, R_ECX );
  2176     ADD_imm8s_r32( 4, R_EAX );
  2177     store_reg( R_EAX, Rm );
  2178     MEM_READ_LONG( R_ECX, R_EAX );
  2179     store_spreg( R_EAX, R_SGR );
  2180 :}
  2181 LDC.L @Rm+, SPC {:  
  2182     check_priv();
  2183     load_reg( R_EAX, Rm );
  2184     check_ralign32( R_EAX );
  2185     MOV_r32_r32( R_EAX, R_ECX );
  2186     ADD_imm8s_r32( 4, R_EAX );
  2187     store_reg( R_EAX, Rm );
  2188     MEM_READ_LONG( R_ECX, R_EAX );
  2189     store_spreg( R_EAX, R_SPC );
  2190 :}
  2191 LDC.L @Rm+, DBR {:  
  2192     check_priv();
  2193     load_reg( R_EAX, Rm );
  2194     check_ralign32( R_EAX );
  2195     MOV_r32_r32( R_EAX, R_ECX );
  2196     ADD_imm8s_r32( 4, R_EAX );
  2197     store_reg( R_EAX, Rm );
  2198     MEM_READ_LONG( R_ECX, R_EAX );
  2199     store_spreg( R_EAX, R_DBR );
  2200 :}
  2201 LDC.L @Rm+, Rn_BANK {:  
  2202     check_priv();
  2203     load_reg( R_EAX, Rm );
  2204     check_ralign32( R_EAX );
  2205     MOV_r32_r32( R_EAX, R_ECX );
  2206     ADD_imm8s_r32( 4, R_EAX );
  2207     store_reg( R_EAX, Rm );
  2208     MEM_READ_LONG( R_ECX, R_EAX );
  2209     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2210 :}
  2211 LDS Rm, FPSCR {:  
  2212     load_reg( R_EAX, Rm );
  2213     store_spreg( R_EAX, R_FPSCR );
  2214     update_fr_bank( R_EAX );
  2215 :}
  2216 LDS.L @Rm+, FPSCR {:  
  2217     load_reg( R_EAX, Rm );
  2218     check_ralign32( R_EAX );
  2219     MOV_r32_r32( R_EAX, R_ECX );
  2220     ADD_imm8s_r32( 4, R_EAX );
  2221     store_reg( R_EAX, Rm );
  2222     MEM_READ_LONG( R_ECX, R_EAX );
  2223     store_spreg( R_EAX, R_FPSCR );
  2224     update_fr_bank( R_EAX );
  2225 :}
  2226 LDS Rm, FPUL {:  
  2227     load_reg( R_EAX, Rm );
  2228     store_spreg( R_EAX, R_FPUL );
  2229 :}
  2230 LDS.L @Rm+, FPUL {:  
  2231     load_reg( R_EAX, Rm );
  2232     check_ralign32( R_EAX );
  2233     MOV_r32_r32( R_EAX, R_ECX );
  2234     ADD_imm8s_r32( 4, R_EAX );
  2235     store_reg( R_EAX, Rm );
  2236     MEM_READ_LONG( R_ECX, R_EAX );
  2237     store_spreg( R_EAX, R_FPUL );
  2238 :}
  2239 LDS Rm, MACH {: 
  2240     load_reg( R_EAX, Rm );
  2241     store_spreg( R_EAX, R_MACH );
  2242 :}
  2243 LDS.L @Rm+, MACH {:  
  2244     load_reg( R_EAX, Rm );
  2245     check_ralign32( R_EAX );
  2246     MOV_r32_r32( R_EAX, R_ECX );
  2247     ADD_imm8s_r32( 4, R_EAX );
  2248     store_reg( R_EAX, Rm );
  2249     MEM_READ_LONG( R_ECX, R_EAX );
  2250     store_spreg( R_EAX, R_MACH );
  2251 :}
  2252 LDS Rm, MACL {:  
  2253     load_reg( R_EAX, Rm );
  2254     store_spreg( R_EAX, R_MACL );
  2255 :}
  2256 LDS.L @Rm+, MACL {:  
  2257     load_reg( R_EAX, Rm );
  2258     check_ralign32( R_EAX );
  2259     MOV_r32_r32( R_EAX, R_ECX );
  2260     ADD_imm8s_r32( 4, R_EAX );
  2261     store_reg( R_EAX, Rm );
  2262     MEM_READ_LONG( R_ECX, R_EAX );
  2263     store_spreg( R_EAX, R_MACL );
  2264 :}
  2265 LDS Rm, PR {:  
  2266     load_reg( R_EAX, Rm );
  2267     store_spreg( R_EAX, R_PR );
  2268 :}
  2269 LDS.L @Rm+, PR {:  
  2270     load_reg( R_EAX, Rm );
  2271     check_ralign32( R_EAX );
  2272     MOV_r32_r32( R_EAX, R_ECX );
  2273     ADD_imm8s_r32( 4, R_EAX );
  2274     store_reg( R_EAX, Rm );
  2275     MEM_READ_LONG( R_ECX, R_EAX );
  2276     store_spreg( R_EAX, R_PR );
  2277 :}
  2278 LDTLB {:  :}
  2279 OCBI @Rn {:  :}
  2280 OCBP @Rn {:  :}
  2281 OCBWB @Rn {:  :}
  2282 PREF @Rn {:
  2283     load_reg( R_EAX, Rn );
  2284     PUSH_r32( R_EAX );
  2285     AND_imm32_r32( 0xFC000000, R_EAX );
  2286     CMP_imm32_r32( 0xE0000000, R_EAX );
  2287     JNE_rel8(7, end);
  2288     call_func0( sh4_flush_store_queue );
  2289     JMP_TARGET(end);
  2290     ADD_imm8s_r32( 4, R_ESP );
  2291 :}
  2292 SLEEP {: 
  2293     check_priv();
  2294     call_func0( sh4_sleep );
  2295     sh4_x86.in_delay_slot = FALSE;
  2296     INC_r32(R_ESI);
  2297     return 2;
  2298 :}
  2299 STC SR, Rn {:
  2300     check_priv();
  2301     call_func0(sh4_read_sr);
  2302     store_reg( R_EAX, Rn );
  2303 :}
  2304 STC GBR, Rn {:  
  2305     load_spreg( R_EAX, R_GBR );
  2306     store_reg( R_EAX, Rn );
  2307 :}
  2308 STC VBR, Rn {:  
  2309     check_priv();
  2310     load_spreg( R_EAX, R_VBR );
  2311     store_reg( R_EAX, Rn );
  2312 :}
  2313 STC SSR, Rn {:  
  2314     check_priv();
  2315     load_spreg( R_EAX, R_SSR );
  2316     store_reg( R_EAX, Rn );
  2317 :}
  2318 STC SPC, Rn {:  
  2319     check_priv();
  2320     load_spreg( R_EAX, R_SPC );
  2321     store_reg( R_EAX, Rn );
  2322 :}
  2323 STC SGR, Rn {:  
  2324     check_priv();
  2325     load_spreg( R_EAX, R_SGR );
  2326     store_reg( R_EAX, Rn );
  2327 :}
  2328 STC DBR, Rn {:  
  2329     check_priv();
  2330     load_spreg( R_EAX, R_DBR );
  2331     store_reg( R_EAX, Rn );
  2332 :}
  2333 STC Rm_BANK, Rn {:
  2334     check_priv();
  2335     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2336     store_reg( R_EAX, Rn );
  2337 :}
  2338 STC.L SR, @-Rn {:
  2339     check_priv();
  2340     call_func0( sh4_read_sr );
  2341     load_reg( R_ECX, Rn );
  2342     check_walign32( R_ECX );
  2343     ADD_imm8s_r32( -4, R_ECX );
  2344     store_reg( R_ECX, Rn );
  2345     MEM_WRITE_LONG( R_ECX, R_EAX );
  2346 :}
  2347 STC.L VBR, @-Rn {:  
  2348     check_priv();
  2349     load_reg( R_ECX, Rn );
  2350     check_walign32( R_ECX );
  2351     ADD_imm8s_r32( -4, R_ECX );
  2352     store_reg( R_ECX, Rn );
  2353     load_spreg( R_EAX, R_VBR );
  2354     MEM_WRITE_LONG( R_ECX, R_EAX );
  2355 :}
  2356 STC.L SSR, @-Rn {:  
  2357     check_priv();
  2358     load_reg( R_ECX, Rn );
  2359     check_walign32( R_ECX );
  2360     ADD_imm8s_r32( -4, R_ECX );
  2361     store_reg( R_ECX, Rn );
  2362     load_spreg( R_EAX, R_SSR );
  2363     MEM_WRITE_LONG( R_ECX, R_EAX );
  2364 :}
  2365 STC.L SPC, @-Rn {:  
  2366     check_priv();
  2367     load_reg( R_ECX, Rn );
  2368     check_walign32( R_ECX );
  2369     ADD_imm8s_r32( -4, R_ECX );
  2370     store_reg( R_ECX, Rn );
  2371     load_spreg( R_EAX, R_SPC );
  2372     MEM_WRITE_LONG( R_ECX, R_EAX );
  2373 :}
  2374 STC.L SGR, @-Rn {:  
  2375     check_priv();
  2376     load_reg( R_ECX, Rn );
  2377     check_walign32( R_ECX );
  2378     ADD_imm8s_r32( -4, R_ECX );
  2379     store_reg( R_ECX, Rn );
  2380     load_spreg( R_EAX, R_SGR );
  2381     MEM_WRITE_LONG( R_ECX, R_EAX );
  2382 :}
  2383 STC.L DBR, @-Rn {:  
  2384     check_priv();
  2385     load_reg( R_ECX, Rn );
  2386     check_walign32( R_ECX );
  2387     ADD_imm8s_r32( -4, R_ECX );
  2388     store_reg( R_ECX, Rn );
  2389     load_spreg( R_EAX, R_DBR );
  2390     MEM_WRITE_LONG( R_ECX, R_EAX );
  2391 :}
  2392 STC.L Rm_BANK, @-Rn {:  
  2393     check_priv();
  2394     load_reg( R_ECX, Rn );
  2395     check_walign32( R_ECX );
  2396     ADD_imm8s_r32( -4, R_ECX );
  2397     store_reg( R_ECX, Rn );
  2398     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2399     MEM_WRITE_LONG( R_ECX, R_EAX );
  2400 :}
  2401 STC.L GBR, @-Rn {:  
  2402     load_reg( R_ECX, Rn );
  2403     check_walign32( R_ECX );
  2404     ADD_imm8s_r32( -4, R_ECX );
  2405     store_reg( R_ECX, Rn );
  2406     load_spreg( R_EAX, R_GBR );
  2407     MEM_WRITE_LONG( R_ECX, R_EAX );
  2408 :}
  2409 STS FPSCR, Rn {:  
  2410     load_spreg( R_EAX, R_FPSCR );
  2411     store_reg( R_EAX, Rn );
  2412 :}
  2413 STS.L FPSCR, @-Rn {:  
  2414     load_reg( R_ECX, Rn );
  2415     check_walign32( R_ECX );
  2416     ADD_imm8s_r32( -4, R_ECX );
  2417     store_reg( R_ECX, Rn );
  2418     load_spreg( R_EAX, R_FPSCR );
  2419     MEM_WRITE_LONG( R_ECX, R_EAX );
  2420 :}
  2421 STS FPUL, Rn {:  
  2422     load_spreg( R_EAX, R_FPUL );
  2423     store_reg( R_EAX, Rn );
  2424 :}
  2425 STS.L FPUL, @-Rn {:  
  2426     load_reg( R_ECX, Rn );
  2427     check_walign32( R_ECX );
  2428     ADD_imm8s_r32( -4, R_ECX );
  2429     store_reg( R_ECX, Rn );
  2430     load_spreg( R_EAX, R_FPUL );
  2431     MEM_WRITE_LONG( R_ECX, R_EAX );
  2432 :}
  2433 STS MACH, Rn {:  
  2434     load_spreg( R_EAX, R_MACH );
  2435     store_reg( R_EAX, Rn );
  2436 :}
  2437 STS.L MACH, @-Rn {:  
  2438     load_reg( R_ECX, Rn );
  2439     check_walign32( R_ECX );
  2440     ADD_imm8s_r32( -4, R_ECX );
  2441     store_reg( R_ECX, Rn );
  2442     load_spreg( R_EAX, R_MACH );
  2443     MEM_WRITE_LONG( R_ECX, R_EAX );
  2444 :}
  2445 STS MACL, Rn {:  
  2446     load_spreg( R_EAX, R_MACL );
  2447     store_reg( R_EAX, Rn );
  2448 :}
  2449 STS.L MACL, @-Rn {:  
  2450     load_reg( R_ECX, Rn );
  2451     check_walign32( R_ECX );
  2452     ADD_imm8s_r32( -4, R_ECX );
  2453     store_reg( R_ECX, Rn );
  2454     load_spreg( R_EAX, R_MACL );
  2455     MEM_WRITE_LONG( R_ECX, R_EAX );
  2456 :}
  2457 STS PR, Rn {:  
  2458     load_spreg( R_EAX, R_PR );
  2459     store_reg( R_EAX, Rn );
  2460 :}
  2461 STS.L PR, @-Rn {:  
  2462     load_reg( R_ECX, Rn );
  2463     check_walign32( R_ECX );
  2464     ADD_imm8s_r32( -4, R_ECX );
  2465     store_reg( R_ECX, Rn );
  2466     load_spreg( R_EAX, R_PR );
  2467     MEM_WRITE_LONG( R_ECX, R_EAX );
  2468 :}
  2470 NOP {: /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ :}
  2471 %%
  2472     if( sh4_x86.in_delay_slot ) {
  2473 	ADD_imm8s_r32(2,R_ESI);
  2474 	sh4_x86.in_delay_slot = FALSE;
  2475     } else {
  2476 	INC_r32(R_ESI);
  2478     return 0;
.