Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 1191:12fdf3aafcd4
prev1186:2dc47c67bb93
next1193:dff55bdc4f46
author nkeynes
date Fri Dec 02 18:18:04 2011 +1000 (12 years ago)
permissions -rw-r--r--
last change SH4 shadow-mode tweaks
- Fix exceptions generated by the translator to account for the excepting
instruction(s) in the cycle counts.
- Compare floating point regs bitwise rather than with FP comparisons
(otherwise can fail due to nan != nan)
- Dump the translated block when we abort with an inconsistency
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "lxdream.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4dasm.h"
    31 #include "sh4/sh4trans.h"
    32 #include "sh4/sh4stat.h"
    33 #include "sh4/sh4mmio.h"
    34 #include "sh4/mmu.h"
    35 #include "xlat/xltcache.h"
    36 #include "xlat/x86/x86op.h"
    37 #include "x86dasm/x86dasm.h"
    38 #include "clock.h"
    40 #define DEFAULT_BACKPATCH_SIZE 4096
    42 /* Offset of a reg relative to the sh4r structure */
    43 #define REG_OFFSET(reg)  (((char *)&sh4r.reg) - ((char *)&sh4r) - 128)
    45 #define R_T      REG_OFFSET(t)
    46 #define R_Q      REG_OFFSET(q)
    47 #define R_S      REG_OFFSET(s)
    48 #define R_M      REG_OFFSET(m)
    49 #define R_SR     REG_OFFSET(sr)
    50 #define R_GBR    REG_OFFSET(gbr)
    51 #define R_SSR    REG_OFFSET(ssr)
    52 #define R_SPC    REG_OFFSET(spc)
    53 #define R_VBR    REG_OFFSET(vbr)
    54 #define R_MACH   REG_OFFSET(mac)+4
    55 #define R_MACL   REG_OFFSET(mac)
    56 #define R_PC     REG_OFFSET(pc)
    57 #define R_NEW_PC REG_OFFSET(new_pc)
    58 #define R_PR     REG_OFFSET(pr)
    59 #define R_SGR    REG_OFFSET(sgr)
    60 #define R_FPUL   REG_OFFSET(fpul)
    61 #define R_FPSCR  REG_OFFSET(fpscr)
    62 #define R_DBR    REG_OFFSET(dbr)
    63 #define R_R(rn)  REG_OFFSET(r[rn])
    64 #define R_FR(f)  REG_OFFSET(fr[0][(f)^1])
    65 #define R_XF(f)  REG_OFFSET(fr[1][(f)^1])
    66 #define R_DR(f)  REG_OFFSET(fr[(f)&1][(f)&0x0E])
    67 #define R_DRL(f) REG_OFFSET(fr[(f)&1][(f)|0x01])
    68 #define R_DRH(f) REG_OFFSET(fr[(f)&1][(f)&0x0E])
    70 #define DELAY_NONE 0
    71 #define DELAY_PC 1
    72 #define DELAY_PC_PR 2
    74 #define SH4_MODE_UNKNOWN -1
    76 struct backpatch_record {
    77     uint32_t fixup_offset;
    78     uint32_t fixup_icount;
    79     int32_t exc_code;
    80 };
    82 /** 
    83  * Struct to manage internal translation state. This state is not saved -
    84  * it is only valid between calls to sh4_translate_begin_block() and
    85  * sh4_translate_end_block()
    86  */
    87 struct sh4_x86_state {
    88     int in_delay_slot;
    89     uint8_t *code;
    90     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    91     gboolean branch_taken; /* true if we branched unconditionally */
    92     gboolean double_prec; /* true if FPU is in double-precision mode */
    93     gboolean double_size; /* true if FPU is in double-size mode */
    94     gboolean sse3_enabled; /* true if host supports SSE3 instructions */
    95     uint32_t block_start_pc;
    96     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    97     uint32_t sh4_mode;     /* Mirror of sh4r.xlat_sh4_mode */
    98     int tstate;
   100     /* mode settings */
   101     gboolean tlb_on; /* True if tlb translation is active */
   102     struct mem_region_fn **priv_address_space;
   103     struct mem_region_fn **user_address_space;
   105     /* Instrumentation */
   106     xlat_block_begin_callback_t begin_callback;
   107     xlat_block_end_callback_t end_callback;
   108     gboolean fastmem;
   109     gboolean profile_blocks;
   111     /* Allocated memory for the (block-wide) back-patch list */
   112     struct backpatch_record *backpatch_list;
   113     uint32_t backpatch_posn;
   114     uint32_t backpatch_size;
   115 };
   117 static struct sh4_x86_state sh4_x86;
   119 static uint32_t max_int = 0x7FFFFFFF;
   120 static uint32_t min_int = 0x80000000;
   121 static uint32_t save_fcw; /* save value for fpu control word */
   122 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   124 static struct x86_symbol x86_symbol_table[] = {
   125     { "sh4r+128", ((char *)&sh4r)+128 },
   126     { "sh4_cpu_period", &sh4_cpu_period },
   127     { "sh4_address_space", NULL },
   128     { "sh4_user_address_space", NULL },
   129     { "sh4_translate_breakpoint_hit", sh4_translate_breakpoint_hit },
   130     { "sh4_write_fpscr", sh4_write_fpscr },
   131     { "sh4_write_sr", sh4_write_sr },
   132     { "sh4_read_sr", sh4_read_sr },
   133     { "sh4_raise_exception", sh4_raise_exception },
   134     { "sh4_sleep", sh4_sleep },
   135     { "sh4_fsca", sh4_fsca },
   136     { "sh4_ftrv", sh4_ftrv },
   137     { "sh4_switch_fr_banks", sh4_switch_fr_banks },
   138     { "sh4_execute_instruction", sh4_execute_instruction },
   139     { "signsat48", signsat48 },
   140     { "xlat_get_code_by_vma", xlat_get_code_by_vma },
   141     { "xlat_get_code", xlat_get_code }
   142 };
   145 gboolean is_sse3_supported()
   146 {
   147     uint32_t features;
   149     __asm__ __volatile__(
   150         "mov $0x01, %%eax\n\t"
   151         "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
   152     return (features & 1) ? TRUE : FALSE;
   153 }
   155 void sh4_translate_set_address_space( struct mem_region_fn **priv, struct mem_region_fn **user )
   156 {
   157     sh4_x86.priv_address_space = priv;
   158     sh4_x86.user_address_space = user;
   159     x86_symbol_table[2].ptr = priv;
   160     x86_symbol_table[3].ptr = user;
   161 }
   163 void sh4_translate_init(void)
   164 {
   165     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   166     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   167     sh4_x86.begin_callback = NULL;
   168     sh4_x86.end_callback = NULL;
   169     sh4_translate_set_address_space( sh4_address_space, sh4_user_address_space );
   170     sh4_x86.fastmem = TRUE;
   171     sh4_x86.profile_blocks = FALSE;
   172     sh4_x86.sse3_enabled = is_sse3_supported();
   173     x86_disasm_init();
   174     x86_set_symtab( x86_symbol_table, sizeof(x86_symbol_table)/sizeof(struct x86_symbol) );
   175 }
   177 void sh4_translate_set_callbacks( xlat_block_begin_callback_t begin, xlat_block_end_callback_t end )
   178 {
   179     sh4_x86.begin_callback = begin;
   180     sh4_x86.end_callback = end;
   181 }
   183 void sh4_translate_set_fastmem( gboolean flag )
   184 {
   185     sh4_x86.fastmem = flag;
   186 }
   188 void sh4_translate_set_profile_blocks( gboolean flag )
   189 {
   190     sh4_x86.profile_blocks = flag;
   191 }
   193 gboolean sh4_translate_get_profile_blocks()
   194 {
   195     return sh4_x86.profile_blocks;
   196 }
   198 /**
   199  * Disassemble the given translated code block, and it's source SH4 code block
   200  * side-by-side. The current native pc will be marked if non-null.
   201  */
   202 void sh4_translate_disasm_block( FILE *out, void *code, sh4addr_t source_start, void *native_pc )
   203 {
   204     char buf[256];
   205     char op[256];
   207     uintptr_t target_start = (uintptr_t)code, target_pc;
   208     uintptr_t target_end = target_start + xlat_get_code_size(code);
   209     uint32_t source_pc = source_start;
   210     uint32_t source_end = source_pc;
   211     xlat_recovery_record_t source_recov_table = XLAT_RECOVERY_TABLE(code);
   212     xlat_recovery_record_t source_recov_end = source_recov_table + XLAT_BLOCK_FOR_CODE(code)->recover_table_size - 1;
   214     for( target_pc = target_start; target_pc < target_end;  ) {
   215         uintptr_t pc2 = x86_disasm_instruction( target_pc, buf, sizeof(buf), op );
   216 #if SIZEOF_VOID_P == 8
   217         fprintf( out, "%c%016lx: %-30s %-40s", (target_pc == (uintptr_t)native_pc ? '*' : ' '),
   218                       target_pc, op, buf );
   219 #else
   220         fprintf( out, "%c%08lx: %-30s %-40s", (target_pc == (uintptr_t)native_pc ? '*' : ' '),
   221                       target_pc, op, buf );
   222 #endif        
   223         if( source_recov_table < source_recov_end && 
   224             target_pc >= (target_start + source_recov_table->xlat_offset) ) {
   225             source_recov_table++;
   226             if( source_end < (source_start + (source_recov_table->sh4_icount)*2) )
   227                 source_end = source_start + (source_recov_table->sh4_icount)*2;
   228         }
   230         if( source_pc < source_end ) {
   231             uint32_t source_pc2 = sh4_disasm_instruction( source_pc, buf, sizeof(buf), op );
   232             fprintf( out, " %08X: %s  %s\n", source_pc, op, buf );
   233             source_pc = source_pc2;
   234         } else {
   235             fprintf( out, "\n" );
   236         }
   238         target_pc = pc2;
   239     }
   241     while( source_pc < source_end ) {
   242         uint32_t source_pc2 = sh4_disasm_instruction( source_pc, buf, sizeof(buf), op );
   243         fprintf( out, "%*c %08X: %s  %s\n", 72,' ', source_pc, op, buf );
   244         source_pc = source_pc2;
   245     }
   246 }
   248 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   249 {
   250     int reloc_size = 4;
   252     if( exc_code == -2 ) {
   253         reloc_size = sizeof(void *);
   254     }
   256     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   257 	sh4_x86.backpatch_size <<= 1;
   258 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   259 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   260 	assert( sh4_x86.backpatch_list != NULL );
   261     }
   262     if( sh4_x86.in_delay_slot ) {
   263 	fixup_pc -= 2;
   264     }
   266     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
   267 	(((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code)) - reloc_size;
   268     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   269     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   270     sh4_x86.backpatch_posn++;
   271 }
   273 #define TSTATE_NONE -1
   274 #define TSTATE_O    X86_COND_O
   275 #define TSTATE_C    X86_COND_C
   276 #define TSTATE_E    X86_COND_E
   277 #define TSTATE_NE   X86_COND_NE
   278 #define TSTATE_G    X86_COND_G
   279 #define TSTATE_GE   X86_COND_GE
   280 #define TSTATE_A    X86_COND_A
   281 #define TSTATE_AE   X86_COND_AE
   283 #define MARK_JMP8(x) uint8_t *_mark_jmp_##x = (xlat_output-1)
   284 #define JMP_TARGET(x) *_mark_jmp_##x += (xlat_output - _mark_jmp_##x)
   286 /* Convenience instructions */
   287 #define LDC_t()          CMPB_imms_rbpdisp(1,R_T); CMC()
   288 #define SETE_t()         SETCCB_cc_rbpdisp(X86_COND_E,R_T)
   289 #define SETA_t()         SETCCB_cc_rbpdisp(X86_COND_A,R_T)
   290 #define SETAE_t()        SETCCB_cc_rbpdisp(X86_COND_AE,R_T)
   291 #define SETG_t()         SETCCB_cc_rbpdisp(X86_COND_G,R_T)
   292 #define SETGE_t()        SETCCB_cc_rbpdisp(X86_COND_GE,R_T)
   293 #define SETC_t()         SETCCB_cc_rbpdisp(X86_COND_C,R_T)
   294 #define SETO_t()         SETCCB_cc_rbpdisp(X86_COND_O,R_T)
   295 #define SETNE_t()        SETCCB_cc_rbpdisp(X86_COND_NE,R_T)
   296 #define SETC_r8(r1)      SETCCB_cc_r8(X86_COND_C, r1)
   297 #define JAE_label(label) JCC_cc_rel8(X86_COND_AE,-1); MARK_JMP8(label)
   298 #define JBE_label(label) JCC_cc_rel8(X86_COND_BE,-1); MARK_JMP8(label)
   299 #define JE_label(label)  JCC_cc_rel8(X86_COND_E,-1); MARK_JMP8(label)
   300 #define JGE_label(label) JCC_cc_rel8(X86_COND_GE,-1); MARK_JMP8(label)
   301 #define JNA_label(label) JCC_cc_rel8(X86_COND_NA,-1); MARK_JMP8(label)
   302 #define JNE_label(label) JCC_cc_rel8(X86_COND_NE,-1); MARK_JMP8(label)
   303 #define JNO_label(label) JCC_cc_rel8(X86_COND_NO,-1); MARK_JMP8(label)
   304 #define JS_label(label)  JCC_cc_rel8(X86_COND_S,-1); MARK_JMP8(label)
   305 #define JMP_label(label) JMP_rel8(-1); MARK_JMP8(label)
   306 #define JNE_exc(exc)     JCC_cc_rel32(X86_COND_NE,0); sh4_x86_add_backpatch(xlat_output, pc, exc)
   308 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
   309 #define JT_label(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
   310 	CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
   311     JCC_cc_rel8(sh4_x86.tstate,-1); MARK_JMP8(label)
   313 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
   314 #define JF_label(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
   315 	CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
   316     JCC_cc_rel8(sh4_x86.tstate^1, -1); MARK_JMP8(label)
   319 #define load_reg(x86reg,sh4reg)     MOVL_rbpdisp_r32( REG_OFFSET(r[sh4reg]), x86reg )
   320 #define store_reg(x86reg,sh4reg)    MOVL_r32_rbpdisp( x86reg, REG_OFFSET(r[sh4reg]) )
   322 /**
   323  * Load an FR register (single-precision floating point) into an integer x86
   324  * register (eg for register-to-register moves)
   325  */
   326 #define load_fr(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[0][(frm)^1]), reg )
   327 #define load_xf(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[1][(frm)^1]), reg )
   329 /**
   330  * Load the low half of a DR register (DR or XD) into an integer x86 register 
   331  */
   332 #define load_dr0(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm|0x01]), reg )
   333 #define load_dr1(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm&0x0E]), reg )
   335 /**
   336  * Store an FR register (single-precision floating point) from an integer x86+
   337  * register (eg for register-to-register moves)
   338  */
   339 #define store_fr(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[0][(frm)^1]) )
   340 #define store_xf(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[1][(frm)^1]) )
   342 #define store_dr0(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   343 #define store_dr1(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   346 #define push_fpul()  FLDF_rbpdisp(R_FPUL)
   347 #define pop_fpul()   FSTPF_rbpdisp(R_FPUL)
   348 #define push_fr(frm) FLDF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
   349 #define pop_fr(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
   350 #define push_xf(frm) FLDF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
   351 #define pop_xf(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
   352 #define push_dr(frm) FLDD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
   353 #define pop_dr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
   354 #define push_xdr(frm) FLDD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
   355 #define pop_xdr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
   357 #ifdef ENABLE_SH4STATS
   358 #define COUNT_INST(id) MOVL_imm32_r32( id, REG_EAX ); CALL1_ptr_r32(sh4_stats_add, REG_EAX); sh4_x86.tstate = TSTATE_NONE
   359 #else
   360 #define COUNT_INST(id)
   361 #endif
   364 /* Exception checks - Note that all exception checks will clobber EAX */
   366 #define check_priv( ) \
   367     if( (sh4_x86.sh4_mode & SR_MD) == 0 ) { \
   368         if( sh4_x86.in_delay_slot ) { \
   369             exit_block_exc(EXC_SLOT_ILLEGAL, (pc-2), 4 ); \
   370         } else { \
   371             exit_block_exc(EXC_ILLEGAL, pc, 2); \
   372         } \
   373         sh4_x86.branch_taken = TRUE; \
   374         sh4_x86.in_delay_slot = DELAY_NONE; \
   375         return 2; \
   376     }
   378 #define check_fpuen( ) \
   379     if( !sh4_x86.fpuen_checked ) {\
   380 	sh4_x86.fpuen_checked = TRUE;\
   381 	MOVL_rbpdisp_r32( R_SR, REG_EAX );\
   382 	ANDL_imms_r32( SR_FD, REG_EAX );\
   383 	if( sh4_x86.in_delay_slot ) {\
   384 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   385 	} else {\
   386 	    JNE_exc(EXC_FPU_DISABLED);\
   387 	}\
   388 	sh4_x86.tstate = TSTATE_NONE; \
   389     }
   391 #define check_ralign16( x86reg ) \
   392     TESTL_imms_r32( 0x00000001, x86reg ); \
   393     JNE_exc(EXC_DATA_ADDR_READ)
   395 #define check_walign16( x86reg ) \
   396     TESTL_imms_r32( 0x00000001, x86reg ); \
   397     JNE_exc(EXC_DATA_ADDR_WRITE);
   399 #define check_ralign32( x86reg ) \
   400     TESTL_imms_r32( 0x00000003, x86reg ); \
   401     JNE_exc(EXC_DATA_ADDR_READ)
   403 #define check_walign32( x86reg ) \
   404     TESTL_imms_r32( 0x00000003, x86reg ); \
   405     JNE_exc(EXC_DATA_ADDR_WRITE);
   407 #define check_ralign64( x86reg ) \
   408     TESTL_imms_r32( 0x00000007, x86reg ); \
   409     JNE_exc(EXC_DATA_ADDR_READ)
   411 #define check_walign64( x86reg ) \
   412     TESTL_imms_r32( 0x00000007, x86reg ); \
   413     JNE_exc(EXC_DATA_ADDR_WRITE);
   415 #define address_space() ((sh4_x86.sh4_mode&SR_MD) ? (uintptr_t)sh4_x86.priv_address_space : (uintptr_t)sh4_x86.user_address_space)
   417 #define UNDEF(ir)
   418 /* Note: For SR.MD == 1 && MMUCR.AT == 0, there are no memory exceptions, so 
   419  * don't waste the cycles expecting them. Otherwise we need to save the exception pointer.
   420  */
   421 #ifdef HAVE_FRAME_ADDRESS
   422 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
   423 {
   424     decode_address(address_space(), addr_reg);
   425     if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) { 
   426         CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
   427     } else {
   428         if( addr_reg != REG_ARG1 ) {
   429             MOVL_r32_r32( addr_reg, REG_ARG1 );
   430         }
   431         MOVP_immptr_rptr( 0, REG_ARG2 );
   432         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   433         CALL2_r32disp_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2);
   434     }
   435     if( value_reg != REG_RESULT1 ) { 
   436         MOVL_r32_r32( REG_RESULT1, value_reg );
   437     }
   438 }
   440 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
   441 {
   442     decode_address(address_space(), addr_reg);
   443     if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) { 
   444         CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
   445     } else {
   446         if( value_reg != REG_ARG2 ) {
   447             MOVL_r32_r32( value_reg, REG_ARG2 );
   448 	}        
   449         if( addr_reg != REG_ARG1 ) {
   450             MOVL_r32_r32( addr_reg, REG_ARG1 );
   451         }
   452 #if MAX_REG_ARG > 2        
   453         MOVP_immptr_rptr( 0, REG_ARG3 );
   454         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   455         CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, REG_ARG3);
   456 #else
   457         MOVL_imm32_rspdisp( 0, 0 );
   458         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   459         CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, 0);
   460 #endif
   461     }
   462 }
   463 #else
   464 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
   465 {
   466     decode_address(address_space(), addr_reg);
   467     CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
   468     if( value_reg != REG_RESULT1 ) {
   469         MOVL_r32_r32( REG_RESULT1, value_reg );
   470     }
   471 }     
   473 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
   474 {
   475     decode_address(address_space(), addr_reg);
   476     CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
   477 }
   478 #endif
   480 #define MEM_REGION_PTR(name) offsetof( struct mem_region_fn, name )
   481 #define MEM_READ_BYTE( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_byte), pc)
   482 #define MEM_READ_BYTE_FOR_WRITE( addr_reg, value_reg ) call_read_func( addr_reg, value_reg, MEM_REGION_PTR(read_byte_for_write), pc) 
   483 #define MEM_READ_WORD( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_word), pc)
   484 #define MEM_READ_LONG( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_long), pc)
   485 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_byte), pc)
   486 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_word), pc)
   487 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_long), pc)
   488 #define MEM_PREFETCH( addr_reg ) call_read_func(addr_reg, REG_RESULT1, MEM_REGION_PTR(prefetch), pc)
   490 #define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2, 4); sh4_x86.in_delay_slot = DELAY_NONE; return 2;
   492 /** Offset of xlat_sh4_mode field relative to the code pointer */ 
   493 #define XLAT_SH4_MODE_CODE_OFFSET  (int32_t)(offsetof(struct xlat_cache_block, xlat_sh4_mode) - offsetof(struct xlat_cache_block,code) )
   494 #define XLAT_CHAIN_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, chain) - offsetof(struct xlat_cache_block,code) )
   495 #define XLAT_ACTIVE_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, active) - offsetof(struct xlat_cache_block,code) )
   497 void sh4_translate_begin_block( sh4addr_t pc ) 
   498 {
   499 	sh4_x86.code = xlat_output;
   500     sh4_x86.in_delay_slot = FALSE;
   501     sh4_x86.fpuen_checked = FALSE;
   502     sh4_x86.branch_taken = FALSE;
   503     sh4_x86.backpatch_posn = 0;
   504     sh4_x86.block_start_pc = pc;
   505     sh4_x86.tlb_on = IS_TLB_ENABLED();
   506     sh4_x86.tstate = TSTATE_NONE;
   507     sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
   508     sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
   509     sh4_x86.sh4_mode = sh4r.xlat_sh4_mode;
   510     emit_prologue();
   511     if( sh4_x86.begin_callback ) {
   512         CALL_ptr( sh4_x86.begin_callback );
   513     }
   514     if( sh4_x86.profile_blocks ) {
   515     	MOVP_immptr_rptr( sh4_x86.code + XLAT_ACTIVE_CODE_OFFSET, REG_EAX );
   516     	ADDL_imms_r32disp( 1, REG_EAX, 0 );
   517     }  
   518 }
   521 uint32_t sh4_translate_end_block_size()
   522 {
   523     if( sh4_x86.backpatch_posn <= 3 ) {
   524         return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*(12+CALL1_PTR_MIN_SIZE));
   525     } else {
   526         return EPILOGUE_SIZE + (3*(12+CALL1_PTR_MIN_SIZE)) + (sh4_x86.backpatch_posn-3)*(15+CALL1_PTR_MIN_SIZE);
   527     }
   528 }
   531 /**
   532  * Embed a breakpoint into the generated code
   533  */
   534 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   535 {
   536     MOVL_imm32_r32( pc, REG_EAX );
   537     CALL1_ptr_r32( sh4_translate_breakpoint_hit, REG_EAX );
   538     sh4_x86.tstate = TSTATE_NONE;
   539 }
   542 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   544 /**
   545  * Test if the loaded target code pointer in %eax is valid, and if so jump
   546  * directly into it, bypassing the normal exit.
   547  */
   548 static void jump_next_block()
   549 {
   550 	uint8_t *ptr = xlat_output;
   551 	TESTP_rptr_rptr(REG_EAX, REG_EAX);
   552 	JE_label(nocode);
   553 	if( sh4_x86.sh4_mode == SH4_MODE_UNKNOWN ) {
   554 	    /* sr/fpscr was changed, possibly updated xlat_sh4_mode, so reload it */
   555 	    MOVL_rbpdisp_r32( REG_OFFSET(xlat_sh4_mode), REG_ECX );
   556 	    CMPL_r32_r32disp( REG_ECX, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
   557 	} else {
   558 	    CMPL_imms_r32disp( sh4_x86.sh4_mode, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
   559 	}
   560 	JNE_label(wrongmode);
   561 	LEAP_rptrdisp_rptr(REG_EAX, PROLOGUE_SIZE,REG_EAX);
   562 	if( sh4_x86.end_callback ) {
   563 	    /* Note this does leave the stack out of alignment, but doesn't matter
   564 	     * for what we're currently using it for.
   565 	     */
   566 	    PUSH_r32(REG_EAX);
   567 	    MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
   568 	    JMP_rptr(REG_ECX);
   569 	} else {
   570 	    JMP_rptr(REG_EAX);
   571 	}
   572 	JMP_TARGET(wrongmode);
   573 	MOVP_rptrdisp_rptr( REG_EAX, XLAT_CHAIN_CODE_OFFSET, REG_EAX );
   574 	int rel = ptr - xlat_output;
   575     JMP_prerel(rel);
   576 	JMP_TARGET(nocode); 
   577 }
   579 /**
   580  * 
   581  */
   582 static void FASTCALL sh4_translate_get_code_and_backpatch( uint32_t pc )
   583 {
   584     uint8_t *target = (uint8_t *)xlat_get_code_by_vma(pc);
   585     while( target != NULL && sh4r.xlat_sh4_mode != XLAT_BLOCK_MODE(target) ) {
   586         target = XLAT_BLOCK_CHAIN(target);
   587 	}
   588     if( target == NULL ) {
   589         target = sh4_translate_basic_block( pc );
   590     }
   591     uint8_t *backpatch = ((uint8_t *)__builtin_return_address(0)) - (CALL1_PTR_MIN_SIZE);
   592     *backpatch = 0xE9;
   593     *(uint32_t *)(backpatch+1) = (uint32_t)(target-backpatch)+PROLOGUE_SIZE-5;
   594     *(void **)(backpatch+5) = XLAT_BLOCK_FOR_CODE(target)->use_list;
   595     XLAT_BLOCK_FOR_CODE(target)->use_list = backpatch; 
   597     uint8_t **retptr = ((uint8_t **)__builtin_frame_address(0))+1;
   598     assert( *retptr == ((uint8_t *)__builtin_return_address(0)) );
   599 	*retptr = backpatch;
   600 }
   602 static void emit_translate_and_backpatch()
   603 {
   604     /* NB: this is either 7 bytes (i386) or 12 bytes (x86-64) */
   605     CALL1_ptr_r32(sh4_translate_get_code_and_backpatch, REG_ARG1);
   607     /* When patched, the jmp instruction will be 5 bytes (either platform) -
   608      * we need to reserve sizeof(void*) bytes for the use-list
   609 	 * pointer
   610 	 */ 
   611     if( sizeof(void*) == 8 ) {
   612         NOP();
   613     } else {
   614         NOP2();
   615     }
   616 }
   618 /**
   619  * If we're jumping to a fixed address (or at least fixed relative to the
   620  * current PC, then we can do a direct branch. REG_ARG1 should contain
   621  * the PC at this point.
   622  */
   623 static void jump_next_block_fixed_pc( sh4addr_t pc )
   624 {
   625 	if( IS_IN_ICACHE(pc) ) {
   626 	    if( sh4_x86.sh4_mode != SH4_MODE_UNKNOWN ) {
   627 	        /* Fixed address, in cache, and fixed SH4 mode - generate a call to the
   628 	         * fetch-and-backpatch routine, which will replace the call with a branch */
   629            emit_translate_and_backpatch();	         
   630            return;
   631 		} else {
   632             MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
   633             ANDP_imms_rptr( -4, REG_EAX );
   634         }
   635 	} else if( sh4_x86.tlb_on ) {
   636         CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);
   637     } else {
   638         CALL1_ptr_r32(xlat_get_code, REG_ARG1);
   639     }
   640     jump_next_block();
   643 }
   645 void sh4_translate_unlink_block( void *use_list )
   646 {
   647 	uint8_t *tmp = xlat_output; /* In case something is active, which should never happen */
   648 	void *next = use_list;
   649 	while( next != NULL ) {
   650     	xlat_output = (uint8_t *)next;
   651  	    next = *(void **)(xlat_output+5);
   652  		emit_translate_and_backpatch();
   653  	}
   654  	xlat_output = tmp;
   655 }
   659 static void exit_block()
   660 {
   661 	emit_epilogue();
   662 	if( sh4_x86.end_callback ) {
   663 	    MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
   664 	    JMP_rptr(REG_ECX);
   665 	} else {
   666 	    RET();
   667 	}
   668 }
   670 /**
   671  * Exit the block with sh4r.pc already written
   672  */
   673 void exit_block_pcset( sh4addr_t pc )
   674 {
   675     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   676     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   677     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   678     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   679     JBE_label(exitloop);
   680     MOVL_rbpdisp_r32( R_PC, REG_ARG1 );
   681     if( sh4_x86.tlb_on ) {
   682         CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
   683     } else {
   684         CALL1_ptr_r32(xlat_get_code,REG_ARG1);
   685     }
   687     jump_next_block();
   688     JMP_TARGET(exitloop);
   689     exit_block();
   690 }
   692 /**
   693  * Exit the block with sh4r.new_pc written with the target pc
   694  */
   695 void exit_block_newpcset( sh4addr_t pc )
   696 {
   697     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   698     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   699     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   700     MOVL_rbpdisp_r32( R_NEW_PC, REG_ARG1 );
   701     MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   702     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   703     JBE_label(exitloop);
   704     if( sh4_x86.tlb_on ) {
   705         CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
   706     } else {
   707         CALL1_ptr_r32(xlat_get_code,REG_ARG1);
   708     }
   710 	jump_next_block();
   711     JMP_TARGET(exitloop);
   712     exit_block();
   713 }
   716 /**
   717  * Exit the block to an absolute PC
   718  */
   719 void exit_block_abs( sh4addr_t pc, sh4addr_t endpc )
   720 {
   721     MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   722     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   723     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   725     MOVL_imm32_r32( pc, REG_ARG1 );
   726     MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   727     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   728     JBE_label(exitloop);
   729     jump_next_block_fixed_pc(pc);    
   730     JMP_TARGET(exitloop);
   731     exit_block();
   732 }
   734 /**
   735  * Exit the block to a relative PC
   736  */
   737 void exit_block_rel( sh4addr_t pc, sh4addr_t endpc )
   738 {
   739     MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   740     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   741     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   743 	if( pc == sh4_x86.block_start_pc && sh4_x86.sh4_mode == sh4r.xlat_sh4_mode ) {
   744 	    /* Special case for tight loops - the PC doesn't change, and
   745 	     * we already know the target address. Just check events pending before
   746 	     * looping.
   747 	     */
   748         CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   749         uint32_t backdisp = ((uintptr_t)(sh4_x86.code - xlat_output)) + PROLOGUE_SIZE;
   750         JCC_cc_prerel(X86_COND_A, backdisp);
   751 	} else {
   752         MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ARG1 );
   753         ADDL_rbpdisp_r32( R_PC, REG_ARG1 );
   754         MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   755         CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   756         JBE_label(exitloop2);
   758         jump_next_block_fixed_pc(pc);
   759         JMP_TARGET(exitloop2);
   760     }
   761     exit_block();
   762 }
   764 /**
   765  * Exit unconditionally with a general exception
   766  */
   767 void exit_block_exc( int code, sh4addr_t pc, int inst_adjust )
   768 {
   769     MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ECX );
   770     ADDL_r32_rbpdisp( REG_ECX, R_PC );
   771     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc + inst_adjust)>>1)*sh4_cpu_period, REG_ECX );
   772     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   773     MOVL_imm32_r32( code, REG_ARG1 );
   774     CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
   775     exit_block();
   776 }    
   778 /**
   779  * Embed a call to sh4_execute_instruction for situations that we
   780  * can't translate (just page-crossing delay slots at the moment).
   781  * Caller is responsible for setting new_pc before calling this function.
   782  *
   783  * Performs:
   784  *   Set PC = endpc
   785  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   786  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   787  *   Call sh4_execute_instruction
   788  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   789  */
   790 void exit_block_emu( sh4vma_t endpc )
   791 {
   792     MOVL_imm32_r32( endpc - sh4_x86.block_start_pc, REG_ECX );   // 5
   793     ADDL_r32_rbpdisp( REG_ECX, R_PC );
   795     MOVL_imm32_r32( (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period, REG_ECX ); // 5
   796     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );     // 6
   797     MOVL_imm32_r32( sh4_x86.in_delay_slot ? 1 : 0, REG_ECX );
   798     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(in_delay_slot) );
   800     CALL_ptr( sh4_execute_instruction );
   801     exit_block();
   802 } 
   804 /**
   805  * Write the block trailer (exception handling block)
   806  */
   807 void sh4_translate_end_block( sh4addr_t pc ) {
   808     if( sh4_x86.branch_taken == FALSE ) {
   809         // Didn't exit unconditionally already, so write the termination here
   810         exit_block_rel( pc, pc );
   811     }
   812     if( sh4_x86.backpatch_posn != 0 ) {
   813         unsigned int i;
   814         // Exception raised - cleanup and exit
   815         uint8_t *end_ptr = xlat_output;
   816         MOVL_r32_r32( REG_EDX, REG_ECX );
   817         ADDL_r32_r32( REG_EDX, REG_ECX );
   818         ADDL_r32_rbpdisp( REG_ECX, R_SPC );
   819         MOVL_moffptr_eax( &sh4_cpu_period );
   820         INC_r32( REG_EDX );  /* Add 1 for the aborting instruction itself */ 
   821         MULL_r32( REG_EDX );
   822         ADDL_r32_rbpdisp( REG_EAX, REG_OFFSET(slice_cycle) );
   823         exit_block();
   825         for( i=0; i< sh4_x86.backpatch_posn; i++ ) {
   826             uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];
   827             if( sh4_x86.backpatch_list[i].exc_code < 0 ) {
   828                 if( sh4_x86.backpatch_list[i].exc_code == -2 ) {
   829                     *((uintptr_t *)fixup_addr) = (uintptr_t)xlat_output; 
   830                 } else {
   831                     *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
   832                 }
   833                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
   834                 int rel = end_ptr - xlat_output;
   835                 JMP_prerel(rel);
   836             } else {
   837                 *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
   838                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].exc_code, REG_ARG1 );
   839                 CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
   840                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
   841                 int rel = end_ptr - xlat_output;
   842                 JMP_prerel(rel);
   843             }
   844         }
   845     }
   846 }
   848 /**
   849  * Translate a single instruction. Delayed branches are handled specially
   850  * by translating both branch and delayed instruction as a single unit (as
   851  * 
   852  * The instruction MUST be in the icache (assert check)
   853  *
   854  * @return true if the instruction marks the end of a basic block
   855  * (eg a branch or 
   856  */
   857 uint32_t sh4_translate_instruction( sh4vma_t pc )
   858 {
   859     uint32_t ir;
   860     /* Read instruction from icache */
   861     assert( IS_IN_ICACHE(pc) );
   862     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   864     if( !sh4_x86.in_delay_slot ) {
   865 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   866     }
   868     /* check for breakpoints at this pc */
   869     for( int i=0; i<sh4_breakpoint_count; i++ ) {
   870         if( sh4_breakpoints[i].address == pc ) {
   871             sh4_translate_emit_breakpoint(pc);
   872             break;
   873         }
   874     }
   875 %%
   876 /* ALU operations */
   877 ADD Rm, Rn {:
   878     COUNT_INST(I_ADD);
   879     load_reg( REG_EAX, Rm );
   880     load_reg( REG_ECX, Rn );
   881     ADDL_r32_r32( REG_EAX, REG_ECX );
   882     store_reg( REG_ECX, Rn );
   883     sh4_x86.tstate = TSTATE_NONE;
   884 :}
   885 ADD #imm, Rn {:  
   886     COUNT_INST(I_ADDI);
   887     ADDL_imms_rbpdisp( imm, REG_OFFSET(r[Rn]) );
   888     sh4_x86.tstate = TSTATE_NONE;
   889 :}
   890 ADDC Rm, Rn {:
   891     COUNT_INST(I_ADDC);
   892     if( sh4_x86.tstate != TSTATE_C ) {
   893         LDC_t();
   894     }
   895     load_reg( REG_EAX, Rm );
   896     load_reg( REG_ECX, Rn );
   897     ADCL_r32_r32( REG_EAX, REG_ECX );
   898     store_reg( REG_ECX, Rn );
   899     SETC_t();
   900     sh4_x86.tstate = TSTATE_C;
   901 :}
   902 ADDV Rm, Rn {:
   903     COUNT_INST(I_ADDV);
   904     load_reg( REG_EAX, Rm );
   905     load_reg( REG_ECX, Rn );
   906     ADDL_r32_r32( REG_EAX, REG_ECX );
   907     store_reg( REG_ECX, Rn );
   908     SETO_t();
   909     sh4_x86.tstate = TSTATE_O;
   910 :}
   911 AND Rm, Rn {:
   912     COUNT_INST(I_AND);
   913     load_reg( REG_EAX, Rm );
   914     load_reg( REG_ECX, Rn );
   915     ANDL_r32_r32( REG_EAX, REG_ECX );
   916     store_reg( REG_ECX, Rn );
   917     sh4_x86.tstate = TSTATE_NONE;
   918 :}
   919 AND #imm, R0 {:  
   920     COUNT_INST(I_ANDI);
   921     load_reg( REG_EAX, 0 );
   922     ANDL_imms_r32(imm, REG_EAX); 
   923     store_reg( REG_EAX, 0 );
   924     sh4_x86.tstate = TSTATE_NONE;
   925 :}
   926 AND.B #imm, @(R0, GBR) {: 
   927     COUNT_INST(I_ANDB);
   928     load_reg( REG_EAX, 0 );
   929     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
   930     MOVL_r32_rspdisp(REG_EAX, 0);
   931     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
   932     MOVL_rspdisp_r32(0, REG_EAX);
   933     ANDL_imms_r32(imm, REG_EDX );
   934     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
   935     sh4_x86.tstate = TSTATE_NONE;
   936 :}
   937 CMP/EQ Rm, Rn {:  
   938     COUNT_INST(I_CMPEQ);
   939     load_reg( REG_EAX, Rm );
   940     load_reg( REG_ECX, Rn );
   941     CMPL_r32_r32( REG_EAX, REG_ECX );
   942     SETE_t();
   943     sh4_x86.tstate = TSTATE_E;
   944 :}
   945 CMP/EQ #imm, R0 {:  
   946     COUNT_INST(I_CMPEQI);
   947     load_reg( REG_EAX, 0 );
   948     CMPL_imms_r32(imm, REG_EAX);
   949     SETE_t();
   950     sh4_x86.tstate = TSTATE_E;
   951 :}
   952 CMP/GE Rm, Rn {:  
   953     COUNT_INST(I_CMPGE);
   954     load_reg( REG_EAX, Rm );
   955     load_reg( REG_ECX, Rn );
   956     CMPL_r32_r32( REG_EAX, REG_ECX );
   957     SETGE_t();
   958     sh4_x86.tstate = TSTATE_GE;
   959 :}
   960 CMP/GT Rm, Rn {: 
   961     COUNT_INST(I_CMPGT);
   962     load_reg( REG_EAX, Rm );
   963     load_reg( REG_ECX, Rn );
   964     CMPL_r32_r32( REG_EAX, REG_ECX );
   965     SETG_t();
   966     sh4_x86.tstate = TSTATE_G;
   967 :}
   968 CMP/HI Rm, Rn {:  
   969     COUNT_INST(I_CMPHI);
   970     load_reg( REG_EAX, Rm );
   971     load_reg( REG_ECX, Rn );
   972     CMPL_r32_r32( REG_EAX, REG_ECX );
   973     SETA_t();
   974     sh4_x86.tstate = TSTATE_A;
   975 :}
   976 CMP/HS Rm, Rn {: 
   977     COUNT_INST(I_CMPHS);
   978     load_reg( REG_EAX, Rm );
   979     load_reg( REG_ECX, Rn );
   980     CMPL_r32_r32( REG_EAX, REG_ECX );
   981     SETAE_t();
   982     sh4_x86.tstate = TSTATE_AE;
   983  :}
   984 CMP/PL Rn {: 
   985     COUNT_INST(I_CMPPL);
   986     load_reg( REG_EAX, Rn );
   987     CMPL_imms_r32( 0, REG_EAX );
   988     SETG_t();
   989     sh4_x86.tstate = TSTATE_G;
   990 :}
   991 CMP/PZ Rn {:  
   992     COUNT_INST(I_CMPPZ);
   993     load_reg( REG_EAX, Rn );
   994     CMPL_imms_r32( 0, REG_EAX );
   995     SETGE_t();
   996     sh4_x86.tstate = TSTATE_GE;
   997 :}
   998 CMP/STR Rm, Rn {:  
   999     COUNT_INST(I_CMPSTR);
  1000     load_reg( REG_EAX, Rm );
  1001     load_reg( REG_ECX, Rn );
  1002     XORL_r32_r32( REG_ECX, REG_EAX );
  1003     TESTB_r8_r8( REG_AL, REG_AL );
  1004     JE_label(target1);
  1005     TESTB_r8_r8( REG_AH, REG_AH );
  1006     JE_label(target2);
  1007     SHRL_imm_r32( 16, REG_EAX );
  1008     TESTB_r8_r8( REG_AL, REG_AL );
  1009     JE_label(target3);
  1010     TESTB_r8_r8( REG_AH, REG_AH );
  1011     JMP_TARGET(target1);
  1012     JMP_TARGET(target2);
  1013     JMP_TARGET(target3);
  1014     SETE_t();
  1015     sh4_x86.tstate = TSTATE_E;
  1016 :}
  1017 DIV0S Rm, Rn {:
  1018     COUNT_INST(I_DIV0S);
  1019     load_reg( REG_EAX, Rm );
  1020     load_reg( REG_ECX, Rn );
  1021     SHRL_imm_r32( 31, REG_EAX );
  1022     SHRL_imm_r32( 31, REG_ECX );
  1023     MOVL_r32_rbpdisp( REG_EAX, R_M );
  1024     MOVL_r32_rbpdisp( REG_ECX, R_Q );
  1025     CMPL_r32_r32( REG_EAX, REG_ECX );
  1026     SETNE_t();
  1027     sh4_x86.tstate = TSTATE_NE;
  1028 :}
  1029 DIV0U {:  
  1030     COUNT_INST(I_DIV0U);
  1031     XORL_r32_r32( REG_EAX, REG_EAX );
  1032     MOVL_r32_rbpdisp( REG_EAX, R_Q );
  1033     MOVL_r32_rbpdisp( REG_EAX, R_M );
  1034     MOVL_r32_rbpdisp( REG_EAX, R_T );
  1035     sh4_x86.tstate = TSTATE_C; // works for DIV1
  1036 :}
  1037 DIV1 Rm, Rn {:
  1038     COUNT_INST(I_DIV1);
  1039     MOVL_rbpdisp_r32( R_M, REG_ECX );
  1040     load_reg( REG_EAX, Rn );
  1041     if( sh4_x86.tstate != TSTATE_C ) {
  1042 	LDC_t();
  1044     RCLL_imm_r32( 1, REG_EAX );
  1045     SETC_r8( REG_DL ); // Q'
  1046     CMPL_rbpdisp_r32( R_Q, REG_ECX );
  1047     JE_label(mqequal);
  1048     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1049     JMP_label(end);
  1050     JMP_TARGET(mqequal);
  1051     SUBL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1052     JMP_TARGET(end);
  1053     store_reg( REG_EAX, Rn ); // Done with Rn now
  1054     SETC_r8(REG_AL); // tmp1
  1055     XORB_r8_r8( REG_DL, REG_AL ); // Q' = Q ^ tmp1
  1056     XORB_r8_r8( REG_AL, REG_CL ); // Q'' = Q' ^ M
  1057     MOVL_r32_rbpdisp( REG_ECX, R_Q );
  1058     XORL_imms_r32( 1, REG_AL );   // T = !Q'
  1059     MOVZXL_r8_r32( REG_AL, REG_EAX );
  1060     MOVL_r32_rbpdisp( REG_EAX, R_T );
  1061     sh4_x86.tstate = TSTATE_NONE;
  1062 :}
  1063 DMULS.L Rm, Rn {:  
  1064     COUNT_INST(I_DMULS);
  1065     load_reg( REG_EAX, Rm );
  1066     load_reg( REG_ECX, Rn );
  1067     IMULL_r32(REG_ECX);
  1068     MOVL_r32_rbpdisp( REG_EDX, R_MACH );
  1069     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1070     sh4_x86.tstate = TSTATE_NONE;
  1071 :}
  1072 DMULU.L Rm, Rn {:  
  1073     COUNT_INST(I_DMULU);
  1074     load_reg( REG_EAX, Rm );
  1075     load_reg( REG_ECX, Rn );
  1076     MULL_r32(REG_ECX);
  1077     MOVL_r32_rbpdisp( REG_EDX, R_MACH );
  1078     MOVL_r32_rbpdisp( REG_EAX, R_MACL );    
  1079     sh4_x86.tstate = TSTATE_NONE;
  1080 :}
  1081 DT Rn {:  
  1082     COUNT_INST(I_DT);
  1083     load_reg( REG_EAX, Rn );
  1084     ADDL_imms_r32( -1, REG_EAX );
  1085     store_reg( REG_EAX, Rn );
  1086     SETE_t();
  1087     sh4_x86.tstate = TSTATE_E;
  1088 :}
  1089 EXTS.B Rm, Rn {:  
  1090     COUNT_INST(I_EXTSB);
  1091     load_reg( REG_EAX, Rm );
  1092     MOVSXL_r8_r32( REG_EAX, REG_EAX );
  1093     store_reg( REG_EAX, Rn );
  1094 :}
  1095 EXTS.W Rm, Rn {:  
  1096     COUNT_INST(I_EXTSW);
  1097     load_reg( REG_EAX, Rm );
  1098     MOVSXL_r16_r32( REG_EAX, REG_EAX );
  1099     store_reg( REG_EAX, Rn );
  1100 :}
  1101 EXTU.B Rm, Rn {:  
  1102     COUNT_INST(I_EXTUB);
  1103     load_reg( REG_EAX, Rm );
  1104     MOVZXL_r8_r32( REG_EAX, REG_EAX );
  1105     store_reg( REG_EAX, Rn );
  1106 :}
  1107 EXTU.W Rm, Rn {:  
  1108     COUNT_INST(I_EXTUW);
  1109     load_reg( REG_EAX, Rm );
  1110     MOVZXL_r16_r32( REG_EAX, REG_EAX );
  1111     store_reg( REG_EAX, Rn );
  1112 :}
  1113 MAC.L @Rm+, @Rn+ {:
  1114     COUNT_INST(I_MACL);
  1115     if( Rm == Rn ) {
  1116 	load_reg( REG_EAX, Rm );
  1117 	check_ralign32( REG_EAX );
  1118 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1119 	MOVL_r32_rspdisp(REG_EAX, 0);
  1120 	load_reg( REG_EAX, Rm );
  1121 	LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  1122 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1123         ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rn]) );
  1124     } else {
  1125 	load_reg( REG_EAX, Rm );
  1126 	check_ralign32( REG_EAX );
  1127 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1128 	MOVL_r32_rspdisp( REG_EAX, 0 );
  1129 	load_reg( REG_EAX, Rn );
  1130 	check_ralign32( REG_EAX );
  1131 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1132 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
  1133 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  1136     IMULL_rspdisp( 0 );
  1137     ADDL_r32_rbpdisp( REG_EAX, R_MACL );
  1138     ADCL_r32_rbpdisp( REG_EDX, R_MACH );
  1140     MOVL_rbpdisp_r32( R_S, REG_ECX );
  1141     TESTL_r32_r32(REG_ECX, REG_ECX);
  1142     JE_label( nosat );
  1143     CALL_ptr( signsat48 );
  1144     JMP_TARGET( nosat );
  1145     sh4_x86.tstate = TSTATE_NONE;
  1146 :}
  1147 MAC.W @Rm+, @Rn+ {:  
  1148     COUNT_INST(I_MACW);
  1149     if( Rm == Rn ) {
  1150 	load_reg( REG_EAX, Rm );
  1151 	check_ralign16( REG_EAX );
  1152 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1153         MOVL_r32_rspdisp( REG_EAX, 0 );
  1154 	load_reg( REG_EAX, Rm );
  1155 	LEAL_r32disp_r32( REG_EAX, 2, REG_EAX );
  1156 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1157 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
  1158 	// Note translate twice in case of page boundaries. Maybe worth
  1159 	// adding a page-boundary check to skip the second translation
  1160     } else {
  1161 	load_reg( REG_EAX, Rm );
  1162 	check_ralign16( REG_EAX );
  1163 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1164         MOVL_r32_rspdisp( REG_EAX, 0 );
  1165 	load_reg( REG_EAX, Rn );
  1166 	check_ralign16( REG_EAX );
  1167 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1168 	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rn]) );
  1169 	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
  1171     IMULL_rspdisp( 0 );
  1172     MOVL_rbpdisp_r32( R_S, REG_ECX );
  1173     TESTL_r32_r32( REG_ECX, REG_ECX );
  1174     JE_label( nosat );
  1176     ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
  1177     JNO_label( end );            // 2
  1178     MOVL_imm32_r32( 1, REG_EDX );         // 5
  1179     MOVL_r32_rbpdisp( REG_EDX, R_MACH );   // 6
  1180     JS_label( positive );        // 2
  1181     MOVL_imm32_r32( 0x80000000, REG_EAX );// 5
  1182     MOVL_r32_rbpdisp( REG_EAX, R_MACL );   // 6
  1183     JMP_label(end2);           // 2
  1185     JMP_TARGET(positive);
  1186     MOVL_imm32_r32( 0x7FFFFFFF, REG_EAX );// 5
  1187     MOVL_r32_rbpdisp( REG_EAX, R_MACL );   // 6
  1188     JMP_label(end3);            // 2
  1190     JMP_TARGET(nosat);
  1191     ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
  1192     ADCL_r32_rbpdisp( REG_EDX, R_MACH );  // 6
  1193     JMP_TARGET(end);
  1194     JMP_TARGET(end2);
  1195     JMP_TARGET(end3);
  1196     sh4_x86.tstate = TSTATE_NONE;
  1197 :}
  1198 MOVT Rn {:  
  1199     COUNT_INST(I_MOVT);
  1200     MOVL_rbpdisp_r32( R_T, REG_EAX );
  1201     store_reg( REG_EAX, Rn );
  1202 :}
  1203 MUL.L Rm, Rn {:  
  1204     COUNT_INST(I_MULL);
  1205     load_reg( REG_EAX, Rm );
  1206     load_reg( REG_ECX, Rn );
  1207     MULL_r32( REG_ECX );
  1208     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1209     sh4_x86.tstate = TSTATE_NONE;
  1210 :}
  1211 MULS.W Rm, Rn {:
  1212     COUNT_INST(I_MULSW);
  1213     MOVSXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
  1214     MOVSXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
  1215     MULL_r32( REG_ECX );
  1216     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1217     sh4_x86.tstate = TSTATE_NONE;
  1218 :}
  1219 MULU.W Rm, Rn {:  
  1220     COUNT_INST(I_MULUW);
  1221     MOVZXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
  1222     MOVZXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
  1223     MULL_r32( REG_ECX );
  1224     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1225     sh4_x86.tstate = TSTATE_NONE;
  1226 :}
  1227 NEG Rm, Rn {:
  1228     COUNT_INST(I_NEG);
  1229     load_reg( REG_EAX, Rm );
  1230     NEGL_r32( REG_EAX );
  1231     store_reg( REG_EAX, Rn );
  1232     sh4_x86.tstate = TSTATE_NONE;
  1233 :}
  1234 NEGC Rm, Rn {:  
  1235     COUNT_INST(I_NEGC);
  1236     load_reg( REG_EAX, Rm );
  1237     XORL_r32_r32( REG_ECX, REG_ECX );
  1238     LDC_t();
  1239     SBBL_r32_r32( REG_EAX, REG_ECX );
  1240     store_reg( REG_ECX, Rn );
  1241     SETC_t();
  1242     sh4_x86.tstate = TSTATE_C;
  1243 :}
  1244 NOT Rm, Rn {:  
  1245     COUNT_INST(I_NOT);
  1246     load_reg( REG_EAX, Rm );
  1247     NOTL_r32( REG_EAX );
  1248     store_reg( REG_EAX, Rn );
  1249     sh4_x86.tstate = TSTATE_NONE;
  1250 :}
  1251 OR Rm, Rn {:  
  1252     COUNT_INST(I_OR);
  1253     load_reg( REG_EAX, Rm );
  1254     load_reg( REG_ECX, Rn );
  1255     ORL_r32_r32( REG_EAX, REG_ECX );
  1256     store_reg( REG_ECX, Rn );
  1257     sh4_x86.tstate = TSTATE_NONE;
  1258 :}
  1259 OR #imm, R0 {:
  1260     COUNT_INST(I_ORI);
  1261     load_reg( REG_EAX, 0 );
  1262     ORL_imms_r32(imm, REG_EAX);
  1263     store_reg( REG_EAX, 0 );
  1264     sh4_x86.tstate = TSTATE_NONE;
  1265 :}
  1266 OR.B #imm, @(R0, GBR) {:  
  1267     COUNT_INST(I_ORB);
  1268     load_reg( REG_EAX, 0 );
  1269     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
  1270     MOVL_r32_rspdisp( REG_EAX, 0 );
  1271     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
  1272     MOVL_rspdisp_r32( 0, REG_EAX );
  1273     ORL_imms_r32(imm, REG_EDX );
  1274     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1275     sh4_x86.tstate = TSTATE_NONE;
  1276 :}
  1277 ROTCL Rn {:
  1278     COUNT_INST(I_ROTCL);
  1279     load_reg( REG_EAX, Rn );
  1280     if( sh4_x86.tstate != TSTATE_C ) {
  1281 	LDC_t();
  1283     RCLL_imm_r32( 1, REG_EAX );
  1284     store_reg( REG_EAX, Rn );
  1285     SETC_t();
  1286     sh4_x86.tstate = TSTATE_C;
  1287 :}
  1288 ROTCR Rn {:  
  1289     COUNT_INST(I_ROTCR);
  1290     load_reg( REG_EAX, Rn );
  1291     if( sh4_x86.tstate != TSTATE_C ) {
  1292 	LDC_t();
  1294     RCRL_imm_r32( 1, REG_EAX );
  1295     store_reg( REG_EAX, Rn );
  1296     SETC_t();
  1297     sh4_x86.tstate = TSTATE_C;
  1298 :}
  1299 ROTL Rn {:  
  1300     COUNT_INST(I_ROTL);
  1301     load_reg( REG_EAX, Rn );
  1302     ROLL_imm_r32( 1, REG_EAX );
  1303     store_reg( REG_EAX, Rn );
  1304     SETC_t();
  1305     sh4_x86.tstate = TSTATE_C;
  1306 :}
  1307 ROTR Rn {:  
  1308     COUNT_INST(I_ROTR);
  1309     load_reg( REG_EAX, Rn );
  1310     RORL_imm_r32( 1, REG_EAX );
  1311     store_reg( REG_EAX, Rn );
  1312     SETC_t();
  1313     sh4_x86.tstate = TSTATE_C;
  1314 :}
  1315 SHAD Rm, Rn {:
  1316     COUNT_INST(I_SHAD);
  1317     /* Annoyingly enough, not directly convertible */
  1318     load_reg( REG_EAX, Rn );
  1319     load_reg( REG_ECX, Rm );
  1320     CMPL_imms_r32( 0, REG_ECX );
  1321     JGE_label(doshl);
  1323     NEGL_r32( REG_ECX );      // 2
  1324     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1325     JE_label(emptysar);     // 2
  1326     SARL_cl_r32( REG_EAX );       // 2
  1327     JMP_label(end);          // 2
  1329     JMP_TARGET(emptysar);
  1330     SARL_imm_r32(31, REG_EAX );  // 3
  1331     JMP_label(end2);
  1333     JMP_TARGET(doshl);
  1334     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1335     SHLL_cl_r32( REG_EAX );       // 2
  1336     JMP_TARGET(end);
  1337     JMP_TARGET(end2);
  1338     store_reg( REG_EAX, Rn );
  1339     sh4_x86.tstate = TSTATE_NONE;
  1340 :}
  1341 SHLD Rm, Rn {:  
  1342     COUNT_INST(I_SHLD);
  1343     load_reg( REG_EAX, Rn );
  1344     load_reg( REG_ECX, Rm );
  1345     CMPL_imms_r32( 0, REG_ECX );
  1346     JGE_label(doshl);
  1348     NEGL_r32( REG_ECX );      // 2
  1349     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1350     JE_label(emptyshr );
  1351     SHRL_cl_r32( REG_EAX );       // 2
  1352     JMP_label(end);          // 2
  1354     JMP_TARGET(emptyshr);
  1355     XORL_r32_r32( REG_EAX, REG_EAX );
  1356     JMP_label(end2);
  1358     JMP_TARGET(doshl);
  1359     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1360     SHLL_cl_r32( REG_EAX );       // 2
  1361     JMP_TARGET(end);
  1362     JMP_TARGET(end2);
  1363     store_reg( REG_EAX, Rn );
  1364     sh4_x86.tstate = TSTATE_NONE;
  1365 :}
  1366 SHAL Rn {: 
  1367     COUNT_INST(I_SHAL);
  1368     load_reg( REG_EAX, Rn );
  1369     SHLL_imm_r32( 1, REG_EAX );
  1370     SETC_t();
  1371     store_reg( REG_EAX, Rn );
  1372     sh4_x86.tstate = TSTATE_C;
  1373 :}
  1374 SHAR Rn {:  
  1375     COUNT_INST(I_SHAR);
  1376     load_reg( REG_EAX, Rn );
  1377     SARL_imm_r32( 1, REG_EAX );
  1378     SETC_t();
  1379     store_reg( REG_EAX, Rn );
  1380     sh4_x86.tstate = TSTATE_C;
  1381 :}
  1382 SHLL Rn {:  
  1383     COUNT_INST(I_SHLL);
  1384     load_reg( REG_EAX, Rn );
  1385     SHLL_imm_r32( 1, REG_EAX );
  1386     SETC_t();
  1387     store_reg( REG_EAX, Rn );
  1388     sh4_x86.tstate = TSTATE_C;
  1389 :}
  1390 SHLL2 Rn {:
  1391     COUNT_INST(I_SHLL);
  1392     load_reg( REG_EAX, Rn );
  1393     SHLL_imm_r32( 2, REG_EAX );
  1394     store_reg( REG_EAX, Rn );
  1395     sh4_x86.tstate = TSTATE_NONE;
  1396 :}
  1397 SHLL8 Rn {:  
  1398     COUNT_INST(I_SHLL);
  1399     load_reg( REG_EAX, Rn );
  1400     SHLL_imm_r32( 8, REG_EAX );
  1401     store_reg( REG_EAX, Rn );
  1402     sh4_x86.tstate = TSTATE_NONE;
  1403 :}
  1404 SHLL16 Rn {:  
  1405     COUNT_INST(I_SHLL);
  1406     load_reg( REG_EAX, Rn );
  1407     SHLL_imm_r32( 16, REG_EAX );
  1408     store_reg( REG_EAX, Rn );
  1409     sh4_x86.tstate = TSTATE_NONE;
  1410 :}
  1411 SHLR Rn {:  
  1412     COUNT_INST(I_SHLR);
  1413     load_reg( REG_EAX, Rn );
  1414     SHRL_imm_r32( 1, REG_EAX );
  1415     SETC_t();
  1416     store_reg( REG_EAX, Rn );
  1417     sh4_x86.tstate = TSTATE_C;
  1418 :}
  1419 SHLR2 Rn {:  
  1420     COUNT_INST(I_SHLR);
  1421     load_reg( REG_EAX, Rn );
  1422     SHRL_imm_r32( 2, REG_EAX );
  1423     store_reg( REG_EAX, Rn );
  1424     sh4_x86.tstate = TSTATE_NONE;
  1425 :}
  1426 SHLR8 Rn {:  
  1427     COUNT_INST(I_SHLR);
  1428     load_reg( REG_EAX, Rn );
  1429     SHRL_imm_r32( 8, REG_EAX );
  1430     store_reg( REG_EAX, Rn );
  1431     sh4_x86.tstate = TSTATE_NONE;
  1432 :}
  1433 SHLR16 Rn {:  
  1434     COUNT_INST(I_SHLR);
  1435     load_reg( REG_EAX, Rn );
  1436     SHRL_imm_r32( 16, REG_EAX );
  1437     store_reg( REG_EAX, Rn );
  1438     sh4_x86.tstate = TSTATE_NONE;
  1439 :}
  1440 SUB Rm, Rn {:  
  1441     COUNT_INST(I_SUB);
  1442     load_reg( REG_EAX, Rm );
  1443     load_reg( REG_ECX, Rn );
  1444     SUBL_r32_r32( REG_EAX, REG_ECX );
  1445     store_reg( REG_ECX, Rn );
  1446     sh4_x86.tstate = TSTATE_NONE;
  1447 :}
  1448 SUBC Rm, Rn {:  
  1449     COUNT_INST(I_SUBC);
  1450     load_reg( REG_EAX, Rm );
  1451     load_reg( REG_ECX, Rn );
  1452     if( sh4_x86.tstate != TSTATE_C ) {
  1453 	LDC_t();
  1455     SBBL_r32_r32( REG_EAX, REG_ECX );
  1456     store_reg( REG_ECX, Rn );
  1457     SETC_t();
  1458     sh4_x86.tstate = TSTATE_C;
  1459 :}
  1460 SUBV Rm, Rn {:  
  1461     COUNT_INST(I_SUBV);
  1462     load_reg( REG_EAX, Rm );
  1463     load_reg( REG_ECX, Rn );
  1464     SUBL_r32_r32( REG_EAX, REG_ECX );
  1465     store_reg( REG_ECX, Rn );
  1466     SETO_t();
  1467     sh4_x86.tstate = TSTATE_O;
  1468 :}
  1469 SWAP.B Rm, Rn {:  
  1470     COUNT_INST(I_SWAPB);
  1471     load_reg( REG_EAX, Rm );
  1472     XCHGB_r8_r8( REG_AL, REG_AH ); // NB: does not touch EFLAGS
  1473     store_reg( REG_EAX, Rn );
  1474 :}
  1475 SWAP.W Rm, Rn {:  
  1476     COUNT_INST(I_SWAPB);
  1477     load_reg( REG_EAX, Rm );
  1478     MOVL_r32_r32( REG_EAX, REG_ECX );
  1479     SHLL_imm_r32( 16, REG_ECX );
  1480     SHRL_imm_r32( 16, REG_EAX );
  1481     ORL_r32_r32( REG_EAX, REG_ECX );
  1482     store_reg( REG_ECX, Rn );
  1483     sh4_x86.tstate = TSTATE_NONE;
  1484 :}
  1485 TAS.B @Rn {:  
  1486     COUNT_INST(I_TASB);
  1487     load_reg( REG_EAX, Rn );
  1488     MOVL_r32_rspdisp( REG_EAX, 0 );
  1489     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
  1490     TESTB_r8_r8( REG_DL, REG_DL );
  1491     SETE_t();
  1492     ORB_imms_r8( 0x80, REG_DL );
  1493     MOVL_rspdisp_r32( 0, REG_EAX );
  1494     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1495     sh4_x86.tstate = TSTATE_NONE;
  1496 :}
  1497 TST Rm, Rn {:  
  1498     COUNT_INST(I_TST);
  1499     load_reg( REG_EAX, Rm );
  1500     load_reg( REG_ECX, Rn );
  1501     TESTL_r32_r32( REG_EAX, REG_ECX );
  1502     SETE_t();
  1503     sh4_x86.tstate = TSTATE_E;
  1504 :}
  1505 TST #imm, R0 {:  
  1506     COUNT_INST(I_TSTI);
  1507     load_reg( REG_EAX, 0 );
  1508     TESTL_imms_r32( imm, REG_EAX );
  1509     SETE_t();
  1510     sh4_x86.tstate = TSTATE_E;
  1511 :}
  1512 TST.B #imm, @(R0, GBR) {:  
  1513     COUNT_INST(I_TSTB);
  1514     load_reg( REG_EAX, 0);
  1515     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
  1516     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1517     TESTB_imms_r8( imm, REG_AL );
  1518     SETE_t();
  1519     sh4_x86.tstate = TSTATE_E;
  1520 :}
  1521 XOR Rm, Rn {:  
  1522     COUNT_INST(I_XOR);
  1523     load_reg( REG_EAX, Rm );
  1524     load_reg( REG_ECX, Rn );
  1525     XORL_r32_r32( REG_EAX, REG_ECX );
  1526     store_reg( REG_ECX, Rn );
  1527     sh4_x86.tstate = TSTATE_NONE;
  1528 :}
  1529 XOR #imm, R0 {:  
  1530     COUNT_INST(I_XORI);
  1531     load_reg( REG_EAX, 0 );
  1532     XORL_imms_r32( imm, REG_EAX );
  1533     store_reg( REG_EAX, 0 );
  1534     sh4_x86.tstate = TSTATE_NONE;
  1535 :}
  1536 XOR.B #imm, @(R0, GBR) {:  
  1537     COUNT_INST(I_XORB);
  1538     load_reg( REG_EAX, 0 );
  1539     ADDL_rbpdisp_r32( R_GBR, REG_EAX ); 
  1540     MOVL_r32_rspdisp( REG_EAX, 0 );
  1541     MEM_READ_BYTE_FOR_WRITE(REG_EAX, REG_EDX);
  1542     MOVL_rspdisp_r32( 0, REG_EAX );
  1543     XORL_imms_r32( imm, REG_EDX );
  1544     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1545     sh4_x86.tstate = TSTATE_NONE;
  1546 :}
  1547 XTRCT Rm, Rn {:
  1548     COUNT_INST(I_XTRCT);
  1549     load_reg( REG_EAX, Rm );
  1550     load_reg( REG_ECX, Rn );
  1551     SHLL_imm_r32( 16, REG_EAX );
  1552     SHRL_imm_r32( 16, REG_ECX );
  1553     ORL_r32_r32( REG_EAX, REG_ECX );
  1554     store_reg( REG_ECX, Rn );
  1555     sh4_x86.tstate = TSTATE_NONE;
  1556 :}
  1558 /* Data move instructions */
  1559 MOV Rm, Rn {:  
  1560     COUNT_INST(I_MOV);
  1561     load_reg( REG_EAX, Rm );
  1562     store_reg( REG_EAX, Rn );
  1563 :}
  1564 MOV #imm, Rn {:  
  1565     COUNT_INST(I_MOVI);
  1566     MOVL_imm32_r32( imm, REG_EAX );
  1567     store_reg( REG_EAX, Rn );
  1568 :}
  1569 MOV.B Rm, @Rn {:  
  1570     COUNT_INST(I_MOVB);
  1571     load_reg( REG_EAX, Rn );
  1572     load_reg( REG_EDX, Rm );
  1573     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1574     sh4_x86.tstate = TSTATE_NONE;
  1575 :}
  1576 MOV.B Rm, @-Rn {:  
  1577     COUNT_INST(I_MOVB);
  1578     load_reg( REG_EAX, Rn );
  1579     LEAL_r32disp_r32( REG_EAX, -1, REG_EAX );
  1580     load_reg( REG_EDX, Rm );
  1581     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1582     ADDL_imms_rbpdisp( -1, REG_OFFSET(r[Rn]) );
  1583     sh4_x86.tstate = TSTATE_NONE;
  1584 :}
  1585 MOV.B Rm, @(R0, Rn) {:  
  1586     COUNT_INST(I_MOVB);
  1587     load_reg( REG_EAX, 0 );
  1588     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1589     load_reg( REG_EDX, Rm );
  1590     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1591     sh4_x86.tstate = TSTATE_NONE;
  1592 :}
  1593 MOV.B R0, @(disp, GBR) {:  
  1594     COUNT_INST(I_MOVB);
  1595     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1596     ADDL_imms_r32( disp, REG_EAX );
  1597     load_reg( REG_EDX, 0 );
  1598     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1599     sh4_x86.tstate = TSTATE_NONE;
  1600 :}
  1601 MOV.B R0, @(disp, Rn) {:  
  1602     COUNT_INST(I_MOVB);
  1603     load_reg( REG_EAX, Rn );
  1604     ADDL_imms_r32( disp, REG_EAX );
  1605     load_reg( REG_EDX, 0 );
  1606     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1607     sh4_x86.tstate = TSTATE_NONE;
  1608 :}
  1609 MOV.B @Rm, Rn {:  
  1610     COUNT_INST(I_MOVB);
  1611     load_reg( REG_EAX, Rm );
  1612     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1613     store_reg( REG_EAX, Rn );
  1614     sh4_x86.tstate = TSTATE_NONE;
  1615 :}
  1616 MOV.B @Rm+, Rn {:  
  1617     COUNT_INST(I_MOVB);
  1618     load_reg( REG_EAX, Rm );
  1619     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1620     if( Rm != Rn ) {
  1621     	ADDL_imms_rbpdisp( 1, REG_OFFSET(r[Rm]) );
  1623     store_reg( REG_EAX, Rn );
  1624     sh4_x86.tstate = TSTATE_NONE;
  1625 :}
  1626 MOV.B @(R0, Rm), Rn {:  
  1627     COUNT_INST(I_MOVB);
  1628     load_reg( REG_EAX, 0 );
  1629     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1630     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1631     store_reg( REG_EAX, Rn );
  1632     sh4_x86.tstate = TSTATE_NONE;
  1633 :}
  1634 MOV.B @(disp, GBR), R0 {:  
  1635     COUNT_INST(I_MOVB);
  1636     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1637     ADDL_imms_r32( disp, REG_EAX );
  1638     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1639     store_reg( REG_EAX, 0 );
  1640     sh4_x86.tstate = TSTATE_NONE;
  1641 :}
  1642 MOV.B @(disp, Rm), R0 {:  
  1643     COUNT_INST(I_MOVB);
  1644     load_reg( REG_EAX, Rm );
  1645     ADDL_imms_r32( disp, REG_EAX );
  1646     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1647     store_reg( REG_EAX, 0 );
  1648     sh4_x86.tstate = TSTATE_NONE;
  1649 :}
  1650 MOV.L Rm, @Rn {:
  1651     COUNT_INST(I_MOVL);
  1652     load_reg( REG_EAX, Rn );
  1653     check_walign32(REG_EAX);
  1654     MOVL_r32_r32( REG_EAX, REG_ECX );
  1655     ANDL_imms_r32( 0xFC000000, REG_ECX );
  1656     CMPL_imms_r32( 0xE0000000, REG_ECX );
  1657     JNE_label( notsq );
  1658     ANDL_imms_r32( 0x3C, REG_EAX );
  1659     load_reg( REG_EDX, Rm );
  1660     MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
  1661     JMP_label(end);
  1662     JMP_TARGET(notsq);
  1663     load_reg( REG_EDX, Rm );
  1664     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1665     JMP_TARGET(end);
  1666     sh4_x86.tstate = TSTATE_NONE;
  1667 :}
  1668 MOV.L Rm, @-Rn {:  
  1669     COUNT_INST(I_MOVL);
  1670     load_reg( REG_EAX, Rn );
  1671     ADDL_imms_r32( -4, REG_EAX );
  1672     check_walign32( REG_EAX );
  1673     load_reg( REG_EDX, Rm );
  1674     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1675     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  1676     sh4_x86.tstate = TSTATE_NONE;
  1677 :}
  1678 MOV.L Rm, @(R0, Rn) {:  
  1679     COUNT_INST(I_MOVL);
  1680     load_reg( REG_EAX, 0 );
  1681     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1682     check_walign32( REG_EAX );
  1683     load_reg( REG_EDX, Rm );
  1684     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1685     sh4_x86.tstate = TSTATE_NONE;
  1686 :}
  1687 MOV.L R0, @(disp, GBR) {:  
  1688     COUNT_INST(I_MOVL);
  1689     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1690     ADDL_imms_r32( disp, REG_EAX );
  1691     check_walign32( REG_EAX );
  1692     load_reg( REG_EDX, 0 );
  1693     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1694     sh4_x86.tstate = TSTATE_NONE;
  1695 :}
  1696 MOV.L Rm, @(disp, Rn) {:  
  1697     COUNT_INST(I_MOVL);
  1698     load_reg( REG_EAX, Rn );
  1699     ADDL_imms_r32( disp, REG_EAX );
  1700     check_walign32( REG_EAX );
  1701     MOVL_r32_r32( REG_EAX, REG_ECX );
  1702     ANDL_imms_r32( 0xFC000000, REG_ECX );
  1703     CMPL_imms_r32( 0xE0000000, REG_ECX );
  1704     JNE_label( notsq );
  1705     ANDL_imms_r32( 0x3C, REG_EAX );
  1706     load_reg( REG_EDX, Rm );
  1707     MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
  1708     JMP_label(end);
  1709     JMP_TARGET(notsq);
  1710     load_reg( REG_EDX, Rm );
  1711     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1712     JMP_TARGET(end);
  1713     sh4_x86.tstate = TSTATE_NONE;
  1714 :}
  1715 MOV.L @Rm, Rn {:  
  1716     COUNT_INST(I_MOVL);
  1717     load_reg( REG_EAX, Rm );
  1718     check_ralign32( REG_EAX );
  1719     MEM_READ_LONG( REG_EAX, REG_EAX );
  1720     store_reg( REG_EAX, Rn );
  1721     sh4_x86.tstate = TSTATE_NONE;
  1722 :}
  1723 MOV.L @Rm+, Rn {:  
  1724     COUNT_INST(I_MOVL);
  1725     load_reg( REG_EAX, Rm );
  1726     check_ralign32( REG_EAX );
  1727     MEM_READ_LONG( REG_EAX, REG_EAX );
  1728     if( Rm != Rn ) {
  1729     	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  1731     store_reg( REG_EAX, Rn );
  1732     sh4_x86.tstate = TSTATE_NONE;
  1733 :}
  1734 MOV.L @(R0, Rm), Rn {:  
  1735     COUNT_INST(I_MOVL);
  1736     load_reg( REG_EAX, 0 );
  1737     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1738     check_ralign32( REG_EAX );
  1739     MEM_READ_LONG( REG_EAX, REG_EAX );
  1740     store_reg( REG_EAX, Rn );
  1741     sh4_x86.tstate = TSTATE_NONE;
  1742 :}
  1743 MOV.L @(disp, GBR), R0 {:
  1744     COUNT_INST(I_MOVL);
  1745     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1746     ADDL_imms_r32( disp, REG_EAX );
  1747     check_ralign32( REG_EAX );
  1748     MEM_READ_LONG( REG_EAX, REG_EAX );
  1749     store_reg( REG_EAX, 0 );
  1750     sh4_x86.tstate = TSTATE_NONE;
  1751 :}
  1752 MOV.L @(disp, PC), Rn {:  
  1753     COUNT_INST(I_MOVLPC);
  1754     if( sh4_x86.in_delay_slot ) {
  1755 	SLOTILLEGAL();
  1756     } else {
  1757 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1758 	if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
  1759 	    // If the target address is in the same page as the code, it's
  1760 	    // pretty safe to just ref it directly and circumvent the whole
  1761 	    // memory subsystem. (this is a big performance win)
  1763 	    // FIXME: There's a corner-case that's not handled here when
  1764 	    // the current code-page is in the ITLB but not in the UTLB.
  1765 	    // (should generate a TLB miss although need to test SH4 
  1766 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1767 	    // behaviour though.
  1768 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1769 	    MOVL_moffptr_eax( ptr );
  1770 	} else {
  1771 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1772 	    // different virtual address than the translation was done with,
  1773 	    // but we can safely assume that the low bits are the same.
  1774 	    MOVL_imm32_r32( (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_EAX );
  1775 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1776 	    MEM_READ_LONG( REG_EAX, REG_EAX );
  1777 	    sh4_x86.tstate = TSTATE_NONE;
  1779 	store_reg( REG_EAX, Rn );
  1781 :}
  1782 MOV.L @(disp, Rm), Rn {:  
  1783     COUNT_INST(I_MOVL);
  1784     load_reg( REG_EAX, Rm );
  1785     ADDL_imms_r32( disp, REG_EAX );
  1786     check_ralign32( REG_EAX );
  1787     MEM_READ_LONG( REG_EAX, REG_EAX );
  1788     store_reg( REG_EAX, Rn );
  1789     sh4_x86.tstate = TSTATE_NONE;
  1790 :}
  1791 MOV.W Rm, @Rn {:  
  1792     COUNT_INST(I_MOVW);
  1793     load_reg( REG_EAX, Rn );
  1794     check_walign16( REG_EAX );
  1795     load_reg( REG_EDX, Rm );
  1796     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1797     sh4_x86.tstate = TSTATE_NONE;
  1798 :}
  1799 MOV.W Rm, @-Rn {:  
  1800     COUNT_INST(I_MOVW);
  1801     load_reg( REG_EAX, Rn );
  1802     check_walign16( REG_EAX );
  1803     LEAL_r32disp_r32( REG_EAX, -2, REG_EAX );
  1804     load_reg( REG_EDX, Rm );
  1805     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1806     ADDL_imms_rbpdisp( -2, REG_OFFSET(r[Rn]) );
  1807     sh4_x86.tstate = TSTATE_NONE;
  1808 :}
  1809 MOV.W Rm, @(R0, Rn) {:  
  1810     COUNT_INST(I_MOVW);
  1811     load_reg( REG_EAX, 0 );
  1812     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1813     check_walign16( REG_EAX );
  1814     load_reg( REG_EDX, Rm );
  1815     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1816     sh4_x86.tstate = TSTATE_NONE;
  1817 :}
  1818 MOV.W R0, @(disp, GBR) {:  
  1819     COUNT_INST(I_MOVW);
  1820     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1821     ADDL_imms_r32( disp, REG_EAX );
  1822     check_walign16( REG_EAX );
  1823     load_reg( REG_EDX, 0 );
  1824     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1825     sh4_x86.tstate = TSTATE_NONE;
  1826 :}
  1827 MOV.W R0, @(disp, Rn) {:  
  1828     COUNT_INST(I_MOVW);
  1829     load_reg( REG_EAX, Rn );
  1830     ADDL_imms_r32( disp, REG_EAX );
  1831     check_walign16( REG_EAX );
  1832     load_reg( REG_EDX, 0 );
  1833     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1834     sh4_x86.tstate = TSTATE_NONE;
  1835 :}
  1836 MOV.W @Rm, Rn {:  
  1837     COUNT_INST(I_MOVW);
  1838     load_reg( REG_EAX, Rm );
  1839     check_ralign16( REG_EAX );
  1840     MEM_READ_WORD( REG_EAX, REG_EAX );
  1841     store_reg( REG_EAX, Rn );
  1842     sh4_x86.tstate = TSTATE_NONE;
  1843 :}
  1844 MOV.W @Rm+, Rn {:  
  1845     COUNT_INST(I_MOVW);
  1846     load_reg( REG_EAX, Rm );
  1847     check_ralign16( REG_EAX );
  1848     MEM_READ_WORD( REG_EAX, REG_EAX );
  1849     if( Rm != Rn ) {
  1850         ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
  1852     store_reg( REG_EAX, Rn );
  1853     sh4_x86.tstate = TSTATE_NONE;
  1854 :}
  1855 MOV.W @(R0, Rm), Rn {:  
  1856     COUNT_INST(I_MOVW);
  1857     load_reg( REG_EAX, 0 );
  1858     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1859     check_ralign16( REG_EAX );
  1860     MEM_READ_WORD( REG_EAX, REG_EAX );
  1861     store_reg( REG_EAX, Rn );
  1862     sh4_x86.tstate = TSTATE_NONE;
  1863 :}
  1864 MOV.W @(disp, GBR), R0 {:  
  1865     COUNT_INST(I_MOVW);
  1866     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1867     ADDL_imms_r32( disp, REG_EAX );
  1868     check_ralign16( REG_EAX );
  1869     MEM_READ_WORD( REG_EAX, REG_EAX );
  1870     store_reg( REG_EAX, 0 );
  1871     sh4_x86.tstate = TSTATE_NONE;
  1872 :}
  1873 MOV.W @(disp, PC), Rn {:  
  1874     COUNT_INST(I_MOVW);
  1875     if( sh4_x86.in_delay_slot ) {
  1876 	SLOTILLEGAL();
  1877     } else {
  1878 	// See comments for MOV.L @(disp, PC), Rn
  1879 	uint32_t target = pc + disp + 4;
  1880 	if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
  1881 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1882 	    MOVL_moffptr_eax( ptr );
  1883 	    MOVSXL_r16_r32( REG_EAX, REG_EAX );
  1884 	} else {
  1885 	    MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4, REG_EAX );
  1886 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1887 	    MEM_READ_WORD( REG_EAX, REG_EAX );
  1888 	    sh4_x86.tstate = TSTATE_NONE;
  1890 	store_reg( REG_EAX, Rn );
  1892 :}
  1893 MOV.W @(disp, Rm), R0 {:  
  1894     COUNT_INST(I_MOVW);
  1895     load_reg( REG_EAX, Rm );
  1896     ADDL_imms_r32( disp, REG_EAX );
  1897     check_ralign16( REG_EAX );
  1898     MEM_READ_WORD( REG_EAX, REG_EAX );
  1899     store_reg( REG_EAX, 0 );
  1900     sh4_x86.tstate = TSTATE_NONE;
  1901 :}
  1902 MOVA @(disp, PC), R0 {:  
  1903     COUNT_INST(I_MOVA);
  1904     if( sh4_x86.in_delay_slot ) {
  1905 	SLOTILLEGAL();
  1906     } else {
  1907 	MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_ECX );
  1908 	ADDL_rbpdisp_r32( R_PC, REG_ECX );
  1909 	store_reg( REG_ECX, 0 );
  1910 	sh4_x86.tstate = TSTATE_NONE;
  1912 :}
  1913 MOVCA.L R0, @Rn {:  
  1914     COUNT_INST(I_MOVCA);
  1915     load_reg( REG_EAX, Rn );
  1916     check_walign32( REG_EAX );
  1917     load_reg( REG_EDX, 0 );
  1918     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1919     sh4_x86.tstate = TSTATE_NONE;
  1920 :}
  1922 /* Control transfer instructions */
  1923 BF disp {:
  1924     COUNT_INST(I_BF);
  1925     if( sh4_x86.in_delay_slot ) {
  1926 	SLOTILLEGAL();
  1927     } else {
  1928 	sh4vma_t target = disp + pc + 4;
  1929 	JT_label( nottaken );
  1930 	exit_block_rel(target, pc+2 );
  1931 	JMP_TARGET(nottaken);
  1932 	return 2;
  1934 :}
  1935 BF/S disp {:
  1936     COUNT_INST(I_BFS);
  1937     if( sh4_x86.in_delay_slot ) {
  1938 	SLOTILLEGAL();
  1939     } else {
  1940 	sh4_x86.in_delay_slot = DELAY_PC;
  1941 	if( UNTRANSLATABLE(pc+2) ) {
  1942 	    MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1943 	    JT_label(nottaken);
  1944 	    ADDL_imms_r32( disp, REG_EAX );
  1945 	    JMP_TARGET(nottaken);
  1946 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1947 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1948 	    exit_block_emu(pc+2);
  1949 	    sh4_x86.branch_taken = TRUE;
  1950 	    return 2;
  1951 	} else {
  1952 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1953 		CMPL_imms_rbpdisp( 1, R_T );
  1954 		sh4_x86.tstate = TSTATE_E;
  1956 	    sh4vma_t target = disp + pc + 4;
  1957 	    JCC_cc_rel32(sh4_x86.tstate,0);
  1958 	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
  1959 	    int save_tstate = sh4_x86.tstate;
  1960 	    sh4_translate_instruction(pc+2);
  1961             sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
  1962 	    exit_block_rel( target, pc+4 );
  1964 	    // not taken
  1965 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1966 	    sh4_x86.tstate = save_tstate;
  1967 	    sh4_translate_instruction(pc+2);
  1968 	    return 4;
  1971 :}
  1972 BRA disp {:  
  1973     COUNT_INST(I_BRA);
  1974     if( sh4_x86.in_delay_slot ) {
  1975 	SLOTILLEGAL();
  1976     } else {
  1977 	sh4_x86.in_delay_slot = DELAY_PC;
  1978 	sh4_x86.branch_taken = TRUE;
  1979 	if( UNTRANSLATABLE(pc+2) ) {
  1980 	    MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1981 	    ADDL_imms_r32( pc + disp + 4 - sh4_x86.block_start_pc, REG_EAX );
  1982 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1983 	    exit_block_emu(pc+2);
  1984 	    return 2;
  1985 	} else {
  1986 	    sh4_translate_instruction( pc + 2 );
  1987 	    exit_block_rel( disp + pc + 4, pc+4 );
  1988 	    return 4;
  1991 :}
  1992 BRAF Rn {:  
  1993     COUNT_INST(I_BRAF);
  1994     if( sh4_x86.in_delay_slot ) {
  1995 	SLOTILLEGAL();
  1996     } else {
  1997 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1998 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1999 	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  2000 	MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  2001 	sh4_x86.in_delay_slot = DELAY_PC;
  2002 	sh4_x86.tstate = TSTATE_NONE;
  2003 	sh4_x86.branch_taken = TRUE;
  2004 	if( UNTRANSLATABLE(pc+2) ) {
  2005 	    exit_block_emu(pc+2);
  2006 	    return 2;
  2007 	} else {
  2008 	    sh4_translate_instruction( pc + 2 );
  2009 	    exit_block_newpcset(pc+4);
  2010 	    return 4;
  2013 :}
  2014 BSR disp {:  
  2015     COUNT_INST(I_BSR);
  2016     if( sh4_x86.in_delay_slot ) {
  2017 	SLOTILLEGAL();
  2018     } else {
  2019 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  2020 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2021 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2022 	sh4_x86.in_delay_slot = DELAY_PC;
  2023 	sh4_x86.branch_taken = TRUE;
  2024 	sh4_x86.tstate = TSTATE_NONE;
  2025 	if( UNTRANSLATABLE(pc+2) ) {
  2026 	    ADDL_imms_r32( disp, REG_EAX );
  2027 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  2028 	    exit_block_emu(pc+2);
  2029 	    return 2;
  2030 	} else {
  2031 	    sh4_translate_instruction( pc + 2 );
  2032 	    exit_block_rel( disp + pc + 4, pc+4 );
  2033 	    return 4;
  2036 :}
  2037 BSRF Rn {:  
  2038     COUNT_INST(I_BSRF);
  2039     if( sh4_x86.in_delay_slot ) {
  2040 	SLOTILLEGAL();
  2041     } else {
  2042 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  2043 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2044 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2045 	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  2046 	MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  2048 	sh4_x86.in_delay_slot = DELAY_PC;
  2049 	sh4_x86.tstate = TSTATE_NONE;
  2050 	sh4_x86.branch_taken = TRUE;
  2051 	if( UNTRANSLATABLE(pc+2) ) {
  2052 	    exit_block_emu(pc+2);
  2053 	    return 2;
  2054 	} else {
  2055 	    sh4_translate_instruction( pc + 2 );
  2056 	    exit_block_newpcset(pc+4);
  2057 	    return 4;
  2060 :}
  2061 BT disp {:
  2062     COUNT_INST(I_BT);
  2063     if( sh4_x86.in_delay_slot ) {
  2064 	SLOTILLEGAL();
  2065     } else {
  2066 	sh4vma_t target = disp + pc + 4;
  2067 	JF_label( nottaken );
  2068 	exit_block_rel(target, pc+2 );
  2069 	JMP_TARGET(nottaken);
  2070 	return 2;
  2072 :}
  2073 BT/S disp {:
  2074     COUNT_INST(I_BTS);
  2075     if( sh4_x86.in_delay_slot ) {
  2076 	SLOTILLEGAL();
  2077     } else {
  2078 	sh4_x86.in_delay_slot = DELAY_PC;
  2079 	if( UNTRANSLATABLE(pc+2) ) {
  2080 	    MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2081 	    JF_label(nottaken);
  2082 	    ADDL_imms_r32( disp, REG_EAX );
  2083 	    JMP_TARGET(nottaken);
  2084 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  2085 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  2086 	    exit_block_emu(pc+2);
  2087 	    sh4_x86.branch_taken = TRUE;
  2088 	    return 2;
  2089 	} else {
  2090 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  2091 		CMPL_imms_rbpdisp( 1, R_T );
  2092 		sh4_x86.tstate = TSTATE_E;
  2094 	    JCC_cc_rel32(sh4_x86.tstate^1,0);
  2095 	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
  2097 	    int save_tstate = sh4_x86.tstate;
  2098 	    sh4_translate_instruction(pc+2);
  2099             sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
  2100 	    exit_block_rel( disp + pc + 4, pc+4 );
  2101 	    // not taken
  2102 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  2103 	    sh4_x86.tstate = save_tstate;
  2104 	    sh4_translate_instruction(pc+2);
  2105 	    return 4;
  2108 :}
  2109 JMP @Rn {:  
  2110     COUNT_INST(I_JMP);
  2111     if( sh4_x86.in_delay_slot ) {
  2112 	SLOTILLEGAL();
  2113     } else {
  2114 	load_reg( REG_ECX, Rn );
  2115 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2116 	sh4_x86.in_delay_slot = DELAY_PC;
  2117 	sh4_x86.branch_taken = TRUE;
  2118 	if( UNTRANSLATABLE(pc+2) ) {
  2119 	    exit_block_emu(pc+2);
  2120 	    return 2;
  2121 	} else {
  2122 	    sh4_translate_instruction(pc+2);
  2123 	    exit_block_newpcset(pc+4);
  2124 	    return 4;
  2127 :}
  2128 JSR @Rn {:  
  2129     COUNT_INST(I_JSR);
  2130     if( sh4_x86.in_delay_slot ) {
  2131 	SLOTILLEGAL();
  2132     } else {
  2133 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  2134 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2135 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2136 	load_reg( REG_ECX, Rn );
  2137 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2138 	sh4_x86.in_delay_slot = DELAY_PC;
  2139 	sh4_x86.branch_taken = TRUE;
  2140 	sh4_x86.tstate = TSTATE_NONE;
  2141 	if( UNTRANSLATABLE(pc+2) ) {
  2142 	    exit_block_emu(pc+2);
  2143 	    return 2;
  2144 	} else {
  2145 	    sh4_translate_instruction(pc+2);
  2146 	    exit_block_newpcset(pc+4);
  2147 	    return 4;
  2150 :}
  2151 RTE {:  
  2152     COUNT_INST(I_RTE);
  2153     if( sh4_x86.in_delay_slot ) {
  2154 	SLOTILLEGAL();
  2155     } else {
  2156 	check_priv();
  2157 	MOVL_rbpdisp_r32( R_SPC, REG_ECX );
  2158 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2159 	MOVL_rbpdisp_r32( R_SSR, REG_EAX );
  2160 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2161 	sh4_x86.in_delay_slot = DELAY_PC;
  2162 	sh4_x86.fpuen_checked = FALSE;
  2163 	sh4_x86.tstate = TSTATE_NONE;
  2164 	sh4_x86.branch_taken = TRUE;
  2165     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2166 	if( UNTRANSLATABLE(pc+2) ) {
  2167 	    exit_block_emu(pc+2);
  2168 	    return 2;
  2169 	} else {
  2170 	    sh4_translate_instruction(pc+2);
  2171 	    exit_block_newpcset(pc+4);
  2172 	    return 4;
  2175 :}
  2176 RTS {:  
  2177     COUNT_INST(I_RTS);
  2178     if( sh4_x86.in_delay_slot ) {
  2179 	SLOTILLEGAL();
  2180     } else {
  2181 	MOVL_rbpdisp_r32( R_PR, REG_ECX );
  2182 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2183 	sh4_x86.in_delay_slot = DELAY_PC;
  2184 	sh4_x86.branch_taken = TRUE;
  2185 	if( UNTRANSLATABLE(pc+2) ) {
  2186 	    exit_block_emu(pc+2);
  2187 	    return 2;
  2188 	} else {
  2189 	    sh4_translate_instruction(pc+2);
  2190 	    exit_block_newpcset(pc+4);
  2191 	    return 4;
  2194 :}
  2195 TRAPA #imm {:  
  2196     COUNT_INST(I_TRAPA);
  2197     if( sh4_x86.in_delay_slot ) {
  2198 	SLOTILLEGAL();
  2199     } else {
  2200 	MOVL_imm32_r32( pc+2 - sh4_x86.block_start_pc, REG_ECX );   // 5
  2201 	ADDL_r32_rbpdisp( REG_ECX, R_PC );
  2202 	MOVL_imm32_r32( imm, REG_EAX );
  2203 	CALL1_ptr_r32( sh4_raise_trap, REG_EAX );
  2204 	sh4_x86.tstate = TSTATE_NONE;
  2205 	exit_block_pcset(pc+2);
  2206 	sh4_x86.branch_taken = TRUE;
  2207 	return 2;
  2209 :}
  2210 UNDEF {:  
  2211     COUNT_INST(I_UNDEF);
  2212     if( sh4_x86.in_delay_slot ) {
  2213 	exit_block_exc(EXC_SLOT_ILLEGAL, pc-2, 4);    
  2214     } else {
  2215 	exit_block_exc(EXC_ILLEGAL, pc, 2);    
  2216 	return 2;
  2218 :}
  2220 CLRMAC {:  
  2221     COUNT_INST(I_CLRMAC);
  2222     XORL_r32_r32(REG_EAX, REG_EAX);
  2223     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2224     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2225     sh4_x86.tstate = TSTATE_NONE;
  2226 :}
  2227 CLRS {:
  2228     COUNT_INST(I_CLRS);
  2229     CLC();
  2230     SETCCB_cc_rbpdisp(X86_COND_C, R_S);
  2231     sh4_x86.tstate = TSTATE_NONE;
  2232 :}
  2233 CLRT {:  
  2234     COUNT_INST(I_CLRT);
  2235     CLC();
  2236     SETC_t();
  2237     sh4_x86.tstate = TSTATE_C;
  2238 :}
  2239 SETS {:  
  2240     COUNT_INST(I_SETS);
  2241     STC();
  2242     SETCCB_cc_rbpdisp(X86_COND_C, R_S);
  2243     sh4_x86.tstate = TSTATE_NONE;
  2244 :}
  2245 SETT {:  
  2246     COUNT_INST(I_SETT);
  2247     STC();
  2248     SETC_t();
  2249     sh4_x86.tstate = TSTATE_C;
  2250 :}
  2252 /* Floating point moves */
  2253 FMOV FRm, FRn {:  
  2254     COUNT_INST(I_FMOV1);
  2255     check_fpuen();
  2256     if( sh4_x86.double_size ) {
  2257         load_dr0( REG_EAX, FRm );
  2258         load_dr1( REG_ECX, FRm );
  2259         store_dr0( REG_EAX, FRn );
  2260         store_dr1( REG_ECX, FRn );
  2261     } else {
  2262         load_fr( REG_EAX, FRm ); // SZ=0 branch
  2263         store_fr( REG_EAX, FRn );
  2265 :}
  2266 FMOV FRm, @Rn {: 
  2267     COUNT_INST(I_FMOV2);
  2268     check_fpuen();
  2269     load_reg( REG_EAX, Rn );
  2270     if( sh4_x86.double_size ) {
  2271         check_walign64( REG_EAX );
  2272         load_dr0( REG_EDX, FRm );
  2273         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2274         load_reg( REG_EAX, Rn );
  2275         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2276         load_dr1( REG_EDX, FRm );
  2277         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2278     } else {
  2279         check_walign32( REG_EAX );
  2280         load_fr( REG_EDX, FRm );
  2281         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2283     sh4_x86.tstate = TSTATE_NONE;
  2284 :}
  2285 FMOV @Rm, FRn {:  
  2286     COUNT_INST(I_FMOV5);
  2287     check_fpuen();
  2288     load_reg( REG_EAX, Rm );
  2289     if( sh4_x86.double_size ) {
  2290         check_ralign64( REG_EAX );
  2291         MEM_READ_LONG( REG_EAX, REG_EAX );
  2292         store_dr0( REG_EAX, FRn );
  2293         load_reg( REG_EAX, Rm );
  2294         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2295         MEM_READ_LONG( REG_EAX, REG_EAX );
  2296         store_dr1( REG_EAX, FRn );
  2297     } else {
  2298         check_ralign32( REG_EAX );
  2299         MEM_READ_LONG( REG_EAX, REG_EAX );
  2300         store_fr( REG_EAX, FRn );
  2302     sh4_x86.tstate = TSTATE_NONE;
  2303 :}
  2304 FMOV FRm, @-Rn {:  
  2305     COUNT_INST(I_FMOV3);
  2306     check_fpuen();
  2307     load_reg( REG_EAX, Rn );
  2308     if( sh4_x86.double_size ) {
  2309         check_walign64( REG_EAX );
  2310         LEAL_r32disp_r32( REG_EAX, -8, REG_EAX );
  2311         load_dr0( REG_EDX, FRm );
  2312         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2313         load_reg( REG_EAX, Rn );
  2314         LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2315         load_dr1( REG_EDX, FRm );
  2316         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2317         ADDL_imms_rbpdisp(-8,REG_OFFSET(r[Rn]));
  2318     } else {
  2319         check_walign32( REG_EAX );
  2320         LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2321         load_fr( REG_EDX, FRm );
  2322         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2323         ADDL_imms_rbpdisp(-4,REG_OFFSET(r[Rn]));
  2325     sh4_x86.tstate = TSTATE_NONE;
  2326 :}
  2327 FMOV @Rm+, FRn {:
  2328     COUNT_INST(I_FMOV6);
  2329     check_fpuen();
  2330     load_reg( REG_EAX, Rm );
  2331     if( sh4_x86.double_size ) {
  2332         check_ralign64( REG_EAX );
  2333         MEM_READ_LONG( REG_EAX, REG_EAX );
  2334         store_dr0( REG_EAX, FRn );
  2335         load_reg( REG_EAX, Rm );
  2336         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2337         MEM_READ_LONG( REG_EAX, REG_EAX );
  2338         store_dr1( REG_EAX, FRn );
  2339         ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rm]) );
  2340     } else {
  2341         check_ralign32( REG_EAX );
  2342         MEM_READ_LONG( REG_EAX, REG_EAX );
  2343         store_fr( REG_EAX, FRn );
  2344         ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2346     sh4_x86.tstate = TSTATE_NONE;
  2347 :}
  2348 FMOV FRm, @(R0, Rn) {:  
  2349     COUNT_INST(I_FMOV4);
  2350     check_fpuen();
  2351     load_reg( REG_EAX, Rn );
  2352     ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2353     if( sh4_x86.double_size ) {
  2354         check_walign64( REG_EAX );
  2355         load_dr0( REG_EDX, FRm );
  2356         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2357         load_reg( REG_EAX, Rn );
  2358         ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2359         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2360         load_dr1( REG_EDX, FRm );
  2361         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2362     } else {
  2363         check_walign32( REG_EAX );
  2364         load_fr( REG_EDX, FRm );
  2365         MEM_WRITE_LONG( REG_EAX, REG_EDX ); // 12
  2367     sh4_x86.tstate = TSTATE_NONE;
  2368 :}
  2369 FMOV @(R0, Rm), FRn {:  
  2370     COUNT_INST(I_FMOV7);
  2371     check_fpuen();
  2372     load_reg( REG_EAX, Rm );
  2373     ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2374     if( sh4_x86.double_size ) {
  2375         check_ralign64( REG_EAX );
  2376         MEM_READ_LONG( REG_EAX, REG_EAX );
  2377         store_dr0( REG_EAX, FRn );
  2378         load_reg( REG_EAX, Rm );
  2379         ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2380         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2381         MEM_READ_LONG( REG_EAX, REG_EAX );
  2382         store_dr1( REG_EAX, FRn );
  2383     } else {
  2384         check_ralign32( REG_EAX );
  2385         MEM_READ_LONG( REG_EAX, REG_EAX );
  2386         store_fr( REG_EAX, FRn );
  2388     sh4_x86.tstate = TSTATE_NONE;
  2389 :}
  2390 FLDI0 FRn {:  /* IFF PR=0 */
  2391     COUNT_INST(I_FLDI0);
  2392     check_fpuen();
  2393     if( sh4_x86.double_prec == 0 ) {
  2394         XORL_r32_r32( REG_EAX, REG_EAX );
  2395         store_fr( REG_EAX, FRn );
  2397     sh4_x86.tstate = TSTATE_NONE;
  2398 :}
  2399 FLDI1 FRn {:  /* IFF PR=0 */
  2400     COUNT_INST(I_FLDI1);
  2401     check_fpuen();
  2402     if( sh4_x86.double_prec == 0 ) {
  2403         MOVL_imm32_r32( 0x3F800000, REG_EAX );
  2404         store_fr( REG_EAX, FRn );
  2406 :}
  2408 FLOAT FPUL, FRn {:  
  2409     COUNT_INST(I_FLOAT);
  2410     check_fpuen();
  2411     FILD_rbpdisp(R_FPUL);
  2412     if( sh4_x86.double_prec ) {
  2413         pop_dr( FRn );
  2414     } else {
  2415         pop_fr( FRn );
  2417 :}
  2418 FTRC FRm, FPUL {:  
  2419     COUNT_INST(I_FTRC);
  2420     check_fpuen();
  2421     if( sh4_x86.double_prec ) {
  2422         push_dr( FRm );
  2423     } else {
  2424         push_fr( FRm );
  2426     MOVP_immptr_rptr( &max_int, REG_ECX );
  2427     FILD_r32disp( REG_ECX, 0 );
  2428     FCOMIP_st(1);
  2429     JNA_label( sat );
  2430     MOVP_immptr_rptr( &min_int, REG_ECX );
  2431     FILD_r32disp( REG_ECX, 0 );
  2432     FCOMIP_st(1);              
  2433     JAE_label( sat2 );            
  2434     MOVP_immptr_rptr( &save_fcw, REG_EAX );
  2435     FNSTCW_r32disp( REG_EAX, 0 );
  2436     MOVP_immptr_rptr( &trunc_fcw, REG_EDX );
  2437     FLDCW_r32disp( REG_EDX, 0 );
  2438     FISTP_rbpdisp(R_FPUL);             
  2439     FLDCW_r32disp( REG_EAX, 0 );
  2440     JMP_label(end);             
  2442     JMP_TARGET(sat);
  2443     JMP_TARGET(sat2);
  2444     MOVL_r32disp_r32( REG_ECX, 0, REG_ECX ); // 2
  2445     MOVL_r32_rbpdisp( REG_ECX, R_FPUL );
  2446     FPOP_st();
  2447     JMP_TARGET(end);
  2448     sh4_x86.tstate = TSTATE_NONE;
  2449 :}
  2450 FLDS FRm, FPUL {:  
  2451     COUNT_INST(I_FLDS);
  2452     check_fpuen();
  2453     load_fr( REG_EAX, FRm );
  2454     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2455 :}
  2456 FSTS FPUL, FRn {:  
  2457     COUNT_INST(I_FSTS);
  2458     check_fpuen();
  2459     MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  2460     store_fr( REG_EAX, FRn );
  2461 :}
  2462 FCNVDS FRm, FPUL {:  
  2463     COUNT_INST(I_FCNVDS);
  2464     check_fpuen();
  2465     if( sh4_x86.double_prec ) {
  2466         push_dr( FRm );
  2467         pop_fpul();
  2469 :}
  2470 FCNVSD FPUL, FRn {:  
  2471     COUNT_INST(I_FCNVSD);
  2472     check_fpuen();
  2473     if( sh4_x86.double_prec ) {
  2474         push_fpul();
  2475         pop_dr( FRn );
  2477 :}
  2479 /* Floating point instructions */
  2480 FABS FRn {:  
  2481     COUNT_INST(I_FABS);
  2482     check_fpuen();
  2483     if( sh4_x86.double_prec ) {
  2484         push_dr(FRn);
  2485         FABS_st0();
  2486         pop_dr(FRn);
  2487     } else {
  2488         push_fr(FRn);
  2489         FABS_st0();
  2490         pop_fr(FRn);
  2492 :}
  2493 FADD FRm, FRn {:  
  2494     COUNT_INST(I_FADD);
  2495     check_fpuen();
  2496     if( sh4_x86.double_prec ) {
  2497         push_dr(FRm);
  2498         push_dr(FRn);
  2499         FADDP_st(1);
  2500         pop_dr(FRn);
  2501     } else {
  2502         push_fr(FRm);
  2503         push_fr(FRn);
  2504         FADDP_st(1);
  2505         pop_fr(FRn);
  2507 :}
  2508 FDIV FRm, FRn {:  
  2509     COUNT_INST(I_FDIV);
  2510     check_fpuen();
  2511     if( sh4_x86.double_prec ) {
  2512         push_dr(FRn);
  2513         push_dr(FRm);
  2514         FDIVP_st(1);
  2515         pop_dr(FRn);
  2516     } else {
  2517         push_fr(FRn);
  2518         push_fr(FRm);
  2519         FDIVP_st(1);
  2520         pop_fr(FRn);
  2522 :}
  2523 FMAC FR0, FRm, FRn {:  
  2524     COUNT_INST(I_FMAC);
  2525     check_fpuen();
  2526     if( sh4_x86.double_prec ) {
  2527         push_dr( 0 );
  2528         push_dr( FRm );
  2529         FMULP_st(1);
  2530         push_dr( FRn );
  2531         FADDP_st(1);
  2532         pop_dr( FRn );
  2533     } else {
  2534         push_fr( 0 );
  2535         push_fr( FRm );
  2536         FMULP_st(1);
  2537         push_fr( FRn );
  2538         FADDP_st(1);
  2539         pop_fr( FRn );
  2541 :}
  2543 FMUL FRm, FRn {:  
  2544     COUNT_INST(I_FMUL);
  2545     check_fpuen();
  2546     if( sh4_x86.double_prec ) {
  2547         push_dr(FRm);
  2548         push_dr(FRn);
  2549         FMULP_st(1);
  2550         pop_dr(FRn);
  2551     } else {
  2552         push_fr(FRm);
  2553         push_fr(FRn);
  2554         FMULP_st(1);
  2555         pop_fr(FRn);
  2557 :}
  2558 FNEG FRn {:  
  2559     COUNT_INST(I_FNEG);
  2560     check_fpuen();
  2561     if( sh4_x86.double_prec ) {
  2562         push_dr(FRn);
  2563         FCHS_st0();
  2564         pop_dr(FRn);
  2565     } else {
  2566         push_fr(FRn);
  2567         FCHS_st0();
  2568         pop_fr(FRn);
  2570 :}
  2571 FSRRA FRn {:  
  2572     COUNT_INST(I_FSRRA);
  2573     check_fpuen();
  2574     if( sh4_x86.double_prec == 0 ) {
  2575         FLD1_st0();
  2576         push_fr(FRn);
  2577         FSQRT_st0();
  2578         FDIVP_st(1);
  2579         pop_fr(FRn);
  2581 :}
  2582 FSQRT FRn {:  
  2583     COUNT_INST(I_FSQRT);
  2584     check_fpuen();
  2585     if( sh4_x86.double_prec ) {
  2586         push_dr(FRn);
  2587         FSQRT_st0();
  2588         pop_dr(FRn);
  2589     } else {
  2590         push_fr(FRn);
  2591         FSQRT_st0();
  2592         pop_fr(FRn);
  2594 :}
  2595 FSUB FRm, FRn {:  
  2596     COUNT_INST(I_FSUB);
  2597     check_fpuen();
  2598     if( sh4_x86.double_prec ) {
  2599         push_dr(FRn);
  2600         push_dr(FRm);
  2601         FSUBP_st(1);
  2602         pop_dr(FRn);
  2603     } else {
  2604         push_fr(FRn);
  2605         push_fr(FRm);
  2606         FSUBP_st(1);
  2607         pop_fr(FRn);
  2609 :}
  2611 FCMP/EQ FRm, FRn {:  
  2612     COUNT_INST(I_FCMPEQ);
  2613     check_fpuen();
  2614     if( sh4_x86.double_prec ) {
  2615         push_dr(FRm);
  2616         push_dr(FRn);
  2617     } else {
  2618         push_fr(FRm);
  2619         push_fr(FRn);
  2621     FCOMIP_st(1);
  2622     SETE_t();
  2623     FPOP_st();
  2624     sh4_x86.tstate = TSTATE_E;
  2625 :}
  2626 FCMP/GT FRm, FRn {:  
  2627     COUNT_INST(I_FCMPGT);
  2628     check_fpuen();
  2629     if( sh4_x86.double_prec ) {
  2630         push_dr(FRm);
  2631         push_dr(FRn);
  2632     } else {
  2633         push_fr(FRm);
  2634         push_fr(FRn);
  2636     FCOMIP_st(1);
  2637     SETA_t();
  2638     FPOP_st();
  2639     sh4_x86.tstate = TSTATE_A;
  2640 :}
  2642 FSCA FPUL, FRn {:  
  2643     COUNT_INST(I_FSCA);
  2644     check_fpuen();
  2645     if( sh4_x86.double_prec == 0 ) {
  2646         LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FRn&0x0E]), REG_EDX );
  2647         MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  2648         CALL2_ptr_r32_r32( sh4_fsca, REG_EAX, REG_EDX );
  2650     sh4_x86.tstate = TSTATE_NONE;
  2651 :}
  2652 FIPR FVm, FVn {:  
  2653     COUNT_INST(I_FIPR);
  2654     check_fpuen();
  2655     if( sh4_x86.double_prec == 0 ) {
  2656         if( sh4_x86.sse3_enabled ) {
  2657             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
  2658             MULPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
  2659             HADDPS_xmm_xmm( 4, 4 ); 
  2660             HADDPS_xmm_xmm( 4, 4 );
  2661             MOVSS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
  2662         } else {
  2663             push_fr( FVm<<2 );
  2664             push_fr( FVn<<2 );
  2665             FMULP_st(1);
  2666             push_fr( (FVm<<2)+1);
  2667             push_fr( (FVn<<2)+1);
  2668             FMULP_st(1);
  2669             FADDP_st(1);
  2670             push_fr( (FVm<<2)+2);
  2671             push_fr( (FVn<<2)+2);
  2672             FMULP_st(1);
  2673             FADDP_st(1);
  2674             push_fr( (FVm<<2)+3);
  2675             push_fr( (FVn<<2)+3);
  2676             FMULP_st(1);
  2677             FADDP_st(1);
  2678             pop_fr( (FVn<<2)+3);
  2681 :}
  2682 FTRV XMTRX, FVn {:  
  2683     COUNT_INST(I_FTRV);
  2684     check_fpuen();
  2685     if( sh4_x86.double_prec == 0 ) {
  2686         if( sh4_x86.sse3_enabled ) {
  2687             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1  M0  M3  M2
  2688             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5  M4  M7  M6
  2689             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9  M8  M11 M10
  2690             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
  2692             MOVSLDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
  2693             MOVSHDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
  2694             MOV_xmm_xmm( 4, 6 );
  2695             MOV_xmm_xmm( 5, 7 );
  2696             MOVLHPS_xmm_xmm( 4, 4 );  // V1 V1 V1 V1
  2697             MOVHLPS_xmm_xmm( 6, 6 );  // V3 V3 V3 V3
  2698             MOVLHPS_xmm_xmm( 5, 5 );  // V0 V0 V0 V0
  2699             MOVHLPS_xmm_xmm( 7, 7 );  // V2 V2 V2 V2
  2700             MULPS_xmm_xmm( 0, 4 );
  2701             MULPS_xmm_xmm( 1, 5 );
  2702             MULPS_xmm_xmm( 2, 6 );
  2703             MULPS_xmm_xmm( 3, 7 );
  2704             ADDPS_xmm_xmm( 5, 4 );
  2705             ADDPS_xmm_xmm( 7, 6 );
  2706             ADDPS_xmm_xmm( 6, 4 );
  2707             MOVAPS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][FVn<<2]) );
  2708         } else {
  2709             LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FVn<<2]), REG_EAX );
  2710             CALL1_ptr_r32( sh4_ftrv, REG_EAX );
  2713     sh4_x86.tstate = TSTATE_NONE;
  2714 :}
  2716 FRCHG {:  
  2717     COUNT_INST(I_FRCHG);
  2718     check_fpuen();
  2719     XORL_imms_rbpdisp( FPSCR_FR, R_FPSCR );
  2720     CALL_ptr( sh4_switch_fr_banks );
  2721     sh4_x86.tstate = TSTATE_NONE;
  2722 :}
  2723 FSCHG {:  
  2724     COUNT_INST(I_FSCHG);
  2725     check_fpuen();
  2726     XORL_imms_rbpdisp( FPSCR_SZ, R_FPSCR);
  2727     XORL_imms_rbpdisp( FPSCR_SZ, REG_OFFSET(xlat_sh4_mode) );
  2728     sh4_x86.tstate = TSTATE_NONE;
  2729     sh4_x86.double_size = !sh4_x86.double_size;
  2730     sh4_x86.sh4_mode = sh4_x86.sh4_mode ^ FPSCR_SZ;
  2731 :}
  2733 /* Processor control instructions */
  2734 LDC Rm, SR {:
  2735     COUNT_INST(I_LDCSR);
  2736     if( sh4_x86.in_delay_slot ) {
  2737 	SLOTILLEGAL();
  2738     } else {
  2739 	check_priv();
  2740 	load_reg( REG_EAX, Rm );
  2741 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2742 	sh4_x86.fpuen_checked = FALSE;
  2743 	sh4_x86.tstate = TSTATE_NONE;
  2744     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2745 	return 2;
  2747 :}
  2748 LDC Rm, GBR {: 
  2749     COUNT_INST(I_LDC);
  2750     load_reg( REG_EAX, Rm );
  2751     MOVL_r32_rbpdisp( REG_EAX, R_GBR );
  2752 :}
  2753 LDC Rm, VBR {:  
  2754     COUNT_INST(I_LDC);
  2755     check_priv();
  2756     load_reg( REG_EAX, Rm );
  2757     MOVL_r32_rbpdisp( REG_EAX, R_VBR );
  2758     sh4_x86.tstate = TSTATE_NONE;
  2759 :}
  2760 LDC Rm, SSR {:  
  2761     COUNT_INST(I_LDC);
  2762     check_priv();
  2763     load_reg( REG_EAX, Rm );
  2764     MOVL_r32_rbpdisp( REG_EAX, R_SSR );
  2765     sh4_x86.tstate = TSTATE_NONE;
  2766 :}
  2767 LDC Rm, SGR {:  
  2768     COUNT_INST(I_LDC);
  2769     check_priv();
  2770     load_reg( REG_EAX, Rm );
  2771     MOVL_r32_rbpdisp( REG_EAX, R_SGR );
  2772     sh4_x86.tstate = TSTATE_NONE;
  2773 :}
  2774 LDC Rm, SPC {:  
  2775     COUNT_INST(I_LDC);
  2776     check_priv();
  2777     load_reg( REG_EAX, Rm );
  2778     MOVL_r32_rbpdisp( REG_EAX, R_SPC );
  2779     sh4_x86.tstate = TSTATE_NONE;
  2780 :}
  2781 LDC Rm, DBR {:  
  2782     COUNT_INST(I_LDC);
  2783     check_priv();
  2784     load_reg( REG_EAX, Rm );
  2785     MOVL_r32_rbpdisp( REG_EAX, R_DBR );
  2786     sh4_x86.tstate = TSTATE_NONE;
  2787 :}
  2788 LDC Rm, Rn_BANK {:  
  2789     COUNT_INST(I_LDC);
  2790     check_priv();
  2791     load_reg( REG_EAX, Rm );
  2792     MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2793     sh4_x86.tstate = TSTATE_NONE;
  2794 :}
  2795 LDC.L @Rm+, GBR {:  
  2796     COUNT_INST(I_LDCM);
  2797     load_reg( REG_EAX, Rm );
  2798     check_ralign32( REG_EAX );
  2799     MEM_READ_LONG( REG_EAX, REG_EAX );
  2800     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2801     MOVL_r32_rbpdisp( REG_EAX, R_GBR );
  2802     sh4_x86.tstate = TSTATE_NONE;
  2803 :}
  2804 LDC.L @Rm+, SR {:
  2805     COUNT_INST(I_LDCSRM);
  2806     if( sh4_x86.in_delay_slot ) {
  2807 	SLOTILLEGAL();
  2808     } else {
  2809 	check_priv();
  2810 	load_reg( REG_EAX, Rm );
  2811 	check_ralign32( REG_EAX );
  2812 	MEM_READ_LONG( REG_EAX, REG_EAX );
  2813 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2814 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2815 	sh4_x86.fpuen_checked = FALSE;
  2816 	sh4_x86.tstate = TSTATE_NONE;
  2817     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2818 	return 2;
  2820 :}
  2821 LDC.L @Rm+, VBR {:  
  2822     COUNT_INST(I_LDCM);
  2823     check_priv();
  2824     load_reg( REG_EAX, Rm );
  2825     check_ralign32( REG_EAX );
  2826     MEM_READ_LONG( REG_EAX, REG_EAX );
  2827     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2828     MOVL_r32_rbpdisp( REG_EAX, R_VBR );
  2829     sh4_x86.tstate = TSTATE_NONE;
  2830 :}
  2831 LDC.L @Rm+, SSR {:
  2832     COUNT_INST(I_LDCM);
  2833     check_priv();
  2834     load_reg( REG_EAX, Rm );
  2835     check_ralign32( REG_EAX );
  2836     MEM_READ_LONG( REG_EAX, REG_EAX );
  2837     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2838     MOVL_r32_rbpdisp( REG_EAX, R_SSR );
  2839     sh4_x86.tstate = TSTATE_NONE;
  2840 :}
  2841 LDC.L @Rm+, SGR {:  
  2842     COUNT_INST(I_LDCM);
  2843     check_priv();
  2844     load_reg( REG_EAX, Rm );
  2845     check_ralign32( REG_EAX );
  2846     MEM_READ_LONG( REG_EAX, REG_EAX );
  2847     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2848     MOVL_r32_rbpdisp( REG_EAX, R_SGR );
  2849     sh4_x86.tstate = TSTATE_NONE;
  2850 :}
  2851 LDC.L @Rm+, SPC {:  
  2852     COUNT_INST(I_LDCM);
  2853     check_priv();
  2854     load_reg( REG_EAX, Rm );
  2855     check_ralign32( REG_EAX );
  2856     MEM_READ_LONG( REG_EAX, REG_EAX );
  2857     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2858     MOVL_r32_rbpdisp( REG_EAX, R_SPC );
  2859     sh4_x86.tstate = TSTATE_NONE;
  2860 :}
  2861 LDC.L @Rm+, DBR {:  
  2862     COUNT_INST(I_LDCM);
  2863     check_priv();
  2864     load_reg( REG_EAX, Rm );
  2865     check_ralign32( REG_EAX );
  2866     MEM_READ_LONG( REG_EAX, REG_EAX );
  2867     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2868     MOVL_r32_rbpdisp( REG_EAX, R_DBR );
  2869     sh4_x86.tstate = TSTATE_NONE;
  2870 :}
  2871 LDC.L @Rm+, Rn_BANK {:  
  2872     COUNT_INST(I_LDCM);
  2873     check_priv();
  2874     load_reg( REG_EAX, Rm );
  2875     check_ralign32( REG_EAX );
  2876     MEM_READ_LONG( REG_EAX, REG_EAX );
  2877     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2878     MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2879     sh4_x86.tstate = TSTATE_NONE;
  2880 :}
  2881 LDS Rm, FPSCR {:
  2882     COUNT_INST(I_LDSFPSCR);
  2883     check_fpuen();
  2884     load_reg( REG_EAX, Rm );
  2885     CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
  2886     sh4_x86.tstate = TSTATE_NONE;
  2887     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2888     return 2;
  2889 :}
  2890 LDS.L @Rm+, FPSCR {:  
  2891     COUNT_INST(I_LDSFPSCRM);
  2892     check_fpuen();
  2893     load_reg( REG_EAX, Rm );
  2894     check_ralign32( REG_EAX );
  2895     MEM_READ_LONG( REG_EAX, REG_EAX );
  2896     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2897     CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
  2898     sh4_x86.tstate = TSTATE_NONE;
  2899     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2900     return 2;
  2901 :}
  2902 LDS Rm, FPUL {:  
  2903     COUNT_INST(I_LDS);
  2904     check_fpuen();
  2905     load_reg( REG_EAX, Rm );
  2906     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2907 :}
  2908 LDS.L @Rm+, FPUL {:  
  2909     COUNT_INST(I_LDSM);
  2910     check_fpuen();
  2911     load_reg( REG_EAX, Rm );
  2912     check_ralign32( REG_EAX );
  2913     MEM_READ_LONG( REG_EAX, REG_EAX );
  2914     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2915     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2916     sh4_x86.tstate = TSTATE_NONE;
  2917 :}
  2918 LDS Rm, MACH {: 
  2919     COUNT_INST(I_LDS);
  2920     load_reg( REG_EAX, Rm );
  2921     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2922 :}
  2923 LDS.L @Rm+, MACH {:  
  2924     COUNT_INST(I_LDSM);
  2925     load_reg( REG_EAX, Rm );
  2926     check_ralign32( REG_EAX );
  2927     MEM_READ_LONG( REG_EAX, REG_EAX );
  2928     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2929     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2930     sh4_x86.tstate = TSTATE_NONE;
  2931 :}
  2932 LDS Rm, MACL {:  
  2933     COUNT_INST(I_LDS);
  2934     load_reg( REG_EAX, Rm );
  2935     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2936 :}
  2937 LDS.L @Rm+, MACL {:  
  2938     COUNT_INST(I_LDSM);
  2939     load_reg( REG_EAX, Rm );
  2940     check_ralign32( REG_EAX );
  2941     MEM_READ_LONG( REG_EAX, REG_EAX );
  2942     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2943     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2944     sh4_x86.tstate = TSTATE_NONE;
  2945 :}
  2946 LDS Rm, PR {:  
  2947     COUNT_INST(I_LDS);
  2948     load_reg( REG_EAX, Rm );
  2949     MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2950 :}
  2951 LDS.L @Rm+, PR {:  
  2952     COUNT_INST(I_LDSM);
  2953     load_reg( REG_EAX, Rm );
  2954     check_ralign32( REG_EAX );
  2955     MEM_READ_LONG( REG_EAX, REG_EAX );
  2956     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2957     MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2958     sh4_x86.tstate = TSTATE_NONE;
  2959 :}
  2960 LDTLB {:  
  2961     COUNT_INST(I_LDTLB);
  2962     CALL_ptr( MMU_ldtlb );
  2963     sh4_x86.tstate = TSTATE_NONE;
  2964 :}
  2965 OCBI @Rn {:
  2966     COUNT_INST(I_OCBI);
  2967 :}
  2968 OCBP @Rn {:
  2969     COUNT_INST(I_OCBP);
  2970 :}
  2971 OCBWB @Rn {:
  2972     COUNT_INST(I_OCBWB);
  2973 :}
  2974 PREF @Rn {:
  2975     COUNT_INST(I_PREF);
  2976     load_reg( REG_EAX, Rn );
  2977     MEM_PREFETCH( REG_EAX );
  2978     sh4_x86.tstate = TSTATE_NONE;
  2979 :}
  2980 SLEEP {: 
  2981     COUNT_INST(I_SLEEP);
  2982     check_priv();
  2983     CALL_ptr( sh4_sleep );
  2984     sh4_x86.tstate = TSTATE_NONE;
  2985     sh4_x86.in_delay_slot = DELAY_NONE;
  2986     return 2;
  2987 :}
  2988 STC SR, Rn {:
  2989     COUNT_INST(I_STCSR);
  2990     check_priv();
  2991     CALL_ptr(sh4_read_sr);
  2992     store_reg( REG_EAX, Rn );
  2993     sh4_x86.tstate = TSTATE_NONE;
  2994 :}
  2995 STC GBR, Rn {:  
  2996     COUNT_INST(I_STC);
  2997     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  2998     store_reg( REG_EAX, Rn );
  2999 :}
  3000 STC VBR, Rn {:  
  3001     COUNT_INST(I_STC);
  3002     check_priv();
  3003     MOVL_rbpdisp_r32( R_VBR, REG_EAX );
  3004     store_reg( REG_EAX, Rn );
  3005     sh4_x86.tstate = TSTATE_NONE;
  3006 :}
  3007 STC SSR, Rn {:  
  3008     COUNT_INST(I_STC);
  3009     check_priv();
  3010     MOVL_rbpdisp_r32( R_SSR, REG_EAX );
  3011     store_reg( REG_EAX, Rn );
  3012     sh4_x86.tstate = TSTATE_NONE;
  3013 :}
  3014 STC SPC, Rn {:  
  3015     COUNT_INST(I_STC);
  3016     check_priv();
  3017     MOVL_rbpdisp_r32( R_SPC, REG_EAX );
  3018     store_reg( REG_EAX, Rn );
  3019     sh4_x86.tstate = TSTATE_NONE;
  3020 :}
  3021 STC SGR, Rn {:  
  3022     COUNT_INST(I_STC);
  3023     check_priv();
  3024     MOVL_rbpdisp_r32( R_SGR, REG_EAX );
  3025     store_reg( REG_EAX, Rn );
  3026     sh4_x86.tstate = TSTATE_NONE;
  3027 :}
  3028 STC DBR, Rn {:  
  3029     COUNT_INST(I_STC);
  3030     check_priv();
  3031     MOVL_rbpdisp_r32( R_DBR, REG_EAX );
  3032     store_reg( REG_EAX, Rn );
  3033     sh4_x86.tstate = TSTATE_NONE;
  3034 :}
  3035 STC Rm_BANK, Rn {:
  3036     COUNT_INST(I_STC);
  3037     check_priv();
  3038     MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EAX );
  3039     store_reg( REG_EAX, Rn );
  3040     sh4_x86.tstate = TSTATE_NONE;
  3041 :}
  3042 STC.L SR, @-Rn {:
  3043     COUNT_INST(I_STCSRM);
  3044     check_priv();
  3045     CALL_ptr( sh4_read_sr );
  3046     MOVL_r32_r32( REG_EAX, REG_EDX );
  3047     load_reg( REG_EAX, Rn );
  3048     check_walign32( REG_EAX );
  3049     LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  3050     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3051     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3052     sh4_x86.tstate = TSTATE_NONE;
  3053 :}
  3054 STC.L VBR, @-Rn {:  
  3055     COUNT_INST(I_STCM);
  3056     check_priv();
  3057     load_reg( REG_EAX, Rn );
  3058     check_walign32( REG_EAX );
  3059     ADDL_imms_r32( -4, REG_EAX );
  3060     MOVL_rbpdisp_r32( R_VBR, REG_EDX );
  3061     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3062     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3063     sh4_x86.tstate = TSTATE_NONE;
  3064 :}
  3065 STC.L SSR, @-Rn {:  
  3066     COUNT_INST(I_STCM);
  3067     check_priv();
  3068     load_reg( REG_EAX, Rn );
  3069     check_walign32( REG_EAX );
  3070     ADDL_imms_r32( -4, REG_EAX );
  3071     MOVL_rbpdisp_r32( R_SSR, REG_EDX );
  3072     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3073     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3074     sh4_x86.tstate = TSTATE_NONE;
  3075 :}
  3076 STC.L SPC, @-Rn {:
  3077     COUNT_INST(I_STCM);
  3078     check_priv();
  3079     load_reg( REG_EAX, Rn );
  3080     check_walign32( REG_EAX );
  3081     ADDL_imms_r32( -4, REG_EAX );
  3082     MOVL_rbpdisp_r32( R_SPC, REG_EDX );
  3083     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3084     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3085     sh4_x86.tstate = TSTATE_NONE;
  3086 :}
  3087 STC.L SGR, @-Rn {:  
  3088     COUNT_INST(I_STCM);
  3089     check_priv();
  3090     load_reg( REG_EAX, Rn );
  3091     check_walign32( REG_EAX );
  3092     ADDL_imms_r32( -4, REG_EAX );
  3093     MOVL_rbpdisp_r32( R_SGR, REG_EDX );
  3094     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3095     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3096     sh4_x86.tstate = TSTATE_NONE;
  3097 :}
  3098 STC.L DBR, @-Rn {:  
  3099     COUNT_INST(I_STCM);
  3100     check_priv();
  3101     load_reg( REG_EAX, Rn );
  3102     check_walign32( REG_EAX );
  3103     ADDL_imms_r32( -4, REG_EAX );
  3104     MOVL_rbpdisp_r32( R_DBR, REG_EDX );
  3105     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3106     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3107     sh4_x86.tstate = TSTATE_NONE;
  3108 :}
  3109 STC.L Rm_BANK, @-Rn {:  
  3110     COUNT_INST(I_STCM);
  3111     check_priv();
  3112     load_reg( REG_EAX, Rn );
  3113     check_walign32( REG_EAX );
  3114     ADDL_imms_r32( -4, REG_EAX );
  3115     MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EDX );
  3116     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3117     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3118     sh4_x86.tstate = TSTATE_NONE;
  3119 :}
  3120 STC.L GBR, @-Rn {:  
  3121     COUNT_INST(I_STCM);
  3122     load_reg( REG_EAX, Rn );
  3123     check_walign32( REG_EAX );
  3124     ADDL_imms_r32( -4, REG_EAX );
  3125     MOVL_rbpdisp_r32( R_GBR, REG_EDX );
  3126     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3127     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3128     sh4_x86.tstate = TSTATE_NONE;
  3129 :}
  3130 STS FPSCR, Rn {:  
  3131     COUNT_INST(I_STSFPSCR);
  3132     check_fpuen();
  3133     MOVL_rbpdisp_r32( R_FPSCR, REG_EAX );
  3134     store_reg( REG_EAX, Rn );
  3135 :}
  3136 STS.L FPSCR, @-Rn {:  
  3137     COUNT_INST(I_STSFPSCRM);
  3138     check_fpuen();
  3139     load_reg( REG_EAX, Rn );
  3140     check_walign32( REG_EAX );
  3141     ADDL_imms_r32( -4, REG_EAX );
  3142     MOVL_rbpdisp_r32( R_FPSCR, REG_EDX );
  3143     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3144     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3145     sh4_x86.tstate = TSTATE_NONE;
  3146 :}
  3147 STS FPUL, Rn {:  
  3148     COUNT_INST(I_STS);
  3149     check_fpuen();
  3150     MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  3151     store_reg( REG_EAX, Rn );
  3152 :}
  3153 STS.L FPUL, @-Rn {:  
  3154     COUNT_INST(I_STSM);
  3155     check_fpuen();
  3156     load_reg( REG_EAX, Rn );
  3157     check_walign32( REG_EAX );
  3158     ADDL_imms_r32( -4, REG_EAX );
  3159     MOVL_rbpdisp_r32( R_FPUL, REG_EDX );
  3160     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3161     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3162     sh4_x86.tstate = TSTATE_NONE;
  3163 :}
  3164 STS MACH, Rn {:  
  3165     COUNT_INST(I_STS);
  3166     MOVL_rbpdisp_r32( R_MACH, REG_EAX );
  3167     store_reg( REG_EAX, Rn );
  3168 :}
  3169 STS.L MACH, @-Rn {:  
  3170     COUNT_INST(I_STSM);
  3171     load_reg( REG_EAX, Rn );
  3172     check_walign32( REG_EAX );
  3173     ADDL_imms_r32( -4, REG_EAX );
  3174     MOVL_rbpdisp_r32( R_MACH, REG_EDX );
  3175     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3176     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3177     sh4_x86.tstate = TSTATE_NONE;
  3178 :}
  3179 STS MACL, Rn {:  
  3180     COUNT_INST(I_STS);
  3181     MOVL_rbpdisp_r32( R_MACL, REG_EAX );
  3182     store_reg( REG_EAX, Rn );
  3183 :}
  3184 STS.L MACL, @-Rn {:  
  3185     COUNT_INST(I_STSM);
  3186     load_reg( REG_EAX, Rn );
  3187     check_walign32( REG_EAX );
  3188     ADDL_imms_r32( -4, REG_EAX );
  3189     MOVL_rbpdisp_r32( R_MACL, REG_EDX );
  3190     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3191     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3192     sh4_x86.tstate = TSTATE_NONE;
  3193 :}
  3194 STS PR, Rn {:  
  3195     COUNT_INST(I_STS);
  3196     MOVL_rbpdisp_r32( R_PR, REG_EAX );
  3197     store_reg( REG_EAX, Rn );
  3198 :}
  3199 STS.L PR, @-Rn {:  
  3200     COUNT_INST(I_STSM);
  3201     load_reg( REG_EAX, Rn );
  3202     check_walign32( REG_EAX );
  3203     ADDL_imms_r32( -4, REG_EAX );
  3204     MOVL_rbpdisp_r32( R_PR, REG_EDX );
  3205     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3206     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3207     sh4_x86.tstate = TSTATE_NONE;
  3208 :}
  3210 NOP {: 
  3211     COUNT_INST(I_NOP);
  3212     /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ 
  3213 :}
  3214 %%
  3215     sh4_x86.in_delay_slot = DELAY_NONE;
  3216     return 0;
  3220 /**
  3221  * The unwind methods only work if we compiled with DWARF2 frame information
  3222  * (ie -fexceptions), otherwise we have to use the direct frame scan.
  3223  */
  3224 #ifdef HAVE_EXCEPTIONS
  3225 #include <unwind.h>
  3227 struct UnwindInfo {
  3228     uintptr_t block_start;
  3229     uintptr_t block_end;
  3230     void *pc;
  3231 };
  3233 static _Unwind_Reason_Code xlat_check_frame( struct _Unwind_Context *context, void *arg )
  3235     struct UnwindInfo *info = arg;
  3236     void *pc = (void *)_Unwind_GetIP(context);
  3237     if( ((uintptr_t)pc) >= info->block_start && ((uintptr_t)pc) < info->block_end ) {
  3238         info->pc = pc;
  3239         return _URC_NORMAL_STOP;
  3241     return _URC_NO_REASON;
  3244 void *xlat_get_native_pc( void *code, uint32_t code_size )
  3246     struct _Unwind_Exception exc;
  3247     struct UnwindInfo info;
  3249     info.pc = NULL;
  3250     info.block_start = (uintptr_t)code;
  3251     info.block_end = info.block_start + code_size;
  3252     void *result = NULL;
  3253     _Unwind_Backtrace( xlat_check_frame, &info );
  3254     return info.pc;
  3256 #else
  3257 /* Assume this is an ia32 build - amd64 should always have dwarf information */
  3258 void *xlat_get_native_pc( void *code, uint32_t code_size )
  3260     void *result = NULL;
  3261     __asm__(
  3262         "mov %%ebp, %%eax\n\t"
  3263         "mov $0x8, %%ecx\n\t"
  3264         "mov %1, %%edx\n"
  3265         "frame_loop: test %%eax, %%eax\n\t"
  3266         "je frame_not_found\n\t"
  3267         "cmp (%%eax), %%edx\n\t"
  3268         "je frame_found\n\t"
  3269         "sub $0x1, %%ecx\n\t"
  3270         "je frame_not_found\n\t"
  3271         "movl (%%eax), %%eax\n\t"
  3272         "jmp frame_loop\n"
  3273         "frame_found: movl 0x4(%%eax), %0\n"
  3274         "frame_not_found:"
  3275         : "=r" (result)
  3276         : "r" (((uint8_t *)&sh4r) + 128 )
  3277         : "eax", "ecx", "edx" );
  3278     return result;
  3280 #endif
.