Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 1198:407659e01ef0
prev1197:904fba59a705
next1214:49152b3d8b75
author Nathan Keynes <nkeynes@lxdream.org>
date Fri Dec 16 10:08:45 2011 +1000 (8 years ago)
permissions -rw-r--r--
last change Add volatile qualifier to return-address frobbing - works around optimizer
bug in GCC versions after 4.2
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "lxdream.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4dasm.h"
    31 #include "sh4/sh4trans.h"
    32 #include "sh4/sh4stat.h"
    33 #include "sh4/sh4mmio.h"
    34 #include "sh4/mmu.h"
    35 #include "xlat/xltcache.h"
    36 #include "xlat/x86/x86op.h"
    37 #include "x86dasm/x86dasm.h"
    38 #include "clock.h"
    40 #define DEFAULT_BACKPATCH_SIZE 4096
    42 /* Offset of a reg relative to the sh4r structure */
    43 #define REG_OFFSET(reg)  (((char *)&sh4r.reg) - ((char *)&sh4r) - 128)
    45 #define R_T      REG_OFFSET(t)
    46 #define R_Q      REG_OFFSET(q)
    47 #define R_S      REG_OFFSET(s)
    48 #define R_M      REG_OFFSET(m)
    49 #define R_SR     REG_OFFSET(sr)
    50 #define R_GBR    REG_OFFSET(gbr)
    51 #define R_SSR    REG_OFFSET(ssr)
    52 #define R_SPC    REG_OFFSET(spc)
    53 #define R_VBR    REG_OFFSET(vbr)
    54 #define R_MACH   REG_OFFSET(mac)+4
    55 #define R_MACL   REG_OFFSET(mac)
    56 #define R_PC     REG_OFFSET(pc)
    57 #define R_NEW_PC REG_OFFSET(new_pc)
    58 #define R_PR     REG_OFFSET(pr)
    59 #define R_SGR    REG_OFFSET(sgr)
    60 #define R_FPUL   REG_OFFSET(fpul)
    61 #define R_FPSCR  REG_OFFSET(fpscr)
    62 #define R_DBR    REG_OFFSET(dbr)
    63 #define R_R(rn)  REG_OFFSET(r[rn])
    64 #define R_FR(f)  REG_OFFSET(fr[0][(f)^1])
    65 #define R_XF(f)  REG_OFFSET(fr[1][(f)^1])
    66 #define R_DR(f)  REG_OFFSET(fr[(f)&1][(f)&0x0E])
    67 #define R_DRL(f) REG_OFFSET(fr[(f)&1][(f)|0x01])
    68 #define R_DRH(f) REG_OFFSET(fr[(f)&1][(f)&0x0E])
    70 #define DELAY_NONE 0
    71 #define DELAY_PC 1
    72 #define DELAY_PC_PR 2
    74 #define SH4_MODE_UNKNOWN -1
    76 struct backpatch_record {
    77     uint32_t fixup_offset;
    78     uint32_t fixup_icount;
    79     int32_t exc_code;
    80 };
    82 /** 
    83  * Struct to manage internal translation state. This state is not saved -
    84  * it is only valid between calls to sh4_translate_begin_block() and
    85  * sh4_translate_end_block()
    86  */
    87 struct sh4_x86_state {
    88     int in_delay_slot;
    89     uint8_t *code;
    90     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    91     gboolean branch_taken; /* true if we branched unconditionally */
    92     gboolean double_prec; /* true if FPU is in double-precision mode */
    93     gboolean double_size; /* true if FPU is in double-size mode */
    94     gboolean sse3_enabled; /* true if host supports SSE3 instructions */
    95     uint32_t block_start_pc;
    96     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    97     uint32_t sh4_mode;     /* Mirror of sh4r.xlat_sh4_mode */
    98     int tstate;
   100     /* mode settings */
   101     gboolean tlb_on; /* True if tlb translation is active */
   102     struct mem_region_fn **priv_address_space;
   103     struct mem_region_fn **user_address_space;
   105     /* Instrumentation */
   106     xlat_block_begin_callback_t begin_callback;
   107     xlat_block_end_callback_t end_callback;
   108     gboolean fastmem;
   109     gboolean profile_blocks;
   111     /* Allocated memory for the (block-wide) back-patch list */
   112     struct backpatch_record *backpatch_list;
   113     uint32_t backpatch_posn;
   114     uint32_t backpatch_size;
   115 };
   117 static struct sh4_x86_state sh4_x86;
   119 static uint32_t max_int = 0x7FFFFFFF;
   120 static uint32_t min_int = 0x80000000;
   121 static uint32_t save_fcw; /* save value for fpu control word */
   122 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   124 static void FASTCALL sh4_translate_get_code_and_backpatch( uint32_t pc );
   126 static struct x86_symbol x86_symbol_table[] = {
   127     { "sh4r+128", ((char *)&sh4r)+128 },
   128     { "sh4_cpu_period", &sh4_cpu_period },
   129     { "sh4_address_space", NULL },
   130     { "sh4_user_address_space", NULL },
   131     { "sh4_translate_breakpoint_hit", sh4_translate_breakpoint_hit },
   132     { "sh4_translate_get_code_and_backpatch", sh4_translate_get_code_and_backpatch },
   133     { "sh4_write_fpscr", sh4_write_fpscr },
   134     { "sh4_write_sr", sh4_write_sr },
   135     { "sh4_read_sr", sh4_read_sr },
   136     { "sh4_raise_exception", sh4_raise_exception },
   137     { "sh4_sleep", sh4_sleep },
   138     { "sh4_fsca", sh4_fsca },
   139     { "sh4_ftrv", sh4_ftrv },
   140     { "sh4_switch_fr_banks", sh4_switch_fr_banks },
   141     { "sh4_execute_instruction", sh4_execute_instruction },
   142     { "signsat48", signsat48 },
   143     { "xlat_get_code_by_vma", xlat_get_code_by_vma },
   144     { "xlat_get_code", xlat_get_code }
   145 };
   148 gboolean is_sse3_supported()
   149 {
   150     uint32_t features;
   152     __asm__ __volatile__(
   153         "mov $0x01, %%eax\n\t"
   154         "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
   155     return (features & 1) ? TRUE : FALSE;
   156 }
   158 void sh4_translate_set_address_space( struct mem_region_fn **priv, struct mem_region_fn **user )
   159 {
   160     sh4_x86.priv_address_space = priv;
   161     sh4_x86.user_address_space = user;
   162     x86_symbol_table[2].ptr = priv;
   163     x86_symbol_table[3].ptr = user;
   164 }
   166 void sh4_translate_init(void)
   167 {
   168     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   169     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   170     sh4_x86.begin_callback = NULL;
   171     sh4_x86.end_callback = NULL;
   172     sh4_translate_set_address_space( sh4_address_space, sh4_user_address_space );
   173     sh4_x86.fastmem = TRUE;
   174     sh4_x86.profile_blocks = FALSE;
   175     sh4_x86.sse3_enabled = is_sse3_supported();
   176     x86_disasm_init();
   177     x86_set_symtab( x86_symbol_table, sizeof(x86_symbol_table)/sizeof(struct x86_symbol) );
   178 }
   180 void sh4_translate_set_callbacks( xlat_block_begin_callback_t begin, xlat_block_end_callback_t end )
   181 {
   182     sh4_x86.begin_callback = begin;
   183     sh4_x86.end_callback = end;
   184 }
   186 void sh4_translate_set_fastmem( gboolean flag )
   187 {
   188     sh4_x86.fastmem = flag;
   189 }
   191 void sh4_translate_set_profile_blocks( gboolean flag )
   192 {
   193     sh4_x86.profile_blocks = flag;
   194 }
   196 gboolean sh4_translate_get_profile_blocks()
   197 {
   198     return sh4_x86.profile_blocks;
   199 }
   201 /**
   202  * Disassemble the given translated code block, and it's source SH4 code block
   203  * side-by-side. The current native pc will be marked if non-null.
   204  */
   205 void sh4_translate_disasm_block( FILE *out, void *code, sh4addr_t source_start, void *native_pc )
   206 {
   207     char buf[256];
   208     char op[256];
   210     uintptr_t target_start = (uintptr_t)code, target_pc;
   211     uintptr_t target_end = target_start + xlat_get_code_size(code);
   212     uint32_t source_pc = source_start;
   213     uint32_t source_end = source_pc;
   214     xlat_recovery_record_t source_recov_table = XLAT_RECOVERY_TABLE(code);
   215     xlat_recovery_record_t source_recov_end = source_recov_table + XLAT_BLOCK_FOR_CODE(code)->recover_table_size - 1;
   217     for( target_pc = target_start; target_pc < target_end;  ) {
   218         uintptr_t pc2 = x86_disasm_instruction( target_pc, buf, sizeof(buf), op );
   219 #if SIZEOF_VOID_P == 8
   220         fprintf( out, "%c%016lx: %-30s %-40s", (target_pc == (uintptr_t)native_pc ? '*' : ' '),
   221                       target_pc, op, buf );
   222 #else
   223         fprintf( out, "%c%08lx: %-30s %-40s", (target_pc == (uintptr_t)native_pc ? '*' : ' '),
   224                       target_pc, op, buf );
   225 #endif        
   226         if( source_recov_table < source_recov_end && 
   227             target_pc >= (target_start + source_recov_table->xlat_offset) ) {
   228             source_recov_table++;
   229             if( source_end < (source_start + (source_recov_table->sh4_icount)*2) )
   230                 source_end = source_start + (source_recov_table->sh4_icount)*2;
   231         }
   233         if( source_pc < source_end ) {
   234             uint32_t source_pc2 = sh4_disasm_instruction( source_pc, buf, sizeof(buf), op );
   235             fprintf( out, " %08X: %s  %s\n", source_pc, op, buf );
   236             source_pc = source_pc2;
   237         } else {
   238             fprintf( out, "\n" );
   239         }
   241         target_pc = pc2;
   242     }
   244     while( source_pc < source_end ) {
   245         uint32_t source_pc2 = sh4_disasm_instruction( source_pc, buf, sizeof(buf), op );
   246         fprintf( out, "%*c %08X: %s  %s\n", 72,' ', source_pc, op, buf );
   247         source_pc = source_pc2;
   248     }
   249 }
   251 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   252 {
   253     int reloc_size = 4;
   255     if( exc_code == -2 ) {
   256         reloc_size = sizeof(void *);
   257     }
   259     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   260 	sh4_x86.backpatch_size <<= 1;
   261 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   262 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   263 	assert( sh4_x86.backpatch_list != NULL );
   264     }
   265     if( sh4_x86.in_delay_slot ) {
   266 	fixup_pc -= 2;
   267     }
   269     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
   270 	(((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code)) - reloc_size;
   271     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   272     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   273     sh4_x86.backpatch_posn++;
   274 }
   276 #define TSTATE_NONE -1
   277 #define TSTATE_O    X86_COND_O
   278 #define TSTATE_C    X86_COND_C
   279 #define TSTATE_E    X86_COND_E
   280 #define TSTATE_NE   X86_COND_NE
   281 #define TSTATE_G    X86_COND_G
   282 #define TSTATE_GE   X86_COND_GE
   283 #define TSTATE_A    X86_COND_A
   284 #define TSTATE_AE   X86_COND_AE
   286 #define MARK_JMP8(x) uint8_t *_mark_jmp_##x = (xlat_output-1)
   287 #define JMP_TARGET(x) *_mark_jmp_##x += (xlat_output - _mark_jmp_##x)
   289 /* Convenience instructions */
   290 #define LDC_t()          CMPB_imms_rbpdisp(1,R_T); CMC()
   291 #define SETE_t()         SETCCB_cc_rbpdisp(X86_COND_E,R_T)
   292 #define SETA_t()         SETCCB_cc_rbpdisp(X86_COND_A,R_T)
   293 #define SETAE_t()        SETCCB_cc_rbpdisp(X86_COND_AE,R_T)
   294 #define SETG_t()         SETCCB_cc_rbpdisp(X86_COND_G,R_T)
   295 #define SETGE_t()        SETCCB_cc_rbpdisp(X86_COND_GE,R_T)
   296 #define SETC_t()         SETCCB_cc_rbpdisp(X86_COND_C,R_T)
   297 #define SETO_t()         SETCCB_cc_rbpdisp(X86_COND_O,R_T)
   298 #define SETNE_t()        SETCCB_cc_rbpdisp(X86_COND_NE,R_T)
   299 #define SETC_r8(r1)      SETCCB_cc_r8(X86_COND_C, r1)
   300 #define JAE_label(label) JCC_cc_rel8(X86_COND_AE,-1); MARK_JMP8(label)
   301 #define JBE_label(label) JCC_cc_rel8(X86_COND_BE,-1); MARK_JMP8(label)
   302 #define JE_label(label)  JCC_cc_rel8(X86_COND_E,-1); MARK_JMP8(label)
   303 #define JGE_label(label) JCC_cc_rel8(X86_COND_GE,-1); MARK_JMP8(label)
   304 #define JNA_label(label) JCC_cc_rel8(X86_COND_NA,-1); MARK_JMP8(label)
   305 #define JNE_label(label) JCC_cc_rel8(X86_COND_NE,-1); MARK_JMP8(label)
   306 #define JNO_label(label) JCC_cc_rel8(X86_COND_NO,-1); MARK_JMP8(label)
   307 #define JP_label(label)  JCC_cc_rel8(X86_COND_P,-1); MARK_JMP8(label)
   308 #define JS_label(label)  JCC_cc_rel8(X86_COND_S,-1); MARK_JMP8(label)
   309 #define JMP_label(label) JMP_rel8(-1); MARK_JMP8(label)
   310 #define JNE_exc(exc)     JCC_cc_rel32(X86_COND_NE,0); sh4_x86_add_backpatch(xlat_output, pc, exc)
   312 #define LOAD_t() if( sh4_x86.tstate == TSTATE_NONE ) { \
   313 	CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; }     
   315 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
   316 #define JT_label(label) LOAD_t() \
   317     JCC_cc_rel8(sh4_x86.tstate,-1); MARK_JMP8(label)
   319 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
   320 #define JF_label(label) LOAD_t() \
   321     JCC_cc_rel8(sh4_x86.tstate^1, -1); MARK_JMP8(label)
   324 #define load_reg(x86reg,sh4reg)     MOVL_rbpdisp_r32( REG_OFFSET(r[sh4reg]), x86reg )
   325 #define store_reg(x86reg,sh4reg)    MOVL_r32_rbpdisp( x86reg, REG_OFFSET(r[sh4reg]) )
   327 /**
   328  * Load an FR register (single-precision floating point) into an integer x86
   329  * register (eg for register-to-register moves)
   330  */
   331 #define load_fr(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[0][(frm)^1]), reg )
   332 #define load_xf(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[1][(frm)^1]), reg )
   334 /**
   335  * Load the low half of a DR register (DR or XD) into an integer x86 register 
   336  */
   337 #define load_dr0(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm|0x01]), reg )
   338 #define load_dr1(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm&0x0E]), reg )
   340 /**
   341  * Store an FR register (single-precision floating point) from an integer x86+
   342  * register (eg for register-to-register moves)
   343  */
   344 #define store_fr(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[0][(frm)^1]) )
   345 #define store_xf(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[1][(frm)^1]) )
   347 #define store_dr0(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   348 #define store_dr1(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   351 #define push_fpul()  FLDF_rbpdisp(R_FPUL)
   352 #define pop_fpul()   FSTPF_rbpdisp(R_FPUL)
   353 #define push_fr(frm) FLDF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
   354 #define pop_fr(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
   355 #define push_xf(frm) FLDF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
   356 #define pop_xf(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
   357 #define push_dr(frm) FLDD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
   358 #define pop_dr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
   359 #define push_xdr(frm) FLDD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
   360 #define pop_xdr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
   362 #ifdef ENABLE_SH4STATS
   363 #define COUNT_INST(id) MOVL_imm32_r32( id, REG_EAX ); CALL1_ptr_r32(sh4_stats_add, REG_EAX); sh4_x86.tstate = TSTATE_NONE
   364 #else
   365 #define COUNT_INST(id)
   366 #endif
   369 /* Exception checks - Note that all exception checks will clobber EAX */
   371 #define check_priv( ) \
   372     if( (sh4_x86.sh4_mode & SR_MD) == 0 ) { \
   373         if( sh4_x86.in_delay_slot ) { \
   374             exit_block_exc(EXC_SLOT_ILLEGAL, (pc-2), 4 ); \
   375         } else { \
   376             exit_block_exc(EXC_ILLEGAL, pc, 2); \
   377         } \
   378         sh4_x86.branch_taken = TRUE; \
   379         sh4_x86.in_delay_slot = DELAY_NONE; \
   380         return 2; \
   381     }
   383 #define check_fpuen( ) \
   384     if( !sh4_x86.fpuen_checked ) {\
   385 	sh4_x86.fpuen_checked = TRUE;\
   386 	MOVL_rbpdisp_r32( R_SR, REG_EAX );\
   387 	ANDL_imms_r32( SR_FD, REG_EAX );\
   388 	if( sh4_x86.in_delay_slot ) {\
   389 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   390 	} else {\
   391 	    JNE_exc(EXC_FPU_DISABLED);\
   392 	}\
   393 	sh4_x86.tstate = TSTATE_NONE; \
   394     }
   396 #define check_ralign16( x86reg ) \
   397     TESTL_imms_r32( 0x00000001, x86reg ); \
   398     JNE_exc(EXC_DATA_ADDR_READ)
   400 #define check_walign16( x86reg ) \
   401     TESTL_imms_r32( 0x00000001, x86reg ); \
   402     JNE_exc(EXC_DATA_ADDR_WRITE);
   404 #define check_ralign32( x86reg ) \
   405     TESTL_imms_r32( 0x00000003, x86reg ); \
   406     JNE_exc(EXC_DATA_ADDR_READ)
   408 #define check_walign32( x86reg ) \
   409     TESTL_imms_r32( 0x00000003, x86reg ); \
   410     JNE_exc(EXC_DATA_ADDR_WRITE);
   412 #define check_ralign64( x86reg ) \
   413     TESTL_imms_r32( 0x00000007, x86reg ); \
   414     JNE_exc(EXC_DATA_ADDR_READ)
   416 #define check_walign64( x86reg ) \
   417     TESTL_imms_r32( 0x00000007, x86reg ); \
   418     JNE_exc(EXC_DATA_ADDR_WRITE);
   420 #define address_space() ((sh4_x86.sh4_mode&SR_MD) ? (uintptr_t)sh4_x86.priv_address_space : (uintptr_t)sh4_x86.user_address_space)
   422 #define UNDEF(ir)
   423 /* Note: For SR.MD == 1 && MMUCR.AT == 0, there are no memory exceptions, so 
   424  * don't waste the cycles expecting them. Otherwise we need to save the exception pointer.
   425  */
   426 #ifdef HAVE_FRAME_ADDRESS
   427 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
   428 {
   429     decode_address(address_space(), addr_reg);
   430     if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) { 
   431         CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
   432     } else {
   433         if( addr_reg != REG_ARG1 ) {
   434             MOVL_r32_r32( addr_reg, REG_ARG1 );
   435         }
   436         MOVP_immptr_rptr( 0, REG_ARG2 );
   437         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   438         CALL2_r32disp_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2);
   439     }
   440     if( value_reg != REG_RESULT1 ) { 
   441         MOVL_r32_r32( REG_RESULT1, value_reg );
   442     }
   443 }
   445 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
   446 {
   447     decode_address(address_space(), addr_reg);
   448     if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) { 
   449         CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
   450     } else {
   451         if( value_reg != REG_ARG2 ) {
   452             MOVL_r32_r32( value_reg, REG_ARG2 );
   453 	}        
   454         if( addr_reg != REG_ARG1 ) {
   455             MOVL_r32_r32( addr_reg, REG_ARG1 );
   456         }
   457 #if MAX_REG_ARG > 2        
   458         MOVP_immptr_rptr( 0, REG_ARG3 );
   459         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   460         CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, REG_ARG3);
   461 #else
   462         MOVL_imm32_rspdisp( 0, 0 );
   463         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   464         CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, 0);
   465 #endif
   466     }
   467 }
   468 #else
   469 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
   470 {
   471     decode_address(address_space(), addr_reg);
   472     CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
   473     if( value_reg != REG_RESULT1 ) {
   474         MOVL_r32_r32( REG_RESULT1, value_reg );
   475     }
   476 }     
   478 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
   479 {
   480     decode_address(address_space(), addr_reg);
   481     CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
   482 }
   483 #endif
   485 #define MEM_REGION_PTR(name) offsetof( struct mem_region_fn, name )
   486 #define MEM_READ_BYTE( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_byte), pc)
   487 #define MEM_READ_BYTE_FOR_WRITE( addr_reg, value_reg ) call_read_func( addr_reg, value_reg, MEM_REGION_PTR(read_byte_for_write), pc) 
   488 #define MEM_READ_WORD( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_word), pc)
   489 #define MEM_READ_LONG( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_long), pc)
   490 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_byte), pc)
   491 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_word), pc)
   492 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_long), pc)
   493 #define MEM_PREFETCH( addr_reg ) call_read_func(addr_reg, REG_RESULT1, MEM_REGION_PTR(prefetch), pc)
   495 #define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2, 4); sh4_x86.in_delay_slot = DELAY_NONE; return 2;
   497 /** Offset of xlat_sh4_mode field relative to the code pointer */ 
   498 #define XLAT_SH4_MODE_CODE_OFFSET  (int32_t)(offsetof(struct xlat_cache_block, xlat_sh4_mode) - offsetof(struct xlat_cache_block,code) )
   499 #define XLAT_CHAIN_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, chain) - offsetof(struct xlat_cache_block,code) )
   500 #define XLAT_ACTIVE_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, active) - offsetof(struct xlat_cache_block,code) )
   502 void sh4_translate_begin_block( sh4addr_t pc ) 
   503 {
   504 	sh4_x86.code = xlat_output;
   505     sh4_x86.in_delay_slot = FALSE;
   506     sh4_x86.fpuen_checked = FALSE;
   507     sh4_x86.branch_taken = FALSE;
   508     sh4_x86.backpatch_posn = 0;
   509     sh4_x86.block_start_pc = pc;
   510     sh4_x86.tlb_on = IS_TLB_ENABLED();
   511     sh4_x86.tstate = TSTATE_NONE;
   512     sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
   513     sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
   514     sh4_x86.sh4_mode = sh4r.xlat_sh4_mode;
   515     emit_prologue();
   516     if( sh4_x86.begin_callback ) {
   517         CALL_ptr( sh4_x86.begin_callback );
   518     }
   519     if( sh4_x86.profile_blocks ) {
   520     	MOVP_immptr_rptr( sh4_x86.code + XLAT_ACTIVE_CODE_OFFSET, REG_EAX );
   521     	ADDL_imms_r32disp( 1, REG_EAX, 0 );
   522     }  
   523 }
   526 uint32_t sh4_translate_end_block_size()
   527 {
   528 	uint32_t epilogue_size = EPILOGUE_SIZE;
   529 	if( sh4_x86.end_callback ) {
   530 	    epilogue_size += (CALL1_PTR_MIN_SIZE - 1);
   531 	}
   532     if( sh4_x86.backpatch_posn <= 3 ) {
   533         epilogue_size += (sh4_x86.backpatch_posn*(12+CALL1_PTR_MIN_SIZE));
   534     } else {
   535         epilogue_size += (3*(12+CALL1_PTR_MIN_SIZE)) + (sh4_x86.backpatch_posn-3)*(15+CALL1_PTR_MIN_SIZE);
   536     }
   537     return epilogue_size;
   538 }
   541 /**
   542  * Embed a breakpoint into the generated code
   543  */
   544 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   545 {
   546     MOVL_imm32_r32( pc, REG_EAX );
   547     CALL1_ptr_r32( sh4_translate_breakpoint_hit, REG_EAX );
   548     sh4_x86.tstate = TSTATE_NONE;
   549 }
   552 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   554 /**
   555  * Test if the loaded target code pointer in %eax is valid, and if so jump
   556  * directly into it, bypassing the normal exit.
   557  */
   558 static void jump_next_block()
   559 {
   560 	uint8_t *ptr = xlat_output;
   561 	TESTP_rptr_rptr(REG_EAX, REG_EAX);
   562 	JE_label(nocode);
   563 	if( sh4_x86.sh4_mode == SH4_MODE_UNKNOWN ) {
   564 	    /* sr/fpscr was changed, possibly updated xlat_sh4_mode, so reload it */
   565 	    MOVL_rbpdisp_r32( REG_OFFSET(xlat_sh4_mode), REG_ECX );
   566 	    CMPL_r32_r32disp( REG_ECX, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
   567 	} else {
   568 	    CMPL_imms_r32disp( sh4_x86.sh4_mode, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
   569 	}
   570 	JNE_label(wrongmode);
   571 	LEAP_rptrdisp_rptr(REG_EAX, PROLOGUE_SIZE,REG_EAX);
   572 	if( sh4_x86.end_callback ) {
   573 	    /* Note this does leave the stack out of alignment, but doesn't matter
   574 	     * for what we're currently using it for.
   575 	     */
   576 	    PUSH_r32(REG_EAX);
   577 	    MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
   578 	    JMP_rptr(REG_ECX);
   579 	} else {
   580 	    JMP_rptr(REG_EAX);
   581 	}
   582 	JMP_TARGET(wrongmode);
   583 	MOVP_rptrdisp_rptr( REG_EAX, XLAT_CHAIN_CODE_OFFSET, REG_EAX );
   584 	int rel = ptr - xlat_output;
   585     JMP_prerel(rel);
   586 	JMP_TARGET(nocode); 
   587 }
   589 /**
   590  * 
   591  */
   592 static void FASTCALL sh4_translate_get_code_and_backpatch( uint32_t pc )
   593 {
   594     uint8_t *target = (uint8_t *)xlat_get_code_by_vma(pc);
   595     while( target != NULL && sh4r.xlat_sh4_mode != XLAT_BLOCK_MODE(target) ) {
   596         target = XLAT_BLOCK_CHAIN(target);
   597 	}
   598     if( target == NULL ) {
   599         target = sh4_translate_basic_block( pc );
   600     }
   601     uint8_t *backpatch = ((uint8_t *)__builtin_return_address(0)) - (CALL1_PTR_MIN_SIZE);
   602     *backpatch = 0xE9;
   603     *(uint32_t *)(backpatch+1) = (uint32_t)(target-backpatch)+PROLOGUE_SIZE-5;
   604     *(void **)(backpatch+5) = XLAT_BLOCK_FOR_CODE(target)->use_list;
   605     XLAT_BLOCK_FOR_CODE(target)->use_list = backpatch; 
   607     uint8_t * volatile *retptr = ((uint8_t * volatile *)__builtin_frame_address(0))+1;
   608     assert( *retptr == ((uint8_t *)__builtin_return_address(0)) );
   609 	*retptr = backpatch;
   610 }
   612 static void emit_translate_and_backpatch()
   613 {
   614     /* NB: this is either 7 bytes (i386) or 12 bytes (x86-64) */
   615     CALL1_ptr_r32(sh4_translate_get_code_and_backpatch, REG_ARG1);
   617     /* When patched, the jmp instruction will be 5 bytes (either platform) -
   618      * we need to reserve sizeof(void*) bytes for the use-list
   619 	 * pointer
   620 	 */ 
   621     if( sizeof(void*) == 8 ) {
   622         NOP();
   623     } else {
   624         NOP2();
   625     }
   626 }
   628 /**
   629  * If we're jumping to a fixed address (or at least fixed relative to the
   630  * current PC, then we can do a direct branch. REG_ARG1 should contain
   631  * the PC at this point.
   632  */
   633 static void jump_next_block_fixed_pc( sh4addr_t pc )
   634 {
   635 	if( IS_IN_ICACHE(pc) ) {
   636 	    if( sh4_x86.sh4_mode != SH4_MODE_UNKNOWN && sh4_x86.end_callback == NULL ) {
   637 	        /* Fixed address, in cache, and fixed SH4 mode - generate a call to the
   638 	         * fetch-and-backpatch routine, which will replace the call with a branch */
   639            emit_translate_and_backpatch();	         
   640            return;
   641 		} else {
   642             MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
   643             ANDP_imms_rptr( -4, REG_EAX );
   644         }
   645 	} else if( sh4_x86.tlb_on ) {
   646         CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);
   647     } else {
   648         CALL1_ptr_r32(xlat_get_code, REG_ARG1);
   649     }
   650     jump_next_block();
   653 }
   655 void sh4_translate_unlink_block( void *use_list )
   656 {
   657 	uint8_t *tmp = xlat_output; /* In case something is active, which should never happen */
   658 	void *next = use_list;
   659 	while( next != NULL ) {
   660     	xlat_output = (uint8_t *)next;
   661  	    next = *(void **)(xlat_output+5);
   662  		emit_translate_and_backpatch();
   663  	}
   664  	xlat_output = tmp;
   665 }
   669 static void exit_block()
   670 {
   671 	emit_epilogue();
   672 	if( sh4_x86.end_callback ) {
   673 	    MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
   674 	    JMP_rptr(REG_ECX);
   675 	} else {
   676 	    RET();
   677 	}
   678 }
   680 /**
   681  * Exit the block with sh4r.pc already written
   682  */
   683 void exit_block_pcset( sh4addr_t pc )
   684 {
   685     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   686     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   687     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   688     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   689     JBE_label(exitloop);
   690     MOVL_rbpdisp_r32( R_PC, REG_ARG1 );
   691     if( sh4_x86.tlb_on ) {
   692         CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
   693     } else {
   694         CALL1_ptr_r32(xlat_get_code,REG_ARG1);
   695     }
   697     jump_next_block();
   698     JMP_TARGET(exitloop);
   699     exit_block();
   700 }
   702 /**
   703  * Exit the block with sh4r.new_pc written with the target pc
   704  */
   705 void exit_block_newpcset( sh4addr_t pc )
   706 {
   707     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   708     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   709     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   710     MOVL_rbpdisp_r32( R_NEW_PC, REG_ARG1 );
   711     MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   712     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   713     JBE_label(exitloop);
   714     if( sh4_x86.tlb_on ) {
   715         CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
   716     } else {
   717         CALL1_ptr_r32(xlat_get_code,REG_ARG1);
   718     }
   720 	jump_next_block();
   721     JMP_TARGET(exitloop);
   722     exit_block();
   723 }
   726 /**
   727  * Exit the block to an absolute PC
   728  */
   729 void exit_block_abs( sh4addr_t pc, sh4addr_t endpc )
   730 {
   731     MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   732     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   733     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   735     MOVL_imm32_r32( pc, REG_ARG1 );
   736     MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   737     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   738     JBE_label(exitloop);
   739     jump_next_block_fixed_pc(pc);    
   740     JMP_TARGET(exitloop);
   741     exit_block();
   742 }
   744 /**
   745  * Exit the block to a relative PC
   746  */
   747 void exit_block_rel( sh4addr_t pc, sh4addr_t endpc )
   748 {
   749     MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   750     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   751     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   753 	if( pc == sh4_x86.block_start_pc && sh4_x86.sh4_mode == sh4r.xlat_sh4_mode ) {
   754 	    /* Special case for tight loops - the PC doesn't change, and
   755 	     * we already know the target address. Just check events pending before
   756 	     * looping.
   757 	     */
   758         CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   759         uint32_t backdisp = ((uintptr_t)(sh4_x86.code - xlat_output)) + PROLOGUE_SIZE;
   760         JCC_cc_prerel(X86_COND_A, backdisp);
   761 	} else {
   762         MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ARG1 );
   763         ADDL_rbpdisp_r32( R_PC, REG_ARG1 );
   764         MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   765         CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   766         JBE_label(exitloop2);
   768         jump_next_block_fixed_pc(pc);
   769         JMP_TARGET(exitloop2);
   770     }
   771     exit_block();
   772 }
   774 /**
   775  * Exit unconditionally with a general exception
   776  */
   777 void exit_block_exc( int code, sh4addr_t pc, int inst_adjust )
   778 {
   779     MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ECX );
   780     ADDL_r32_rbpdisp( REG_ECX, R_PC );
   781     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc + inst_adjust)>>1)*sh4_cpu_period, REG_ECX );
   782     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   783     MOVL_imm32_r32( code, REG_ARG1 );
   784     CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
   785     exit_block();
   786 }    
   788 /**
   789  * Embed a call to sh4_execute_instruction for situations that we
   790  * can't translate (just page-crossing delay slots at the moment).
   791  * Caller is responsible for setting new_pc before calling this function.
   792  *
   793  * Performs:
   794  *   Set PC = endpc
   795  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   796  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   797  *   Call sh4_execute_instruction
   798  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   799  */
   800 void exit_block_emu( sh4vma_t endpc )
   801 {
   802     MOVL_imm32_r32( endpc - sh4_x86.block_start_pc, REG_ECX );   // 5
   803     ADDL_r32_rbpdisp( REG_ECX, R_PC );
   805     MOVL_imm32_r32( (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period, REG_ECX ); // 5
   806     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );     // 6
   807     MOVL_imm32_r32( sh4_x86.in_delay_slot ? 1 : 0, REG_ECX );
   808     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(in_delay_slot) );
   810     CALL_ptr( sh4_execute_instruction );
   811     exit_block();
   812 } 
   814 /**
   815  * Write the block trailer (exception handling block)
   816  */
   817 void sh4_translate_end_block( sh4addr_t pc ) {
   818     if( sh4_x86.branch_taken == FALSE ) {
   819         // Didn't exit unconditionally already, so write the termination here
   820         exit_block_rel( pc, pc );
   821     }
   822     if( sh4_x86.backpatch_posn != 0 ) {
   823         unsigned int i;
   824         // Exception raised - cleanup and exit
   825         uint8_t *end_ptr = xlat_output;
   826         MOVL_r32_r32( REG_EDX, REG_ECX );
   827         ADDL_r32_r32( REG_EDX, REG_ECX );
   828         ADDL_r32_rbpdisp( REG_ECX, R_SPC );
   829         MOVL_moffptr_eax( &sh4_cpu_period );
   830         INC_r32( REG_EDX );  /* Add 1 for the aborting instruction itself */ 
   831         MULL_r32( REG_EDX );
   832         ADDL_r32_rbpdisp( REG_EAX, REG_OFFSET(slice_cycle) );
   833         exit_block();
   835         for( i=0; i< sh4_x86.backpatch_posn; i++ ) {
   836             uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];
   837             if( sh4_x86.backpatch_list[i].exc_code < 0 ) {
   838                 if( sh4_x86.backpatch_list[i].exc_code == -2 ) {
   839                     *((uintptr_t *)fixup_addr) = (uintptr_t)xlat_output; 
   840                 } else {
   841                     *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
   842                 }
   843                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
   844                 int rel = end_ptr - xlat_output;
   845                 JMP_prerel(rel);
   846             } else {
   847                 *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
   848                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].exc_code, REG_ARG1 );
   849                 CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
   850                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
   851                 int rel = end_ptr - xlat_output;
   852                 JMP_prerel(rel);
   853             }
   854         }
   855     }
   856 }
   858 /**
   859  * Translate a single instruction. Delayed branches are handled specially
   860  * by translating both branch and delayed instruction as a single unit (as
   861  * 
   862  * The instruction MUST be in the icache (assert check)
   863  *
   864  * @return true if the instruction marks the end of a basic block
   865  * (eg a branch or 
   866  */
   867 uint32_t sh4_translate_instruction( sh4vma_t pc )
   868 {
   869     uint32_t ir;
   870     /* Read instruction from icache */
   871     assert( IS_IN_ICACHE(pc) );
   872     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   874     if( !sh4_x86.in_delay_slot ) {
   875 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   876     }
   878     /* check for breakpoints at this pc */
   879     for( int i=0; i<sh4_breakpoint_count; i++ ) {
   880         if( sh4_breakpoints[i].address == pc ) {
   881             sh4_translate_emit_breakpoint(pc);
   882             break;
   883         }
   884     }
   885 %%
   886 /* ALU operations */
   887 ADD Rm, Rn {:
   888     COUNT_INST(I_ADD);
   889     load_reg( REG_EAX, Rm );
   890     load_reg( REG_ECX, Rn );
   891     ADDL_r32_r32( REG_EAX, REG_ECX );
   892     store_reg( REG_ECX, Rn );
   893     sh4_x86.tstate = TSTATE_NONE;
   894 :}
   895 ADD #imm, Rn {:  
   896     COUNT_INST(I_ADDI);
   897     ADDL_imms_rbpdisp( imm, REG_OFFSET(r[Rn]) );
   898     sh4_x86.tstate = TSTATE_NONE;
   899 :}
   900 ADDC Rm, Rn {:
   901     COUNT_INST(I_ADDC);
   902     if( sh4_x86.tstate != TSTATE_C ) {
   903         LDC_t();
   904     }
   905     load_reg( REG_EAX, Rm );
   906     load_reg( REG_ECX, Rn );
   907     ADCL_r32_r32( REG_EAX, REG_ECX );
   908     store_reg( REG_ECX, Rn );
   909     SETC_t();
   910     sh4_x86.tstate = TSTATE_C;
   911 :}
   912 ADDV Rm, Rn {:
   913     COUNT_INST(I_ADDV);
   914     load_reg( REG_EAX, Rm );
   915     load_reg( REG_ECX, Rn );
   916     ADDL_r32_r32( REG_EAX, REG_ECX );
   917     store_reg( REG_ECX, Rn );
   918     SETO_t();
   919     sh4_x86.tstate = TSTATE_O;
   920 :}
   921 AND Rm, Rn {:
   922     COUNT_INST(I_AND);
   923     load_reg( REG_EAX, Rm );
   924     load_reg( REG_ECX, Rn );
   925     ANDL_r32_r32( REG_EAX, REG_ECX );
   926     store_reg( REG_ECX, Rn );
   927     sh4_x86.tstate = TSTATE_NONE;
   928 :}
   929 AND #imm, R0 {:  
   930     COUNT_INST(I_ANDI);
   931     load_reg( REG_EAX, 0 );
   932     ANDL_imms_r32(imm, REG_EAX); 
   933     store_reg( REG_EAX, 0 );
   934     sh4_x86.tstate = TSTATE_NONE;
   935 :}
   936 AND.B #imm, @(R0, GBR) {: 
   937     COUNT_INST(I_ANDB);
   938     load_reg( REG_EAX, 0 );
   939     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
   940     MOVL_r32_rspdisp(REG_EAX, 0);
   941     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
   942     MOVL_rspdisp_r32(0, REG_EAX);
   943     ANDL_imms_r32(imm, REG_EDX );
   944     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
   945     sh4_x86.tstate = TSTATE_NONE;
   946 :}
   947 CMP/EQ Rm, Rn {:  
   948     COUNT_INST(I_CMPEQ);
   949     load_reg( REG_EAX, Rm );
   950     load_reg( REG_ECX, Rn );
   951     CMPL_r32_r32( REG_EAX, REG_ECX );
   952     SETE_t();
   953     sh4_x86.tstate = TSTATE_E;
   954 :}
   955 CMP/EQ #imm, R0 {:  
   956     COUNT_INST(I_CMPEQI);
   957     load_reg( REG_EAX, 0 );
   958     CMPL_imms_r32(imm, REG_EAX);
   959     SETE_t();
   960     sh4_x86.tstate = TSTATE_E;
   961 :}
   962 CMP/GE Rm, Rn {:  
   963     COUNT_INST(I_CMPGE);
   964     load_reg( REG_EAX, Rm );
   965     load_reg( REG_ECX, Rn );
   966     CMPL_r32_r32( REG_EAX, REG_ECX );
   967     SETGE_t();
   968     sh4_x86.tstate = TSTATE_GE;
   969 :}
   970 CMP/GT Rm, Rn {: 
   971     COUNT_INST(I_CMPGT);
   972     load_reg( REG_EAX, Rm );
   973     load_reg( REG_ECX, Rn );
   974     CMPL_r32_r32( REG_EAX, REG_ECX );
   975     SETG_t();
   976     sh4_x86.tstate = TSTATE_G;
   977 :}
   978 CMP/HI Rm, Rn {:  
   979     COUNT_INST(I_CMPHI);
   980     load_reg( REG_EAX, Rm );
   981     load_reg( REG_ECX, Rn );
   982     CMPL_r32_r32( REG_EAX, REG_ECX );
   983     SETA_t();
   984     sh4_x86.tstate = TSTATE_A;
   985 :}
   986 CMP/HS Rm, Rn {: 
   987     COUNT_INST(I_CMPHS);
   988     load_reg( REG_EAX, Rm );
   989     load_reg( REG_ECX, Rn );
   990     CMPL_r32_r32( REG_EAX, REG_ECX );
   991     SETAE_t();
   992     sh4_x86.tstate = TSTATE_AE;
   993  :}
   994 CMP/PL Rn {: 
   995     COUNT_INST(I_CMPPL);
   996     load_reg( REG_EAX, Rn );
   997     CMPL_imms_r32( 0, REG_EAX );
   998     SETG_t();
   999     sh4_x86.tstate = TSTATE_G;
  1000 :}
  1001 CMP/PZ Rn {:  
  1002     COUNT_INST(I_CMPPZ);
  1003     load_reg( REG_EAX, Rn );
  1004     CMPL_imms_r32( 0, REG_EAX );
  1005     SETGE_t();
  1006     sh4_x86.tstate = TSTATE_GE;
  1007 :}
  1008 CMP/STR Rm, Rn {:  
  1009     COUNT_INST(I_CMPSTR);
  1010     load_reg( REG_EAX, Rm );
  1011     load_reg( REG_ECX, Rn );
  1012     XORL_r32_r32( REG_ECX, REG_EAX );
  1013     TESTB_r8_r8( REG_AL, REG_AL );
  1014     JE_label(target1);
  1015     TESTB_r8_r8( REG_AH, REG_AH );
  1016     JE_label(target2);
  1017     SHRL_imm_r32( 16, REG_EAX );
  1018     TESTB_r8_r8( REG_AL, REG_AL );
  1019     JE_label(target3);
  1020     TESTB_r8_r8( REG_AH, REG_AH );
  1021     JMP_TARGET(target1);
  1022     JMP_TARGET(target2);
  1023     JMP_TARGET(target3);
  1024     SETE_t();
  1025     sh4_x86.tstate = TSTATE_E;
  1026 :}
  1027 DIV0S Rm, Rn {:
  1028     COUNT_INST(I_DIV0S);
  1029     load_reg( REG_EAX, Rm );
  1030     load_reg( REG_ECX, Rn );
  1031     SHRL_imm_r32( 31, REG_EAX );
  1032     SHRL_imm_r32( 31, REG_ECX );
  1033     MOVL_r32_rbpdisp( REG_EAX, R_M );
  1034     MOVL_r32_rbpdisp( REG_ECX, R_Q );
  1035     CMPL_r32_r32( REG_EAX, REG_ECX );
  1036     SETNE_t();
  1037     sh4_x86.tstate = TSTATE_NE;
  1038 :}
  1039 DIV0U {:  
  1040     COUNT_INST(I_DIV0U);
  1041     XORL_r32_r32( REG_EAX, REG_EAX );
  1042     MOVL_r32_rbpdisp( REG_EAX, R_Q );
  1043     MOVL_r32_rbpdisp( REG_EAX, R_M );
  1044     MOVL_r32_rbpdisp( REG_EAX, R_T );
  1045     sh4_x86.tstate = TSTATE_C; // works for DIV1
  1046 :}
  1047 DIV1 Rm, Rn {:
  1048     COUNT_INST(I_DIV1);
  1049     MOVL_rbpdisp_r32( R_M, REG_ECX );
  1050     load_reg( REG_EAX, Rn );
  1051     if( sh4_x86.tstate != TSTATE_C ) {
  1052 	LDC_t();
  1054     RCLL_imm_r32( 1, REG_EAX );
  1055     SETC_r8( REG_DL ); // Q'
  1056     CMPL_rbpdisp_r32( R_Q, REG_ECX );
  1057     JE_label(mqequal);
  1058     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1059     JMP_label(end);
  1060     JMP_TARGET(mqequal);
  1061     SUBL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1062     JMP_TARGET(end);
  1063     store_reg( REG_EAX, Rn ); // Done with Rn now
  1064     SETC_r8(REG_AL); // tmp1
  1065     XORB_r8_r8( REG_DL, REG_AL ); // Q' = Q ^ tmp1
  1066     XORB_r8_r8( REG_AL, REG_CL ); // Q'' = Q' ^ M
  1067     MOVL_r32_rbpdisp( REG_ECX, R_Q );
  1068     XORL_imms_r32( 1, REG_AL );   // T = !Q'
  1069     MOVZXL_r8_r32( REG_AL, REG_EAX );
  1070     MOVL_r32_rbpdisp( REG_EAX, R_T );
  1071     sh4_x86.tstate = TSTATE_NONE;
  1072 :}
  1073 DMULS.L Rm, Rn {:  
  1074     COUNT_INST(I_DMULS);
  1075     load_reg( REG_EAX, Rm );
  1076     load_reg( REG_ECX, Rn );
  1077     IMULL_r32(REG_ECX);
  1078     MOVL_r32_rbpdisp( REG_EDX, R_MACH );
  1079     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1080     sh4_x86.tstate = TSTATE_NONE;
  1081 :}
  1082 DMULU.L Rm, Rn {:  
  1083     COUNT_INST(I_DMULU);
  1084     load_reg( REG_EAX, Rm );
  1085     load_reg( REG_ECX, Rn );
  1086     MULL_r32(REG_ECX);
  1087     MOVL_r32_rbpdisp( REG_EDX, R_MACH );
  1088     MOVL_r32_rbpdisp( REG_EAX, R_MACL );    
  1089     sh4_x86.tstate = TSTATE_NONE;
  1090 :}
  1091 DT Rn {:  
  1092     COUNT_INST(I_DT);
  1093     load_reg( REG_EAX, Rn );
  1094     ADDL_imms_r32( -1, REG_EAX );
  1095     store_reg( REG_EAX, Rn );
  1096     SETE_t();
  1097     sh4_x86.tstate = TSTATE_E;
  1098 :}
  1099 EXTS.B Rm, Rn {:  
  1100     COUNT_INST(I_EXTSB);
  1101     load_reg( REG_EAX, Rm );
  1102     MOVSXL_r8_r32( REG_EAX, REG_EAX );
  1103     store_reg( REG_EAX, Rn );
  1104 :}
  1105 EXTS.W Rm, Rn {:  
  1106     COUNT_INST(I_EXTSW);
  1107     load_reg( REG_EAX, Rm );
  1108     MOVSXL_r16_r32( REG_EAX, REG_EAX );
  1109     store_reg( REG_EAX, Rn );
  1110 :}
  1111 EXTU.B Rm, Rn {:  
  1112     COUNT_INST(I_EXTUB);
  1113     load_reg( REG_EAX, Rm );
  1114     MOVZXL_r8_r32( REG_EAX, REG_EAX );
  1115     store_reg( REG_EAX, Rn );
  1116 :}
  1117 EXTU.W Rm, Rn {:  
  1118     COUNT_INST(I_EXTUW);
  1119     load_reg( REG_EAX, Rm );
  1120     MOVZXL_r16_r32( REG_EAX, REG_EAX );
  1121     store_reg( REG_EAX, Rn );
  1122 :}
  1123 MAC.L @Rm+, @Rn+ {:
  1124     COUNT_INST(I_MACL);
  1125     if( Rm == Rn ) {
  1126 	load_reg( REG_EAX, Rm );
  1127 	check_ralign32( REG_EAX );
  1128 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1129 	MOVL_r32_rspdisp(REG_EAX, 0);
  1130 	load_reg( REG_EAX, Rm );
  1131 	LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  1132 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1133         ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rn]) );
  1134     } else {
  1135 	load_reg( REG_EAX, Rm );
  1136 	check_ralign32( REG_EAX );
  1137 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1138 	MOVL_r32_rspdisp( REG_EAX, 0 );
  1139 	load_reg( REG_EAX, Rn );
  1140 	check_ralign32( REG_EAX );
  1141 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1142 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
  1143 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  1146     IMULL_rspdisp( 0 );
  1147     ADDL_r32_rbpdisp( REG_EAX, R_MACL );
  1148     ADCL_r32_rbpdisp( REG_EDX, R_MACH );
  1150     MOVL_rbpdisp_r32( R_S, REG_ECX );
  1151     TESTL_r32_r32(REG_ECX, REG_ECX);
  1152     JE_label( nosat );
  1153     CALL_ptr( signsat48 );
  1154     JMP_TARGET( nosat );
  1155     sh4_x86.tstate = TSTATE_NONE;
  1156 :}
  1157 MAC.W @Rm+, @Rn+ {:  
  1158     COUNT_INST(I_MACW);
  1159     if( Rm == Rn ) {
  1160 	load_reg( REG_EAX, Rm );
  1161 	check_ralign16( REG_EAX );
  1162 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1163         MOVL_r32_rspdisp( REG_EAX, 0 );
  1164 	load_reg( REG_EAX, Rm );
  1165 	LEAL_r32disp_r32( REG_EAX, 2, REG_EAX );
  1166 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1167 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
  1168 	// Note translate twice in case of page boundaries. Maybe worth
  1169 	// adding a page-boundary check to skip the second translation
  1170     } else {
  1171 	load_reg( REG_EAX, Rn );
  1172 	check_ralign16( REG_EAX );
  1173 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1174         MOVL_r32_rspdisp( REG_EAX, 0 );
  1175 	load_reg( REG_EAX, Rm );
  1176 	check_ralign16( REG_EAX );
  1177 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1178 	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rn]) );
  1179 	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
  1181     IMULL_rspdisp( 0 );
  1182     MOVL_rbpdisp_r32( R_S, REG_ECX );
  1183     TESTL_r32_r32( REG_ECX, REG_ECX );
  1184     JE_label( nosat );
  1186     ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
  1187     JNO_label( end );            // 2
  1188     MOVL_imm32_r32( 1, REG_EDX );         // 5
  1189     MOVL_r32_rbpdisp( REG_EDX, R_MACH );   // 6
  1190     JS_label( positive );        // 2
  1191     MOVL_imm32_r32( 0x80000000, REG_EAX );// 5
  1192     MOVL_r32_rbpdisp( REG_EAX, R_MACL );   // 6
  1193     JMP_label(end2);           // 2
  1195     JMP_TARGET(positive);
  1196     MOVL_imm32_r32( 0x7FFFFFFF, REG_EAX );// 5
  1197     MOVL_r32_rbpdisp( REG_EAX, R_MACL );   // 6
  1198     JMP_label(end3);            // 2
  1200     JMP_TARGET(nosat);
  1201     ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
  1202     ADCL_r32_rbpdisp( REG_EDX, R_MACH );  // 6
  1203     JMP_TARGET(end);
  1204     JMP_TARGET(end2);
  1205     JMP_TARGET(end3);
  1206     sh4_x86.tstate = TSTATE_NONE;
  1207 :}
  1208 MOVT Rn {:  
  1209     COUNT_INST(I_MOVT);
  1210     MOVL_rbpdisp_r32( R_T, REG_EAX );
  1211     store_reg( REG_EAX, Rn );
  1212 :}
  1213 MUL.L Rm, Rn {:  
  1214     COUNT_INST(I_MULL);
  1215     load_reg( REG_EAX, Rm );
  1216     load_reg( REG_ECX, Rn );
  1217     MULL_r32( REG_ECX );
  1218     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1219     sh4_x86.tstate = TSTATE_NONE;
  1220 :}
  1221 MULS.W Rm, Rn {:
  1222     COUNT_INST(I_MULSW);
  1223     MOVSXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
  1224     MOVSXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
  1225     MULL_r32( REG_ECX );
  1226     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1227     sh4_x86.tstate = TSTATE_NONE;
  1228 :}
  1229 MULU.W Rm, Rn {:  
  1230     COUNT_INST(I_MULUW);
  1231     MOVZXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
  1232     MOVZXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
  1233     MULL_r32( REG_ECX );
  1234     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1235     sh4_x86.tstate = TSTATE_NONE;
  1236 :}
  1237 NEG Rm, Rn {:
  1238     COUNT_INST(I_NEG);
  1239     load_reg( REG_EAX, Rm );
  1240     NEGL_r32( REG_EAX );
  1241     store_reg( REG_EAX, Rn );
  1242     sh4_x86.tstate = TSTATE_NONE;
  1243 :}
  1244 NEGC Rm, Rn {:  
  1245     COUNT_INST(I_NEGC);
  1246     load_reg( REG_EAX, Rm );
  1247     XORL_r32_r32( REG_ECX, REG_ECX );
  1248     LDC_t();
  1249     SBBL_r32_r32( REG_EAX, REG_ECX );
  1250     store_reg( REG_ECX, Rn );
  1251     SETC_t();
  1252     sh4_x86.tstate = TSTATE_C;
  1253 :}
  1254 NOT Rm, Rn {:  
  1255     COUNT_INST(I_NOT);
  1256     load_reg( REG_EAX, Rm );
  1257     NOTL_r32( REG_EAX );
  1258     store_reg( REG_EAX, Rn );
  1259     sh4_x86.tstate = TSTATE_NONE;
  1260 :}
  1261 OR Rm, Rn {:  
  1262     COUNT_INST(I_OR);
  1263     load_reg( REG_EAX, Rm );
  1264     load_reg( REG_ECX, Rn );
  1265     ORL_r32_r32( REG_EAX, REG_ECX );
  1266     store_reg( REG_ECX, Rn );
  1267     sh4_x86.tstate = TSTATE_NONE;
  1268 :}
  1269 OR #imm, R0 {:
  1270     COUNT_INST(I_ORI);
  1271     load_reg( REG_EAX, 0 );
  1272     ORL_imms_r32(imm, REG_EAX);
  1273     store_reg( REG_EAX, 0 );
  1274     sh4_x86.tstate = TSTATE_NONE;
  1275 :}
  1276 OR.B #imm, @(R0, GBR) {:  
  1277     COUNT_INST(I_ORB);
  1278     load_reg( REG_EAX, 0 );
  1279     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
  1280     MOVL_r32_rspdisp( REG_EAX, 0 );
  1281     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
  1282     MOVL_rspdisp_r32( 0, REG_EAX );
  1283     ORL_imms_r32(imm, REG_EDX );
  1284     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1285     sh4_x86.tstate = TSTATE_NONE;
  1286 :}
  1287 ROTCL Rn {:
  1288     COUNT_INST(I_ROTCL);
  1289     load_reg( REG_EAX, Rn );
  1290     if( sh4_x86.tstate != TSTATE_C ) {
  1291 	LDC_t();
  1293     RCLL_imm_r32( 1, REG_EAX );
  1294     store_reg( REG_EAX, Rn );
  1295     SETC_t();
  1296     sh4_x86.tstate = TSTATE_C;
  1297 :}
  1298 ROTCR Rn {:  
  1299     COUNT_INST(I_ROTCR);
  1300     load_reg( REG_EAX, Rn );
  1301     if( sh4_x86.tstate != TSTATE_C ) {
  1302 	LDC_t();
  1304     RCRL_imm_r32( 1, REG_EAX );
  1305     store_reg( REG_EAX, Rn );
  1306     SETC_t();
  1307     sh4_x86.tstate = TSTATE_C;
  1308 :}
  1309 ROTL Rn {:  
  1310     COUNT_INST(I_ROTL);
  1311     load_reg( REG_EAX, Rn );
  1312     ROLL_imm_r32( 1, REG_EAX );
  1313     store_reg( REG_EAX, Rn );
  1314     SETC_t();
  1315     sh4_x86.tstate = TSTATE_C;
  1316 :}
  1317 ROTR Rn {:  
  1318     COUNT_INST(I_ROTR);
  1319     load_reg( REG_EAX, Rn );
  1320     RORL_imm_r32( 1, REG_EAX );
  1321     store_reg( REG_EAX, Rn );
  1322     SETC_t();
  1323     sh4_x86.tstate = TSTATE_C;
  1324 :}
  1325 SHAD Rm, Rn {:
  1326     COUNT_INST(I_SHAD);
  1327     /* Annoyingly enough, not directly convertible */
  1328     load_reg( REG_EAX, Rn );
  1329     load_reg( REG_ECX, Rm );
  1330     CMPL_imms_r32( 0, REG_ECX );
  1331     JGE_label(doshl);
  1333     NEGL_r32( REG_ECX );      // 2
  1334     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1335     JE_label(emptysar);     // 2
  1336     SARL_cl_r32( REG_EAX );       // 2
  1337     JMP_label(end);          // 2
  1339     JMP_TARGET(emptysar);
  1340     SARL_imm_r32(31, REG_EAX );  // 3
  1341     JMP_label(end2);
  1343     JMP_TARGET(doshl);
  1344     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1345     SHLL_cl_r32( REG_EAX );       // 2
  1346     JMP_TARGET(end);
  1347     JMP_TARGET(end2);
  1348     store_reg( REG_EAX, Rn );
  1349     sh4_x86.tstate = TSTATE_NONE;
  1350 :}
  1351 SHLD Rm, Rn {:  
  1352     COUNT_INST(I_SHLD);
  1353     load_reg( REG_EAX, Rn );
  1354     load_reg( REG_ECX, Rm );
  1355     CMPL_imms_r32( 0, REG_ECX );
  1356     JGE_label(doshl);
  1358     NEGL_r32( REG_ECX );      // 2
  1359     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1360     JE_label(emptyshr );
  1361     SHRL_cl_r32( REG_EAX );       // 2
  1362     JMP_label(end);          // 2
  1364     JMP_TARGET(emptyshr);
  1365     XORL_r32_r32( REG_EAX, REG_EAX );
  1366     JMP_label(end2);
  1368     JMP_TARGET(doshl);
  1369     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1370     SHLL_cl_r32( REG_EAX );       // 2
  1371     JMP_TARGET(end);
  1372     JMP_TARGET(end2);
  1373     store_reg( REG_EAX, Rn );
  1374     sh4_x86.tstate = TSTATE_NONE;
  1375 :}
  1376 SHAL Rn {: 
  1377     COUNT_INST(I_SHAL);
  1378     load_reg( REG_EAX, Rn );
  1379     SHLL_imm_r32( 1, REG_EAX );
  1380     SETC_t();
  1381     store_reg( REG_EAX, Rn );
  1382     sh4_x86.tstate = TSTATE_C;
  1383 :}
  1384 SHAR Rn {:  
  1385     COUNT_INST(I_SHAR);
  1386     load_reg( REG_EAX, Rn );
  1387     SARL_imm_r32( 1, REG_EAX );
  1388     SETC_t();
  1389     store_reg( REG_EAX, Rn );
  1390     sh4_x86.tstate = TSTATE_C;
  1391 :}
  1392 SHLL Rn {:  
  1393     COUNT_INST(I_SHLL);
  1394     load_reg( REG_EAX, Rn );
  1395     SHLL_imm_r32( 1, REG_EAX );
  1396     SETC_t();
  1397     store_reg( REG_EAX, Rn );
  1398     sh4_x86.tstate = TSTATE_C;
  1399 :}
  1400 SHLL2 Rn {:
  1401     COUNT_INST(I_SHLL);
  1402     load_reg( REG_EAX, Rn );
  1403     SHLL_imm_r32( 2, REG_EAX );
  1404     store_reg( REG_EAX, Rn );
  1405     sh4_x86.tstate = TSTATE_NONE;
  1406 :}
  1407 SHLL8 Rn {:  
  1408     COUNT_INST(I_SHLL);
  1409     load_reg( REG_EAX, Rn );
  1410     SHLL_imm_r32( 8, REG_EAX );
  1411     store_reg( REG_EAX, Rn );
  1412     sh4_x86.tstate = TSTATE_NONE;
  1413 :}
  1414 SHLL16 Rn {:  
  1415     COUNT_INST(I_SHLL);
  1416     load_reg( REG_EAX, Rn );
  1417     SHLL_imm_r32( 16, REG_EAX );
  1418     store_reg( REG_EAX, Rn );
  1419     sh4_x86.tstate = TSTATE_NONE;
  1420 :}
  1421 SHLR Rn {:  
  1422     COUNT_INST(I_SHLR);
  1423     load_reg( REG_EAX, Rn );
  1424     SHRL_imm_r32( 1, REG_EAX );
  1425     SETC_t();
  1426     store_reg( REG_EAX, Rn );
  1427     sh4_x86.tstate = TSTATE_C;
  1428 :}
  1429 SHLR2 Rn {:  
  1430     COUNT_INST(I_SHLR);
  1431     load_reg( REG_EAX, Rn );
  1432     SHRL_imm_r32( 2, REG_EAX );
  1433     store_reg( REG_EAX, Rn );
  1434     sh4_x86.tstate = TSTATE_NONE;
  1435 :}
  1436 SHLR8 Rn {:  
  1437     COUNT_INST(I_SHLR);
  1438     load_reg( REG_EAX, Rn );
  1439     SHRL_imm_r32( 8, REG_EAX );
  1440     store_reg( REG_EAX, Rn );
  1441     sh4_x86.tstate = TSTATE_NONE;
  1442 :}
  1443 SHLR16 Rn {:  
  1444     COUNT_INST(I_SHLR);
  1445     load_reg( REG_EAX, Rn );
  1446     SHRL_imm_r32( 16, REG_EAX );
  1447     store_reg( REG_EAX, Rn );
  1448     sh4_x86.tstate = TSTATE_NONE;
  1449 :}
  1450 SUB Rm, Rn {:  
  1451     COUNT_INST(I_SUB);
  1452     load_reg( REG_EAX, Rm );
  1453     load_reg( REG_ECX, Rn );
  1454     SUBL_r32_r32( REG_EAX, REG_ECX );
  1455     store_reg( REG_ECX, Rn );
  1456     sh4_x86.tstate = TSTATE_NONE;
  1457 :}
  1458 SUBC Rm, Rn {:  
  1459     COUNT_INST(I_SUBC);
  1460     load_reg( REG_EAX, Rm );
  1461     load_reg( REG_ECX, Rn );
  1462     if( sh4_x86.tstate != TSTATE_C ) {
  1463 	LDC_t();
  1465     SBBL_r32_r32( REG_EAX, REG_ECX );
  1466     store_reg( REG_ECX, Rn );
  1467     SETC_t();
  1468     sh4_x86.tstate = TSTATE_C;
  1469 :}
  1470 SUBV Rm, Rn {:  
  1471     COUNT_INST(I_SUBV);
  1472     load_reg( REG_EAX, Rm );
  1473     load_reg( REG_ECX, Rn );
  1474     SUBL_r32_r32( REG_EAX, REG_ECX );
  1475     store_reg( REG_ECX, Rn );
  1476     SETO_t();
  1477     sh4_x86.tstate = TSTATE_O;
  1478 :}
  1479 SWAP.B Rm, Rn {:  
  1480     COUNT_INST(I_SWAPB);
  1481     load_reg( REG_EAX, Rm );
  1482     XCHGB_r8_r8( REG_AL, REG_AH ); // NB: does not touch EFLAGS
  1483     store_reg( REG_EAX, Rn );
  1484 :}
  1485 SWAP.W Rm, Rn {:  
  1486     COUNT_INST(I_SWAPB);
  1487     load_reg( REG_EAX, Rm );
  1488     MOVL_r32_r32( REG_EAX, REG_ECX );
  1489     SHLL_imm_r32( 16, REG_ECX );
  1490     SHRL_imm_r32( 16, REG_EAX );
  1491     ORL_r32_r32( REG_EAX, REG_ECX );
  1492     store_reg( REG_ECX, Rn );
  1493     sh4_x86.tstate = TSTATE_NONE;
  1494 :}
  1495 TAS.B @Rn {:  
  1496     COUNT_INST(I_TASB);
  1497     load_reg( REG_EAX, Rn );
  1498     MOVL_r32_rspdisp( REG_EAX, 0 );
  1499     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
  1500     TESTB_r8_r8( REG_DL, REG_DL );
  1501     SETE_t();
  1502     ORB_imms_r8( 0x80, REG_DL );
  1503     MOVL_rspdisp_r32( 0, REG_EAX );
  1504     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1505     sh4_x86.tstate = TSTATE_NONE;
  1506 :}
  1507 TST Rm, Rn {:  
  1508     COUNT_INST(I_TST);
  1509     load_reg( REG_EAX, Rm );
  1510     load_reg( REG_ECX, Rn );
  1511     TESTL_r32_r32( REG_EAX, REG_ECX );
  1512     SETE_t();
  1513     sh4_x86.tstate = TSTATE_E;
  1514 :}
  1515 TST #imm, R0 {:  
  1516     COUNT_INST(I_TSTI);
  1517     load_reg( REG_EAX, 0 );
  1518     TESTL_imms_r32( imm, REG_EAX );
  1519     SETE_t();
  1520     sh4_x86.tstate = TSTATE_E;
  1521 :}
  1522 TST.B #imm, @(R0, GBR) {:  
  1523     COUNT_INST(I_TSTB);
  1524     load_reg( REG_EAX, 0);
  1525     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
  1526     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1527     TESTB_imms_r8( imm, REG_AL );
  1528     SETE_t();
  1529     sh4_x86.tstate = TSTATE_E;
  1530 :}
  1531 XOR Rm, Rn {:  
  1532     COUNT_INST(I_XOR);
  1533     load_reg( REG_EAX, Rm );
  1534     load_reg( REG_ECX, Rn );
  1535     XORL_r32_r32( REG_EAX, REG_ECX );
  1536     store_reg( REG_ECX, Rn );
  1537     sh4_x86.tstate = TSTATE_NONE;
  1538 :}
  1539 XOR #imm, R0 {:  
  1540     COUNT_INST(I_XORI);
  1541     load_reg( REG_EAX, 0 );
  1542     XORL_imms_r32( imm, REG_EAX );
  1543     store_reg( REG_EAX, 0 );
  1544     sh4_x86.tstate = TSTATE_NONE;
  1545 :}
  1546 XOR.B #imm, @(R0, GBR) {:  
  1547     COUNT_INST(I_XORB);
  1548     load_reg( REG_EAX, 0 );
  1549     ADDL_rbpdisp_r32( R_GBR, REG_EAX ); 
  1550     MOVL_r32_rspdisp( REG_EAX, 0 );
  1551     MEM_READ_BYTE_FOR_WRITE(REG_EAX, REG_EDX);
  1552     MOVL_rspdisp_r32( 0, REG_EAX );
  1553     XORL_imms_r32( imm, REG_EDX );
  1554     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1555     sh4_x86.tstate = TSTATE_NONE;
  1556 :}
  1557 XTRCT Rm, Rn {:
  1558     COUNT_INST(I_XTRCT);
  1559     load_reg( REG_EAX, Rm );
  1560     load_reg( REG_ECX, Rn );
  1561     SHLL_imm_r32( 16, REG_EAX );
  1562     SHRL_imm_r32( 16, REG_ECX );
  1563     ORL_r32_r32( REG_EAX, REG_ECX );
  1564     store_reg( REG_ECX, Rn );
  1565     sh4_x86.tstate = TSTATE_NONE;
  1566 :}
  1568 /* Data move instructions */
  1569 MOV Rm, Rn {:  
  1570     COUNT_INST(I_MOV);
  1571     load_reg( REG_EAX, Rm );
  1572     store_reg( REG_EAX, Rn );
  1573 :}
  1574 MOV #imm, Rn {:  
  1575     COUNT_INST(I_MOVI);
  1576     MOVL_imm32_r32( imm, REG_EAX );
  1577     store_reg( REG_EAX, Rn );
  1578 :}
  1579 MOV.B Rm, @Rn {:  
  1580     COUNT_INST(I_MOVB);
  1581     load_reg( REG_EAX, Rn );
  1582     load_reg( REG_EDX, Rm );
  1583     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1584     sh4_x86.tstate = TSTATE_NONE;
  1585 :}
  1586 MOV.B Rm, @-Rn {:  
  1587     COUNT_INST(I_MOVB);
  1588     load_reg( REG_EAX, Rn );
  1589     LEAL_r32disp_r32( REG_EAX, -1, REG_EAX );
  1590     load_reg( REG_EDX, Rm );
  1591     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1592     ADDL_imms_rbpdisp( -1, REG_OFFSET(r[Rn]) );
  1593     sh4_x86.tstate = TSTATE_NONE;
  1594 :}
  1595 MOV.B Rm, @(R0, Rn) {:  
  1596     COUNT_INST(I_MOVB);
  1597     load_reg( REG_EAX, 0 );
  1598     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1599     load_reg( REG_EDX, Rm );
  1600     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1601     sh4_x86.tstate = TSTATE_NONE;
  1602 :}
  1603 MOV.B R0, @(disp, GBR) {:  
  1604     COUNT_INST(I_MOVB);
  1605     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1606     ADDL_imms_r32( disp, REG_EAX );
  1607     load_reg( REG_EDX, 0 );
  1608     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1609     sh4_x86.tstate = TSTATE_NONE;
  1610 :}
  1611 MOV.B R0, @(disp, Rn) {:  
  1612     COUNT_INST(I_MOVB);
  1613     load_reg( REG_EAX, Rn );
  1614     ADDL_imms_r32( disp, REG_EAX );
  1615     load_reg( REG_EDX, 0 );
  1616     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1617     sh4_x86.tstate = TSTATE_NONE;
  1618 :}
  1619 MOV.B @Rm, Rn {:  
  1620     COUNT_INST(I_MOVB);
  1621     load_reg( REG_EAX, Rm );
  1622     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1623     store_reg( REG_EAX, Rn );
  1624     sh4_x86.tstate = TSTATE_NONE;
  1625 :}
  1626 MOV.B @Rm+, Rn {:  
  1627     COUNT_INST(I_MOVB);
  1628     load_reg( REG_EAX, Rm );
  1629     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1630     if( Rm != Rn ) {
  1631     	ADDL_imms_rbpdisp( 1, REG_OFFSET(r[Rm]) );
  1633     store_reg( REG_EAX, Rn );
  1634     sh4_x86.tstate = TSTATE_NONE;
  1635 :}
  1636 MOV.B @(R0, Rm), Rn {:  
  1637     COUNT_INST(I_MOVB);
  1638     load_reg( REG_EAX, 0 );
  1639     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1640     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1641     store_reg( REG_EAX, Rn );
  1642     sh4_x86.tstate = TSTATE_NONE;
  1643 :}
  1644 MOV.B @(disp, GBR), R0 {:  
  1645     COUNT_INST(I_MOVB);
  1646     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1647     ADDL_imms_r32( disp, REG_EAX );
  1648     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1649     store_reg( REG_EAX, 0 );
  1650     sh4_x86.tstate = TSTATE_NONE;
  1651 :}
  1652 MOV.B @(disp, Rm), R0 {:  
  1653     COUNT_INST(I_MOVB);
  1654     load_reg( REG_EAX, Rm );
  1655     ADDL_imms_r32( disp, REG_EAX );
  1656     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1657     store_reg( REG_EAX, 0 );
  1658     sh4_x86.tstate = TSTATE_NONE;
  1659 :}
  1660 MOV.L Rm, @Rn {:
  1661     COUNT_INST(I_MOVL);
  1662     load_reg( REG_EAX, Rn );
  1663     check_walign32(REG_EAX);
  1664     MOVL_r32_r32( REG_EAX, REG_ECX );
  1665     ANDL_imms_r32( 0xFC000000, REG_ECX );
  1666     CMPL_imms_r32( 0xE0000000, REG_ECX );
  1667     JNE_label( notsq );
  1668     ANDL_imms_r32( 0x3C, REG_EAX );
  1669     load_reg( REG_EDX, Rm );
  1670     MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
  1671     JMP_label(end);
  1672     JMP_TARGET(notsq);
  1673     load_reg( REG_EDX, Rm );
  1674     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1675     JMP_TARGET(end);
  1676     sh4_x86.tstate = TSTATE_NONE;
  1677 :}
  1678 MOV.L Rm, @-Rn {:  
  1679     COUNT_INST(I_MOVL);
  1680     load_reg( REG_EAX, Rn );
  1681     ADDL_imms_r32( -4, REG_EAX );
  1682     check_walign32( REG_EAX );
  1683     load_reg( REG_EDX, Rm );
  1684     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1685     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  1686     sh4_x86.tstate = TSTATE_NONE;
  1687 :}
  1688 MOV.L Rm, @(R0, Rn) {:  
  1689     COUNT_INST(I_MOVL);
  1690     load_reg( REG_EAX, 0 );
  1691     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1692     check_walign32( REG_EAX );
  1693     load_reg( REG_EDX, Rm );
  1694     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1695     sh4_x86.tstate = TSTATE_NONE;
  1696 :}
  1697 MOV.L R0, @(disp, GBR) {:  
  1698     COUNT_INST(I_MOVL);
  1699     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1700     ADDL_imms_r32( disp, REG_EAX );
  1701     check_walign32( REG_EAX );
  1702     load_reg( REG_EDX, 0 );
  1703     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1704     sh4_x86.tstate = TSTATE_NONE;
  1705 :}
  1706 MOV.L Rm, @(disp, Rn) {:  
  1707     COUNT_INST(I_MOVL);
  1708     load_reg( REG_EAX, Rn );
  1709     ADDL_imms_r32( disp, REG_EAX );
  1710     check_walign32( REG_EAX );
  1711     MOVL_r32_r32( REG_EAX, REG_ECX );
  1712     ANDL_imms_r32( 0xFC000000, REG_ECX );
  1713     CMPL_imms_r32( 0xE0000000, REG_ECX );
  1714     JNE_label( notsq );
  1715     ANDL_imms_r32( 0x3C, REG_EAX );
  1716     load_reg( REG_EDX, Rm );
  1717     MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
  1718     JMP_label(end);
  1719     JMP_TARGET(notsq);
  1720     load_reg( REG_EDX, Rm );
  1721     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1722     JMP_TARGET(end);
  1723     sh4_x86.tstate = TSTATE_NONE;
  1724 :}
  1725 MOV.L @Rm, Rn {:  
  1726     COUNT_INST(I_MOVL);
  1727     load_reg( REG_EAX, Rm );
  1728     check_ralign32( REG_EAX );
  1729     MEM_READ_LONG( REG_EAX, REG_EAX );
  1730     store_reg( REG_EAX, Rn );
  1731     sh4_x86.tstate = TSTATE_NONE;
  1732 :}
  1733 MOV.L @Rm+, Rn {:  
  1734     COUNT_INST(I_MOVL);
  1735     load_reg( REG_EAX, Rm );
  1736     check_ralign32( REG_EAX );
  1737     MEM_READ_LONG( REG_EAX, REG_EAX );
  1738     if( Rm != Rn ) {
  1739     	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  1741     store_reg( REG_EAX, Rn );
  1742     sh4_x86.tstate = TSTATE_NONE;
  1743 :}
  1744 MOV.L @(R0, Rm), Rn {:  
  1745     COUNT_INST(I_MOVL);
  1746     load_reg( REG_EAX, 0 );
  1747     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1748     check_ralign32( REG_EAX );
  1749     MEM_READ_LONG( REG_EAX, REG_EAX );
  1750     store_reg( REG_EAX, Rn );
  1751     sh4_x86.tstate = TSTATE_NONE;
  1752 :}
  1753 MOV.L @(disp, GBR), R0 {:
  1754     COUNT_INST(I_MOVL);
  1755     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1756     ADDL_imms_r32( disp, REG_EAX );
  1757     check_ralign32( REG_EAX );
  1758     MEM_READ_LONG( REG_EAX, REG_EAX );
  1759     store_reg( REG_EAX, 0 );
  1760     sh4_x86.tstate = TSTATE_NONE;
  1761 :}
  1762 MOV.L @(disp, PC), Rn {:  
  1763     COUNT_INST(I_MOVLPC);
  1764     if( sh4_x86.in_delay_slot ) {
  1765 	SLOTILLEGAL();
  1766     } else {
  1767 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1768 	if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
  1769 	    // If the target address is in the same page as the code, it's
  1770 	    // pretty safe to just ref it directly and circumvent the whole
  1771 	    // memory subsystem. (this is a big performance win)
  1773 	    // FIXME: There's a corner-case that's not handled here when
  1774 	    // the current code-page is in the ITLB but not in the UTLB.
  1775 	    // (should generate a TLB miss although need to test SH4 
  1776 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1777 	    // behaviour though.
  1778 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1779 	    MOVL_moffptr_eax( ptr );
  1780 	} else {
  1781 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1782 	    // different virtual address than the translation was done with,
  1783 	    // but we can safely assume that the low bits are the same.
  1784 	    MOVL_imm32_r32( (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_EAX );
  1785 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1786 	    MEM_READ_LONG( REG_EAX, REG_EAX );
  1787 	    sh4_x86.tstate = TSTATE_NONE;
  1789 	store_reg( REG_EAX, Rn );
  1791 :}
  1792 MOV.L @(disp, Rm), Rn {:  
  1793     COUNT_INST(I_MOVL);
  1794     load_reg( REG_EAX, Rm );
  1795     ADDL_imms_r32( disp, REG_EAX );
  1796     check_ralign32( REG_EAX );
  1797     MEM_READ_LONG( REG_EAX, REG_EAX );
  1798     store_reg( REG_EAX, Rn );
  1799     sh4_x86.tstate = TSTATE_NONE;
  1800 :}
  1801 MOV.W Rm, @Rn {:  
  1802     COUNT_INST(I_MOVW);
  1803     load_reg( REG_EAX, Rn );
  1804     check_walign16( REG_EAX );
  1805     load_reg( REG_EDX, Rm );
  1806     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1807     sh4_x86.tstate = TSTATE_NONE;
  1808 :}
  1809 MOV.W Rm, @-Rn {:  
  1810     COUNT_INST(I_MOVW);
  1811     load_reg( REG_EAX, Rn );
  1812     check_walign16( REG_EAX );
  1813     LEAL_r32disp_r32( REG_EAX, -2, REG_EAX );
  1814     load_reg( REG_EDX, Rm );
  1815     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1816     ADDL_imms_rbpdisp( -2, REG_OFFSET(r[Rn]) );
  1817     sh4_x86.tstate = TSTATE_NONE;
  1818 :}
  1819 MOV.W Rm, @(R0, Rn) {:  
  1820     COUNT_INST(I_MOVW);
  1821     load_reg( REG_EAX, 0 );
  1822     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1823     check_walign16( REG_EAX );
  1824     load_reg( REG_EDX, Rm );
  1825     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1826     sh4_x86.tstate = TSTATE_NONE;
  1827 :}
  1828 MOV.W R0, @(disp, GBR) {:  
  1829     COUNT_INST(I_MOVW);
  1830     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1831     ADDL_imms_r32( disp, REG_EAX );
  1832     check_walign16( REG_EAX );
  1833     load_reg( REG_EDX, 0 );
  1834     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1835     sh4_x86.tstate = TSTATE_NONE;
  1836 :}
  1837 MOV.W R0, @(disp, Rn) {:  
  1838     COUNT_INST(I_MOVW);
  1839     load_reg( REG_EAX, Rn );
  1840     ADDL_imms_r32( disp, REG_EAX );
  1841     check_walign16( REG_EAX );
  1842     load_reg( REG_EDX, 0 );
  1843     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1844     sh4_x86.tstate = TSTATE_NONE;
  1845 :}
  1846 MOV.W @Rm, Rn {:  
  1847     COUNT_INST(I_MOVW);
  1848     load_reg( REG_EAX, Rm );
  1849     check_ralign16( REG_EAX );
  1850     MEM_READ_WORD( REG_EAX, REG_EAX );
  1851     store_reg( REG_EAX, Rn );
  1852     sh4_x86.tstate = TSTATE_NONE;
  1853 :}
  1854 MOV.W @Rm+, Rn {:  
  1855     COUNT_INST(I_MOVW);
  1856     load_reg( REG_EAX, Rm );
  1857     check_ralign16( REG_EAX );
  1858     MEM_READ_WORD( REG_EAX, REG_EAX );
  1859     if( Rm != Rn ) {
  1860         ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
  1862     store_reg( REG_EAX, Rn );
  1863     sh4_x86.tstate = TSTATE_NONE;
  1864 :}
  1865 MOV.W @(R0, Rm), Rn {:  
  1866     COUNT_INST(I_MOVW);
  1867     load_reg( REG_EAX, 0 );
  1868     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1869     check_ralign16( REG_EAX );
  1870     MEM_READ_WORD( REG_EAX, REG_EAX );
  1871     store_reg( REG_EAX, Rn );
  1872     sh4_x86.tstate = TSTATE_NONE;
  1873 :}
  1874 MOV.W @(disp, GBR), R0 {:  
  1875     COUNT_INST(I_MOVW);
  1876     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1877     ADDL_imms_r32( disp, REG_EAX );
  1878     check_ralign16( REG_EAX );
  1879     MEM_READ_WORD( REG_EAX, REG_EAX );
  1880     store_reg( REG_EAX, 0 );
  1881     sh4_x86.tstate = TSTATE_NONE;
  1882 :}
  1883 MOV.W @(disp, PC), Rn {:  
  1884     COUNT_INST(I_MOVW);
  1885     if( sh4_x86.in_delay_slot ) {
  1886 	SLOTILLEGAL();
  1887     } else {
  1888 	// See comments for MOV.L @(disp, PC), Rn
  1889 	uint32_t target = pc + disp + 4;
  1890 	if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
  1891 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1892 	    MOVL_moffptr_eax( ptr );
  1893 	    MOVSXL_r16_r32( REG_EAX, REG_EAX );
  1894 	} else {
  1895 	    MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4, REG_EAX );
  1896 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1897 	    MEM_READ_WORD( REG_EAX, REG_EAX );
  1898 	    sh4_x86.tstate = TSTATE_NONE;
  1900 	store_reg( REG_EAX, Rn );
  1902 :}
  1903 MOV.W @(disp, Rm), R0 {:  
  1904     COUNT_INST(I_MOVW);
  1905     load_reg( REG_EAX, Rm );
  1906     ADDL_imms_r32( disp, REG_EAX );
  1907     check_ralign16( REG_EAX );
  1908     MEM_READ_WORD( REG_EAX, REG_EAX );
  1909     store_reg( REG_EAX, 0 );
  1910     sh4_x86.tstate = TSTATE_NONE;
  1911 :}
  1912 MOVA @(disp, PC), R0 {:  
  1913     COUNT_INST(I_MOVA);
  1914     if( sh4_x86.in_delay_slot ) {
  1915 	SLOTILLEGAL();
  1916     } else {
  1917 	MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_ECX );
  1918 	ADDL_rbpdisp_r32( R_PC, REG_ECX );
  1919 	store_reg( REG_ECX, 0 );
  1920 	sh4_x86.tstate = TSTATE_NONE;
  1922 :}
  1923 MOVCA.L R0, @Rn {:  
  1924     COUNT_INST(I_MOVCA);
  1925     load_reg( REG_EAX, Rn );
  1926     check_walign32( REG_EAX );
  1927     load_reg( REG_EDX, 0 );
  1928     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1929     sh4_x86.tstate = TSTATE_NONE;
  1930 :}
  1932 /* Control transfer instructions */
  1933 BF disp {:
  1934     COUNT_INST(I_BF);
  1935     if( sh4_x86.in_delay_slot ) {
  1936 	SLOTILLEGAL();
  1937     } else {
  1938 	sh4vma_t target = disp + pc + 4;
  1939 	JT_label( nottaken );
  1940 	exit_block_rel(target, pc+2 );
  1941 	JMP_TARGET(nottaken);
  1942 	return 2;
  1944 :}
  1945 BF/S disp {:
  1946     COUNT_INST(I_BFS);
  1947     if( sh4_x86.in_delay_slot ) {
  1948 	SLOTILLEGAL();
  1949     } else {
  1950 	sh4_x86.in_delay_slot = DELAY_PC;
  1951 	if( UNTRANSLATABLE(pc+2) ) {
  1952 	    MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1953 	    JT_label(nottaken);
  1954 	    ADDL_imms_r32( disp, REG_EAX );
  1955 	    JMP_TARGET(nottaken);
  1956 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1957 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1958 	    exit_block_emu(pc+2);
  1959 	    sh4_x86.branch_taken = TRUE;
  1960 	    return 2;
  1961 	} else {
  1962 	    LOAD_t();
  1963 	    sh4vma_t target = disp + pc + 4;
  1964 	    JCC_cc_rel32(sh4_x86.tstate,0);
  1965 	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
  1966 	    int save_tstate = sh4_x86.tstate;
  1967 	    sh4_translate_instruction(pc+2);
  1968             sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
  1969 	    exit_block_rel( target, pc+4 );
  1971 	    // not taken
  1972 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1973 	    sh4_x86.tstate = save_tstate;
  1974 	    sh4_translate_instruction(pc+2);
  1975 	    return 4;
  1978 :}
  1979 BRA disp {:  
  1980     COUNT_INST(I_BRA);
  1981     if( sh4_x86.in_delay_slot ) {
  1982 	SLOTILLEGAL();
  1983     } else {
  1984 	sh4_x86.in_delay_slot = DELAY_PC;
  1985 	sh4_x86.branch_taken = TRUE;
  1986 	if( UNTRANSLATABLE(pc+2) ) {
  1987 	    MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1988 	    ADDL_imms_r32( pc + disp + 4 - sh4_x86.block_start_pc, REG_EAX );
  1989 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1990 	    exit_block_emu(pc+2);
  1991 	    return 2;
  1992 	} else {
  1993 	    sh4_translate_instruction( pc + 2 );
  1994 	    exit_block_rel( disp + pc + 4, pc+4 );
  1995 	    return 4;
  1998 :}
  1999 BRAF Rn {:  
  2000     COUNT_INST(I_BRAF);
  2001     if( sh4_x86.in_delay_slot ) {
  2002 	SLOTILLEGAL();
  2003     } else {
  2004 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  2005 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2006 	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  2007 	MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  2008 	sh4_x86.in_delay_slot = DELAY_PC;
  2009 	sh4_x86.tstate = TSTATE_NONE;
  2010 	sh4_x86.branch_taken = TRUE;
  2011 	if( UNTRANSLATABLE(pc+2) ) {
  2012 	    exit_block_emu(pc+2);
  2013 	    return 2;
  2014 	} else {
  2015 	    sh4_translate_instruction( pc + 2 );
  2016 	    exit_block_newpcset(pc+4);
  2017 	    return 4;
  2020 :}
  2021 BSR disp {:  
  2022     COUNT_INST(I_BSR);
  2023     if( sh4_x86.in_delay_slot ) {
  2024 	SLOTILLEGAL();
  2025     } else {
  2026 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  2027 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2028 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2029 	sh4_x86.in_delay_slot = DELAY_PC;
  2030 	sh4_x86.branch_taken = TRUE;
  2031 	sh4_x86.tstate = TSTATE_NONE;
  2032 	if( UNTRANSLATABLE(pc+2) ) {
  2033 	    ADDL_imms_r32( disp, REG_EAX );
  2034 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  2035 	    exit_block_emu(pc+2);
  2036 	    return 2;
  2037 	} else {
  2038 	    sh4_translate_instruction( pc + 2 );
  2039 	    exit_block_rel( disp + pc + 4, pc+4 );
  2040 	    return 4;
  2043 :}
  2044 BSRF Rn {:  
  2045     COUNT_INST(I_BSRF);
  2046     if( sh4_x86.in_delay_slot ) {
  2047 	SLOTILLEGAL();
  2048     } else {
  2049 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  2050 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2051 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2052 	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  2053 	MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  2055 	sh4_x86.in_delay_slot = DELAY_PC;
  2056 	sh4_x86.tstate = TSTATE_NONE;
  2057 	sh4_x86.branch_taken = TRUE;
  2058 	if( UNTRANSLATABLE(pc+2) ) {
  2059 	    exit_block_emu(pc+2);
  2060 	    return 2;
  2061 	} else {
  2062 	    sh4_translate_instruction( pc + 2 );
  2063 	    exit_block_newpcset(pc+4);
  2064 	    return 4;
  2067 :}
  2068 BT disp {:
  2069     COUNT_INST(I_BT);
  2070     if( sh4_x86.in_delay_slot ) {
  2071 	SLOTILLEGAL();
  2072     } else {
  2073 	sh4vma_t target = disp + pc + 4;
  2074 	JF_label( nottaken );
  2075 	exit_block_rel(target, pc+2 );
  2076 	JMP_TARGET(nottaken);
  2077 	return 2;
  2079 :}
  2080 BT/S disp {:
  2081     COUNT_INST(I_BTS);
  2082     if( sh4_x86.in_delay_slot ) {
  2083 	SLOTILLEGAL();
  2084     } else {
  2085 	sh4_x86.in_delay_slot = DELAY_PC;
  2086 	if( UNTRANSLATABLE(pc+2) ) {
  2087 	    MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2088 	    JF_label(nottaken);
  2089 	    ADDL_imms_r32( disp, REG_EAX );
  2090 	    JMP_TARGET(nottaken);
  2091 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  2092 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  2093 	    exit_block_emu(pc+2);
  2094 	    sh4_x86.branch_taken = TRUE;
  2095 	    return 2;
  2096 	} else {
  2097 		LOAD_t();
  2098 	    JCC_cc_rel32(sh4_x86.tstate^1,0);
  2099 	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
  2101 	    int save_tstate = sh4_x86.tstate;
  2102 	    sh4_translate_instruction(pc+2);
  2103             sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
  2104 	    exit_block_rel( disp + pc + 4, pc+4 );
  2105 	    // not taken
  2106 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  2107 	    sh4_x86.tstate = save_tstate;
  2108 	    sh4_translate_instruction(pc+2);
  2109 	    return 4;
  2112 :}
  2113 JMP @Rn {:  
  2114     COUNT_INST(I_JMP);
  2115     if( sh4_x86.in_delay_slot ) {
  2116 	SLOTILLEGAL();
  2117     } else {
  2118 	load_reg( REG_ECX, Rn );
  2119 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2120 	sh4_x86.in_delay_slot = DELAY_PC;
  2121 	sh4_x86.branch_taken = TRUE;
  2122 	if( UNTRANSLATABLE(pc+2) ) {
  2123 	    exit_block_emu(pc+2);
  2124 	    return 2;
  2125 	} else {
  2126 	    sh4_translate_instruction(pc+2);
  2127 	    exit_block_newpcset(pc+4);
  2128 	    return 4;
  2131 :}
  2132 JSR @Rn {:  
  2133     COUNT_INST(I_JSR);
  2134     if( sh4_x86.in_delay_slot ) {
  2135 	SLOTILLEGAL();
  2136     } else {
  2137 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  2138 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2139 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2140 	load_reg( REG_ECX, Rn );
  2141 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2142 	sh4_x86.in_delay_slot = DELAY_PC;
  2143 	sh4_x86.branch_taken = TRUE;
  2144 	sh4_x86.tstate = TSTATE_NONE;
  2145 	if( UNTRANSLATABLE(pc+2) ) {
  2146 	    exit_block_emu(pc+2);
  2147 	    return 2;
  2148 	} else {
  2149 	    sh4_translate_instruction(pc+2);
  2150 	    exit_block_newpcset(pc+4);
  2151 	    return 4;
  2154 :}
  2155 RTE {:  
  2156     COUNT_INST(I_RTE);
  2157     if( sh4_x86.in_delay_slot ) {
  2158 	SLOTILLEGAL();
  2159     } else {
  2160 	check_priv();
  2161 	MOVL_rbpdisp_r32( R_SPC, REG_ECX );
  2162 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2163 	MOVL_rbpdisp_r32( R_SSR, REG_EAX );
  2164 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2165 	sh4_x86.in_delay_slot = DELAY_PC;
  2166 	sh4_x86.fpuen_checked = FALSE;
  2167 	sh4_x86.tstate = TSTATE_NONE;
  2168 	sh4_x86.branch_taken = TRUE;
  2169     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2170 	if( UNTRANSLATABLE(pc+2) ) {
  2171 	    exit_block_emu(pc+2);
  2172 	    return 2;
  2173 	} else {
  2174 	    sh4_translate_instruction(pc+2);
  2175 	    exit_block_newpcset(pc+4);
  2176 	    return 4;
  2179 :}
  2180 RTS {:  
  2181     COUNT_INST(I_RTS);
  2182     if( sh4_x86.in_delay_slot ) {
  2183 	SLOTILLEGAL();
  2184     } else {
  2185 	MOVL_rbpdisp_r32( R_PR, REG_ECX );
  2186 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2187 	sh4_x86.in_delay_slot = DELAY_PC;
  2188 	sh4_x86.branch_taken = TRUE;
  2189 	if( UNTRANSLATABLE(pc+2) ) {
  2190 	    exit_block_emu(pc+2);
  2191 	    return 2;
  2192 	} else {
  2193 	    sh4_translate_instruction(pc+2);
  2194 	    exit_block_newpcset(pc+4);
  2195 	    return 4;
  2198 :}
  2199 TRAPA #imm {:  
  2200     COUNT_INST(I_TRAPA);
  2201     if( sh4_x86.in_delay_slot ) {
  2202 	SLOTILLEGAL();
  2203     } else {
  2204 	MOVL_imm32_r32( pc+2 - sh4_x86.block_start_pc, REG_ECX );   // 5
  2205 	ADDL_r32_rbpdisp( REG_ECX, R_PC );
  2206 	MOVL_imm32_r32( imm, REG_EAX );
  2207 	CALL1_ptr_r32( sh4_raise_trap, REG_EAX );
  2208 	sh4_x86.tstate = TSTATE_NONE;
  2209 	exit_block_pcset(pc+2);
  2210 	sh4_x86.branch_taken = TRUE;
  2211 	return 2;
  2213 :}
  2214 UNDEF {:  
  2215     COUNT_INST(I_UNDEF);
  2216     if( sh4_x86.in_delay_slot ) {
  2217 	exit_block_exc(EXC_SLOT_ILLEGAL, pc-2, 4);    
  2218     } else {
  2219 	exit_block_exc(EXC_ILLEGAL, pc, 2);    
  2220 	return 2;
  2222 :}
  2224 CLRMAC {:  
  2225     COUNT_INST(I_CLRMAC);
  2226     XORL_r32_r32(REG_EAX, REG_EAX);
  2227     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2228     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2229     sh4_x86.tstate = TSTATE_NONE;
  2230 :}
  2231 CLRS {:
  2232     COUNT_INST(I_CLRS);
  2233     CLC();
  2234     SETCCB_cc_rbpdisp(X86_COND_C, R_S);
  2235     sh4_x86.tstate = TSTATE_NONE;
  2236 :}
  2237 CLRT {:  
  2238     COUNT_INST(I_CLRT);
  2239     CLC();
  2240     SETC_t();
  2241     sh4_x86.tstate = TSTATE_C;
  2242 :}
  2243 SETS {:  
  2244     COUNT_INST(I_SETS);
  2245     STC();
  2246     SETCCB_cc_rbpdisp(X86_COND_C, R_S);
  2247     sh4_x86.tstate = TSTATE_NONE;
  2248 :}
  2249 SETT {:  
  2250     COUNT_INST(I_SETT);
  2251     STC();
  2252     SETC_t();
  2253     sh4_x86.tstate = TSTATE_C;
  2254 :}
  2256 /* Floating point moves */
  2257 FMOV FRm, FRn {:  
  2258     COUNT_INST(I_FMOV1);
  2259     check_fpuen();
  2260     if( sh4_x86.double_size ) {
  2261         load_dr0( REG_EAX, FRm );
  2262         load_dr1( REG_ECX, FRm );
  2263         store_dr0( REG_EAX, FRn );
  2264         store_dr1( REG_ECX, FRn );
  2265     } else {
  2266         load_fr( REG_EAX, FRm ); // SZ=0 branch
  2267         store_fr( REG_EAX, FRn );
  2269 :}
  2270 FMOV FRm, @Rn {: 
  2271     COUNT_INST(I_FMOV2);
  2272     check_fpuen();
  2273     load_reg( REG_EAX, Rn );
  2274     if( sh4_x86.double_size ) {
  2275         check_walign64( REG_EAX );
  2276         load_dr0( REG_EDX, FRm );
  2277         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2278         load_reg( REG_EAX, Rn );
  2279         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2280         load_dr1( REG_EDX, FRm );
  2281         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2282     } else {
  2283         check_walign32( REG_EAX );
  2284         load_fr( REG_EDX, FRm );
  2285         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2287     sh4_x86.tstate = TSTATE_NONE;
  2288 :}
  2289 FMOV @Rm, FRn {:  
  2290     COUNT_INST(I_FMOV5);
  2291     check_fpuen();
  2292     load_reg( REG_EAX, Rm );
  2293     if( sh4_x86.double_size ) {
  2294         check_ralign64( REG_EAX );
  2295         MEM_READ_LONG( REG_EAX, REG_EAX );
  2296         store_dr0( REG_EAX, FRn );
  2297         load_reg( REG_EAX, Rm );
  2298         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2299         MEM_READ_LONG( REG_EAX, REG_EAX );
  2300         store_dr1( REG_EAX, FRn );
  2301     } else {
  2302         check_ralign32( REG_EAX );
  2303         MEM_READ_LONG( REG_EAX, REG_EAX );
  2304         store_fr( REG_EAX, FRn );
  2306     sh4_x86.tstate = TSTATE_NONE;
  2307 :}
  2308 FMOV FRm, @-Rn {:  
  2309     COUNT_INST(I_FMOV3);
  2310     check_fpuen();
  2311     load_reg( REG_EAX, Rn );
  2312     if( sh4_x86.double_size ) {
  2313         check_walign64( REG_EAX );
  2314         LEAL_r32disp_r32( REG_EAX, -8, REG_EAX );
  2315         load_dr0( REG_EDX, FRm );
  2316         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2317         load_reg( REG_EAX, Rn );
  2318         LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2319         load_dr1( REG_EDX, FRm );
  2320         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2321         ADDL_imms_rbpdisp(-8,REG_OFFSET(r[Rn]));
  2322     } else {
  2323         check_walign32( REG_EAX );
  2324         LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2325         load_fr( REG_EDX, FRm );
  2326         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2327         ADDL_imms_rbpdisp(-4,REG_OFFSET(r[Rn]));
  2329     sh4_x86.tstate = TSTATE_NONE;
  2330 :}
  2331 FMOV @Rm+, FRn {:
  2332     COUNT_INST(I_FMOV6);
  2333     check_fpuen();
  2334     load_reg( REG_EAX, Rm );
  2335     if( sh4_x86.double_size ) {
  2336         check_ralign64( REG_EAX );
  2337         MEM_READ_LONG( REG_EAX, REG_EAX );
  2338         store_dr0( REG_EAX, FRn );
  2339         load_reg( REG_EAX, Rm );
  2340         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2341         MEM_READ_LONG( REG_EAX, REG_EAX );
  2342         store_dr1( REG_EAX, FRn );
  2343         ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rm]) );
  2344     } else {
  2345         check_ralign32( REG_EAX );
  2346         MEM_READ_LONG( REG_EAX, REG_EAX );
  2347         store_fr( REG_EAX, FRn );
  2348         ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2350     sh4_x86.tstate = TSTATE_NONE;
  2351 :}
  2352 FMOV FRm, @(R0, Rn) {:  
  2353     COUNT_INST(I_FMOV4);
  2354     check_fpuen();
  2355     load_reg( REG_EAX, Rn );
  2356     ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2357     if( sh4_x86.double_size ) {
  2358         check_walign64( REG_EAX );
  2359         load_dr0( REG_EDX, FRm );
  2360         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2361         load_reg( REG_EAX, Rn );
  2362         ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2363         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2364         load_dr1( REG_EDX, FRm );
  2365         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2366     } else {
  2367         check_walign32( REG_EAX );
  2368         load_fr( REG_EDX, FRm );
  2369         MEM_WRITE_LONG( REG_EAX, REG_EDX ); // 12
  2371     sh4_x86.tstate = TSTATE_NONE;
  2372 :}
  2373 FMOV @(R0, Rm), FRn {:  
  2374     COUNT_INST(I_FMOV7);
  2375     check_fpuen();
  2376     load_reg( REG_EAX, Rm );
  2377     ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2378     if( sh4_x86.double_size ) {
  2379         check_ralign64( REG_EAX );
  2380         MEM_READ_LONG( REG_EAX, REG_EAX );
  2381         store_dr0( REG_EAX, FRn );
  2382         load_reg( REG_EAX, Rm );
  2383         ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2384         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2385         MEM_READ_LONG( REG_EAX, REG_EAX );
  2386         store_dr1( REG_EAX, FRn );
  2387     } else {
  2388         check_ralign32( REG_EAX );
  2389         MEM_READ_LONG( REG_EAX, REG_EAX );
  2390         store_fr( REG_EAX, FRn );
  2392     sh4_x86.tstate = TSTATE_NONE;
  2393 :}
  2394 FLDI0 FRn {:  /* IFF PR=0 */
  2395     COUNT_INST(I_FLDI0);
  2396     check_fpuen();
  2397     if( sh4_x86.double_prec == 0 ) {
  2398         XORL_r32_r32( REG_EAX, REG_EAX );
  2399         store_fr( REG_EAX, FRn );
  2401     sh4_x86.tstate = TSTATE_NONE;
  2402 :}
  2403 FLDI1 FRn {:  /* IFF PR=0 */
  2404     COUNT_INST(I_FLDI1);
  2405     check_fpuen();
  2406     if( sh4_x86.double_prec == 0 ) {
  2407         MOVL_imm32_r32( 0x3F800000, REG_EAX );
  2408         store_fr( REG_EAX, FRn );
  2410 :}
  2412 FLOAT FPUL, FRn {:  
  2413     COUNT_INST(I_FLOAT);
  2414     check_fpuen();
  2415     FILD_rbpdisp(R_FPUL);
  2416     if( sh4_x86.double_prec ) {
  2417         pop_dr( FRn );
  2418     } else {
  2419         pop_fr( FRn );
  2421 :}
  2422 FTRC FRm, FPUL {:  
  2423     COUNT_INST(I_FTRC);
  2424     check_fpuen();
  2425     if( sh4_x86.double_prec ) {
  2426         push_dr( FRm );
  2427     } else {
  2428         push_fr( FRm );
  2430     MOVP_immptr_rptr( &min_int, REG_ECX );
  2431     FILD_r32disp( REG_ECX, 0 );
  2432     FCOMIP_st(1);              
  2433     JAE_label( sat );     
  2434     JP_label( sat2 );       
  2435     MOVP_immptr_rptr( &max_int, REG_ECX );
  2436     FILD_r32disp( REG_ECX, 0 );
  2437     FCOMIP_st(1);
  2438     JNA_label( sat3 );
  2439     MOVP_immptr_rptr( &save_fcw, REG_EAX );
  2440     FNSTCW_r32disp( REG_EAX, 0 );
  2441     MOVP_immptr_rptr( &trunc_fcw, REG_EDX );
  2442     FLDCW_r32disp( REG_EDX, 0 );
  2443     FISTP_rbpdisp(R_FPUL);             
  2444     FLDCW_r32disp( REG_EAX, 0 );
  2445     JMP_label(end);             
  2447     JMP_TARGET(sat);
  2448     JMP_TARGET(sat2);
  2449     JMP_TARGET(sat3);
  2450     MOVL_r32disp_r32( REG_ECX, 0, REG_ECX ); // 2
  2451     MOVL_r32_rbpdisp( REG_ECX, R_FPUL );
  2452     FPOP_st();
  2453     JMP_TARGET(end);
  2454     sh4_x86.tstate = TSTATE_NONE;
  2455 :}
  2456 FLDS FRm, FPUL {:  
  2457     COUNT_INST(I_FLDS);
  2458     check_fpuen();
  2459     load_fr( REG_EAX, FRm );
  2460     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2461 :}
  2462 FSTS FPUL, FRn {:  
  2463     COUNT_INST(I_FSTS);
  2464     check_fpuen();
  2465     MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  2466     store_fr( REG_EAX, FRn );
  2467 :}
  2468 FCNVDS FRm, FPUL {:  
  2469     COUNT_INST(I_FCNVDS);
  2470     check_fpuen();
  2471     if( sh4_x86.double_prec ) {
  2472         push_dr( FRm );
  2473         pop_fpul();
  2475 :}
  2476 FCNVSD FPUL, FRn {:  
  2477     COUNT_INST(I_FCNVSD);
  2478     check_fpuen();
  2479     if( sh4_x86.double_prec ) {
  2480         push_fpul();
  2481         pop_dr( FRn );
  2483 :}
  2485 /* Floating point instructions */
  2486 FABS FRn {:  
  2487     COUNT_INST(I_FABS);
  2488     check_fpuen();
  2489     if( sh4_x86.double_prec ) {
  2490         push_dr(FRn);
  2491         FABS_st0();
  2492         pop_dr(FRn);
  2493     } else {
  2494         push_fr(FRn);
  2495         FABS_st0();
  2496         pop_fr(FRn);
  2498 :}
  2499 FADD FRm, FRn {:  
  2500     COUNT_INST(I_FADD);
  2501     check_fpuen();
  2502     if( sh4_x86.double_prec ) {
  2503         push_dr(FRm);
  2504         push_dr(FRn);
  2505         FADDP_st(1);
  2506         pop_dr(FRn);
  2507     } else {
  2508         push_fr(FRm);
  2509         push_fr(FRn);
  2510         FADDP_st(1);
  2511         pop_fr(FRn);
  2513 :}
  2514 FDIV FRm, FRn {:  
  2515     COUNT_INST(I_FDIV);
  2516     check_fpuen();
  2517     if( sh4_x86.double_prec ) {
  2518         push_dr(FRn);
  2519         push_dr(FRm);
  2520         FDIVP_st(1);
  2521         pop_dr(FRn);
  2522     } else {
  2523         push_fr(FRn);
  2524         push_fr(FRm);
  2525         FDIVP_st(1);
  2526         pop_fr(FRn);
  2528 :}
  2529 FMAC FR0, FRm, FRn {:  
  2530     COUNT_INST(I_FMAC);
  2531     check_fpuen();
  2532     if( sh4_x86.double_prec ) {
  2533         push_dr( 0 );
  2534         push_dr( FRm );
  2535         FMULP_st(1);
  2536         push_dr( FRn );
  2537         FADDP_st(1);
  2538         pop_dr( FRn );
  2539     } else {
  2540         push_fr( 0 );
  2541         push_fr( FRm );
  2542         FMULP_st(1);
  2543         push_fr( FRn );
  2544         FADDP_st(1);
  2545         pop_fr( FRn );
  2547 :}
  2549 FMUL FRm, FRn {:  
  2550     COUNT_INST(I_FMUL);
  2551     check_fpuen();
  2552     if( sh4_x86.double_prec ) {
  2553         push_dr(FRm);
  2554         push_dr(FRn);
  2555         FMULP_st(1);
  2556         pop_dr(FRn);
  2557     } else {
  2558         push_fr(FRm);
  2559         push_fr(FRn);
  2560         FMULP_st(1);
  2561         pop_fr(FRn);
  2563 :}
  2564 FNEG FRn {:  
  2565     COUNT_INST(I_FNEG);
  2566     check_fpuen();
  2567     if( sh4_x86.double_prec ) {
  2568         push_dr(FRn);
  2569         FCHS_st0();
  2570         pop_dr(FRn);
  2571     } else {
  2572         push_fr(FRn);
  2573         FCHS_st0();
  2574         pop_fr(FRn);
  2576 :}
  2577 FSRRA FRn {:  
  2578     COUNT_INST(I_FSRRA);
  2579     check_fpuen();
  2580     if( sh4_x86.double_prec == 0 ) {
  2581         FLD1_st0();
  2582         push_fr(FRn);
  2583         FSQRT_st0();
  2584         FDIVP_st(1);
  2585         pop_fr(FRn);
  2587 :}
  2588 FSQRT FRn {:  
  2589     COUNT_INST(I_FSQRT);
  2590     check_fpuen();
  2591     if( sh4_x86.double_prec ) {
  2592         push_dr(FRn);
  2593         FSQRT_st0();
  2594         pop_dr(FRn);
  2595     } else {
  2596         push_fr(FRn);
  2597         FSQRT_st0();
  2598         pop_fr(FRn);
  2600 :}
  2601 FSUB FRm, FRn {:  
  2602     COUNT_INST(I_FSUB);
  2603     check_fpuen();
  2604     if( sh4_x86.double_prec ) {
  2605         push_dr(FRn);
  2606         push_dr(FRm);
  2607         FSUBP_st(1);
  2608         pop_dr(FRn);
  2609     } else {
  2610         push_fr(FRn);
  2611         push_fr(FRm);
  2612         FSUBP_st(1);
  2613         pop_fr(FRn);
  2615 :}
  2617 FCMP/EQ FRm, FRn {:  
  2618     COUNT_INST(I_FCMPEQ);
  2619     check_fpuen();
  2620     if( sh4_x86.double_prec ) {
  2621         push_dr(FRm);
  2622         push_dr(FRn);
  2623     } else {
  2624         push_fr(FRm);
  2625         push_fr(FRn);
  2627     XORL_r32_r32(REG_EAX, REG_EAX);
  2628     XORL_r32_r32(REG_EDX, REG_EDX);
  2629     FCOMIP_st(1);
  2630     SETCCB_cc_r8(X86_COND_NP, REG_DL);
  2631     CMOVCCL_cc_r32_r32(X86_COND_E, REG_EDX, REG_EAX);
  2632     MOVL_r32_rbpdisp(REG_EAX, R_T);
  2633     FPOP_st();
  2634     sh4_x86.tstate = TSTATE_NONE;
  2635 :}
  2636 FCMP/GT FRm, FRn {:  
  2637     COUNT_INST(I_FCMPGT);
  2638     check_fpuen();
  2639     if( sh4_x86.double_prec ) {
  2640         push_dr(FRm);
  2641         push_dr(FRn);
  2642     } else {
  2643         push_fr(FRm);
  2644         push_fr(FRn);
  2646     FCOMIP_st(1);
  2647     SETA_t();
  2648     FPOP_st();
  2649     sh4_x86.tstate = TSTATE_A;
  2650 :}
  2652 FSCA FPUL, FRn {:  
  2653     COUNT_INST(I_FSCA);
  2654     check_fpuen();
  2655     if( sh4_x86.double_prec == 0 ) {
  2656         LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FRn&0x0E]), REG_EDX );
  2657         MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  2658         CALL2_ptr_r32_r32( sh4_fsca, REG_EAX, REG_EDX );
  2660     sh4_x86.tstate = TSTATE_NONE;
  2661 :}
  2662 FIPR FVm, FVn {:  
  2663     COUNT_INST(I_FIPR);
  2664     check_fpuen();
  2665     if( sh4_x86.double_prec == 0 ) {
  2666         if( sh4_x86.sse3_enabled ) {
  2667             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
  2668             MULPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
  2669             HADDPS_xmm_xmm( 4, 4 ); 
  2670             HADDPS_xmm_xmm( 4, 4 );
  2671             MOVSS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
  2672         } else {
  2673             push_fr( FVm<<2 );
  2674             push_fr( FVn<<2 );
  2675             FMULP_st(1);
  2676             push_fr( (FVm<<2)+1);
  2677             push_fr( (FVn<<2)+1);
  2678             FMULP_st(1);
  2679             FADDP_st(1);
  2680             push_fr( (FVm<<2)+2);
  2681             push_fr( (FVn<<2)+2);
  2682             FMULP_st(1);
  2683             FADDP_st(1);
  2684             push_fr( (FVm<<2)+3);
  2685             push_fr( (FVn<<2)+3);
  2686             FMULP_st(1);
  2687             FADDP_st(1);
  2688             pop_fr( (FVn<<2)+3);
  2691 :}
  2692 FTRV XMTRX, FVn {:  
  2693     COUNT_INST(I_FTRV);
  2694     check_fpuen();
  2695     if( sh4_x86.double_prec == 0 ) {
  2696         if( sh4_x86.sse3_enabled && sh4_x86.begin_callback == NULL ) {
  2697         	/* FIXME: For now, disable this inlining when we're running in shadow mode -
  2698         	 * it gives slightly different results from the emu core. Need to
  2699         	 * fix the precision so both give the right results.
  2700         	 */
  2701             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1  M0  M3  M2
  2702             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5  M4  M7  M6
  2703             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9  M8  M11 M10
  2704             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
  2706             MOVSLDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
  2707             MOVSHDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
  2708             MOV_xmm_xmm( 4, 6 );
  2709             MOV_xmm_xmm( 5, 7 );
  2710             MOVLHPS_xmm_xmm( 4, 4 );  // V1 V1 V1 V1
  2711             MOVHLPS_xmm_xmm( 6, 6 );  // V3 V3 V3 V3
  2712             MOVLHPS_xmm_xmm( 5, 5 );  // V0 V0 V0 V0
  2713             MOVHLPS_xmm_xmm( 7, 7 );  // V2 V2 V2 V2
  2714             MULPS_xmm_xmm( 0, 4 );
  2715             MULPS_xmm_xmm( 1, 5 );
  2716             MULPS_xmm_xmm( 2, 6 );
  2717             MULPS_xmm_xmm( 3, 7 );
  2718             ADDPS_xmm_xmm( 5, 4 );
  2719             ADDPS_xmm_xmm( 7, 6 );
  2720             ADDPS_xmm_xmm( 6, 4 );
  2721             MOVAPS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][FVn<<2]) );
  2722         } else {
  2723             LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FVn<<2]), REG_EAX );
  2724             CALL1_ptr_r32( sh4_ftrv, REG_EAX );
  2727     sh4_x86.tstate = TSTATE_NONE;
  2728 :}
  2730 FRCHG {:  
  2731     COUNT_INST(I_FRCHG);
  2732     check_fpuen();
  2733     XORL_imms_rbpdisp( FPSCR_FR, R_FPSCR );
  2734     CALL_ptr( sh4_switch_fr_banks );
  2735     sh4_x86.tstate = TSTATE_NONE;
  2736 :}
  2737 FSCHG {:  
  2738     COUNT_INST(I_FSCHG);
  2739     check_fpuen();
  2740     XORL_imms_rbpdisp( FPSCR_SZ, R_FPSCR);
  2741     XORL_imms_rbpdisp( FPSCR_SZ, REG_OFFSET(xlat_sh4_mode) );
  2742     sh4_x86.tstate = TSTATE_NONE;
  2743     sh4_x86.double_size = !sh4_x86.double_size;
  2744     sh4_x86.sh4_mode = sh4_x86.sh4_mode ^ FPSCR_SZ;
  2745 :}
  2747 /* Processor control instructions */
  2748 LDC Rm, SR {:
  2749     COUNT_INST(I_LDCSR);
  2750     if( sh4_x86.in_delay_slot ) {
  2751 	SLOTILLEGAL();
  2752     } else {
  2753 	check_priv();
  2754 	load_reg( REG_EAX, Rm );
  2755 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2756 	sh4_x86.fpuen_checked = FALSE;
  2757 	sh4_x86.tstate = TSTATE_NONE;
  2758     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2759 	return 2;
  2761 :}
  2762 LDC Rm, GBR {: 
  2763     COUNT_INST(I_LDC);
  2764     load_reg( REG_EAX, Rm );
  2765     MOVL_r32_rbpdisp( REG_EAX, R_GBR );
  2766 :}
  2767 LDC Rm, VBR {:  
  2768     COUNT_INST(I_LDC);
  2769     check_priv();
  2770     load_reg( REG_EAX, Rm );
  2771     MOVL_r32_rbpdisp( REG_EAX, R_VBR );
  2772     sh4_x86.tstate = TSTATE_NONE;
  2773 :}
  2774 LDC Rm, SSR {:  
  2775     COUNT_INST(I_LDC);
  2776     check_priv();
  2777     load_reg( REG_EAX, Rm );
  2778     MOVL_r32_rbpdisp( REG_EAX, R_SSR );
  2779     sh4_x86.tstate = TSTATE_NONE;
  2780 :}
  2781 LDC Rm, SGR {:  
  2782     COUNT_INST(I_LDC);
  2783     check_priv();
  2784     load_reg( REG_EAX, Rm );
  2785     MOVL_r32_rbpdisp( REG_EAX, R_SGR );
  2786     sh4_x86.tstate = TSTATE_NONE;
  2787 :}
  2788 LDC Rm, SPC {:  
  2789     COUNT_INST(I_LDC);
  2790     check_priv();
  2791     load_reg( REG_EAX, Rm );
  2792     MOVL_r32_rbpdisp( REG_EAX, R_SPC );
  2793     sh4_x86.tstate = TSTATE_NONE;
  2794 :}
  2795 LDC Rm, DBR {:  
  2796     COUNT_INST(I_LDC);
  2797     check_priv();
  2798     load_reg( REG_EAX, Rm );
  2799     MOVL_r32_rbpdisp( REG_EAX, R_DBR );
  2800     sh4_x86.tstate = TSTATE_NONE;
  2801 :}
  2802 LDC Rm, Rn_BANK {:  
  2803     COUNT_INST(I_LDC);
  2804     check_priv();
  2805     load_reg( REG_EAX, Rm );
  2806     MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2807     sh4_x86.tstate = TSTATE_NONE;
  2808 :}
  2809 LDC.L @Rm+, GBR {:  
  2810     COUNT_INST(I_LDCM);
  2811     load_reg( REG_EAX, Rm );
  2812     check_ralign32( REG_EAX );
  2813     MEM_READ_LONG( REG_EAX, REG_EAX );
  2814     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2815     MOVL_r32_rbpdisp( REG_EAX, R_GBR );
  2816     sh4_x86.tstate = TSTATE_NONE;
  2817 :}
  2818 LDC.L @Rm+, SR {:
  2819     COUNT_INST(I_LDCSRM);
  2820     if( sh4_x86.in_delay_slot ) {
  2821 	SLOTILLEGAL();
  2822     } else {
  2823 	check_priv();
  2824 	load_reg( REG_EAX, Rm );
  2825 	check_ralign32( REG_EAX );
  2826 	MEM_READ_LONG( REG_EAX, REG_EAX );
  2827 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2828 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2829 	sh4_x86.fpuen_checked = FALSE;
  2830 	sh4_x86.tstate = TSTATE_NONE;
  2831     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2832 	return 2;
  2834 :}
  2835 LDC.L @Rm+, VBR {:  
  2836     COUNT_INST(I_LDCM);
  2837     check_priv();
  2838     load_reg( REG_EAX, Rm );
  2839     check_ralign32( REG_EAX );
  2840     MEM_READ_LONG( REG_EAX, REG_EAX );
  2841     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2842     MOVL_r32_rbpdisp( REG_EAX, R_VBR );
  2843     sh4_x86.tstate = TSTATE_NONE;
  2844 :}
  2845 LDC.L @Rm+, SSR {:
  2846     COUNT_INST(I_LDCM);
  2847     check_priv();
  2848     load_reg( REG_EAX, Rm );
  2849     check_ralign32( REG_EAX );
  2850     MEM_READ_LONG( REG_EAX, REG_EAX );
  2851     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2852     MOVL_r32_rbpdisp( REG_EAX, R_SSR );
  2853     sh4_x86.tstate = TSTATE_NONE;
  2854 :}
  2855 LDC.L @Rm+, SGR {:  
  2856     COUNT_INST(I_LDCM);
  2857     check_priv();
  2858     load_reg( REG_EAX, Rm );
  2859     check_ralign32( REG_EAX );
  2860     MEM_READ_LONG( REG_EAX, REG_EAX );
  2861     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2862     MOVL_r32_rbpdisp( REG_EAX, R_SGR );
  2863     sh4_x86.tstate = TSTATE_NONE;
  2864 :}
  2865 LDC.L @Rm+, SPC {:  
  2866     COUNT_INST(I_LDCM);
  2867     check_priv();
  2868     load_reg( REG_EAX, Rm );
  2869     check_ralign32( REG_EAX );
  2870     MEM_READ_LONG( REG_EAX, REG_EAX );
  2871     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2872     MOVL_r32_rbpdisp( REG_EAX, R_SPC );
  2873     sh4_x86.tstate = TSTATE_NONE;
  2874 :}
  2875 LDC.L @Rm+, DBR {:  
  2876     COUNT_INST(I_LDCM);
  2877     check_priv();
  2878     load_reg( REG_EAX, Rm );
  2879     check_ralign32( REG_EAX );
  2880     MEM_READ_LONG( REG_EAX, REG_EAX );
  2881     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2882     MOVL_r32_rbpdisp( REG_EAX, R_DBR );
  2883     sh4_x86.tstate = TSTATE_NONE;
  2884 :}
  2885 LDC.L @Rm+, Rn_BANK {:  
  2886     COUNT_INST(I_LDCM);
  2887     check_priv();
  2888     load_reg( REG_EAX, Rm );
  2889     check_ralign32( REG_EAX );
  2890     MEM_READ_LONG( REG_EAX, REG_EAX );
  2891     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2892     MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2893     sh4_x86.tstate = TSTATE_NONE;
  2894 :}
  2895 LDS Rm, FPSCR {:
  2896     COUNT_INST(I_LDSFPSCR);
  2897     check_fpuen();
  2898     load_reg( REG_EAX, Rm );
  2899     CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
  2900     sh4_x86.tstate = TSTATE_NONE;
  2901     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2902     return 2;
  2903 :}
  2904 LDS.L @Rm+, FPSCR {:  
  2905     COUNT_INST(I_LDSFPSCRM);
  2906     check_fpuen();
  2907     load_reg( REG_EAX, Rm );
  2908     check_ralign32( REG_EAX );
  2909     MEM_READ_LONG( REG_EAX, REG_EAX );
  2910     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2911     CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
  2912     sh4_x86.tstate = TSTATE_NONE;
  2913     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2914     return 2;
  2915 :}
  2916 LDS Rm, FPUL {:  
  2917     COUNT_INST(I_LDS);
  2918     check_fpuen();
  2919     load_reg( REG_EAX, Rm );
  2920     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2921 :}
  2922 LDS.L @Rm+, FPUL {:  
  2923     COUNT_INST(I_LDSM);
  2924     check_fpuen();
  2925     load_reg( REG_EAX, Rm );
  2926     check_ralign32( REG_EAX );
  2927     MEM_READ_LONG( REG_EAX, REG_EAX );
  2928     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2929     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2930     sh4_x86.tstate = TSTATE_NONE;
  2931 :}
  2932 LDS Rm, MACH {: 
  2933     COUNT_INST(I_LDS);
  2934     load_reg( REG_EAX, Rm );
  2935     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2936 :}
  2937 LDS.L @Rm+, MACH {:  
  2938     COUNT_INST(I_LDSM);
  2939     load_reg( REG_EAX, Rm );
  2940     check_ralign32( REG_EAX );
  2941     MEM_READ_LONG( REG_EAX, REG_EAX );
  2942     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2943     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2944     sh4_x86.tstate = TSTATE_NONE;
  2945 :}
  2946 LDS Rm, MACL {:  
  2947     COUNT_INST(I_LDS);
  2948     load_reg( REG_EAX, Rm );
  2949     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2950 :}
  2951 LDS.L @Rm+, MACL {:  
  2952     COUNT_INST(I_LDSM);
  2953     load_reg( REG_EAX, Rm );
  2954     check_ralign32( REG_EAX );
  2955     MEM_READ_LONG( REG_EAX, REG_EAX );
  2956     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2957     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2958     sh4_x86.tstate = TSTATE_NONE;
  2959 :}
  2960 LDS Rm, PR {:  
  2961     COUNT_INST(I_LDS);
  2962     load_reg( REG_EAX, Rm );
  2963     MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2964 :}
  2965 LDS.L @Rm+, PR {:  
  2966     COUNT_INST(I_LDSM);
  2967     load_reg( REG_EAX, Rm );
  2968     check_ralign32( REG_EAX );
  2969     MEM_READ_LONG( REG_EAX, REG_EAX );
  2970     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2971     MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2972     sh4_x86.tstate = TSTATE_NONE;
  2973 :}
  2974 LDTLB {:  
  2975     COUNT_INST(I_LDTLB);
  2976     CALL_ptr( MMU_ldtlb );
  2977     sh4_x86.tstate = TSTATE_NONE;
  2978 :}
  2979 OCBI @Rn {:
  2980     COUNT_INST(I_OCBI);
  2981 :}
  2982 OCBP @Rn {:
  2983     COUNT_INST(I_OCBP);
  2984 :}
  2985 OCBWB @Rn {:
  2986     COUNT_INST(I_OCBWB);
  2987 :}
  2988 PREF @Rn {:
  2989     COUNT_INST(I_PREF);
  2990     load_reg( REG_EAX, Rn );
  2991     MEM_PREFETCH( REG_EAX );
  2992     sh4_x86.tstate = TSTATE_NONE;
  2993 :}
  2994 SLEEP {: 
  2995     COUNT_INST(I_SLEEP);
  2996     check_priv();
  2997     CALL_ptr( sh4_sleep );
  2998     sh4_x86.tstate = TSTATE_NONE;
  2999     sh4_x86.in_delay_slot = DELAY_NONE;
  3000     return 2;
  3001 :}
  3002 STC SR, Rn {:
  3003     COUNT_INST(I_STCSR);
  3004     check_priv();
  3005     CALL_ptr(sh4_read_sr);
  3006     store_reg( REG_EAX, Rn );
  3007     sh4_x86.tstate = TSTATE_NONE;
  3008 :}
  3009 STC GBR, Rn {:  
  3010     COUNT_INST(I_STC);
  3011     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  3012     store_reg( REG_EAX, Rn );
  3013 :}
  3014 STC VBR, Rn {:  
  3015     COUNT_INST(I_STC);
  3016     check_priv();
  3017     MOVL_rbpdisp_r32( R_VBR, REG_EAX );
  3018     store_reg( REG_EAX, Rn );
  3019     sh4_x86.tstate = TSTATE_NONE;
  3020 :}
  3021 STC SSR, Rn {:  
  3022     COUNT_INST(I_STC);
  3023     check_priv();
  3024     MOVL_rbpdisp_r32( R_SSR, REG_EAX );
  3025     store_reg( REG_EAX, Rn );
  3026     sh4_x86.tstate = TSTATE_NONE;
  3027 :}
  3028 STC SPC, Rn {:  
  3029     COUNT_INST(I_STC);
  3030     check_priv();
  3031     MOVL_rbpdisp_r32( R_SPC, REG_EAX );
  3032     store_reg( REG_EAX, Rn );
  3033     sh4_x86.tstate = TSTATE_NONE;
  3034 :}
  3035 STC SGR, Rn {:  
  3036     COUNT_INST(I_STC);
  3037     check_priv();
  3038     MOVL_rbpdisp_r32( R_SGR, REG_EAX );
  3039     store_reg( REG_EAX, Rn );
  3040     sh4_x86.tstate = TSTATE_NONE;
  3041 :}
  3042 STC DBR, Rn {:  
  3043     COUNT_INST(I_STC);
  3044     check_priv();
  3045     MOVL_rbpdisp_r32( R_DBR, REG_EAX );
  3046     store_reg( REG_EAX, Rn );
  3047     sh4_x86.tstate = TSTATE_NONE;
  3048 :}
  3049 STC Rm_BANK, Rn {:
  3050     COUNT_INST(I_STC);
  3051     check_priv();
  3052     MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EAX );
  3053     store_reg( REG_EAX, Rn );
  3054     sh4_x86.tstate = TSTATE_NONE;
  3055 :}
  3056 STC.L SR, @-Rn {:
  3057     COUNT_INST(I_STCSRM);
  3058     check_priv();
  3059     CALL_ptr( sh4_read_sr );
  3060     MOVL_r32_r32( REG_EAX, REG_EDX );
  3061     load_reg( REG_EAX, Rn );
  3062     check_walign32( REG_EAX );
  3063     LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  3064     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3065     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3066     sh4_x86.tstate = TSTATE_NONE;
  3067 :}
  3068 STC.L VBR, @-Rn {:  
  3069     COUNT_INST(I_STCM);
  3070     check_priv();
  3071     load_reg( REG_EAX, Rn );
  3072     check_walign32( REG_EAX );
  3073     ADDL_imms_r32( -4, REG_EAX );
  3074     MOVL_rbpdisp_r32( R_VBR, REG_EDX );
  3075     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3076     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3077     sh4_x86.tstate = TSTATE_NONE;
  3078 :}
  3079 STC.L SSR, @-Rn {:  
  3080     COUNT_INST(I_STCM);
  3081     check_priv();
  3082     load_reg( REG_EAX, Rn );
  3083     check_walign32( REG_EAX );
  3084     ADDL_imms_r32( -4, REG_EAX );
  3085     MOVL_rbpdisp_r32( R_SSR, REG_EDX );
  3086     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3087     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3088     sh4_x86.tstate = TSTATE_NONE;
  3089 :}
  3090 STC.L SPC, @-Rn {:
  3091     COUNT_INST(I_STCM);
  3092     check_priv();
  3093     load_reg( REG_EAX, Rn );
  3094     check_walign32( REG_EAX );
  3095     ADDL_imms_r32( -4, REG_EAX );
  3096     MOVL_rbpdisp_r32( R_SPC, REG_EDX );
  3097     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3098     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3099     sh4_x86.tstate = TSTATE_NONE;
  3100 :}
  3101 STC.L SGR, @-Rn {:  
  3102     COUNT_INST(I_STCM);
  3103     check_priv();
  3104     load_reg( REG_EAX, Rn );
  3105     check_walign32( REG_EAX );
  3106     ADDL_imms_r32( -4, REG_EAX );
  3107     MOVL_rbpdisp_r32( R_SGR, REG_EDX );
  3108     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3109     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3110     sh4_x86.tstate = TSTATE_NONE;
  3111 :}
  3112 STC.L DBR, @-Rn {:  
  3113     COUNT_INST(I_STCM);
  3114     check_priv();
  3115     load_reg( REG_EAX, Rn );
  3116     check_walign32( REG_EAX );
  3117     ADDL_imms_r32( -4, REG_EAX );
  3118     MOVL_rbpdisp_r32( R_DBR, REG_EDX );
  3119     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3120     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3121     sh4_x86.tstate = TSTATE_NONE;
  3122 :}
  3123 STC.L Rm_BANK, @-Rn {:  
  3124     COUNT_INST(I_STCM);
  3125     check_priv();
  3126     load_reg( REG_EAX, Rn );
  3127     check_walign32( REG_EAX );
  3128     ADDL_imms_r32( -4, REG_EAX );
  3129     MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EDX );
  3130     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3131     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3132     sh4_x86.tstate = TSTATE_NONE;
  3133 :}
  3134 STC.L GBR, @-Rn {:  
  3135     COUNT_INST(I_STCM);
  3136     load_reg( REG_EAX, Rn );
  3137     check_walign32( REG_EAX );
  3138     ADDL_imms_r32( -4, REG_EAX );
  3139     MOVL_rbpdisp_r32( R_GBR, REG_EDX );
  3140     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3141     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3142     sh4_x86.tstate = TSTATE_NONE;
  3143 :}
  3144 STS FPSCR, Rn {:  
  3145     COUNT_INST(I_STSFPSCR);
  3146     check_fpuen();
  3147     MOVL_rbpdisp_r32( R_FPSCR, REG_EAX );
  3148     store_reg( REG_EAX, Rn );
  3149 :}
  3150 STS.L FPSCR, @-Rn {:  
  3151     COUNT_INST(I_STSFPSCRM);
  3152     check_fpuen();
  3153     load_reg( REG_EAX, Rn );
  3154     check_walign32( REG_EAX );
  3155     ADDL_imms_r32( -4, REG_EAX );
  3156     MOVL_rbpdisp_r32( R_FPSCR, REG_EDX );
  3157     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3158     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3159     sh4_x86.tstate = TSTATE_NONE;
  3160 :}
  3161 STS FPUL, Rn {:  
  3162     COUNT_INST(I_STS);
  3163     check_fpuen();
  3164     MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  3165     store_reg( REG_EAX, Rn );
  3166 :}
  3167 STS.L FPUL, @-Rn {:  
  3168     COUNT_INST(I_STSM);
  3169     check_fpuen();
  3170     load_reg( REG_EAX, Rn );
  3171     check_walign32( REG_EAX );
  3172     ADDL_imms_r32( -4, REG_EAX );
  3173     MOVL_rbpdisp_r32( R_FPUL, REG_EDX );
  3174     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3175     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3176     sh4_x86.tstate = TSTATE_NONE;
  3177 :}
  3178 STS MACH, Rn {:  
  3179     COUNT_INST(I_STS);
  3180     MOVL_rbpdisp_r32( R_MACH, REG_EAX );
  3181     store_reg( REG_EAX, Rn );
  3182 :}
  3183 STS.L MACH, @-Rn {:  
  3184     COUNT_INST(I_STSM);
  3185     load_reg( REG_EAX, Rn );
  3186     check_walign32( REG_EAX );
  3187     ADDL_imms_r32( -4, REG_EAX );
  3188     MOVL_rbpdisp_r32( R_MACH, REG_EDX );
  3189     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3190     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3191     sh4_x86.tstate = TSTATE_NONE;
  3192 :}
  3193 STS MACL, Rn {:  
  3194     COUNT_INST(I_STS);
  3195     MOVL_rbpdisp_r32( R_MACL, REG_EAX );
  3196     store_reg( REG_EAX, Rn );
  3197 :}
  3198 STS.L MACL, @-Rn {:  
  3199     COUNT_INST(I_STSM);
  3200     load_reg( REG_EAX, Rn );
  3201     check_walign32( REG_EAX );
  3202     ADDL_imms_r32( -4, REG_EAX );
  3203     MOVL_rbpdisp_r32( R_MACL, REG_EDX );
  3204     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3205     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3206     sh4_x86.tstate = TSTATE_NONE;
  3207 :}
  3208 STS PR, Rn {:  
  3209     COUNT_INST(I_STS);
  3210     MOVL_rbpdisp_r32( R_PR, REG_EAX );
  3211     store_reg( REG_EAX, Rn );
  3212 :}
  3213 STS.L PR, @-Rn {:  
  3214     COUNT_INST(I_STSM);
  3215     load_reg( REG_EAX, Rn );
  3216     check_walign32( REG_EAX );
  3217     ADDL_imms_r32( -4, REG_EAX );
  3218     MOVL_rbpdisp_r32( R_PR, REG_EDX );
  3219     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3220     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3221     sh4_x86.tstate = TSTATE_NONE;
  3222 :}
  3224 NOP {: 
  3225     COUNT_INST(I_NOP);
  3226     /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ 
  3227 :}
  3228 %%
  3229     sh4_x86.in_delay_slot = DELAY_NONE;
  3230     return 0;
  3234 /**
  3235  * The unwind methods only work if we compiled with DWARF2 frame information
  3236  * (ie -fexceptions), otherwise we have to use the direct frame scan.
  3237  */
  3238 #ifdef HAVE_EXCEPTIONS
  3239 #include <unwind.h>
  3241 struct UnwindInfo {
  3242     uintptr_t block_start;
  3243     uintptr_t block_end;
  3244     void *pc;
  3245 };
  3247 static _Unwind_Reason_Code xlat_check_frame( struct _Unwind_Context *context, void *arg )
  3249     struct UnwindInfo *info = arg;
  3250     void *pc = (void *)_Unwind_GetIP(context);
  3251     if( ((uintptr_t)pc) >= info->block_start && ((uintptr_t)pc) < info->block_end ) {
  3252         info->pc = pc;
  3253         return _URC_NORMAL_STOP;
  3255     return _URC_NO_REASON;
  3258 void *xlat_get_native_pc( void *code, uint32_t code_size )
  3260     struct _Unwind_Exception exc;
  3261     struct UnwindInfo info;
  3263     info.pc = NULL;
  3264     info.block_start = (uintptr_t)code;
  3265     info.block_end = info.block_start + code_size;
  3266     void *result = NULL;
  3267     _Unwind_Backtrace( xlat_check_frame, &info );
  3268     return info.pc;
  3270 #else
  3271 /* Assume this is an ia32 build - amd64 should always have dwarf information */
  3272 void *xlat_get_native_pc( void *code, uint32_t code_size )
  3274     void *result = NULL;
  3275     __asm__(
  3276         "mov %%ebp, %%eax\n\t"
  3277         "mov $0x8, %%ecx\n\t"
  3278         "mov %1, %%edx\n"
  3279         "frame_loop: test %%eax, %%eax\n\t"
  3280         "je frame_not_found\n\t"
  3281         "cmp (%%eax), %%edx\n\t"
  3282         "je frame_found\n\t"
  3283         "sub $0x1, %%ecx\n\t"
  3284         "je frame_not_found\n\t"
  3285         "movl (%%eax), %%eax\n\t"
  3286         "jmp frame_loop\n"
  3287         "frame_found: movl 0x4(%%eax), %0\n"
  3288         "frame_not_found:"
  3289         : "=r" (result)
  3290         : "r" (((uint8_t *)&sh4r) + 128 )
  3291         : "eax", "ecx", "edx" );
  3292     return result;
  3294 #endif
.