Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 1292:799fdd4f704a
prev1263:b3de98d19faf
next1298:d0eb2307b847
author nkeynes
date Fri Aug 24 08:53:50 2012 +1000 (8 years ago)
permissions -rw-r--r--
last change Move the generated prologue/epilogue code out into a common entry stub
(reduces space requirements) and pre-save all saved registers. Change
FASTCALL to use 3 regs instead of 2 since we can now keep everything in
regs.
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "lxdream.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4dasm.h"
    31 #include "sh4/sh4trans.h"
    32 #include "sh4/sh4stat.h"
    33 #include "sh4/sh4mmio.h"
    34 #include "sh4/mmu.h"
    35 #include "xlat/xltcache.h"
    36 #include "xlat/x86/x86op.h"
    37 #include "xlat/xlatdasm.h"
    38 #include "clock.h"
    40 #define DEFAULT_BACKPATCH_SIZE 4096
    42 /* Offset of a reg relative to the sh4r structure */
    43 #define REG_OFFSET(reg)  (((char *)&sh4r.reg) - ((char *)&sh4r) - 128)
    45 #define R_T      REG_OFFSET(t)
    46 #define R_Q      REG_OFFSET(q)
    47 #define R_S      REG_OFFSET(s)
    48 #define R_M      REG_OFFSET(m)
    49 #define R_SR     REG_OFFSET(sr)
    50 #define R_GBR    REG_OFFSET(gbr)
    51 #define R_SSR    REG_OFFSET(ssr)
    52 #define R_SPC    REG_OFFSET(spc)
    53 #define R_VBR    REG_OFFSET(vbr)
    54 #define R_MACH   REG_OFFSET(mac)+4
    55 #define R_MACL   REG_OFFSET(mac)
    56 #define R_PC     REG_OFFSET(pc)
    57 #define R_NEW_PC REG_OFFSET(new_pc)
    58 #define R_PR     REG_OFFSET(pr)
    59 #define R_SGR    REG_OFFSET(sgr)
    60 #define R_FPUL   REG_OFFSET(fpul)
    61 #define R_FPSCR  REG_OFFSET(fpscr)
    62 #define R_DBR    REG_OFFSET(dbr)
    63 #define R_R(rn)  REG_OFFSET(r[rn])
    64 #define R_FR(f)  REG_OFFSET(fr[0][(f)^1])
    65 #define R_XF(f)  REG_OFFSET(fr[1][(f)^1])
    66 #define R_DR(f)  REG_OFFSET(fr[(f)&1][(f)&0x0E])
    67 #define R_DRL(f) REG_OFFSET(fr[(f)&1][(f)|0x01])
    68 #define R_DRH(f) REG_OFFSET(fr[(f)&1][(f)&0x0E])
    70 #define DELAY_NONE 0
    71 #define DELAY_PC 1
    72 #define DELAY_PC_PR 2
    74 #define SH4_MODE_UNKNOWN -1
    76 struct backpatch_record {
    77     uint32_t fixup_offset;
    78     uint32_t fixup_icount;
    79     int32_t exc_code;
    80 };
    82 /** 
    83  * Struct to manage internal translation state. This state is not saved -
    84  * it is only valid between calls to sh4_translate_begin_block() and
    85  * sh4_translate_end_block()
    86  */
    87 struct sh4_x86_state {
    88     int in_delay_slot;
    89     uint8_t *code;
    90     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    91     gboolean branch_taken; /* true if we branched unconditionally */
    92     gboolean double_prec; /* true if FPU is in double-precision mode */
    93     gboolean double_size; /* true if FPU is in double-size mode */
    94     gboolean sse3_enabled; /* true if host supports SSE3 instructions */
    95     uint32_t block_start_pc;
    96     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    97     uint32_t sh4_mode;     /* Mirror of sh4r.xlat_sh4_mode */
    98     int tstate;
   100     /* mode settings */
   101     gboolean tlb_on; /* True if tlb translation is active */
   102     struct mem_region_fn **priv_address_space;
   103     struct mem_region_fn **user_address_space;
   105     /* Instrumentation */
   106     xlat_block_begin_callback_t begin_callback;
   107     xlat_block_end_callback_t end_callback;
   108     gboolean fastmem;
   110     /* Allocated memory for the (block-wide) back-patch list */
   111     struct backpatch_record *backpatch_list;
   112     uint32_t backpatch_posn;
   113     uint32_t backpatch_size;
   114 };
   116 static struct sh4_x86_state sh4_x86;
   118 static uint8_t sh4_entry_stub[128];
   119 void FASTCALL (*sh4_translate_enter)(void *code);
   121 static uint32_t max_int = 0x7FFFFFFF;
   122 static uint32_t min_int = 0x80000000;
   123 static uint32_t save_fcw; /* save value for fpu control word */
   124 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   126 static void sh4_x86_translate_unlink_block( void *use_list );
   128 static struct xlat_target_fns x86_target_fns = {
   129 	sh4_x86_translate_unlink_block
   130 };	
   133 gboolean is_sse3_supported()
   134 {
   135     uint32_t features;
   137     __asm__ __volatile__(
   138         "mov $0x01, %%eax\n\t"
   139         "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
   140     return (features & 1) ? TRUE : FALSE;
   141 }
   143 void sh4_translate_set_address_space( struct mem_region_fn **priv, struct mem_region_fn **user )
   144 {
   145     sh4_x86.priv_address_space = priv;
   146     sh4_x86.user_address_space = user;
   147 }
   149 void sh4_translate_write_entry_stub(void)
   150 {
   151 	mem_unprotect(sh4_entry_stub, sizeof(sh4_entry_stub));
   152 	xlat_output = sh4_entry_stub;
   153 	PUSH_r32(REG_EBP);
   154 	MOVP_immptr_rptr( ((uint8_t *)&sh4r) + 128, REG_EBP );
   155 	PUSH_r32(REG_EBX);
   156 	PUSH_r32(REG_SAVE1);
   157 	PUSH_r32(REG_SAVE2);
   158 #if SIZEOF_VOID_P == 8
   159     PUSH_r32(REG_SAVE3);
   160     PUSH_r32(REG_SAVE4);
   161     CALL_r32( REG_ARG1 );
   162     POP_r32(REG_SAVE4);
   163     POP_r32(REG_SAVE3);
   164 #else
   165     SUBL_imms_r32( 8, REG_ESP ); 
   166 	CALL_r32( REG_ARG1 );
   167 	ADDL_imms_r32( 8, REG_ESP );
   168 #endif
   169 	POP_r32(REG_SAVE2);	
   170 	POP_r32(REG_SAVE1);
   171 	POP_r32(REG_EBX);
   172 	POP_r32(REG_EBP);
   173 	RET();
   174 	sh4_translate_enter = sh4_entry_stub;
   175 }
   177 void sh4_translate_init(void)
   178 {
   179     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   180     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   181     sh4_x86.begin_callback = NULL;
   182     sh4_x86.end_callback = NULL;
   183     sh4_x86.fastmem = TRUE;
   184     sh4_x86.sse3_enabled = is_sse3_supported();
   185     xlat_set_target_fns(&x86_target_fns);
   186     sh4_translate_set_address_space( sh4_address_space, sh4_user_address_space );
   187     sh4_translate_write_entry_stub();
   188 }
   190 void sh4_translate_set_callbacks( xlat_block_begin_callback_t begin, xlat_block_end_callback_t end )
   191 {
   192     sh4_x86.begin_callback = begin;
   193     sh4_x86.end_callback = end;
   194 }
   196 void sh4_translate_set_fastmem( gboolean flag )
   197 {
   198     sh4_x86.fastmem = flag;
   199 }
   201 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   202 {
   203     int reloc_size = 4;
   205     if( exc_code == -2 ) {
   206         reloc_size = sizeof(void *);
   207     }
   209     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   210 	sh4_x86.backpatch_size <<= 1;
   211 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   212 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   213 	assert( sh4_x86.backpatch_list != NULL );
   214     }
   215     if( sh4_x86.in_delay_slot ) {
   216 	fixup_pc -= 2;
   217     }
   219     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
   220 	(((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code)) - reloc_size;
   221     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   222     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   223     sh4_x86.backpatch_posn++;
   224 }
   226 #define TSTATE_NONE -1
   227 #define TSTATE_O    X86_COND_O
   228 #define TSTATE_C    X86_COND_C
   229 #define TSTATE_E    X86_COND_E
   230 #define TSTATE_NE   X86_COND_NE
   231 #define TSTATE_G    X86_COND_G
   232 #define TSTATE_GE   X86_COND_GE
   233 #define TSTATE_A    X86_COND_A
   234 #define TSTATE_AE   X86_COND_AE
   236 #define MARK_JMP8(x) uint8_t *_mark_jmp_##x = (xlat_output-1)
   237 #define JMP_TARGET(x) *_mark_jmp_##x += (xlat_output - _mark_jmp_##x)
   239 /* Convenience instructions */
   240 #define LDC_t()          CMPB_imms_rbpdisp(1,R_T); CMC()
   241 #define SETE_t()         SETCCB_cc_rbpdisp(X86_COND_E,R_T)
   242 #define SETA_t()         SETCCB_cc_rbpdisp(X86_COND_A,R_T)
   243 #define SETAE_t()        SETCCB_cc_rbpdisp(X86_COND_AE,R_T)
   244 #define SETG_t()         SETCCB_cc_rbpdisp(X86_COND_G,R_T)
   245 #define SETGE_t()        SETCCB_cc_rbpdisp(X86_COND_GE,R_T)
   246 #define SETC_t()         SETCCB_cc_rbpdisp(X86_COND_C,R_T)
   247 #define SETO_t()         SETCCB_cc_rbpdisp(X86_COND_O,R_T)
   248 #define SETNE_t()        SETCCB_cc_rbpdisp(X86_COND_NE,R_T)
   249 #define SETC_r8(r1)      SETCCB_cc_r8(X86_COND_C, r1)
   250 #define JAE_label(label) JCC_cc_rel8(X86_COND_AE,-1); MARK_JMP8(label)
   251 #define JBE_label(label) JCC_cc_rel8(X86_COND_BE,-1); MARK_JMP8(label)
   252 #define JE_label(label)  JCC_cc_rel8(X86_COND_E,-1); MARK_JMP8(label)
   253 #define JGE_label(label) JCC_cc_rel8(X86_COND_GE,-1); MARK_JMP8(label)
   254 #define JNA_label(label) JCC_cc_rel8(X86_COND_NA,-1); MARK_JMP8(label)
   255 #define JNE_label(label) JCC_cc_rel8(X86_COND_NE,-1); MARK_JMP8(label)
   256 #define JNO_label(label) JCC_cc_rel8(X86_COND_NO,-1); MARK_JMP8(label)
   257 #define JP_label(label)  JCC_cc_rel8(X86_COND_P,-1); MARK_JMP8(label)
   258 #define JS_label(label)  JCC_cc_rel8(X86_COND_S,-1); MARK_JMP8(label)
   259 #define JMP_label(label) JMP_rel8(-1); MARK_JMP8(label)
   260 #define JNE_exc(exc)     JCC_cc_rel32(X86_COND_NE,0); sh4_x86_add_backpatch(xlat_output, pc, exc)
   262 #define LOAD_t() if( sh4_x86.tstate == TSTATE_NONE ) { \
   263 	CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; }     
   265 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
   266 #define JT_label(label) LOAD_t() \
   267     JCC_cc_rel8(sh4_x86.tstate,-1); MARK_JMP8(label)
   269 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
   270 #define JF_label(label) LOAD_t() \
   271     JCC_cc_rel8(sh4_x86.tstate^1, -1); MARK_JMP8(label)
   274 #define load_reg(x86reg,sh4reg)     MOVL_rbpdisp_r32( REG_OFFSET(r[sh4reg]), x86reg )
   275 #define store_reg(x86reg,sh4reg)    MOVL_r32_rbpdisp( x86reg, REG_OFFSET(r[sh4reg]) )
   277 /**
   278  * Load an FR register (single-precision floating point) into an integer x86
   279  * register (eg for register-to-register moves)
   280  */
   281 #define load_fr(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[0][(frm)^1]), reg )
   282 #define load_xf(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[1][(frm)^1]), reg )
   284 /**
   285  * Load the low half of a DR register (DR or XD) into an integer x86 register 
   286  */
   287 #define load_dr0(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm|0x01]), reg )
   288 #define load_dr1(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm&0x0E]), reg )
   290 /**
   291  * Store an FR register (single-precision floating point) from an integer x86+
   292  * register (eg for register-to-register moves)
   293  */
   294 #define store_fr(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[0][(frm)^1]) )
   295 #define store_xf(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[1][(frm)^1]) )
   297 #define store_dr0(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   298 #define store_dr1(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   301 #define push_fpul()  FLDF_rbpdisp(R_FPUL)
   302 #define pop_fpul()   FSTPF_rbpdisp(R_FPUL)
   303 #define push_fr(frm) FLDF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
   304 #define pop_fr(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
   305 #define push_xf(frm) FLDF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
   306 #define pop_xf(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
   307 #define push_dr(frm) FLDD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
   308 #define pop_dr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
   309 #define push_xdr(frm) FLDD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
   310 #define pop_xdr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
   312 #ifdef ENABLE_SH4STATS
   313 #define COUNT_INST(id) MOVL_imm32_r32( id, REG_EAX ); CALL1_ptr_r32(sh4_stats_add, REG_EAX); sh4_x86.tstate = TSTATE_NONE
   314 #else
   315 #define COUNT_INST(id)
   316 #endif
   319 /* Exception checks - Note that all exception checks will clobber EAX */
   321 #define check_priv( ) \
   322     if( (sh4_x86.sh4_mode & SR_MD) == 0 ) { \
   323         if( sh4_x86.in_delay_slot ) { \
   324             exit_block_exc(EXC_SLOT_ILLEGAL, (pc-2), 4 ); \
   325         } else { \
   326             exit_block_exc(EXC_ILLEGAL, pc, 2); \
   327         } \
   328         sh4_x86.branch_taken = TRUE; \
   329         sh4_x86.in_delay_slot = DELAY_NONE; \
   330         return 2; \
   331     }
   333 #define check_fpuen( ) \
   334     if( !sh4_x86.fpuen_checked ) {\
   335 	sh4_x86.fpuen_checked = TRUE;\
   336 	MOVL_rbpdisp_r32( R_SR, REG_EAX );\
   337 	ANDL_imms_r32( SR_FD, REG_EAX );\
   338 	if( sh4_x86.in_delay_slot ) {\
   339 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   340 	} else {\
   341 	    JNE_exc(EXC_FPU_DISABLED);\
   342 	}\
   343 	sh4_x86.tstate = TSTATE_NONE; \
   344     }
   346 #define check_ralign16( x86reg ) \
   347     TESTL_imms_r32( 0x00000001, x86reg ); \
   348     JNE_exc(EXC_DATA_ADDR_READ)
   350 #define check_walign16( x86reg ) \
   351     TESTL_imms_r32( 0x00000001, x86reg ); \
   352     JNE_exc(EXC_DATA_ADDR_WRITE);
   354 #define check_ralign32( x86reg ) \
   355     TESTL_imms_r32( 0x00000003, x86reg ); \
   356     JNE_exc(EXC_DATA_ADDR_READ)
   358 #define check_walign32( x86reg ) \
   359     TESTL_imms_r32( 0x00000003, x86reg ); \
   360     JNE_exc(EXC_DATA_ADDR_WRITE);
   362 #define check_ralign64( x86reg ) \
   363     TESTL_imms_r32( 0x00000007, x86reg ); \
   364     JNE_exc(EXC_DATA_ADDR_READ)
   366 #define check_walign64( x86reg ) \
   367     TESTL_imms_r32( 0x00000007, x86reg ); \
   368     JNE_exc(EXC_DATA_ADDR_WRITE);
   370 #define address_space() ((sh4_x86.sh4_mode&SR_MD) ? (uintptr_t)sh4_x86.priv_address_space : (uintptr_t)sh4_x86.user_address_space)
   372 #define UNDEF(ir)
   373 /* Note: For SR.MD == 1 && MMUCR.AT == 0, there are no memory exceptions, so 
   374  * don't waste the cycles expecting them. Otherwise we need to save the exception pointer.
   375  */
   376 #ifdef HAVE_FRAME_ADDRESS
   377 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
   378 {
   379     decode_address(address_space(), addr_reg, REG_CALLPTR);
   380     if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) { 
   381         CALL1_r32disp_r32(REG_CALLPTR, offset, addr_reg);
   382     } else {
   383         if( addr_reg != REG_ARG1 ) {
   384             MOVL_r32_r32( addr_reg, REG_ARG1 );
   385         }
   386         MOVP_immptr_rptr( 0, REG_ARG2 );
   387         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   388         CALL2_r32disp_r32_r32(REG_CALLPTR, offset, REG_ARG1, REG_ARG2);
   389     }
   390     if( value_reg != REG_RESULT1 ) { 
   391         MOVL_r32_r32( REG_RESULT1, value_reg );
   392     }
   393 }
   395 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
   396 {
   397     decode_address(address_space(), addr_reg, REG_CALLPTR);
   398     if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) { 
   399         CALL2_r32disp_r32_r32(REG_CALLPTR, offset, addr_reg, value_reg);
   400     } else {
   401         if( value_reg != REG_ARG2 ) {
   402             MOVL_r32_r32( value_reg, REG_ARG2 );
   403 	}        
   404         if( addr_reg != REG_ARG1 ) {
   405             MOVL_r32_r32( addr_reg, REG_ARG1 );
   406         }
   407 #if MAX_REG_ARG > 2        
   408         MOVP_immptr_rptr( 0, REG_ARG3 );
   409         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   410         CALL3_r32disp_r32_r32_r32(REG_CALLPTR, offset, REG_ARG1, REG_ARG2, REG_ARG3);
   411 #else
   412         MOVL_imm32_rspdisp( 0, 0 );
   413         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   414         CALL3_r32disp_r32_r32_r32(REG_CALLPTR, offset, REG_ARG1, REG_ARG2, 0);
   415 #endif
   416     }
   417 }
   418 #else
   419 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
   420 {
   421     decode_address(address_space(), addr_reg, REG_CALLPTR);
   422     CALL1_r32disp_r32(REG_CALLPTR, offset, addr_reg);
   423     if( value_reg != REG_RESULT1 ) {
   424         MOVL_r32_r32( REG_RESULT1, value_reg );
   425     }
   426 }     
   428 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
   429 {
   430     decode_address(address_space(), addr_reg, REG_CALLPTR);
   431     CALL2_r32disp_r32_r32(REG_CALLPTR, offset, addr_reg, value_reg);
   432 }
   433 #endif
   435 #define MEM_REGION_PTR(name) offsetof( struct mem_region_fn, name )
   436 #define MEM_READ_BYTE( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_byte), pc)
   437 #define MEM_READ_BYTE_FOR_WRITE( addr_reg, value_reg ) call_read_func( addr_reg, value_reg, MEM_REGION_PTR(read_byte_for_write), pc) 
   438 #define MEM_READ_WORD( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_word), pc)
   439 #define MEM_READ_LONG( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_long), pc)
   440 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_byte), pc)
   441 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_word), pc)
   442 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_long), pc)
   443 #define MEM_PREFETCH( addr_reg ) call_read_func(addr_reg, REG_RESULT1, MEM_REGION_PTR(prefetch), pc)
   445 #define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2, 4); sh4_x86.in_delay_slot = DELAY_NONE; return 2;
   447 /** Offset of xlat_sh4_mode field relative to the code pointer */ 
   448 #define XLAT_SH4_MODE_CODE_OFFSET  (int32_t)(offsetof(struct xlat_cache_block, xlat_sh4_mode) - offsetof(struct xlat_cache_block,code) )
   449 #define XLAT_CHAIN_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, chain) - offsetof(struct xlat_cache_block,code) )
   450 #define XLAT_ACTIVE_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, active) - offsetof(struct xlat_cache_block,code) )
   452 void sh4_translate_begin_block( sh4addr_t pc ) 
   453 {
   454 	sh4_x86.code = xlat_output;
   455     sh4_x86.in_delay_slot = FALSE;
   456     sh4_x86.fpuen_checked = FALSE;
   457     sh4_x86.branch_taken = FALSE;
   458     sh4_x86.backpatch_posn = 0;
   459     sh4_x86.block_start_pc = pc;
   460     sh4_x86.tlb_on = IS_TLB_ENABLED();
   461     sh4_x86.tstate = TSTATE_NONE;
   462     sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
   463     sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
   464     sh4_x86.sh4_mode = sh4r.xlat_sh4_mode;
   465     if( sh4_x86.begin_callback ) {
   466         CALL_ptr( sh4_x86.begin_callback );
   467     }
   468     if( sh4_profile_blocks ) {
   469     	MOVP_immptr_rptr( sh4_x86.code + XLAT_ACTIVE_CODE_OFFSET, REG_EAX );
   470     	ADDL_imms_r32disp( 1, REG_EAX, 0 );
   471     }  
   472 }
   475 uint32_t sh4_translate_end_block_size()
   476 {
   477 	uint32_t epilogue_size = EPILOGUE_SIZE;
   478 	if( sh4_x86.end_callback ) {
   479 	    epilogue_size += (CALL1_PTR_MIN_SIZE - 1);
   480 	}
   481     if( sh4_x86.backpatch_posn <= 3 ) {
   482         epilogue_size += (sh4_x86.backpatch_posn*(12+CALL1_PTR_MIN_SIZE));
   483     } else {
   484         epilogue_size += (3*(12+CALL1_PTR_MIN_SIZE)) + (sh4_x86.backpatch_posn-3)*(15+CALL1_PTR_MIN_SIZE);
   485     }
   486     return epilogue_size;
   487 }
   490 /**
   491  * Embed a breakpoint into the generated code
   492  */
   493 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   494 {
   495     MOVL_imm32_r32( pc, REG_EAX );
   496     CALL1_ptr_r32( sh4_translate_breakpoint_hit, REG_EAX );
   497     sh4_x86.tstate = TSTATE_NONE;
   498 }
   501 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   503 /**
   504  * Test if the loaded target code pointer in %eax is valid, and if so jump
   505  * directly into it, bypassing the normal exit.
   506  */
   507 static void jump_next_block()
   508 {
   509 	uint8_t *ptr = xlat_output;
   510 	TESTP_rptr_rptr(REG_EAX, REG_EAX);
   511 	JE_label(nocode);
   512 	if( sh4_x86.sh4_mode == SH4_MODE_UNKNOWN ) {
   513 	    /* sr/fpscr was changed, possibly updated xlat_sh4_mode, so reload it */
   514 	    MOVL_rbpdisp_r32( REG_OFFSET(xlat_sh4_mode), REG_ECX );
   515 	    CMPL_r32_r32disp( REG_ECX, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
   516 	} else {
   517 	    CMPL_imms_r32disp( sh4_x86.sh4_mode, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
   518 	}
   519 	JNE_label(wrongmode);
   520 	if( sh4_x86.end_callback ) {
   521 	    /* Note this does leave the stack out of alignment, but doesn't matter
   522 	     * for what we're currently using it for.
   523 	     */
   524 	    PUSH_r32(REG_EAX);
   525 	    MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
   526 	    JMP_rptr(REG_ECX);
   527 	} else {
   528 	    JMP_rptr(REG_EAX);
   529 	}
   530 	JMP_TARGET(wrongmode);
   531 	MOVP_rptrdisp_rptr( REG_EAX, XLAT_CHAIN_CODE_OFFSET, REG_EAX );
   532 	int rel = ptr - xlat_output;
   533     JMP_prerel(rel);
   534 	JMP_TARGET(nocode); 
   535 }
   537 /**
   538  * 
   539  */
   540 void FASTCALL sh4_translate_link_block( uint32_t pc )
   541 {
   542     uint8_t *target = (uint8_t *)xlat_get_code_by_vma(pc);
   543     while( target != NULL && sh4r.xlat_sh4_mode != XLAT_BLOCK_MODE(target) ) {
   544         target = XLAT_BLOCK_CHAIN(target);
   545 	}
   546     if( target == NULL ) {
   547         target = sh4_translate_basic_block( pc );
   548     }
   549     uint8_t *backpatch = ((uint8_t *)__builtin_return_address(0)) - (CALL1_PTR_MIN_SIZE);
   550     *backpatch = 0xE9;
   551     *(uint32_t *)(backpatch+1) = (uint32_t)(target-backpatch)-5;
   552     *(void **)(backpatch+5) = XLAT_BLOCK_FOR_CODE(target)->use_list;
   553     XLAT_BLOCK_FOR_CODE(target)->use_list = backpatch; 
   555     uint8_t * volatile *retptr = ((uint8_t * volatile *)__builtin_frame_address(0))+1;
   556     assert( *retptr == ((uint8_t *)__builtin_return_address(0)) );
   557 	*retptr = backpatch;
   558 }
   560 static void emit_translate_and_backpatch()
   561 {
   562     /* NB: this is either 7 bytes (i386) or 12 bytes (x86-64) */
   563     CALL1_ptr_r32(sh4_translate_link_block, REG_ARG1);
   565     /* When patched, the jmp instruction will be 5 bytes (either platform) -
   566      * we need to reserve sizeof(void*) bytes for the use-list
   567 	 * pointer
   568 	 */ 
   569     if( sizeof(void*) == 8 ) {
   570         NOP();
   571     } else {
   572         NOP2();
   573     }
   574 }
   576 /**
   577  * If we're jumping to a fixed address (or at least fixed relative to the
   578  * current PC, then we can do a direct branch. REG_ARG1 should contain
   579  * the PC at this point.
   580  */
   581 static void jump_next_block_fixed_pc( sh4addr_t pc )
   582 {
   583 	if( IS_IN_ICACHE(pc) ) {
   584 	    if( sh4_x86.sh4_mode != SH4_MODE_UNKNOWN && sh4_x86.end_callback == NULL ) {
   585 	        /* Fixed address, in cache, and fixed SH4 mode - generate a call to the
   586 	         * fetch-and-backpatch routine, which will replace the call with a branch */
   587            emit_translate_and_backpatch();	         
   588            return;
   589 		} else {
   590             MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
   591             ANDP_imms_rptr( -4, REG_EAX );
   592         }
   593 	} else if( sh4_x86.tlb_on ) {
   594         CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);
   595     } else {
   596         CALL1_ptr_r32(xlat_get_code, REG_ARG1);
   597     }
   598     jump_next_block();
   601 }
   603 static void sh4_x86_translate_unlink_block( void *use_list )
   604 {
   605 	uint8_t *tmp = xlat_output; /* In case something is active, which should never happen */
   606 	void *next = use_list;
   607 	while( next != NULL ) {
   608     	xlat_output = (uint8_t *)next;
   609  	    next = *(void **)(xlat_output+5);
   610  		emit_translate_and_backpatch();
   611  	}
   612  	xlat_output = tmp;
   613 }
   617 static void exit_block()
   618 {
   619 	if( sh4_x86.end_callback ) {
   620 	    MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
   621 	    JMP_rptr(REG_ECX);
   622 	} else {
   623 	    RET();
   624 	}
   625 }
   627 /**
   628  * Exit the block with sh4r.pc already written
   629  */
   630 void exit_block_pcset( sh4addr_t pc )
   631 {
   632     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   633     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   634     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   635     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   636     JBE_label(exitloop);
   637     MOVL_rbpdisp_r32( R_PC, REG_ARG1 );
   638     if( sh4_x86.tlb_on ) {
   639         CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
   640     } else {
   641         CALL1_ptr_r32(xlat_get_code,REG_ARG1);
   642     }
   644     jump_next_block();
   645     JMP_TARGET(exitloop);
   646     exit_block();
   647 }
   649 /**
   650  * Exit the block with sh4r.new_pc written with the target pc
   651  */
   652 void exit_block_newpcset( sh4addr_t pc )
   653 {
   654     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   655     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   656     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   657     MOVL_rbpdisp_r32( R_NEW_PC, REG_ARG1 );
   658     MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   659     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   660     JBE_label(exitloop);
   661     if( sh4_x86.tlb_on ) {
   662         CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
   663     } else {
   664         CALL1_ptr_r32(xlat_get_code,REG_ARG1);
   665     }
   667 	jump_next_block();
   668     JMP_TARGET(exitloop);
   669     exit_block();
   670 }
   673 /**
   674  * Exit the block to an absolute PC
   675  */
   676 void exit_block_abs( sh4addr_t pc, sh4addr_t endpc )
   677 {
   678     MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   679     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   680     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   682     MOVL_imm32_r32( pc, REG_ARG1 );
   683     MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   684     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   685     JBE_label(exitloop);
   686     jump_next_block_fixed_pc(pc);    
   687     JMP_TARGET(exitloop);
   688     exit_block();
   689 }
   691 /**
   692  * Exit the block to a relative PC
   693  */
   694 void exit_block_rel( sh4addr_t pc, sh4addr_t endpc )
   695 {
   696     MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   697     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   698     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   700 	if( pc == sh4_x86.block_start_pc && sh4_x86.sh4_mode == sh4r.xlat_sh4_mode ) {
   701 	    /* Special case for tight loops - the PC doesn't change, and
   702 	     * we already know the target address. Just check events pending before
   703 	     * looping.
   704 	     */
   705         CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   706         uint32_t backdisp = ((uintptr_t)(sh4_x86.code - xlat_output));
   707         JCC_cc_prerel(X86_COND_A, backdisp);
   708 	} else {
   709         MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ARG1 );
   710         ADDL_rbpdisp_r32( R_PC, REG_ARG1 );
   711         MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   712         CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   713         JBE_label(exitloop2);
   715         jump_next_block_fixed_pc(pc);
   716         JMP_TARGET(exitloop2);
   717     }
   718     exit_block();
   719 }
   721 /**
   722  * Exit unconditionally with a general exception
   723  */
   724 void exit_block_exc( int code, sh4addr_t pc, int inst_adjust )
   725 {
   726     MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ECX );
   727     ADDL_r32_rbpdisp( REG_ECX, R_PC );
   728     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc + inst_adjust)>>1)*sh4_cpu_period, REG_ECX );
   729     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   730     MOVL_imm32_r32( code, REG_ARG1 );
   731     CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
   732     exit_block();
   733 }    
   735 /**
   736  * Embed a call to sh4_execute_instruction for situations that we
   737  * can't translate (just page-crossing delay slots at the moment).
   738  * Caller is responsible for setting new_pc before calling this function.
   739  *
   740  * Performs:
   741  *   Set PC = endpc
   742  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   743  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   744  *   Call sh4_execute_instruction
   745  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   746  */
   747 void exit_block_emu( sh4vma_t endpc )
   748 {
   749     MOVL_imm32_r32( endpc - sh4_x86.block_start_pc, REG_ECX );   // 5
   750     ADDL_r32_rbpdisp( REG_ECX, R_PC );
   752     MOVL_imm32_r32( (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period, REG_ECX ); // 5
   753     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );     // 6
   754     MOVL_imm32_r32( sh4_x86.in_delay_slot ? 1 : 0, REG_ECX );
   755     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(in_delay_slot) );
   757     CALL_ptr( sh4_execute_instruction );
   758     exit_block();
   759 } 
   761 /**
   762  * Write the block trailer (exception handling block)
   763  */
   764 void sh4_translate_end_block( sh4addr_t pc ) {
   765     if( sh4_x86.branch_taken == FALSE ) {
   766         // Didn't exit unconditionally already, so write the termination here
   767         exit_block_rel( pc, pc );
   768     }
   769     if( sh4_x86.backpatch_posn != 0 ) {
   770         unsigned int i;
   771         // Exception raised - cleanup and exit
   772         uint8_t *end_ptr = xlat_output;
   773         MOVL_r32_r32( REG_EDX, REG_ECX );
   774         ADDL_r32_r32( REG_EDX, REG_ECX );
   775         ADDL_r32_rbpdisp( REG_ECX, R_SPC );
   776         MOVL_moffptr_eax( &sh4_cpu_period );
   777         INC_r32( REG_EDX );  /* Add 1 for the aborting instruction itself */ 
   778         MULL_r32( REG_EDX );
   779         ADDL_r32_rbpdisp( REG_EAX, REG_OFFSET(slice_cycle) );
   780         exit_block();
   782         for( i=0; i< sh4_x86.backpatch_posn; i++ ) {
   783             uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];
   784             if( sh4_x86.backpatch_list[i].exc_code < 0 ) {
   785                 if( sh4_x86.backpatch_list[i].exc_code == -2 ) {
   786                     *((uintptr_t *)fixup_addr) = (uintptr_t)xlat_output; 
   787                 } else {
   788                     *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
   789                 }
   790                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
   791                 int rel = end_ptr - xlat_output;
   792                 JMP_prerel(rel);
   793             } else {
   794                 *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
   795                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].exc_code, REG_ARG1 );
   796                 CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
   797                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
   798                 int rel = end_ptr - xlat_output;
   799                 JMP_prerel(rel);
   800             }
   801         }
   802     }
   803 }
   805 /**
   806  * Translate a single instruction. Delayed branches are handled specially
   807  * by translating both branch and delayed instruction as a single unit (as
   808  * 
   809  * The instruction MUST be in the icache (assert check)
   810  *
   811  * @return true if the instruction marks the end of a basic block
   812  * (eg a branch or 
   813  */
   814 uint32_t sh4_translate_instruction( sh4vma_t pc )
   815 {
   816     uint32_t ir;
   817     /* Read instruction from icache */
   818     assert( IS_IN_ICACHE(pc) );
   819     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   821     if( !sh4_x86.in_delay_slot ) {
   822 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   823     }
   825     /* check for breakpoints at this pc */
   826     for( int i=0; i<sh4_breakpoint_count; i++ ) {
   827         if( sh4_breakpoints[i].address == pc ) {
   828             sh4_translate_emit_breakpoint(pc);
   829             break;
   830         }
   831     }
   832 %%
   833 /* ALU operations */
   834 ADD Rm, Rn {:
   835     COUNT_INST(I_ADD);
   836     load_reg( REG_EAX, Rm );
   837     load_reg( REG_ECX, Rn );
   838     ADDL_r32_r32( REG_EAX, REG_ECX );
   839     store_reg( REG_ECX, Rn );
   840     sh4_x86.tstate = TSTATE_NONE;
   841 :}
   842 ADD #imm, Rn {:  
   843     COUNT_INST(I_ADDI);
   844     ADDL_imms_rbpdisp( imm, REG_OFFSET(r[Rn]) );
   845     sh4_x86.tstate = TSTATE_NONE;
   846 :}
   847 ADDC Rm, Rn {:
   848     COUNT_INST(I_ADDC);
   849     if( sh4_x86.tstate != TSTATE_C ) {
   850         LDC_t();
   851     }
   852     load_reg( REG_EAX, Rm );
   853     load_reg( REG_ECX, Rn );
   854     ADCL_r32_r32( REG_EAX, REG_ECX );
   855     store_reg( REG_ECX, Rn );
   856     SETC_t();
   857     sh4_x86.tstate = TSTATE_C;
   858 :}
   859 ADDV Rm, Rn {:
   860     COUNT_INST(I_ADDV);
   861     load_reg( REG_EAX, Rm );
   862     load_reg( REG_ECX, Rn );
   863     ADDL_r32_r32( REG_EAX, REG_ECX );
   864     store_reg( REG_ECX, Rn );
   865     SETO_t();
   866     sh4_x86.tstate = TSTATE_O;
   867 :}
   868 AND Rm, Rn {:
   869     COUNT_INST(I_AND);
   870     load_reg( REG_EAX, Rm );
   871     load_reg( REG_ECX, Rn );
   872     ANDL_r32_r32( REG_EAX, REG_ECX );
   873     store_reg( REG_ECX, Rn );
   874     sh4_x86.tstate = TSTATE_NONE;
   875 :}
   876 AND #imm, R0 {:  
   877     COUNT_INST(I_ANDI);
   878     load_reg( REG_EAX, 0 );
   879     ANDL_imms_r32(imm, REG_EAX); 
   880     store_reg( REG_EAX, 0 );
   881     sh4_x86.tstate = TSTATE_NONE;
   882 :}
   883 AND.B #imm, @(R0, GBR) {: 
   884     COUNT_INST(I_ANDB);
   885     load_reg( REG_EAX, 0 );
   886     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
   887     MOVL_r32_r32(REG_EAX, REG_SAVE1);
   888     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
   889     MOVL_r32_r32(REG_SAVE1, REG_EAX);
   890     ANDL_imms_r32(imm, REG_EDX );
   891     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
   892     sh4_x86.tstate = TSTATE_NONE;
   893 :}
   894 CMP/EQ Rm, Rn {:  
   895     COUNT_INST(I_CMPEQ);
   896     load_reg( REG_EAX, Rm );
   897     load_reg( REG_ECX, Rn );
   898     CMPL_r32_r32( REG_EAX, REG_ECX );
   899     SETE_t();
   900     sh4_x86.tstate = TSTATE_E;
   901 :}
   902 CMP/EQ #imm, R0 {:  
   903     COUNT_INST(I_CMPEQI);
   904     load_reg( REG_EAX, 0 );
   905     CMPL_imms_r32(imm, REG_EAX);
   906     SETE_t();
   907     sh4_x86.tstate = TSTATE_E;
   908 :}
   909 CMP/GE Rm, Rn {:  
   910     COUNT_INST(I_CMPGE);
   911     load_reg( REG_EAX, Rm );
   912     load_reg( REG_ECX, Rn );
   913     CMPL_r32_r32( REG_EAX, REG_ECX );
   914     SETGE_t();
   915     sh4_x86.tstate = TSTATE_GE;
   916 :}
   917 CMP/GT Rm, Rn {: 
   918     COUNT_INST(I_CMPGT);
   919     load_reg( REG_EAX, Rm );
   920     load_reg( REG_ECX, Rn );
   921     CMPL_r32_r32( REG_EAX, REG_ECX );
   922     SETG_t();
   923     sh4_x86.tstate = TSTATE_G;
   924 :}
   925 CMP/HI Rm, Rn {:  
   926     COUNT_INST(I_CMPHI);
   927     load_reg( REG_EAX, Rm );
   928     load_reg( REG_ECX, Rn );
   929     CMPL_r32_r32( REG_EAX, REG_ECX );
   930     SETA_t();
   931     sh4_x86.tstate = TSTATE_A;
   932 :}
   933 CMP/HS Rm, Rn {: 
   934     COUNT_INST(I_CMPHS);
   935     load_reg( REG_EAX, Rm );
   936     load_reg( REG_ECX, Rn );
   937     CMPL_r32_r32( REG_EAX, REG_ECX );
   938     SETAE_t();
   939     sh4_x86.tstate = TSTATE_AE;
   940  :}
   941 CMP/PL Rn {: 
   942     COUNT_INST(I_CMPPL);
   943     load_reg( REG_EAX, Rn );
   944     CMPL_imms_r32( 0, REG_EAX );
   945     SETG_t();
   946     sh4_x86.tstate = TSTATE_G;
   947 :}
   948 CMP/PZ Rn {:  
   949     COUNT_INST(I_CMPPZ);
   950     load_reg( REG_EAX, Rn );
   951     CMPL_imms_r32( 0, REG_EAX );
   952     SETGE_t();
   953     sh4_x86.tstate = TSTATE_GE;
   954 :}
   955 CMP/STR Rm, Rn {:  
   956     COUNT_INST(I_CMPSTR);
   957     load_reg( REG_EAX, Rm );
   958     load_reg( REG_ECX, Rn );
   959     XORL_r32_r32( REG_ECX, REG_EAX );
   960     TESTB_r8_r8( REG_AL, REG_AL );
   961     JE_label(target1);
   962     TESTB_r8_r8( REG_AH, REG_AH );
   963     JE_label(target2);
   964     SHRL_imm_r32( 16, REG_EAX );
   965     TESTB_r8_r8( REG_AL, REG_AL );
   966     JE_label(target3);
   967     TESTB_r8_r8( REG_AH, REG_AH );
   968     JMP_TARGET(target1);
   969     JMP_TARGET(target2);
   970     JMP_TARGET(target3);
   971     SETE_t();
   972     sh4_x86.tstate = TSTATE_E;
   973 :}
   974 DIV0S Rm, Rn {:
   975     COUNT_INST(I_DIV0S);
   976     load_reg( REG_EAX, Rm );
   977     load_reg( REG_ECX, Rn );
   978     SHRL_imm_r32( 31, REG_EAX );
   979     SHRL_imm_r32( 31, REG_ECX );
   980     MOVL_r32_rbpdisp( REG_EAX, R_M );
   981     MOVL_r32_rbpdisp( REG_ECX, R_Q );
   982     CMPL_r32_r32( REG_EAX, REG_ECX );
   983     SETNE_t();
   984     sh4_x86.tstate = TSTATE_NE;
   985 :}
   986 DIV0U {:  
   987     COUNT_INST(I_DIV0U);
   988     XORL_r32_r32( REG_EAX, REG_EAX );
   989     MOVL_r32_rbpdisp( REG_EAX, R_Q );
   990     MOVL_r32_rbpdisp( REG_EAX, R_M );
   991     MOVL_r32_rbpdisp( REG_EAX, R_T );
   992     sh4_x86.tstate = TSTATE_C; // works for DIV1
   993 :}
   994 DIV1 Rm, Rn {:
   995     COUNT_INST(I_DIV1);
   996     MOVL_rbpdisp_r32( R_M, REG_ECX );
   997     load_reg( REG_EAX, Rn );
   998     if( sh4_x86.tstate != TSTATE_C ) {
   999 	LDC_t();
  1001     RCLL_imm_r32( 1, REG_EAX );
  1002     SETC_r8( REG_DL ); // Q'
  1003     CMPL_rbpdisp_r32( R_Q, REG_ECX );
  1004     JE_label(mqequal);
  1005     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1006     JMP_label(end);
  1007     JMP_TARGET(mqequal);
  1008     SUBL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1009     JMP_TARGET(end);
  1010     store_reg( REG_EAX, Rn ); // Done with Rn now
  1011     SETC_r8(REG_AL); // tmp1
  1012     XORB_r8_r8( REG_DL, REG_AL ); // Q' = Q ^ tmp1
  1013     XORB_r8_r8( REG_AL, REG_CL ); // Q'' = Q' ^ M
  1014     MOVL_r32_rbpdisp( REG_ECX, R_Q );
  1015     XORL_imms_r32( 1, REG_AL );   // T = !Q'
  1016     MOVZXL_r8_r32( REG_AL, REG_EAX );
  1017     MOVL_r32_rbpdisp( REG_EAX, R_T );
  1018     sh4_x86.tstate = TSTATE_NONE;
  1019 :}
  1020 DMULS.L Rm, Rn {:  
  1021     COUNT_INST(I_DMULS);
  1022     load_reg( REG_EAX, Rm );
  1023     load_reg( REG_ECX, Rn );
  1024     IMULL_r32(REG_ECX);
  1025     MOVL_r32_rbpdisp( REG_EDX, R_MACH );
  1026     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1027     sh4_x86.tstate = TSTATE_NONE;
  1028 :}
  1029 DMULU.L Rm, Rn {:  
  1030     COUNT_INST(I_DMULU);
  1031     load_reg( REG_EAX, Rm );
  1032     load_reg( REG_ECX, Rn );
  1033     MULL_r32(REG_ECX);
  1034     MOVL_r32_rbpdisp( REG_EDX, R_MACH );
  1035     MOVL_r32_rbpdisp( REG_EAX, R_MACL );    
  1036     sh4_x86.tstate = TSTATE_NONE;
  1037 :}
  1038 DT Rn {:  
  1039     COUNT_INST(I_DT);
  1040     load_reg( REG_EAX, Rn );
  1041     ADDL_imms_r32( -1, REG_EAX );
  1042     store_reg( REG_EAX, Rn );
  1043     SETE_t();
  1044     sh4_x86.tstate = TSTATE_E;
  1045 :}
  1046 EXTS.B Rm, Rn {:  
  1047     COUNT_INST(I_EXTSB);
  1048     load_reg( REG_EAX, Rm );
  1049     MOVSXL_r8_r32( REG_EAX, REG_EAX );
  1050     store_reg( REG_EAX, Rn );
  1051 :}
  1052 EXTS.W Rm, Rn {:  
  1053     COUNT_INST(I_EXTSW);
  1054     load_reg( REG_EAX, Rm );
  1055     MOVSXL_r16_r32( REG_EAX, REG_EAX );
  1056     store_reg( REG_EAX, Rn );
  1057 :}
  1058 EXTU.B Rm, Rn {:  
  1059     COUNT_INST(I_EXTUB);
  1060     load_reg( REG_EAX, Rm );
  1061     MOVZXL_r8_r32( REG_EAX, REG_EAX );
  1062     store_reg( REG_EAX, Rn );
  1063 :}
  1064 EXTU.W Rm, Rn {:  
  1065     COUNT_INST(I_EXTUW);
  1066     load_reg( REG_EAX, Rm );
  1067     MOVZXL_r16_r32( REG_EAX, REG_EAX );
  1068     store_reg( REG_EAX, Rn );
  1069 :}
  1070 MAC.L @Rm+, @Rn+ {:
  1071     COUNT_INST(I_MACL);
  1072     if( Rm == Rn ) {
  1073 	load_reg( REG_EAX, Rm );
  1074 	check_ralign32( REG_EAX );
  1075 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1076 	MOVL_r32_r32(REG_EAX, REG_SAVE1);
  1077 	load_reg( REG_EAX, Rm );
  1078 	LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  1079 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1080         ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rn]) );
  1081     } else {
  1082 	load_reg( REG_EAX, Rm );
  1083 	check_ralign32( REG_EAX );
  1084 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1085 	MOVL_r32_r32(REG_EAX, REG_SAVE1);
  1086 	load_reg( REG_EAX, Rn );
  1087 	check_ralign32( REG_EAX );
  1088 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1089 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
  1090 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  1093     IMULL_r32( REG_SAVE1 );
  1094     ADDL_r32_rbpdisp( REG_EAX, R_MACL );
  1095     ADCL_r32_rbpdisp( REG_EDX, R_MACH );
  1097     MOVL_rbpdisp_r32( R_S, REG_ECX );
  1098     TESTL_r32_r32(REG_ECX, REG_ECX);
  1099     JE_label( nosat );
  1100     CALL_ptr( signsat48 );
  1101     JMP_TARGET( nosat );
  1102     sh4_x86.tstate = TSTATE_NONE;
  1103 :}
  1104 MAC.W @Rm+, @Rn+ {:  
  1105     COUNT_INST(I_MACW);
  1106     if( Rm == Rn ) {
  1107 	load_reg( REG_EAX, Rm );
  1108 	check_ralign16( REG_EAX );
  1109 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1110         MOVL_r32_r32( REG_EAX, REG_SAVE1 );
  1111 	load_reg( REG_EAX, Rm );
  1112 	LEAL_r32disp_r32( REG_EAX, 2, REG_EAX );
  1113 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1114 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
  1115 	// Note translate twice in case of page boundaries. Maybe worth
  1116 	// adding a page-boundary check to skip the second translation
  1117     } else {
  1118 	load_reg( REG_EAX, Rn );
  1119 	check_ralign16( REG_EAX );
  1120 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1121         MOVL_r32_r32( REG_EAX, REG_SAVE1 );
  1122 	load_reg( REG_EAX, Rm );
  1123 	check_ralign16( REG_EAX );
  1124 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1125 	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rn]) );
  1126 	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
  1128     IMULL_r32( REG_SAVE1 );
  1129     MOVL_rbpdisp_r32( R_S, REG_ECX );
  1130     TESTL_r32_r32( REG_ECX, REG_ECX );
  1131     JE_label( nosat );
  1133     ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
  1134     JNO_label( end );            // 2
  1135     MOVL_imm32_r32( 1, REG_EDX );         // 5
  1136     MOVL_r32_rbpdisp( REG_EDX, R_MACH );   // 6
  1137     JS_label( positive );        // 2
  1138     MOVL_imm32_r32( 0x80000000, REG_EAX );// 5
  1139     MOVL_r32_rbpdisp( REG_EAX, R_MACL );   // 6
  1140     JMP_label(end2);           // 2
  1142     JMP_TARGET(positive);
  1143     MOVL_imm32_r32( 0x7FFFFFFF, REG_EAX );// 5
  1144     MOVL_r32_rbpdisp( REG_EAX, R_MACL );   // 6
  1145     JMP_label(end3);            // 2
  1147     JMP_TARGET(nosat);
  1148     ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
  1149     ADCL_r32_rbpdisp( REG_EDX, R_MACH );  // 6
  1150     JMP_TARGET(end);
  1151     JMP_TARGET(end2);
  1152     JMP_TARGET(end3);
  1153     sh4_x86.tstate = TSTATE_NONE;
  1154 :}
  1155 MOVT Rn {:  
  1156     COUNT_INST(I_MOVT);
  1157     MOVL_rbpdisp_r32( R_T, REG_EAX );
  1158     store_reg( REG_EAX, Rn );
  1159 :}
  1160 MUL.L Rm, Rn {:  
  1161     COUNT_INST(I_MULL);
  1162     load_reg( REG_EAX, Rm );
  1163     load_reg( REG_ECX, Rn );
  1164     MULL_r32( REG_ECX );
  1165     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1166     sh4_x86.tstate = TSTATE_NONE;
  1167 :}
  1168 MULS.W Rm, Rn {:
  1169     COUNT_INST(I_MULSW);
  1170     MOVSXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
  1171     MOVSXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
  1172     MULL_r32( REG_ECX );
  1173     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1174     sh4_x86.tstate = TSTATE_NONE;
  1175 :}
  1176 MULU.W Rm, Rn {:  
  1177     COUNT_INST(I_MULUW);
  1178     MOVZXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
  1179     MOVZXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
  1180     MULL_r32( REG_ECX );
  1181     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1182     sh4_x86.tstate = TSTATE_NONE;
  1183 :}
  1184 NEG Rm, Rn {:
  1185     COUNT_INST(I_NEG);
  1186     load_reg( REG_EAX, Rm );
  1187     NEGL_r32( REG_EAX );
  1188     store_reg( REG_EAX, Rn );
  1189     sh4_x86.tstate = TSTATE_NONE;
  1190 :}
  1191 NEGC Rm, Rn {:  
  1192     COUNT_INST(I_NEGC);
  1193     load_reg( REG_EAX, Rm );
  1194     XORL_r32_r32( REG_ECX, REG_ECX );
  1195     LDC_t();
  1196     SBBL_r32_r32( REG_EAX, REG_ECX );
  1197     store_reg( REG_ECX, Rn );
  1198     SETC_t();
  1199     sh4_x86.tstate = TSTATE_C;
  1200 :}
  1201 NOT Rm, Rn {:  
  1202     COUNT_INST(I_NOT);
  1203     load_reg( REG_EAX, Rm );
  1204     NOTL_r32( REG_EAX );
  1205     store_reg( REG_EAX, Rn );
  1206     sh4_x86.tstate = TSTATE_NONE;
  1207 :}
  1208 OR Rm, Rn {:  
  1209     COUNT_INST(I_OR);
  1210     load_reg( REG_EAX, Rm );
  1211     load_reg( REG_ECX, Rn );
  1212     ORL_r32_r32( REG_EAX, REG_ECX );
  1213     store_reg( REG_ECX, Rn );
  1214     sh4_x86.tstate = TSTATE_NONE;
  1215 :}
  1216 OR #imm, R0 {:
  1217     COUNT_INST(I_ORI);
  1218     load_reg( REG_EAX, 0 );
  1219     ORL_imms_r32(imm, REG_EAX);
  1220     store_reg( REG_EAX, 0 );
  1221     sh4_x86.tstate = TSTATE_NONE;
  1222 :}
  1223 OR.B #imm, @(R0, GBR) {:  
  1224     COUNT_INST(I_ORB);
  1225     load_reg( REG_EAX, 0 );
  1226     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
  1227     MOVL_r32_r32( REG_EAX, REG_SAVE1 );
  1228     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
  1229     MOVL_r32_r32( REG_SAVE1, REG_EAX );
  1230     ORL_imms_r32(imm, REG_EDX );
  1231     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1232     sh4_x86.tstate = TSTATE_NONE;
  1233 :}
  1234 ROTCL Rn {:
  1235     COUNT_INST(I_ROTCL);
  1236     load_reg( REG_EAX, Rn );
  1237     if( sh4_x86.tstate != TSTATE_C ) {
  1238 	LDC_t();
  1240     RCLL_imm_r32( 1, REG_EAX );
  1241     store_reg( REG_EAX, Rn );
  1242     SETC_t();
  1243     sh4_x86.tstate = TSTATE_C;
  1244 :}
  1245 ROTCR Rn {:  
  1246     COUNT_INST(I_ROTCR);
  1247     load_reg( REG_EAX, Rn );
  1248     if( sh4_x86.tstate != TSTATE_C ) {
  1249 	LDC_t();
  1251     RCRL_imm_r32( 1, REG_EAX );
  1252     store_reg( REG_EAX, Rn );
  1253     SETC_t();
  1254     sh4_x86.tstate = TSTATE_C;
  1255 :}
  1256 ROTL Rn {:  
  1257     COUNT_INST(I_ROTL);
  1258     load_reg( REG_EAX, Rn );
  1259     ROLL_imm_r32( 1, REG_EAX );
  1260     store_reg( REG_EAX, Rn );
  1261     SETC_t();
  1262     sh4_x86.tstate = TSTATE_C;
  1263 :}
  1264 ROTR Rn {:  
  1265     COUNT_INST(I_ROTR);
  1266     load_reg( REG_EAX, Rn );
  1267     RORL_imm_r32( 1, REG_EAX );
  1268     store_reg( REG_EAX, Rn );
  1269     SETC_t();
  1270     sh4_x86.tstate = TSTATE_C;
  1271 :}
  1272 SHAD Rm, Rn {:
  1273     COUNT_INST(I_SHAD);
  1274     /* Annoyingly enough, not directly convertible */
  1275     load_reg( REG_EAX, Rn );
  1276     load_reg( REG_ECX, Rm );
  1277     CMPL_imms_r32( 0, REG_ECX );
  1278     JGE_label(doshl);
  1280     NEGL_r32( REG_ECX );      // 2
  1281     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1282     JE_label(emptysar);     // 2
  1283     SARL_cl_r32( REG_EAX );       // 2
  1284     JMP_label(end);          // 2
  1286     JMP_TARGET(emptysar);
  1287     SARL_imm_r32(31, REG_EAX );  // 3
  1288     JMP_label(end2);
  1290     JMP_TARGET(doshl);
  1291     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1292     SHLL_cl_r32( REG_EAX );       // 2
  1293     JMP_TARGET(end);
  1294     JMP_TARGET(end2);
  1295     store_reg( REG_EAX, Rn );
  1296     sh4_x86.tstate = TSTATE_NONE;
  1297 :}
  1298 SHLD Rm, Rn {:  
  1299     COUNT_INST(I_SHLD);
  1300     load_reg( REG_EAX, Rn );
  1301     load_reg( REG_ECX, Rm );
  1302     CMPL_imms_r32( 0, REG_ECX );
  1303     JGE_label(doshl);
  1305     NEGL_r32( REG_ECX );      // 2
  1306     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1307     JE_label(emptyshr );
  1308     SHRL_cl_r32( REG_EAX );       // 2
  1309     JMP_label(end);          // 2
  1311     JMP_TARGET(emptyshr);
  1312     XORL_r32_r32( REG_EAX, REG_EAX );
  1313     JMP_label(end2);
  1315     JMP_TARGET(doshl);
  1316     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1317     SHLL_cl_r32( REG_EAX );       // 2
  1318     JMP_TARGET(end);
  1319     JMP_TARGET(end2);
  1320     store_reg( REG_EAX, Rn );
  1321     sh4_x86.tstate = TSTATE_NONE;
  1322 :}
  1323 SHAL Rn {: 
  1324     COUNT_INST(I_SHAL);
  1325     load_reg( REG_EAX, Rn );
  1326     SHLL_imm_r32( 1, REG_EAX );
  1327     SETC_t();
  1328     store_reg( REG_EAX, Rn );
  1329     sh4_x86.tstate = TSTATE_C;
  1330 :}
  1331 SHAR Rn {:  
  1332     COUNT_INST(I_SHAR);
  1333     load_reg( REG_EAX, Rn );
  1334     SARL_imm_r32( 1, REG_EAX );
  1335     SETC_t();
  1336     store_reg( REG_EAX, Rn );
  1337     sh4_x86.tstate = TSTATE_C;
  1338 :}
  1339 SHLL Rn {:  
  1340     COUNT_INST(I_SHLL);
  1341     load_reg( REG_EAX, Rn );
  1342     SHLL_imm_r32( 1, REG_EAX );
  1343     SETC_t();
  1344     store_reg( REG_EAX, Rn );
  1345     sh4_x86.tstate = TSTATE_C;
  1346 :}
  1347 SHLL2 Rn {:
  1348     COUNT_INST(I_SHLL);
  1349     load_reg( REG_EAX, Rn );
  1350     SHLL_imm_r32( 2, REG_EAX );
  1351     store_reg( REG_EAX, Rn );
  1352     sh4_x86.tstate = TSTATE_NONE;
  1353 :}
  1354 SHLL8 Rn {:  
  1355     COUNT_INST(I_SHLL);
  1356     load_reg( REG_EAX, Rn );
  1357     SHLL_imm_r32( 8, REG_EAX );
  1358     store_reg( REG_EAX, Rn );
  1359     sh4_x86.tstate = TSTATE_NONE;
  1360 :}
  1361 SHLL16 Rn {:  
  1362     COUNT_INST(I_SHLL);
  1363     load_reg( REG_EAX, Rn );
  1364     SHLL_imm_r32( 16, REG_EAX );
  1365     store_reg( REG_EAX, Rn );
  1366     sh4_x86.tstate = TSTATE_NONE;
  1367 :}
  1368 SHLR Rn {:  
  1369     COUNT_INST(I_SHLR);
  1370     load_reg( REG_EAX, Rn );
  1371     SHRL_imm_r32( 1, REG_EAX );
  1372     SETC_t();
  1373     store_reg( REG_EAX, Rn );
  1374     sh4_x86.tstate = TSTATE_C;
  1375 :}
  1376 SHLR2 Rn {:  
  1377     COUNT_INST(I_SHLR);
  1378     load_reg( REG_EAX, Rn );
  1379     SHRL_imm_r32( 2, REG_EAX );
  1380     store_reg( REG_EAX, Rn );
  1381     sh4_x86.tstate = TSTATE_NONE;
  1382 :}
  1383 SHLR8 Rn {:  
  1384     COUNT_INST(I_SHLR);
  1385     load_reg( REG_EAX, Rn );
  1386     SHRL_imm_r32( 8, REG_EAX );
  1387     store_reg( REG_EAX, Rn );
  1388     sh4_x86.tstate = TSTATE_NONE;
  1389 :}
  1390 SHLR16 Rn {:  
  1391     COUNT_INST(I_SHLR);
  1392     load_reg( REG_EAX, Rn );
  1393     SHRL_imm_r32( 16, REG_EAX );
  1394     store_reg( REG_EAX, Rn );
  1395     sh4_x86.tstate = TSTATE_NONE;
  1396 :}
  1397 SUB Rm, Rn {:  
  1398     COUNT_INST(I_SUB);
  1399     load_reg( REG_EAX, Rm );
  1400     load_reg( REG_ECX, Rn );
  1401     SUBL_r32_r32( REG_EAX, REG_ECX );
  1402     store_reg( REG_ECX, Rn );
  1403     sh4_x86.tstate = TSTATE_NONE;
  1404 :}
  1405 SUBC Rm, Rn {:  
  1406     COUNT_INST(I_SUBC);
  1407     load_reg( REG_EAX, Rm );
  1408     load_reg( REG_ECX, Rn );
  1409     if( sh4_x86.tstate != TSTATE_C ) {
  1410 	LDC_t();
  1412     SBBL_r32_r32( REG_EAX, REG_ECX );
  1413     store_reg( REG_ECX, Rn );
  1414     SETC_t();
  1415     sh4_x86.tstate = TSTATE_C;
  1416 :}
  1417 SUBV Rm, Rn {:  
  1418     COUNT_INST(I_SUBV);
  1419     load_reg( REG_EAX, Rm );
  1420     load_reg( REG_ECX, Rn );
  1421     SUBL_r32_r32( REG_EAX, REG_ECX );
  1422     store_reg( REG_ECX, Rn );
  1423     SETO_t();
  1424     sh4_x86.tstate = TSTATE_O;
  1425 :}
  1426 SWAP.B Rm, Rn {:  
  1427     COUNT_INST(I_SWAPB);
  1428     load_reg( REG_EAX, Rm );
  1429     XCHGB_r8_r8( REG_AL, REG_AH ); // NB: does not touch EFLAGS
  1430     store_reg( REG_EAX, Rn );
  1431 :}
  1432 SWAP.W Rm, Rn {:  
  1433     COUNT_INST(I_SWAPB);
  1434     load_reg( REG_EAX, Rm );
  1435     MOVL_r32_r32( REG_EAX, REG_ECX );
  1436     SHLL_imm_r32( 16, REG_ECX );
  1437     SHRL_imm_r32( 16, REG_EAX );
  1438     ORL_r32_r32( REG_EAX, REG_ECX );
  1439     store_reg( REG_ECX, Rn );
  1440     sh4_x86.tstate = TSTATE_NONE;
  1441 :}
  1442 TAS.B @Rn {:  
  1443     COUNT_INST(I_TASB);
  1444     load_reg( REG_EAX, Rn );
  1445     MOVL_r32_r32( REG_EAX, REG_SAVE1 );
  1446     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
  1447     TESTB_r8_r8( REG_DL, REG_DL );
  1448     SETE_t();
  1449     ORB_imms_r8( 0x80, REG_DL );
  1450     MOVL_r32_r32( REG_SAVE1, REG_EAX );
  1451     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1452     sh4_x86.tstate = TSTATE_NONE;
  1453 :}
  1454 TST Rm, Rn {:  
  1455     COUNT_INST(I_TST);
  1456     load_reg( REG_EAX, Rm );
  1457     load_reg( REG_ECX, Rn );
  1458     TESTL_r32_r32( REG_EAX, REG_ECX );
  1459     SETE_t();
  1460     sh4_x86.tstate = TSTATE_E;
  1461 :}
  1462 TST #imm, R0 {:  
  1463     COUNT_INST(I_TSTI);
  1464     load_reg( REG_EAX, 0 );
  1465     TESTL_imms_r32( imm, REG_EAX );
  1466     SETE_t();
  1467     sh4_x86.tstate = TSTATE_E;
  1468 :}
  1469 TST.B #imm, @(R0, GBR) {:  
  1470     COUNT_INST(I_TSTB);
  1471     load_reg( REG_EAX, 0);
  1472     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
  1473     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1474     TESTB_imms_r8( imm, REG_AL );
  1475     SETE_t();
  1476     sh4_x86.tstate = TSTATE_E;
  1477 :}
  1478 XOR Rm, Rn {:  
  1479     COUNT_INST(I_XOR);
  1480     load_reg( REG_EAX, Rm );
  1481     load_reg( REG_ECX, Rn );
  1482     XORL_r32_r32( REG_EAX, REG_ECX );
  1483     store_reg( REG_ECX, Rn );
  1484     sh4_x86.tstate = TSTATE_NONE;
  1485 :}
  1486 XOR #imm, R0 {:  
  1487     COUNT_INST(I_XORI);
  1488     load_reg( REG_EAX, 0 );
  1489     XORL_imms_r32( imm, REG_EAX );
  1490     store_reg( REG_EAX, 0 );
  1491     sh4_x86.tstate = TSTATE_NONE;
  1492 :}
  1493 XOR.B #imm, @(R0, GBR) {:  
  1494     COUNT_INST(I_XORB);
  1495     load_reg( REG_EAX, 0 );
  1496     ADDL_rbpdisp_r32( R_GBR, REG_EAX ); 
  1497     MOVL_r32_r32( REG_EAX, REG_SAVE1 );
  1498     MEM_READ_BYTE_FOR_WRITE(REG_EAX, REG_EDX);
  1499     MOVL_r32_r32( REG_SAVE1, REG_EAX );
  1500     XORL_imms_r32( imm, REG_EDX );
  1501     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1502     sh4_x86.tstate = TSTATE_NONE;
  1503 :}
  1504 XTRCT Rm, Rn {:
  1505     COUNT_INST(I_XTRCT);
  1506     load_reg( REG_EAX, Rm );
  1507     load_reg( REG_ECX, Rn );
  1508     SHLL_imm_r32( 16, REG_EAX );
  1509     SHRL_imm_r32( 16, REG_ECX );
  1510     ORL_r32_r32( REG_EAX, REG_ECX );
  1511     store_reg( REG_ECX, Rn );
  1512     sh4_x86.tstate = TSTATE_NONE;
  1513 :}
  1515 /* Data move instructions */
  1516 MOV Rm, Rn {:  
  1517     COUNT_INST(I_MOV);
  1518     load_reg( REG_EAX, Rm );
  1519     store_reg( REG_EAX, Rn );
  1520 :}
  1521 MOV #imm, Rn {:  
  1522     COUNT_INST(I_MOVI);
  1523     MOVL_imm32_r32( imm, REG_EAX );
  1524     store_reg( REG_EAX, Rn );
  1525 :}
  1526 MOV.B Rm, @Rn {:  
  1527     COUNT_INST(I_MOVB);
  1528     load_reg( REG_EAX, Rn );
  1529     load_reg( REG_EDX, Rm );
  1530     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1531     sh4_x86.tstate = TSTATE_NONE;
  1532 :}
  1533 MOV.B Rm, @-Rn {:  
  1534     COUNT_INST(I_MOVB);
  1535     load_reg( REG_EAX, Rn );
  1536     LEAL_r32disp_r32( REG_EAX, -1, REG_EAX );
  1537     load_reg( REG_EDX, Rm );
  1538     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1539     ADDL_imms_rbpdisp( -1, REG_OFFSET(r[Rn]) );
  1540     sh4_x86.tstate = TSTATE_NONE;
  1541 :}
  1542 MOV.B Rm, @(R0, Rn) {:  
  1543     COUNT_INST(I_MOVB);
  1544     load_reg( REG_EAX, 0 );
  1545     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1546     load_reg( REG_EDX, Rm );
  1547     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1548     sh4_x86.tstate = TSTATE_NONE;
  1549 :}
  1550 MOV.B R0, @(disp, GBR) {:  
  1551     COUNT_INST(I_MOVB);
  1552     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1553     ADDL_imms_r32( disp, REG_EAX );
  1554     load_reg( REG_EDX, 0 );
  1555     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1556     sh4_x86.tstate = TSTATE_NONE;
  1557 :}
  1558 MOV.B R0, @(disp, Rn) {:  
  1559     COUNT_INST(I_MOVB);
  1560     load_reg( REG_EAX, Rn );
  1561     ADDL_imms_r32( disp, REG_EAX );
  1562     load_reg( REG_EDX, 0 );
  1563     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1564     sh4_x86.tstate = TSTATE_NONE;
  1565 :}
  1566 MOV.B @Rm, Rn {:  
  1567     COUNT_INST(I_MOVB);
  1568     load_reg( REG_EAX, Rm );
  1569     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1570     store_reg( REG_EAX, Rn );
  1571     sh4_x86.tstate = TSTATE_NONE;
  1572 :}
  1573 MOV.B @Rm+, Rn {:  
  1574     COUNT_INST(I_MOVB);
  1575     load_reg( REG_EAX, Rm );
  1576     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1577     if( Rm != Rn ) {
  1578     	ADDL_imms_rbpdisp( 1, REG_OFFSET(r[Rm]) );
  1580     store_reg( REG_EAX, Rn );
  1581     sh4_x86.tstate = TSTATE_NONE;
  1582 :}
  1583 MOV.B @(R0, Rm), Rn {:  
  1584     COUNT_INST(I_MOVB);
  1585     load_reg( REG_EAX, 0 );
  1586     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1587     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1588     store_reg( REG_EAX, Rn );
  1589     sh4_x86.tstate = TSTATE_NONE;
  1590 :}
  1591 MOV.B @(disp, GBR), R0 {:  
  1592     COUNT_INST(I_MOVB);
  1593     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1594     ADDL_imms_r32( disp, REG_EAX );
  1595     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1596     store_reg( REG_EAX, 0 );
  1597     sh4_x86.tstate = TSTATE_NONE;
  1598 :}
  1599 MOV.B @(disp, Rm), R0 {:  
  1600     COUNT_INST(I_MOVB);
  1601     load_reg( REG_EAX, Rm );
  1602     ADDL_imms_r32( disp, REG_EAX );
  1603     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1604     store_reg( REG_EAX, 0 );
  1605     sh4_x86.tstate = TSTATE_NONE;
  1606 :}
  1607 MOV.L Rm, @Rn {:
  1608     COUNT_INST(I_MOVL);
  1609     load_reg( REG_EAX, Rn );
  1610     check_walign32(REG_EAX);
  1611     MOVL_r32_r32( REG_EAX, REG_ECX );
  1612     ANDL_imms_r32( 0xFC000000, REG_ECX );
  1613     CMPL_imms_r32( 0xE0000000, REG_ECX );
  1614     JNE_label( notsq );
  1615     ANDL_imms_r32( 0x3C, REG_EAX );
  1616     load_reg( REG_EDX, Rm );
  1617     MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
  1618     JMP_label(end);
  1619     JMP_TARGET(notsq);
  1620     load_reg( REG_EDX, Rm );
  1621     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1622     JMP_TARGET(end);
  1623     sh4_x86.tstate = TSTATE_NONE;
  1624 :}
  1625 MOV.L Rm, @-Rn {:  
  1626     COUNT_INST(I_MOVL);
  1627     load_reg( REG_EAX, Rn );
  1628     ADDL_imms_r32( -4, REG_EAX );
  1629     check_walign32( REG_EAX );
  1630     load_reg( REG_EDX, Rm );
  1631     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1632     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  1633     sh4_x86.tstate = TSTATE_NONE;
  1634 :}
  1635 MOV.L Rm, @(R0, Rn) {:  
  1636     COUNT_INST(I_MOVL);
  1637     load_reg( REG_EAX, 0 );
  1638     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1639     check_walign32( REG_EAX );
  1640     load_reg( REG_EDX, Rm );
  1641     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1642     sh4_x86.tstate = TSTATE_NONE;
  1643 :}
  1644 MOV.L R0, @(disp, GBR) {:  
  1645     COUNT_INST(I_MOVL);
  1646     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1647     ADDL_imms_r32( disp, REG_EAX );
  1648     check_walign32( REG_EAX );
  1649     load_reg( REG_EDX, 0 );
  1650     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1651     sh4_x86.tstate = TSTATE_NONE;
  1652 :}
  1653 MOV.L Rm, @(disp, Rn) {:  
  1654     COUNT_INST(I_MOVL);
  1655     load_reg( REG_EAX, Rn );
  1656     ADDL_imms_r32( disp, REG_EAX );
  1657     check_walign32( REG_EAX );
  1658     MOVL_r32_r32( REG_EAX, REG_ECX );
  1659     ANDL_imms_r32( 0xFC000000, REG_ECX );
  1660     CMPL_imms_r32( 0xE0000000, REG_ECX );
  1661     JNE_label( notsq );
  1662     ANDL_imms_r32( 0x3C, REG_EAX );
  1663     load_reg( REG_EDX, Rm );
  1664     MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
  1665     JMP_label(end);
  1666     JMP_TARGET(notsq);
  1667     load_reg( REG_EDX, Rm );
  1668     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1669     JMP_TARGET(end);
  1670     sh4_x86.tstate = TSTATE_NONE;
  1671 :}
  1672 MOV.L @Rm, Rn {:  
  1673     COUNT_INST(I_MOVL);
  1674     load_reg( REG_EAX, Rm );
  1675     check_ralign32( REG_EAX );
  1676     MEM_READ_LONG( REG_EAX, REG_EAX );
  1677     store_reg( REG_EAX, Rn );
  1678     sh4_x86.tstate = TSTATE_NONE;
  1679 :}
  1680 MOV.L @Rm+, Rn {:  
  1681     COUNT_INST(I_MOVL);
  1682     load_reg( REG_EAX, Rm );
  1683     check_ralign32( REG_EAX );
  1684     MEM_READ_LONG( REG_EAX, REG_EAX );
  1685     if( Rm != Rn ) {
  1686     	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  1688     store_reg( REG_EAX, Rn );
  1689     sh4_x86.tstate = TSTATE_NONE;
  1690 :}
  1691 MOV.L @(R0, Rm), Rn {:  
  1692     COUNT_INST(I_MOVL);
  1693     load_reg( REG_EAX, 0 );
  1694     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1695     check_ralign32( REG_EAX );
  1696     MEM_READ_LONG( REG_EAX, REG_EAX );
  1697     store_reg( REG_EAX, Rn );
  1698     sh4_x86.tstate = TSTATE_NONE;
  1699 :}
  1700 MOV.L @(disp, GBR), R0 {:
  1701     COUNT_INST(I_MOVL);
  1702     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1703     ADDL_imms_r32( disp, REG_EAX );
  1704     check_ralign32( REG_EAX );
  1705     MEM_READ_LONG( REG_EAX, REG_EAX );
  1706     store_reg( REG_EAX, 0 );
  1707     sh4_x86.tstate = TSTATE_NONE;
  1708 :}
  1709 MOV.L @(disp, PC), Rn {:  
  1710     COUNT_INST(I_MOVLPC);
  1711     if( sh4_x86.in_delay_slot ) {
  1712 	SLOTILLEGAL();
  1713     } else {
  1714 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1715 	if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
  1716 	    // If the target address is in the same page as the code, it's
  1717 	    // pretty safe to just ref it directly and circumvent the whole
  1718 	    // memory subsystem. (this is a big performance win)
  1720 	    // FIXME: There's a corner-case that's not handled here when
  1721 	    // the current code-page is in the ITLB but not in the UTLB.
  1722 	    // (should generate a TLB miss although need to test SH4 
  1723 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1724 	    // behaviour though.
  1725 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1726 	    MOVL_moffptr_eax( ptr );
  1727 	} else {
  1728 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1729 	    // different virtual address than the translation was done with,
  1730 	    // but we can safely assume that the low bits are the same.
  1731 	    MOVL_imm32_r32( (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_EAX );
  1732 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1733 	    MEM_READ_LONG( REG_EAX, REG_EAX );
  1734 	    sh4_x86.tstate = TSTATE_NONE;
  1736 	store_reg( REG_EAX, Rn );
  1738 :}
  1739 MOV.L @(disp, Rm), Rn {:  
  1740     COUNT_INST(I_MOVL);
  1741     load_reg( REG_EAX, Rm );
  1742     ADDL_imms_r32( disp, REG_EAX );
  1743     check_ralign32( REG_EAX );
  1744     MEM_READ_LONG( REG_EAX, REG_EAX );
  1745     store_reg( REG_EAX, Rn );
  1746     sh4_x86.tstate = TSTATE_NONE;
  1747 :}
  1748 MOV.W Rm, @Rn {:  
  1749     COUNT_INST(I_MOVW);
  1750     load_reg( REG_EAX, Rn );
  1751     check_walign16( REG_EAX );
  1752     load_reg( REG_EDX, Rm );
  1753     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1754     sh4_x86.tstate = TSTATE_NONE;
  1755 :}
  1756 MOV.W Rm, @-Rn {:  
  1757     COUNT_INST(I_MOVW);
  1758     load_reg( REG_EAX, Rn );
  1759     check_walign16( REG_EAX );
  1760     LEAL_r32disp_r32( REG_EAX, -2, REG_EAX );
  1761     load_reg( REG_EDX, Rm );
  1762     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1763     ADDL_imms_rbpdisp( -2, REG_OFFSET(r[Rn]) );
  1764     sh4_x86.tstate = TSTATE_NONE;
  1765 :}
  1766 MOV.W Rm, @(R0, Rn) {:  
  1767     COUNT_INST(I_MOVW);
  1768     load_reg( REG_EAX, 0 );
  1769     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1770     check_walign16( REG_EAX );
  1771     load_reg( REG_EDX, Rm );
  1772     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1773     sh4_x86.tstate = TSTATE_NONE;
  1774 :}
  1775 MOV.W R0, @(disp, GBR) {:  
  1776     COUNT_INST(I_MOVW);
  1777     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1778     ADDL_imms_r32( disp, REG_EAX );
  1779     check_walign16( REG_EAX );
  1780     load_reg( REG_EDX, 0 );
  1781     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1782     sh4_x86.tstate = TSTATE_NONE;
  1783 :}
  1784 MOV.W R0, @(disp, Rn) {:  
  1785     COUNT_INST(I_MOVW);
  1786     load_reg( REG_EAX, Rn );
  1787     ADDL_imms_r32( disp, REG_EAX );
  1788     check_walign16( REG_EAX );
  1789     load_reg( REG_EDX, 0 );
  1790     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1791     sh4_x86.tstate = TSTATE_NONE;
  1792 :}
  1793 MOV.W @Rm, Rn {:  
  1794     COUNT_INST(I_MOVW);
  1795     load_reg( REG_EAX, Rm );
  1796     check_ralign16( REG_EAX );
  1797     MEM_READ_WORD( REG_EAX, REG_EAX );
  1798     store_reg( REG_EAX, Rn );
  1799     sh4_x86.tstate = TSTATE_NONE;
  1800 :}
  1801 MOV.W @Rm+, Rn {:  
  1802     COUNT_INST(I_MOVW);
  1803     load_reg( REG_EAX, Rm );
  1804     check_ralign16( REG_EAX );
  1805     MEM_READ_WORD( REG_EAX, REG_EAX );
  1806     if( Rm != Rn ) {
  1807         ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
  1809     store_reg( REG_EAX, Rn );
  1810     sh4_x86.tstate = TSTATE_NONE;
  1811 :}
  1812 MOV.W @(R0, Rm), Rn {:  
  1813     COUNT_INST(I_MOVW);
  1814     load_reg( REG_EAX, 0 );
  1815     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1816     check_ralign16( REG_EAX );
  1817     MEM_READ_WORD( REG_EAX, REG_EAX );
  1818     store_reg( REG_EAX, Rn );
  1819     sh4_x86.tstate = TSTATE_NONE;
  1820 :}
  1821 MOV.W @(disp, GBR), R0 {:  
  1822     COUNT_INST(I_MOVW);
  1823     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1824     ADDL_imms_r32( disp, REG_EAX );
  1825     check_ralign16( REG_EAX );
  1826     MEM_READ_WORD( REG_EAX, REG_EAX );
  1827     store_reg( REG_EAX, 0 );
  1828     sh4_x86.tstate = TSTATE_NONE;
  1829 :}
  1830 MOV.W @(disp, PC), Rn {:  
  1831     COUNT_INST(I_MOVW);
  1832     if( sh4_x86.in_delay_slot ) {
  1833 	SLOTILLEGAL();
  1834     } else {
  1835 	// See comments for MOV.L @(disp, PC), Rn
  1836 	uint32_t target = pc + disp + 4;
  1837 	if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
  1838 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1839 	    MOVL_moffptr_eax( ptr );
  1840 	    MOVSXL_r16_r32( REG_EAX, REG_EAX );
  1841 	} else {
  1842 	    MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4, REG_EAX );
  1843 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1844 	    MEM_READ_WORD( REG_EAX, REG_EAX );
  1845 	    sh4_x86.tstate = TSTATE_NONE;
  1847 	store_reg( REG_EAX, Rn );
  1849 :}
  1850 MOV.W @(disp, Rm), R0 {:  
  1851     COUNT_INST(I_MOVW);
  1852     load_reg( REG_EAX, Rm );
  1853     ADDL_imms_r32( disp, REG_EAX );
  1854     check_ralign16( REG_EAX );
  1855     MEM_READ_WORD( REG_EAX, REG_EAX );
  1856     store_reg( REG_EAX, 0 );
  1857     sh4_x86.tstate = TSTATE_NONE;
  1858 :}
  1859 MOVA @(disp, PC), R0 {:  
  1860     COUNT_INST(I_MOVA);
  1861     if( sh4_x86.in_delay_slot ) {
  1862 	SLOTILLEGAL();
  1863     } else {
  1864 	MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_ECX );
  1865 	ADDL_rbpdisp_r32( R_PC, REG_ECX );
  1866 	store_reg( REG_ECX, 0 );
  1867 	sh4_x86.tstate = TSTATE_NONE;
  1869 :}
  1870 MOVCA.L R0, @Rn {:  
  1871     COUNT_INST(I_MOVCA);
  1872     load_reg( REG_EAX, Rn );
  1873     check_walign32( REG_EAX );
  1874     load_reg( REG_EDX, 0 );
  1875     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1876     sh4_x86.tstate = TSTATE_NONE;
  1877 :}
  1879 /* Control transfer instructions */
  1880 BF disp {:
  1881     COUNT_INST(I_BF);
  1882     if( sh4_x86.in_delay_slot ) {
  1883 	SLOTILLEGAL();
  1884     } else {
  1885 	sh4vma_t target = disp + pc + 4;
  1886 	JT_label( nottaken );
  1887 	exit_block_rel(target, pc+2 );
  1888 	JMP_TARGET(nottaken);
  1889 	return 2;
  1891 :}
  1892 BF/S disp {:
  1893     COUNT_INST(I_BFS);
  1894     if( sh4_x86.in_delay_slot ) {
  1895 	SLOTILLEGAL();
  1896     } else {
  1897 	sh4_x86.in_delay_slot = DELAY_PC;
  1898 	if( UNTRANSLATABLE(pc+2) ) {
  1899 	    MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1900 	    JT_label(nottaken);
  1901 	    ADDL_imms_r32( disp, REG_EAX );
  1902 	    JMP_TARGET(nottaken);
  1903 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1904 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1905 	    exit_block_emu(pc+2);
  1906 	    sh4_x86.branch_taken = TRUE;
  1907 	    return 2;
  1908 	} else {
  1909 	    LOAD_t();
  1910 	    sh4vma_t target = disp + pc + 4;
  1911 	    JCC_cc_rel32(sh4_x86.tstate,0);
  1912 	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
  1913 	    int save_tstate = sh4_x86.tstate;
  1914 	    sh4_translate_instruction(pc+2);
  1915             sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
  1916 	    exit_block_rel( target, pc+4 );
  1918 	    // not taken
  1919 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1920 	    sh4_x86.tstate = save_tstate;
  1921 	    sh4_translate_instruction(pc+2);
  1922 	    return 4;
  1925 :}
  1926 BRA disp {:  
  1927     COUNT_INST(I_BRA);
  1928     if( sh4_x86.in_delay_slot ) {
  1929 	SLOTILLEGAL();
  1930     } else {
  1931 	sh4_x86.in_delay_slot = DELAY_PC;
  1932 	sh4_x86.branch_taken = TRUE;
  1933 	if( UNTRANSLATABLE(pc+2) ) {
  1934 	    MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1935 	    ADDL_imms_r32( pc + disp + 4 - sh4_x86.block_start_pc, REG_EAX );
  1936 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1937 	    exit_block_emu(pc+2);
  1938 	    return 2;
  1939 	} else {
  1940 	    sh4_translate_instruction( pc + 2 );
  1941 	    exit_block_rel( disp + pc + 4, pc+4 );
  1942 	    return 4;
  1945 :}
  1946 BRAF Rn {:  
  1947     COUNT_INST(I_BRAF);
  1948     if( sh4_x86.in_delay_slot ) {
  1949 	SLOTILLEGAL();
  1950     } else {
  1951 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1952 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1953 	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1954 	MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1955 	sh4_x86.in_delay_slot = DELAY_PC;
  1956 	sh4_x86.tstate = TSTATE_NONE;
  1957 	sh4_x86.branch_taken = TRUE;
  1958 	if( UNTRANSLATABLE(pc+2) ) {
  1959 	    exit_block_emu(pc+2);
  1960 	    return 2;
  1961 	} else {
  1962 	    sh4_translate_instruction( pc + 2 );
  1963 	    exit_block_newpcset(pc+4);
  1964 	    return 4;
  1967 :}
  1968 BSR disp {:  
  1969     COUNT_INST(I_BSR);
  1970     if( sh4_x86.in_delay_slot ) {
  1971 	SLOTILLEGAL();
  1972     } else {
  1973 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1974 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1975 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  1976 	sh4_x86.in_delay_slot = DELAY_PC;
  1977 	sh4_x86.branch_taken = TRUE;
  1978 	sh4_x86.tstate = TSTATE_NONE;
  1979 	if( UNTRANSLATABLE(pc+2) ) {
  1980 	    ADDL_imms_r32( disp, REG_EAX );
  1981 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1982 	    exit_block_emu(pc+2);
  1983 	    return 2;
  1984 	} else {
  1985 	    sh4_translate_instruction( pc + 2 );
  1986 	    exit_block_rel( disp + pc + 4, pc+4 );
  1987 	    return 4;
  1990 :}
  1991 BSRF Rn {:  
  1992     COUNT_INST(I_BSRF);
  1993     if( sh4_x86.in_delay_slot ) {
  1994 	SLOTILLEGAL();
  1995     } else {
  1996 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1997 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1998 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  1999 	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  2000 	MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  2002 	sh4_x86.in_delay_slot = DELAY_PC;
  2003 	sh4_x86.tstate = TSTATE_NONE;
  2004 	sh4_x86.branch_taken = TRUE;
  2005 	if( UNTRANSLATABLE(pc+2) ) {
  2006 	    exit_block_emu(pc+2);
  2007 	    return 2;
  2008 	} else {
  2009 	    sh4_translate_instruction( pc + 2 );
  2010 	    exit_block_newpcset(pc+4);
  2011 	    return 4;
  2014 :}
  2015 BT disp {:
  2016     COUNT_INST(I_BT);
  2017     if( sh4_x86.in_delay_slot ) {
  2018 	SLOTILLEGAL();
  2019     } else {
  2020 	sh4vma_t target = disp + pc + 4;
  2021 	JF_label( nottaken );
  2022 	exit_block_rel(target, pc+2 );
  2023 	JMP_TARGET(nottaken);
  2024 	return 2;
  2026 :}
  2027 BT/S disp {:
  2028     COUNT_INST(I_BTS);
  2029     if( sh4_x86.in_delay_slot ) {
  2030 	SLOTILLEGAL();
  2031     } else {
  2032 	sh4_x86.in_delay_slot = DELAY_PC;
  2033 	if( UNTRANSLATABLE(pc+2) ) {
  2034 	    MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2035 	    JF_label(nottaken);
  2036 	    ADDL_imms_r32( disp, REG_EAX );
  2037 	    JMP_TARGET(nottaken);
  2038 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  2039 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  2040 	    exit_block_emu(pc+2);
  2041 	    sh4_x86.branch_taken = TRUE;
  2042 	    return 2;
  2043 	} else {
  2044 		LOAD_t();
  2045 	    JCC_cc_rel32(sh4_x86.tstate^1,0);
  2046 	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
  2048 	    int save_tstate = sh4_x86.tstate;
  2049 	    sh4_translate_instruction(pc+2);
  2050             sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
  2051 	    exit_block_rel( disp + pc + 4, pc+4 );
  2052 	    // not taken
  2053 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  2054 	    sh4_x86.tstate = save_tstate;
  2055 	    sh4_translate_instruction(pc+2);
  2056 	    return 4;
  2059 :}
  2060 JMP @Rn {:  
  2061     COUNT_INST(I_JMP);
  2062     if( sh4_x86.in_delay_slot ) {
  2063 	SLOTILLEGAL();
  2064     } else {
  2065 	load_reg( REG_ECX, Rn );
  2066 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2067 	sh4_x86.in_delay_slot = DELAY_PC;
  2068 	sh4_x86.branch_taken = TRUE;
  2069 	if( UNTRANSLATABLE(pc+2) ) {
  2070 	    exit_block_emu(pc+2);
  2071 	    return 2;
  2072 	} else {
  2073 	    sh4_translate_instruction(pc+2);
  2074 	    exit_block_newpcset(pc+4);
  2075 	    return 4;
  2078 :}
  2079 JSR @Rn {:  
  2080     COUNT_INST(I_JSR);
  2081     if( sh4_x86.in_delay_slot ) {
  2082 	SLOTILLEGAL();
  2083     } else {
  2084 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  2085 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2086 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2087 	load_reg( REG_ECX, Rn );
  2088 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2089 	sh4_x86.in_delay_slot = DELAY_PC;
  2090 	sh4_x86.branch_taken = TRUE;
  2091 	sh4_x86.tstate = TSTATE_NONE;
  2092 	if( UNTRANSLATABLE(pc+2) ) {
  2093 	    exit_block_emu(pc+2);
  2094 	    return 2;
  2095 	} else {
  2096 	    sh4_translate_instruction(pc+2);
  2097 	    exit_block_newpcset(pc+4);
  2098 	    return 4;
  2101 :}
  2102 RTE {:  
  2103     COUNT_INST(I_RTE);
  2104     if( sh4_x86.in_delay_slot ) {
  2105 	SLOTILLEGAL();
  2106     } else {
  2107 	check_priv();
  2108 	MOVL_rbpdisp_r32( R_SPC, REG_ECX );
  2109 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2110 	MOVL_rbpdisp_r32( R_SSR, REG_EAX );
  2111 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2112 	sh4_x86.in_delay_slot = DELAY_PC;
  2113 	sh4_x86.fpuen_checked = FALSE;
  2114 	sh4_x86.tstate = TSTATE_NONE;
  2115 	sh4_x86.branch_taken = TRUE;
  2116     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2117 	if( UNTRANSLATABLE(pc+2) ) {
  2118 	    exit_block_emu(pc+2);
  2119 	    return 2;
  2120 	} else {
  2121 	    sh4_translate_instruction(pc+2);
  2122 	    exit_block_newpcset(pc+4);
  2123 	    return 4;
  2126 :}
  2127 RTS {:  
  2128     COUNT_INST(I_RTS);
  2129     if( sh4_x86.in_delay_slot ) {
  2130 	SLOTILLEGAL();
  2131     } else {
  2132 	MOVL_rbpdisp_r32( R_PR, REG_ECX );
  2133 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2134 	sh4_x86.in_delay_slot = DELAY_PC;
  2135 	sh4_x86.branch_taken = TRUE;
  2136 	if( UNTRANSLATABLE(pc+2) ) {
  2137 	    exit_block_emu(pc+2);
  2138 	    return 2;
  2139 	} else {
  2140 	    sh4_translate_instruction(pc+2);
  2141 	    exit_block_newpcset(pc+4);
  2142 	    return 4;
  2145 :}
  2146 TRAPA #imm {:  
  2147     COUNT_INST(I_TRAPA);
  2148     if( sh4_x86.in_delay_slot ) {
  2149 	SLOTILLEGAL();
  2150     } else {
  2151 	MOVL_imm32_r32( pc+2 - sh4_x86.block_start_pc, REG_ECX );   // 5
  2152 	ADDL_r32_rbpdisp( REG_ECX, R_PC );
  2153 	MOVL_imm32_r32( imm, REG_EAX );
  2154 	CALL1_ptr_r32( sh4_raise_trap, REG_EAX );
  2155 	sh4_x86.tstate = TSTATE_NONE;
  2156 	exit_block_pcset(pc+2);
  2157 	sh4_x86.branch_taken = TRUE;
  2158 	return 2;
  2160 :}
  2161 UNDEF {:  
  2162     COUNT_INST(I_UNDEF);
  2163     if( sh4_x86.in_delay_slot ) {
  2164 	exit_block_exc(EXC_SLOT_ILLEGAL, pc-2, 4);    
  2165     } else {
  2166 	exit_block_exc(EXC_ILLEGAL, pc, 2);    
  2167 	return 2;
  2169 :}
  2171 CLRMAC {:  
  2172     COUNT_INST(I_CLRMAC);
  2173     XORL_r32_r32(REG_EAX, REG_EAX);
  2174     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2175     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2176     sh4_x86.tstate = TSTATE_NONE;
  2177 :}
  2178 CLRS {:
  2179     COUNT_INST(I_CLRS);
  2180     CLC();
  2181     SETCCB_cc_rbpdisp(X86_COND_C, R_S);
  2182     sh4_x86.tstate = TSTATE_NONE;
  2183 :}
  2184 CLRT {:  
  2185     COUNT_INST(I_CLRT);
  2186     CLC();
  2187     SETC_t();
  2188     sh4_x86.tstate = TSTATE_C;
  2189 :}
  2190 SETS {:  
  2191     COUNT_INST(I_SETS);
  2192     STC();
  2193     SETCCB_cc_rbpdisp(X86_COND_C, R_S);
  2194     sh4_x86.tstate = TSTATE_NONE;
  2195 :}
  2196 SETT {:  
  2197     COUNT_INST(I_SETT);
  2198     STC();
  2199     SETC_t();
  2200     sh4_x86.tstate = TSTATE_C;
  2201 :}
  2203 /* Floating point moves */
  2204 FMOV FRm, FRn {:  
  2205     COUNT_INST(I_FMOV1);
  2206     check_fpuen();
  2207     if( sh4_x86.double_size ) {
  2208         load_dr0( REG_EAX, FRm );
  2209         load_dr1( REG_ECX, FRm );
  2210         store_dr0( REG_EAX, FRn );
  2211         store_dr1( REG_ECX, FRn );
  2212     } else {
  2213         load_fr( REG_EAX, FRm ); // SZ=0 branch
  2214         store_fr( REG_EAX, FRn );
  2216 :}
  2217 FMOV FRm, @Rn {: 
  2218     COUNT_INST(I_FMOV2);
  2219     check_fpuen();
  2220     load_reg( REG_EAX, Rn );
  2221     if( sh4_x86.double_size ) {
  2222         check_walign64( REG_EAX );
  2223         load_dr0( REG_EDX, FRm );
  2224         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2225         load_reg( REG_EAX, Rn );
  2226         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2227         load_dr1( REG_EDX, FRm );
  2228         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2229     } else {
  2230         check_walign32( REG_EAX );
  2231         load_fr( REG_EDX, FRm );
  2232         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2234     sh4_x86.tstate = TSTATE_NONE;
  2235 :}
  2236 FMOV @Rm, FRn {:  
  2237     COUNT_INST(I_FMOV5);
  2238     check_fpuen();
  2239     load_reg( REG_EAX, Rm );
  2240     if( sh4_x86.double_size ) {
  2241         check_ralign64( REG_EAX );
  2242         MEM_READ_LONG( REG_EAX, REG_EAX );
  2243         store_dr0( REG_EAX, FRn );
  2244         load_reg( REG_EAX, Rm );
  2245         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2246         MEM_READ_LONG( REG_EAX, REG_EAX );
  2247         store_dr1( REG_EAX, FRn );
  2248     } else {
  2249         check_ralign32( REG_EAX );
  2250         MEM_READ_LONG( REG_EAX, REG_EAX );
  2251         store_fr( REG_EAX, FRn );
  2253     sh4_x86.tstate = TSTATE_NONE;
  2254 :}
  2255 FMOV FRm, @-Rn {:  
  2256     COUNT_INST(I_FMOV3);
  2257     check_fpuen();
  2258     load_reg( REG_EAX, Rn );
  2259     if( sh4_x86.double_size ) {
  2260         check_walign64( REG_EAX );
  2261         LEAL_r32disp_r32( REG_EAX, -8, REG_EAX );
  2262         load_dr0( REG_EDX, FRm );
  2263         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2264         load_reg( REG_EAX, Rn );
  2265         LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2266         load_dr1( REG_EDX, FRm );
  2267         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2268         ADDL_imms_rbpdisp(-8,REG_OFFSET(r[Rn]));
  2269     } else {
  2270         check_walign32( REG_EAX );
  2271         LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2272         load_fr( REG_EDX, FRm );
  2273         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2274         ADDL_imms_rbpdisp(-4,REG_OFFSET(r[Rn]));
  2276     sh4_x86.tstate = TSTATE_NONE;
  2277 :}
  2278 FMOV @Rm+, FRn {:
  2279     COUNT_INST(I_FMOV6);
  2280     check_fpuen();
  2281     load_reg( REG_EAX, Rm );
  2282     if( sh4_x86.double_size ) {
  2283         check_ralign64( REG_EAX );
  2284         MEM_READ_LONG( REG_EAX, REG_EAX );
  2285         store_dr0( REG_EAX, FRn );
  2286         load_reg( REG_EAX, Rm );
  2287         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2288         MEM_READ_LONG( REG_EAX, REG_EAX );
  2289         store_dr1( REG_EAX, FRn );
  2290         ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rm]) );
  2291     } else {
  2292         check_ralign32( REG_EAX );
  2293         MEM_READ_LONG( REG_EAX, REG_EAX );
  2294         store_fr( REG_EAX, FRn );
  2295         ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2297     sh4_x86.tstate = TSTATE_NONE;
  2298 :}
  2299 FMOV FRm, @(R0, Rn) {:  
  2300     COUNT_INST(I_FMOV4);
  2301     check_fpuen();
  2302     load_reg( REG_EAX, Rn );
  2303     ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2304     if( sh4_x86.double_size ) {
  2305         check_walign64( REG_EAX );
  2306         load_dr0( REG_EDX, FRm );
  2307         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2308         load_reg( REG_EAX, Rn );
  2309         ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2310         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2311         load_dr1( REG_EDX, FRm );
  2312         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2313     } else {
  2314         check_walign32( REG_EAX );
  2315         load_fr( REG_EDX, FRm );
  2316         MEM_WRITE_LONG( REG_EAX, REG_EDX ); // 12
  2318     sh4_x86.tstate = TSTATE_NONE;
  2319 :}
  2320 FMOV @(R0, Rm), FRn {:  
  2321     COUNT_INST(I_FMOV7);
  2322     check_fpuen();
  2323     load_reg( REG_EAX, Rm );
  2324     ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2325     if( sh4_x86.double_size ) {
  2326         check_ralign64( REG_EAX );
  2327         MEM_READ_LONG( REG_EAX, REG_EAX );
  2328         store_dr0( REG_EAX, FRn );
  2329         load_reg( REG_EAX, Rm );
  2330         ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2331         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2332         MEM_READ_LONG( REG_EAX, REG_EAX );
  2333         store_dr1( REG_EAX, FRn );
  2334     } else {
  2335         check_ralign32( REG_EAX );
  2336         MEM_READ_LONG( REG_EAX, REG_EAX );
  2337         store_fr( REG_EAX, FRn );
  2339     sh4_x86.tstate = TSTATE_NONE;
  2340 :}
  2341 FLDI0 FRn {:  /* IFF PR=0 */
  2342     COUNT_INST(I_FLDI0);
  2343     check_fpuen();
  2344     if( sh4_x86.double_prec == 0 ) {
  2345         XORL_r32_r32( REG_EAX, REG_EAX );
  2346         store_fr( REG_EAX, FRn );
  2348     sh4_x86.tstate = TSTATE_NONE;
  2349 :}
  2350 FLDI1 FRn {:  /* IFF PR=0 */
  2351     COUNT_INST(I_FLDI1);
  2352     check_fpuen();
  2353     if( sh4_x86.double_prec == 0 ) {
  2354         MOVL_imm32_r32( 0x3F800000, REG_EAX );
  2355         store_fr( REG_EAX, FRn );
  2357 :}
  2359 FLOAT FPUL, FRn {:  
  2360     COUNT_INST(I_FLOAT);
  2361     check_fpuen();
  2362     FILD_rbpdisp(R_FPUL);
  2363     if( sh4_x86.double_prec ) {
  2364         pop_dr( FRn );
  2365     } else {
  2366         pop_fr( FRn );
  2368 :}
  2369 FTRC FRm, FPUL {:  
  2370     COUNT_INST(I_FTRC);
  2371     check_fpuen();
  2372     if( sh4_x86.double_prec ) {
  2373         push_dr( FRm );
  2374     } else {
  2375         push_fr( FRm );
  2377     MOVP_immptr_rptr( &min_int, REG_ECX );
  2378     FILD_r32disp( REG_ECX, 0 );
  2379     FCOMIP_st(1);              
  2380     JAE_label( sat );     
  2381     JP_label( sat2 );       
  2382     MOVP_immptr_rptr( &max_int, REG_ECX );
  2383     FILD_r32disp( REG_ECX, 0 );
  2384     FCOMIP_st(1);
  2385     JNA_label( sat3 );
  2386     MOVP_immptr_rptr( &save_fcw, REG_EAX );
  2387     FNSTCW_r32disp( REG_EAX, 0 );
  2388     MOVP_immptr_rptr( &trunc_fcw, REG_EDX );
  2389     FLDCW_r32disp( REG_EDX, 0 );
  2390     FISTP_rbpdisp(R_FPUL);             
  2391     FLDCW_r32disp( REG_EAX, 0 );
  2392     JMP_label(end);             
  2394     JMP_TARGET(sat);
  2395     JMP_TARGET(sat2);
  2396     JMP_TARGET(sat3);
  2397     MOVL_r32disp_r32( REG_ECX, 0, REG_ECX ); // 2
  2398     MOVL_r32_rbpdisp( REG_ECX, R_FPUL );
  2399     FPOP_st();
  2400     JMP_TARGET(end);
  2401     sh4_x86.tstate = TSTATE_NONE;
  2402 :}
  2403 FLDS FRm, FPUL {:  
  2404     COUNT_INST(I_FLDS);
  2405     check_fpuen();
  2406     load_fr( REG_EAX, FRm );
  2407     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2408 :}
  2409 FSTS FPUL, FRn {:  
  2410     COUNT_INST(I_FSTS);
  2411     check_fpuen();
  2412     MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  2413     store_fr( REG_EAX, FRn );
  2414 :}
  2415 FCNVDS FRm, FPUL {:  
  2416     COUNT_INST(I_FCNVDS);
  2417     check_fpuen();
  2418     if( sh4_x86.double_prec ) {
  2419         push_dr( FRm );
  2420         pop_fpul();
  2422 :}
  2423 FCNVSD FPUL, FRn {:  
  2424     COUNT_INST(I_FCNVSD);
  2425     check_fpuen();
  2426     if( sh4_x86.double_prec ) {
  2427         push_fpul();
  2428         pop_dr( FRn );
  2430 :}
  2432 /* Floating point instructions */
  2433 FABS FRn {:  
  2434     COUNT_INST(I_FABS);
  2435     check_fpuen();
  2436     if( sh4_x86.double_prec ) {
  2437         push_dr(FRn);
  2438         FABS_st0();
  2439         pop_dr(FRn);
  2440     } else {
  2441         push_fr(FRn);
  2442         FABS_st0();
  2443         pop_fr(FRn);
  2445 :}
  2446 FADD FRm, FRn {:  
  2447     COUNT_INST(I_FADD);
  2448     check_fpuen();
  2449     if( sh4_x86.double_prec ) {
  2450         push_dr(FRm);
  2451         push_dr(FRn);
  2452         FADDP_st(1);
  2453         pop_dr(FRn);
  2454     } else {
  2455         push_fr(FRm);
  2456         push_fr(FRn);
  2457         FADDP_st(1);
  2458         pop_fr(FRn);
  2460 :}
  2461 FDIV FRm, FRn {:  
  2462     COUNT_INST(I_FDIV);
  2463     check_fpuen();
  2464     if( sh4_x86.double_prec ) {
  2465         push_dr(FRn);
  2466         push_dr(FRm);
  2467         FDIVP_st(1);
  2468         pop_dr(FRn);
  2469     } else {
  2470         push_fr(FRn);
  2471         push_fr(FRm);
  2472         FDIVP_st(1);
  2473         pop_fr(FRn);
  2475 :}
  2476 FMAC FR0, FRm, FRn {:  
  2477     COUNT_INST(I_FMAC);
  2478     check_fpuen();
  2479     if( sh4_x86.double_prec ) {
  2480         push_dr( 0 );
  2481         push_dr( FRm );
  2482         FMULP_st(1);
  2483         push_dr( FRn );
  2484         FADDP_st(1);
  2485         pop_dr( FRn );
  2486     } else {
  2487         push_fr( 0 );
  2488         push_fr( FRm );
  2489         FMULP_st(1);
  2490         push_fr( FRn );
  2491         FADDP_st(1);
  2492         pop_fr( FRn );
  2494 :}
  2496 FMUL FRm, FRn {:  
  2497     COUNT_INST(I_FMUL);
  2498     check_fpuen();
  2499     if( sh4_x86.double_prec ) {
  2500         push_dr(FRm);
  2501         push_dr(FRn);
  2502         FMULP_st(1);
  2503         pop_dr(FRn);
  2504     } else {
  2505         push_fr(FRm);
  2506         push_fr(FRn);
  2507         FMULP_st(1);
  2508         pop_fr(FRn);
  2510 :}
  2511 FNEG FRn {:  
  2512     COUNT_INST(I_FNEG);
  2513     check_fpuen();
  2514     if( sh4_x86.double_prec ) {
  2515         push_dr(FRn);
  2516         FCHS_st0();
  2517         pop_dr(FRn);
  2518     } else {
  2519         push_fr(FRn);
  2520         FCHS_st0();
  2521         pop_fr(FRn);
  2523 :}
  2524 FSRRA FRn {:  
  2525     COUNT_INST(I_FSRRA);
  2526     check_fpuen();
  2527     if( sh4_x86.double_prec == 0 ) {
  2528         FLD1_st0();
  2529         push_fr(FRn);
  2530         FSQRT_st0();
  2531         FDIVP_st(1);
  2532         pop_fr(FRn);
  2534 :}
  2535 FSQRT FRn {:  
  2536     COUNT_INST(I_FSQRT);
  2537     check_fpuen();
  2538     if( sh4_x86.double_prec ) {
  2539         push_dr(FRn);
  2540         FSQRT_st0();
  2541         pop_dr(FRn);
  2542     } else {
  2543         push_fr(FRn);
  2544         FSQRT_st0();
  2545         pop_fr(FRn);
  2547 :}
  2548 FSUB FRm, FRn {:  
  2549     COUNT_INST(I_FSUB);
  2550     check_fpuen();
  2551     if( sh4_x86.double_prec ) {
  2552         push_dr(FRn);
  2553         push_dr(FRm);
  2554         FSUBP_st(1);
  2555         pop_dr(FRn);
  2556     } else {
  2557         push_fr(FRn);
  2558         push_fr(FRm);
  2559         FSUBP_st(1);
  2560         pop_fr(FRn);
  2562 :}
  2564 FCMP/EQ FRm, FRn {:  
  2565     COUNT_INST(I_FCMPEQ);
  2566     check_fpuen();
  2567     if( sh4_x86.double_prec ) {
  2568         push_dr(FRm);
  2569         push_dr(FRn);
  2570     } else {
  2571         push_fr(FRm);
  2572         push_fr(FRn);
  2574     XORL_r32_r32(REG_EAX, REG_EAX);
  2575     XORL_r32_r32(REG_EDX, REG_EDX);
  2576     FCOMIP_st(1);
  2577     SETCCB_cc_r8(X86_COND_NP, REG_DL);
  2578     CMOVCCL_cc_r32_r32(X86_COND_E, REG_EDX, REG_EAX);
  2579     MOVL_r32_rbpdisp(REG_EAX, R_T);
  2580     FPOP_st();
  2581     sh4_x86.tstate = TSTATE_NONE;
  2582 :}
  2583 FCMP/GT FRm, FRn {:  
  2584     COUNT_INST(I_FCMPGT);
  2585     check_fpuen();
  2586     if( sh4_x86.double_prec ) {
  2587         push_dr(FRm);
  2588         push_dr(FRn);
  2589     } else {
  2590         push_fr(FRm);
  2591         push_fr(FRn);
  2593     FCOMIP_st(1);
  2594     SETA_t();
  2595     FPOP_st();
  2596     sh4_x86.tstate = TSTATE_A;
  2597 :}
  2599 FSCA FPUL, FRn {:  
  2600     COUNT_INST(I_FSCA);
  2601     check_fpuen();
  2602     if( sh4_x86.double_prec == 0 ) {
  2603         LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FRn&0x0E]), REG_EDX );
  2604         MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  2605         CALL2_ptr_r32_r32( sh4_fsca, REG_EAX, REG_EDX );
  2607     sh4_x86.tstate = TSTATE_NONE;
  2608 :}
  2609 FIPR FVm, FVn {:  
  2610     COUNT_INST(I_FIPR);
  2611     check_fpuen();
  2612     if( sh4_x86.double_prec == 0 ) {
  2613         if( sh4_x86.sse3_enabled ) {
  2614             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
  2615             MULPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
  2616             HADDPS_xmm_xmm( 4, 4 ); 
  2617             HADDPS_xmm_xmm( 4, 4 );
  2618             MOVSS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
  2619         } else {
  2620             push_fr( FVm<<2 );
  2621             push_fr( FVn<<2 );
  2622             FMULP_st(1);
  2623             push_fr( (FVm<<2)+1);
  2624             push_fr( (FVn<<2)+1);
  2625             FMULP_st(1);
  2626             FADDP_st(1);
  2627             push_fr( (FVm<<2)+2);
  2628             push_fr( (FVn<<2)+2);
  2629             FMULP_st(1);
  2630             FADDP_st(1);
  2631             push_fr( (FVm<<2)+3);
  2632             push_fr( (FVn<<2)+3);
  2633             FMULP_st(1);
  2634             FADDP_st(1);
  2635             pop_fr( (FVn<<2)+3);
  2638 :}
  2639 FTRV XMTRX, FVn {:  
  2640     COUNT_INST(I_FTRV);
  2641     check_fpuen();
  2642     if( sh4_x86.double_prec == 0 ) {
  2643         if( sh4_x86.sse3_enabled && sh4_x86.begin_callback == NULL ) {
  2644         	/* FIXME: For now, disable this inlining when we're running in shadow mode -
  2645         	 * it gives slightly different results from the emu core. Need to
  2646         	 * fix the precision so both give the right results.
  2647         	 */
  2648             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1  M0  M3  M2
  2649             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5  M4  M7  M6
  2650             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9  M8  M11 M10
  2651             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
  2653             MOVSLDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
  2654             MOVSHDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
  2655             MOV_xmm_xmm( 4, 6 );
  2656             MOV_xmm_xmm( 5, 7 );
  2657             MOVLHPS_xmm_xmm( 4, 4 );  // V1 V1 V1 V1
  2658             MOVHLPS_xmm_xmm( 6, 6 );  // V3 V3 V3 V3
  2659             MOVLHPS_xmm_xmm( 5, 5 );  // V0 V0 V0 V0
  2660             MOVHLPS_xmm_xmm( 7, 7 );  // V2 V2 V2 V2
  2661             MULPS_xmm_xmm( 0, 4 );
  2662             MULPS_xmm_xmm( 1, 5 );
  2663             MULPS_xmm_xmm( 2, 6 );
  2664             MULPS_xmm_xmm( 3, 7 );
  2665             ADDPS_xmm_xmm( 5, 4 );
  2666             ADDPS_xmm_xmm( 7, 6 );
  2667             ADDPS_xmm_xmm( 6, 4 );
  2668             MOVAPS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][FVn<<2]) );
  2669         } else {
  2670             LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FVn<<2]), REG_EAX );
  2671             CALL1_ptr_r32( sh4_ftrv, REG_EAX );
  2674     sh4_x86.tstate = TSTATE_NONE;
  2675 :}
  2677 FRCHG {:  
  2678     COUNT_INST(I_FRCHG);
  2679     check_fpuen();
  2680     XORL_imms_rbpdisp( FPSCR_FR, R_FPSCR );
  2681     CALL_ptr( sh4_switch_fr_banks );
  2682     sh4_x86.tstate = TSTATE_NONE;
  2683 :}
  2684 FSCHG {:  
  2685     COUNT_INST(I_FSCHG);
  2686     check_fpuen();
  2687     XORL_imms_rbpdisp( FPSCR_SZ, R_FPSCR);
  2688     XORL_imms_rbpdisp( FPSCR_SZ, REG_OFFSET(xlat_sh4_mode) );
  2689     sh4_x86.tstate = TSTATE_NONE;
  2690     sh4_x86.double_size = !sh4_x86.double_size;
  2691     sh4_x86.sh4_mode = sh4_x86.sh4_mode ^ FPSCR_SZ;
  2692 :}
  2694 /* Processor control instructions */
  2695 LDC Rm, SR {:
  2696     COUNT_INST(I_LDCSR);
  2697     if( sh4_x86.in_delay_slot ) {
  2698 	SLOTILLEGAL();
  2699     } else {
  2700 	check_priv();
  2701 	load_reg( REG_EAX, Rm );
  2702 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2703 	sh4_x86.fpuen_checked = FALSE;
  2704 	sh4_x86.tstate = TSTATE_NONE;
  2705     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2706 	return 2;
  2708 :}
  2709 LDC Rm, GBR {: 
  2710     COUNT_INST(I_LDC);
  2711     load_reg( REG_EAX, Rm );
  2712     MOVL_r32_rbpdisp( REG_EAX, R_GBR );
  2713 :}
  2714 LDC Rm, VBR {:  
  2715     COUNT_INST(I_LDC);
  2716     check_priv();
  2717     load_reg( REG_EAX, Rm );
  2718     MOVL_r32_rbpdisp( REG_EAX, R_VBR );
  2719     sh4_x86.tstate = TSTATE_NONE;
  2720 :}
  2721 LDC Rm, SSR {:  
  2722     COUNT_INST(I_LDC);
  2723     check_priv();
  2724     load_reg( REG_EAX, Rm );
  2725     MOVL_r32_rbpdisp( REG_EAX, R_SSR );
  2726     sh4_x86.tstate = TSTATE_NONE;
  2727 :}
  2728 LDC Rm, SGR {:  
  2729     COUNT_INST(I_LDC);
  2730     check_priv();
  2731     load_reg( REG_EAX, Rm );
  2732     MOVL_r32_rbpdisp( REG_EAX, R_SGR );
  2733     sh4_x86.tstate = TSTATE_NONE;
  2734 :}
  2735 LDC Rm, SPC {:  
  2736     COUNT_INST(I_LDC);
  2737     check_priv();
  2738     load_reg( REG_EAX, Rm );
  2739     MOVL_r32_rbpdisp( REG_EAX, R_SPC );
  2740     sh4_x86.tstate = TSTATE_NONE;
  2741 :}
  2742 LDC Rm, DBR {:  
  2743     COUNT_INST(I_LDC);
  2744     check_priv();
  2745     load_reg( REG_EAX, Rm );
  2746     MOVL_r32_rbpdisp( REG_EAX, R_DBR );
  2747     sh4_x86.tstate = TSTATE_NONE;
  2748 :}
  2749 LDC Rm, Rn_BANK {:  
  2750     COUNT_INST(I_LDC);
  2751     check_priv();
  2752     load_reg( REG_EAX, Rm );
  2753     MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2754     sh4_x86.tstate = TSTATE_NONE;
  2755 :}
  2756 LDC.L @Rm+, GBR {:  
  2757     COUNT_INST(I_LDCM);
  2758     load_reg( REG_EAX, Rm );
  2759     check_ralign32( REG_EAX );
  2760     MEM_READ_LONG( REG_EAX, REG_EAX );
  2761     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2762     MOVL_r32_rbpdisp( REG_EAX, R_GBR );
  2763     sh4_x86.tstate = TSTATE_NONE;
  2764 :}
  2765 LDC.L @Rm+, SR {:
  2766     COUNT_INST(I_LDCSRM);
  2767     if( sh4_x86.in_delay_slot ) {
  2768 	SLOTILLEGAL();
  2769     } else {
  2770 	check_priv();
  2771 	load_reg( REG_EAX, Rm );
  2772 	check_ralign32( REG_EAX );
  2773 	MEM_READ_LONG( REG_EAX, REG_EAX );
  2774 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2775 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2776 	sh4_x86.fpuen_checked = FALSE;
  2777 	sh4_x86.tstate = TSTATE_NONE;
  2778     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2779 	return 2;
  2781 :}
  2782 LDC.L @Rm+, VBR {:  
  2783     COUNT_INST(I_LDCM);
  2784     check_priv();
  2785     load_reg( REG_EAX, Rm );
  2786     check_ralign32( REG_EAX );
  2787     MEM_READ_LONG( REG_EAX, REG_EAX );
  2788     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2789     MOVL_r32_rbpdisp( REG_EAX, R_VBR );
  2790     sh4_x86.tstate = TSTATE_NONE;
  2791 :}
  2792 LDC.L @Rm+, SSR {:
  2793     COUNT_INST(I_LDCM);
  2794     check_priv();
  2795     load_reg( REG_EAX, Rm );
  2796     check_ralign32( REG_EAX );
  2797     MEM_READ_LONG( REG_EAX, REG_EAX );
  2798     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2799     MOVL_r32_rbpdisp( REG_EAX, R_SSR );
  2800     sh4_x86.tstate = TSTATE_NONE;
  2801 :}
  2802 LDC.L @Rm+, SGR {:  
  2803     COUNT_INST(I_LDCM);
  2804     check_priv();
  2805     load_reg( REG_EAX, Rm );
  2806     check_ralign32( REG_EAX );
  2807     MEM_READ_LONG( REG_EAX, REG_EAX );
  2808     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2809     MOVL_r32_rbpdisp( REG_EAX, R_SGR );
  2810     sh4_x86.tstate = TSTATE_NONE;
  2811 :}
  2812 LDC.L @Rm+, SPC {:  
  2813     COUNT_INST(I_LDCM);
  2814     check_priv();
  2815     load_reg( REG_EAX, Rm );
  2816     check_ralign32( REG_EAX );
  2817     MEM_READ_LONG( REG_EAX, REG_EAX );
  2818     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2819     MOVL_r32_rbpdisp( REG_EAX, R_SPC );
  2820     sh4_x86.tstate = TSTATE_NONE;
  2821 :}
  2822 LDC.L @Rm+, DBR {:  
  2823     COUNT_INST(I_LDCM);
  2824     check_priv();
  2825     load_reg( REG_EAX, Rm );
  2826     check_ralign32( REG_EAX );
  2827     MEM_READ_LONG( REG_EAX, REG_EAX );
  2828     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2829     MOVL_r32_rbpdisp( REG_EAX, R_DBR );
  2830     sh4_x86.tstate = TSTATE_NONE;
  2831 :}
  2832 LDC.L @Rm+, Rn_BANK {:  
  2833     COUNT_INST(I_LDCM);
  2834     check_priv();
  2835     load_reg( REG_EAX, Rm );
  2836     check_ralign32( REG_EAX );
  2837     MEM_READ_LONG( REG_EAX, REG_EAX );
  2838     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2839     MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2840     sh4_x86.tstate = TSTATE_NONE;
  2841 :}
  2842 LDS Rm, FPSCR {:
  2843     COUNT_INST(I_LDSFPSCR);
  2844     check_fpuen();
  2845     load_reg( REG_EAX, Rm );
  2846     CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
  2847     sh4_x86.tstate = TSTATE_NONE;
  2848     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2849     return 2;
  2850 :}
  2851 LDS.L @Rm+, FPSCR {:  
  2852     COUNT_INST(I_LDSFPSCRM);
  2853     check_fpuen();
  2854     load_reg( REG_EAX, Rm );
  2855     check_ralign32( REG_EAX );
  2856     MEM_READ_LONG( REG_EAX, REG_EAX );
  2857     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2858     CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
  2859     sh4_x86.tstate = TSTATE_NONE;
  2860     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2861     return 2;
  2862 :}
  2863 LDS Rm, FPUL {:  
  2864     COUNT_INST(I_LDS);
  2865     check_fpuen();
  2866     load_reg( REG_EAX, Rm );
  2867     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2868 :}
  2869 LDS.L @Rm+, FPUL {:  
  2870     COUNT_INST(I_LDSM);
  2871     check_fpuen();
  2872     load_reg( REG_EAX, Rm );
  2873     check_ralign32( REG_EAX );
  2874     MEM_READ_LONG( REG_EAX, REG_EAX );
  2875     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2876     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2877     sh4_x86.tstate = TSTATE_NONE;
  2878 :}
  2879 LDS Rm, MACH {: 
  2880     COUNT_INST(I_LDS);
  2881     load_reg( REG_EAX, Rm );
  2882     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2883 :}
  2884 LDS.L @Rm+, MACH {:  
  2885     COUNT_INST(I_LDSM);
  2886     load_reg( REG_EAX, Rm );
  2887     check_ralign32( REG_EAX );
  2888     MEM_READ_LONG( REG_EAX, REG_EAX );
  2889     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2890     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2891     sh4_x86.tstate = TSTATE_NONE;
  2892 :}
  2893 LDS Rm, MACL {:  
  2894     COUNT_INST(I_LDS);
  2895     load_reg( REG_EAX, Rm );
  2896     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2897 :}
  2898 LDS.L @Rm+, MACL {:  
  2899     COUNT_INST(I_LDSM);
  2900     load_reg( REG_EAX, Rm );
  2901     check_ralign32( REG_EAX );
  2902     MEM_READ_LONG( REG_EAX, REG_EAX );
  2903     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2904     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2905     sh4_x86.tstate = TSTATE_NONE;
  2906 :}
  2907 LDS Rm, PR {:  
  2908     COUNT_INST(I_LDS);
  2909     load_reg( REG_EAX, Rm );
  2910     MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2911 :}
  2912 LDS.L @Rm+, PR {:  
  2913     COUNT_INST(I_LDSM);
  2914     load_reg( REG_EAX, Rm );
  2915     check_ralign32( REG_EAX );
  2916     MEM_READ_LONG( REG_EAX, REG_EAX );
  2917     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2918     MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2919     sh4_x86.tstate = TSTATE_NONE;
  2920 :}
  2921 LDTLB {:  
  2922     COUNT_INST(I_LDTLB);
  2923     CALL_ptr( MMU_ldtlb );
  2924     sh4_x86.tstate = TSTATE_NONE;
  2925 :}
  2926 OCBI @Rn {:
  2927     COUNT_INST(I_OCBI);
  2928 :}
  2929 OCBP @Rn {:
  2930     COUNT_INST(I_OCBP);
  2931 :}
  2932 OCBWB @Rn {:
  2933     COUNT_INST(I_OCBWB);
  2934 :}
  2935 PREF @Rn {:
  2936     COUNT_INST(I_PREF);
  2937     load_reg( REG_EAX, Rn );
  2938     MEM_PREFETCH( REG_EAX );
  2939     sh4_x86.tstate = TSTATE_NONE;
  2940 :}
  2941 SLEEP {: 
  2942     COUNT_INST(I_SLEEP);
  2943     check_priv();
  2944     CALL_ptr( sh4_sleep );
  2945     sh4_x86.tstate = TSTATE_NONE;
  2946     sh4_x86.in_delay_slot = DELAY_NONE;
  2947     return 2;
  2948 :}
  2949 STC SR, Rn {:
  2950     COUNT_INST(I_STCSR);
  2951     check_priv();
  2952     CALL_ptr(sh4_read_sr);
  2953     store_reg( REG_EAX, Rn );
  2954     sh4_x86.tstate = TSTATE_NONE;
  2955 :}
  2956 STC GBR, Rn {:  
  2957     COUNT_INST(I_STC);
  2958     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  2959     store_reg( REG_EAX, Rn );
  2960 :}
  2961 STC VBR, Rn {:  
  2962     COUNT_INST(I_STC);
  2963     check_priv();
  2964     MOVL_rbpdisp_r32( R_VBR, REG_EAX );
  2965     store_reg( REG_EAX, Rn );
  2966     sh4_x86.tstate = TSTATE_NONE;
  2967 :}
  2968 STC SSR, Rn {:  
  2969     COUNT_INST(I_STC);
  2970     check_priv();
  2971     MOVL_rbpdisp_r32( R_SSR, REG_EAX );
  2972     store_reg( REG_EAX, Rn );
  2973     sh4_x86.tstate = TSTATE_NONE;
  2974 :}
  2975 STC SPC, Rn {:  
  2976     COUNT_INST(I_STC);
  2977     check_priv();
  2978     MOVL_rbpdisp_r32( R_SPC, REG_EAX );
  2979     store_reg( REG_EAX, Rn );
  2980     sh4_x86.tstate = TSTATE_NONE;
  2981 :}
  2982 STC SGR, Rn {:  
  2983     COUNT_INST(I_STC);
  2984     check_priv();
  2985     MOVL_rbpdisp_r32( R_SGR, REG_EAX );
  2986     store_reg( REG_EAX, Rn );
  2987     sh4_x86.tstate = TSTATE_NONE;
  2988 :}
  2989 STC DBR, Rn {:  
  2990     COUNT_INST(I_STC);
  2991     check_priv();
  2992     MOVL_rbpdisp_r32( R_DBR, REG_EAX );
  2993     store_reg( REG_EAX, Rn );
  2994     sh4_x86.tstate = TSTATE_NONE;
  2995 :}
  2996 STC Rm_BANK, Rn {:
  2997     COUNT_INST(I_STC);
  2998     check_priv();
  2999     MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EAX );
  3000     store_reg( REG_EAX, Rn );
  3001     sh4_x86.tstate = TSTATE_NONE;
  3002 :}
  3003 STC.L SR, @-Rn {:
  3004     COUNT_INST(I_STCSRM);
  3005     check_priv();
  3006     CALL_ptr( sh4_read_sr );
  3007     MOVL_r32_r32( REG_EAX, REG_EDX );
  3008     load_reg( REG_EAX, Rn );
  3009     check_walign32( REG_EAX );
  3010     LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  3011     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3012     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3013     sh4_x86.tstate = TSTATE_NONE;
  3014 :}
  3015 STC.L VBR, @-Rn {:  
  3016     COUNT_INST(I_STCM);
  3017     check_priv();
  3018     load_reg( REG_EAX, Rn );
  3019     check_walign32( REG_EAX );
  3020     ADDL_imms_r32( -4, REG_EAX );
  3021     MOVL_rbpdisp_r32( R_VBR, REG_EDX );
  3022     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3023     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3024     sh4_x86.tstate = TSTATE_NONE;
  3025 :}
  3026 STC.L SSR, @-Rn {:  
  3027     COUNT_INST(I_STCM);
  3028     check_priv();
  3029     load_reg( REG_EAX, Rn );
  3030     check_walign32( REG_EAX );
  3031     ADDL_imms_r32( -4, REG_EAX );
  3032     MOVL_rbpdisp_r32( R_SSR, REG_EDX );
  3033     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3034     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3035     sh4_x86.tstate = TSTATE_NONE;
  3036 :}
  3037 STC.L SPC, @-Rn {:
  3038     COUNT_INST(I_STCM);
  3039     check_priv();
  3040     load_reg( REG_EAX, Rn );
  3041     check_walign32( REG_EAX );
  3042     ADDL_imms_r32( -4, REG_EAX );
  3043     MOVL_rbpdisp_r32( R_SPC, REG_EDX );
  3044     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3045     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3046     sh4_x86.tstate = TSTATE_NONE;
  3047 :}
  3048 STC.L SGR, @-Rn {:  
  3049     COUNT_INST(I_STCM);
  3050     check_priv();
  3051     load_reg( REG_EAX, Rn );
  3052     check_walign32( REG_EAX );
  3053     ADDL_imms_r32( -4, REG_EAX );
  3054     MOVL_rbpdisp_r32( R_SGR, REG_EDX );
  3055     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3056     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3057     sh4_x86.tstate = TSTATE_NONE;
  3058 :}
  3059 STC.L DBR, @-Rn {:  
  3060     COUNT_INST(I_STCM);
  3061     check_priv();
  3062     load_reg( REG_EAX, Rn );
  3063     check_walign32( REG_EAX );
  3064     ADDL_imms_r32( -4, REG_EAX );
  3065     MOVL_rbpdisp_r32( R_DBR, REG_EDX );
  3066     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3067     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3068     sh4_x86.tstate = TSTATE_NONE;
  3069 :}
  3070 STC.L Rm_BANK, @-Rn {:  
  3071     COUNT_INST(I_STCM);
  3072     check_priv();
  3073     load_reg( REG_EAX, Rn );
  3074     check_walign32( REG_EAX );
  3075     ADDL_imms_r32( -4, REG_EAX );
  3076     MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EDX );
  3077     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3078     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3079     sh4_x86.tstate = TSTATE_NONE;
  3080 :}
  3081 STC.L GBR, @-Rn {:  
  3082     COUNT_INST(I_STCM);
  3083     load_reg( REG_EAX, Rn );
  3084     check_walign32( REG_EAX );
  3085     ADDL_imms_r32( -4, REG_EAX );
  3086     MOVL_rbpdisp_r32( R_GBR, REG_EDX );
  3087     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3088     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3089     sh4_x86.tstate = TSTATE_NONE;
  3090 :}
  3091 STS FPSCR, Rn {:  
  3092     COUNT_INST(I_STSFPSCR);
  3093     check_fpuen();
  3094     MOVL_rbpdisp_r32( R_FPSCR, REG_EAX );
  3095     store_reg( REG_EAX, Rn );
  3096 :}
  3097 STS.L FPSCR, @-Rn {:  
  3098     COUNT_INST(I_STSFPSCRM);
  3099     check_fpuen();
  3100     load_reg( REG_EAX, Rn );
  3101     check_walign32( REG_EAX );
  3102     ADDL_imms_r32( -4, REG_EAX );
  3103     MOVL_rbpdisp_r32( R_FPSCR, REG_EDX );
  3104     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3105     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3106     sh4_x86.tstate = TSTATE_NONE;
  3107 :}
  3108 STS FPUL, Rn {:  
  3109     COUNT_INST(I_STS);
  3110     check_fpuen();
  3111     MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  3112     store_reg( REG_EAX, Rn );
  3113 :}
  3114 STS.L FPUL, @-Rn {:  
  3115     COUNT_INST(I_STSM);
  3116     check_fpuen();
  3117     load_reg( REG_EAX, Rn );
  3118     check_walign32( REG_EAX );
  3119     ADDL_imms_r32( -4, REG_EAX );
  3120     MOVL_rbpdisp_r32( R_FPUL, REG_EDX );
  3121     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3122     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3123     sh4_x86.tstate = TSTATE_NONE;
  3124 :}
  3125 STS MACH, Rn {:  
  3126     COUNT_INST(I_STS);
  3127     MOVL_rbpdisp_r32( R_MACH, REG_EAX );
  3128     store_reg( REG_EAX, Rn );
  3129 :}
  3130 STS.L MACH, @-Rn {:  
  3131     COUNT_INST(I_STSM);
  3132     load_reg( REG_EAX, Rn );
  3133     check_walign32( REG_EAX );
  3134     ADDL_imms_r32( -4, REG_EAX );
  3135     MOVL_rbpdisp_r32( R_MACH, REG_EDX );
  3136     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3137     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3138     sh4_x86.tstate = TSTATE_NONE;
  3139 :}
  3140 STS MACL, Rn {:  
  3141     COUNT_INST(I_STS);
  3142     MOVL_rbpdisp_r32( R_MACL, REG_EAX );
  3143     store_reg( REG_EAX, Rn );
  3144 :}
  3145 STS.L MACL, @-Rn {:  
  3146     COUNT_INST(I_STSM);
  3147     load_reg( REG_EAX, Rn );
  3148     check_walign32( REG_EAX );
  3149     ADDL_imms_r32( -4, REG_EAX );
  3150     MOVL_rbpdisp_r32( R_MACL, REG_EDX );
  3151     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3152     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3153     sh4_x86.tstate = TSTATE_NONE;
  3154 :}
  3155 STS PR, Rn {:  
  3156     COUNT_INST(I_STS);
  3157     MOVL_rbpdisp_r32( R_PR, REG_EAX );
  3158     store_reg( REG_EAX, Rn );
  3159 :}
  3160 STS.L PR, @-Rn {:  
  3161     COUNT_INST(I_STSM);
  3162     load_reg( REG_EAX, Rn );
  3163     check_walign32( REG_EAX );
  3164     ADDL_imms_r32( -4, REG_EAX );
  3165     MOVL_rbpdisp_r32( R_PR, REG_EDX );
  3166     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3167     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3168     sh4_x86.tstate = TSTATE_NONE;
  3169 :}
  3171 NOP {: 
  3172     COUNT_INST(I_NOP);
  3173     /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ 
  3174 :}
  3175 %%
  3176     sh4_x86.in_delay_slot = DELAY_NONE;
  3177     return 0;
  3181 /**
  3182  * The unwind methods only work if we compiled with DWARF2 frame information
  3183  * (ie -fexceptions), otherwise we have to use the direct frame scan.
  3184  */
  3185 #ifdef HAVE_EXCEPTIONS
  3186 #include <unwind.h>
  3188 struct UnwindInfo {
  3189     uintptr_t block_start;
  3190     uintptr_t block_end;
  3191     void *pc;
  3192 };
  3194 static _Unwind_Reason_Code xlat_check_frame( struct _Unwind_Context *context, void *arg )
  3196     struct UnwindInfo *info = arg;
  3197     void *pc = (void *)_Unwind_GetIP(context);
  3198     if( ((uintptr_t)pc) >= info->block_start && ((uintptr_t)pc) < info->block_end ) {
  3199         info->pc = pc;
  3200         return _URC_NORMAL_STOP;
  3202     return _URC_NO_REASON;
  3205 void *xlat_get_native_pc( void *code, uint32_t code_size )
  3207     struct _Unwind_Exception exc;
  3208     struct UnwindInfo info;
  3210     info.pc = NULL;
  3211     info.block_start = (uintptr_t)code;
  3212     info.block_end = info.block_start + code_size;
  3213     void *result = NULL;
  3214     _Unwind_Backtrace( xlat_check_frame, &info );
  3215     return info.pc;
  3217 #else
  3218 /* Assume this is an ia32 build - amd64 should always have dwarf information */
  3219 void *xlat_get_native_pc( void *code, uint32_t code_size )
  3221     void *result = NULL;
  3222     __asm__(
  3223         "mov %%ebp, %%eax\n\t"
  3224         "mov $0x8, %%ecx\n\t"
  3225         "mov %1, %%edx\n"
  3226         "frame_loop: test %%eax, %%eax\n\t"
  3227         "je frame_not_found\n\t"
  3228         "cmp (%%eax), %%edx\n\t"
  3229         "je frame_found\n\t"
  3230         "sub $0x1, %%ecx\n\t"
  3231         "je frame_not_found\n\t"
  3232         "movl (%%eax), %%eax\n\t"
  3233         "jmp frame_loop\n"
  3234         "frame_found: movl 0x4(%%eax), %0\n"
  3235         "frame_not_found:"
  3236         : "=r" (result)
  3237         : "r" (((uint8_t *)&sh4r) + 128 )
  3238         : "eax", "ecx", "edx" );
  3239     return result;
  3241 #endif
.