Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 1298:d0eb2307b847
prev1292:799fdd4f704a
next1301:b76840ccf94b
author nkeynes
date Wed Feb 04 08:38:23 2015 +1000 (5 years ago)
permissions -rw-r--r--
last change Fix assorted compile warnings reported by Clang
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "lxdream.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4dasm.h"
    31 #include "sh4/sh4trans.h"
    32 #include "sh4/sh4stat.h"
    33 #include "sh4/sh4mmio.h"
    34 #include "sh4/mmu.h"
    35 #include "xlat/xltcache.h"
    36 #include "xlat/x86/x86op.h"
    37 #include "xlat/xlatdasm.h"
    38 #include "clock.h"
    40 #define DEFAULT_BACKPATCH_SIZE 4096
    42 /* Offset of a reg relative to the sh4r structure */
    43 #define REG_OFFSET(reg)  (((char *)&sh4r.reg) - ((char *)&sh4r) - 128)
    45 #define R_T      REG_OFFSET(t)
    46 #define R_Q      REG_OFFSET(q)
    47 #define R_S      REG_OFFSET(s)
    48 #define R_M      REG_OFFSET(m)
    49 #define R_SR     REG_OFFSET(sr)
    50 #define R_GBR    REG_OFFSET(gbr)
    51 #define R_SSR    REG_OFFSET(ssr)
    52 #define R_SPC    REG_OFFSET(spc)
    53 #define R_VBR    REG_OFFSET(vbr)
    54 #define R_MACH   REG_OFFSET(mac)+4
    55 #define R_MACL   REG_OFFSET(mac)
    56 #define R_PC     REG_OFFSET(pc)
    57 #define R_NEW_PC REG_OFFSET(new_pc)
    58 #define R_PR     REG_OFFSET(pr)
    59 #define R_SGR    REG_OFFSET(sgr)
    60 #define R_FPUL   REG_OFFSET(fpul)
    61 #define R_FPSCR  REG_OFFSET(fpscr)
    62 #define R_DBR    REG_OFFSET(dbr)
    63 #define R_R(rn)  REG_OFFSET(r[rn])
    64 #define R_FR(f)  REG_OFFSET(fr[0][(f)^1])
    65 #define R_XF(f)  REG_OFFSET(fr[1][(f)^1])
    66 #define R_DR(f)  REG_OFFSET(fr[(f)&1][(f)&0x0E])
    67 #define R_DRL(f) REG_OFFSET(fr[(f)&1][(f)|0x01])
    68 #define R_DRH(f) REG_OFFSET(fr[(f)&1][(f)&0x0E])
    70 #define DELAY_NONE 0
    71 #define DELAY_PC 1
    72 #define DELAY_PC_PR 2
    74 #define SH4_MODE_UNKNOWN -1
    76 struct backpatch_record {
    77     uint32_t fixup_offset;
    78     uint32_t fixup_icount;
    79     int32_t exc_code;
    80 };
    82 /** 
    83  * Struct to manage internal translation state. This state is not saved -
    84  * it is only valid between calls to sh4_translate_begin_block() and
    85  * sh4_translate_end_block()
    86  */
    87 struct sh4_x86_state {
    88     int in_delay_slot;
    89     uint8_t *code;
    90     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    91     gboolean branch_taken; /* true if we branched unconditionally */
    92     gboolean double_prec; /* true if FPU is in double-precision mode */
    93     gboolean double_size; /* true if FPU is in double-size mode */
    94     gboolean sse3_enabled; /* true if host supports SSE3 instructions */
    95     uint32_t block_start_pc;
    96     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    97     uint32_t sh4_mode;     /* Mirror of sh4r.xlat_sh4_mode */
    98     int tstate;
   100     /* mode settings */
   101     gboolean tlb_on; /* True if tlb translation is active */
   102     struct mem_region_fn **priv_address_space;
   103     struct mem_region_fn **user_address_space;
   105     /* Instrumentation */
   106     xlat_block_begin_callback_t begin_callback;
   107     xlat_block_end_callback_t end_callback;
   108     gboolean fastmem;
   110     /* Allocated memory for the (block-wide) back-patch list */
   111     struct backpatch_record *backpatch_list;
   112     uint32_t backpatch_posn;
   113     uint32_t backpatch_size;
   114 };
   116 static struct sh4_x86_state sh4_x86;
   118 static uint8_t sh4_entry_stub[128];
   119 typedef FASTCALL void (*entry_point_t)(void *);
   120 entry_point_t sh4_translate_enter;
   122 static uint32_t max_int = 0x7FFFFFFF;
   123 static uint32_t min_int = 0x80000000;
   124 static uint32_t save_fcw; /* save value for fpu control word */
   125 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   127 static void sh4_x86_translate_unlink_block( void *use_list );
   129 static struct xlat_target_fns x86_target_fns = {
   130 	sh4_x86_translate_unlink_block
   131 };	
   134 gboolean is_sse3_supported()
   135 {
   136     uint32_t features;
   138     __asm__ __volatile__(
   139         "mov $0x01, %%eax\n\t"
   140         "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
   141     return (features & 1) ? TRUE : FALSE;
   142 }
   144 void sh4_translate_set_address_space( struct mem_region_fn **priv, struct mem_region_fn **user )
   145 {
   146     sh4_x86.priv_address_space = priv;
   147     sh4_x86.user_address_space = user;
   148 }
   150 void sh4_translate_write_entry_stub(void)
   151 {
   152 	mem_unprotect(sh4_entry_stub, sizeof(sh4_entry_stub));
   153 	xlat_output = sh4_entry_stub;
   154 	PUSH_r32(REG_EBP);
   155 	MOVP_immptr_rptr( ((uint8_t *)&sh4r) + 128, REG_EBP );
   156 	PUSH_r32(REG_EBX);
   157 	PUSH_r32(REG_SAVE1);
   158 	PUSH_r32(REG_SAVE2);
   159 #if SIZEOF_VOID_P == 8
   160     PUSH_r32(REG_SAVE3);
   161     PUSH_r32(REG_SAVE4);
   162     CALL_r32( REG_ARG1 );
   163     POP_r32(REG_SAVE4);
   164     POP_r32(REG_SAVE3);
   165 #else
   166     SUBL_imms_r32( 8, REG_ESP ); 
   167 	CALL_r32( REG_ARG1 );
   168 	ADDL_imms_r32( 8, REG_ESP );
   169 #endif
   170 	POP_r32(REG_SAVE2);	
   171 	POP_r32(REG_SAVE1);
   172 	POP_r32(REG_EBX);
   173 	POP_r32(REG_EBP);
   174 	RET();
   175 	sh4_translate_enter = (entry_point_t)sh4_entry_stub;
   176 }
   178 void sh4_translate_init(void)
   179 {
   180     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   181     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   182     sh4_x86.begin_callback = NULL;
   183     sh4_x86.end_callback = NULL;
   184     sh4_x86.fastmem = TRUE;
   185     sh4_x86.sse3_enabled = is_sse3_supported();
   186     xlat_set_target_fns(&x86_target_fns);
   187     sh4_translate_set_address_space( sh4_address_space, sh4_user_address_space );
   188     sh4_translate_write_entry_stub();
   189 }
   191 void sh4_translate_set_callbacks( xlat_block_begin_callback_t begin, xlat_block_end_callback_t end )
   192 {
   193     sh4_x86.begin_callback = begin;
   194     sh4_x86.end_callback = end;
   195 }
   197 void sh4_translate_set_fastmem( gboolean flag )
   198 {
   199     sh4_x86.fastmem = flag;
   200 }
   202 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   203 {
   204     int reloc_size = 4;
   206     if( exc_code == -2 ) {
   207         reloc_size = sizeof(void *);
   208     }
   210     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   211 	sh4_x86.backpatch_size <<= 1;
   212 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   213 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   214 	assert( sh4_x86.backpatch_list != NULL );
   215     }
   216     if( sh4_x86.in_delay_slot ) {
   217 	fixup_pc -= 2;
   218     }
   220     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
   221 	(((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code)) - reloc_size;
   222     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   223     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   224     sh4_x86.backpatch_posn++;
   225 }
   227 #define TSTATE_NONE -1
   228 #define TSTATE_O    X86_COND_O
   229 #define TSTATE_C    X86_COND_C
   230 #define TSTATE_E    X86_COND_E
   231 #define TSTATE_NE   X86_COND_NE
   232 #define TSTATE_G    X86_COND_G
   233 #define TSTATE_GE   X86_COND_GE
   234 #define TSTATE_A    X86_COND_A
   235 #define TSTATE_AE   X86_COND_AE
   237 #define MARK_JMP8(x) uint8_t *_mark_jmp_##x = (xlat_output-1)
   238 #define JMP_TARGET(x) *_mark_jmp_##x += (xlat_output - _mark_jmp_##x)
   240 /* Convenience instructions */
   241 #define LDC_t()          CMPB_imms_rbpdisp(1,R_T); CMC()
   242 #define SETE_t()         SETCCB_cc_rbpdisp(X86_COND_E,R_T)
   243 #define SETA_t()         SETCCB_cc_rbpdisp(X86_COND_A,R_T)
   244 #define SETAE_t()        SETCCB_cc_rbpdisp(X86_COND_AE,R_T)
   245 #define SETG_t()         SETCCB_cc_rbpdisp(X86_COND_G,R_T)
   246 #define SETGE_t()        SETCCB_cc_rbpdisp(X86_COND_GE,R_T)
   247 #define SETC_t()         SETCCB_cc_rbpdisp(X86_COND_C,R_T)
   248 #define SETO_t()         SETCCB_cc_rbpdisp(X86_COND_O,R_T)
   249 #define SETNE_t()        SETCCB_cc_rbpdisp(X86_COND_NE,R_T)
   250 #define SETC_r8(r1)      SETCCB_cc_r8(X86_COND_C, r1)
   251 #define JAE_label(label) JCC_cc_rel8(X86_COND_AE,-1); MARK_JMP8(label)
   252 #define JBE_label(label) JCC_cc_rel8(X86_COND_BE,-1); MARK_JMP8(label)
   253 #define JE_label(label)  JCC_cc_rel8(X86_COND_E,-1); MARK_JMP8(label)
   254 #define JGE_label(label) JCC_cc_rel8(X86_COND_GE,-1); MARK_JMP8(label)
   255 #define JNA_label(label) JCC_cc_rel8(X86_COND_NA,-1); MARK_JMP8(label)
   256 #define JNE_label(label) JCC_cc_rel8(X86_COND_NE,-1); MARK_JMP8(label)
   257 #define JNO_label(label) JCC_cc_rel8(X86_COND_NO,-1); MARK_JMP8(label)
   258 #define JP_label(label)  JCC_cc_rel8(X86_COND_P,-1); MARK_JMP8(label)
   259 #define JS_label(label)  JCC_cc_rel8(X86_COND_S,-1); MARK_JMP8(label)
   260 #define JMP_label(label) JMP_rel8(-1); MARK_JMP8(label)
   261 #define JNE_exc(exc)     JCC_cc_rel32(X86_COND_NE,0); sh4_x86_add_backpatch(xlat_output, pc, exc)
   263 #define LOAD_t() if( sh4_x86.tstate == TSTATE_NONE ) { \
   264 	CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; }     
   266 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
   267 #define JT_label(label) LOAD_t() \
   268     JCC_cc_rel8(sh4_x86.tstate,-1); MARK_JMP8(label)
   270 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
   271 #define JF_label(label) LOAD_t() \
   272     JCC_cc_rel8(sh4_x86.tstate^1, -1); MARK_JMP8(label)
   275 #define load_reg(x86reg,sh4reg)     MOVL_rbpdisp_r32( REG_OFFSET(r[sh4reg]), x86reg )
   276 #define store_reg(x86reg,sh4reg)    MOVL_r32_rbpdisp( x86reg, REG_OFFSET(r[sh4reg]) )
   278 /**
   279  * Load an FR register (single-precision floating point) into an integer x86
   280  * register (eg for register-to-register moves)
   281  */
   282 #define load_fr(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[0][(frm)^1]), reg )
   283 #define load_xf(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[1][(frm)^1]), reg )
   285 /**
   286  * Load the low half of a DR register (DR or XD) into an integer x86 register 
   287  */
   288 #define load_dr0(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm|0x01]), reg )
   289 #define load_dr1(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm&0x0E]), reg )
   291 /**
   292  * Store an FR register (single-precision floating point) from an integer x86+
   293  * register (eg for register-to-register moves)
   294  */
   295 #define store_fr(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[0][(frm)^1]) )
   296 #define store_xf(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[1][(frm)^1]) )
   298 #define store_dr0(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   299 #define store_dr1(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   302 #define push_fpul()  FLDF_rbpdisp(R_FPUL)
   303 #define pop_fpul()   FSTPF_rbpdisp(R_FPUL)
   304 #define push_fr(frm) FLDF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
   305 #define pop_fr(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
   306 #define push_xf(frm) FLDF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
   307 #define pop_xf(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
   308 #define push_dr(frm) FLDD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
   309 #define pop_dr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
   310 #define push_xdr(frm) FLDD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
   311 #define pop_xdr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
   313 #ifdef ENABLE_SH4STATS
   314 #define COUNT_INST(id) MOVL_imm32_r32( id, REG_EAX ); CALL1_ptr_r32(sh4_stats_add, REG_EAX); sh4_x86.tstate = TSTATE_NONE
   315 #else
   316 #define COUNT_INST(id)
   317 #endif
   320 /* Exception checks - Note that all exception checks will clobber EAX */
   322 #define check_priv( ) \
   323     if( (sh4_x86.sh4_mode & SR_MD) == 0 ) { \
   324         if( sh4_x86.in_delay_slot ) { \
   325             exit_block_exc(EXC_SLOT_ILLEGAL, (pc-2), 4 ); \
   326         } else { \
   327             exit_block_exc(EXC_ILLEGAL, pc, 2); \
   328         } \
   329         sh4_x86.branch_taken = TRUE; \
   330         sh4_x86.in_delay_slot = DELAY_NONE; \
   331         return 2; \
   332     }
   334 #define check_fpuen( ) \
   335     if( !sh4_x86.fpuen_checked ) {\
   336 	sh4_x86.fpuen_checked = TRUE;\
   337 	MOVL_rbpdisp_r32( R_SR, REG_EAX );\
   338 	ANDL_imms_r32( SR_FD, REG_EAX );\
   339 	if( sh4_x86.in_delay_slot ) {\
   340 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   341 	} else {\
   342 	    JNE_exc(EXC_FPU_DISABLED);\
   343 	}\
   344 	sh4_x86.tstate = TSTATE_NONE; \
   345     }
   347 #define check_ralign16( x86reg ) \
   348     TESTL_imms_r32( 0x00000001, x86reg ); \
   349     JNE_exc(EXC_DATA_ADDR_READ)
   351 #define check_walign16( x86reg ) \
   352     TESTL_imms_r32( 0x00000001, x86reg ); \
   353     JNE_exc(EXC_DATA_ADDR_WRITE);
   355 #define check_ralign32( x86reg ) \
   356     TESTL_imms_r32( 0x00000003, x86reg ); \
   357     JNE_exc(EXC_DATA_ADDR_READ)
   359 #define check_walign32( x86reg ) \
   360     TESTL_imms_r32( 0x00000003, x86reg ); \
   361     JNE_exc(EXC_DATA_ADDR_WRITE);
   363 #define check_ralign64( x86reg ) \
   364     TESTL_imms_r32( 0x00000007, x86reg ); \
   365     JNE_exc(EXC_DATA_ADDR_READ)
   367 #define check_walign64( x86reg ) \
   368     TESTL_imms_r32( 0x00000007, x86reg ); \
   369     JNE_exc(EXC_DATA_ADDR_WRITE);
   371 #define address_space() ((sh4_x86.sh4_mode&SR_MD) ? (uintptr_t)sh4_x86.priv_address_space : (uintptr_t)sh4_x86.user_address_space)
   373 #define UNDEF(ir)
   374 /* Note: For SR.MD == 1 && MMUCR.AT == 0, there are no memory exceptions, so 
   375  * don't waste the cycles expecting them. Otherwise we need to save the exception pointer.
   376  */
   377 #ifdef HAVE_FRAME_ADDRESS
   378 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
   379 {
   380     decode_address(address_space(), addr_reg, REG_CALLPTR);
   381     if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) { 
   382         CALL1_r32disp_r32(REG_CALLPTR, offset, addr_reg);
   383     } else {
   384         if( addr_reg != REG_ARG1 ) {
   385             MOVL_r32_r32( addr_reg, REG_ARG1 );
   386         }
   387         MOVP_immptr_rptr( 0, REG_ARG2 );
   388         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   389         CALL2_r32disp_r32_r32(REG_CALLPTR, offset, REG_ARG1, REG_ARG2);
   390     }
   391     if( value_reg != REG_RESULT1 ) { 
   392         MOVL_r32_r32( REG_RESULT1, value_reg );
   393     }
   394 }
   396 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
   397 {
   398     decode_address(address_space(), addr_reg, REG_CALLPTR);
   399     if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) { 
   400         CALL2_r32disp_r32_r32(REG_CALLPTR, offset, addr_reg, value_reg);
   401     } else {
   402         if( value_reg != REG_ARG2 ) {
   403             MOVL_r32_r32( value_reg, REG_ARG2 );
   404 	}        
   405         if( addr_reg != REG_ARG1 ) {
   406             MOVL_r32_r32( addr_reg, REG_ARG1 );
   407         }
   408 #if MAX_REG_ARG > 2        
   409         MOVP_immptr_rptr( 0, REG_ARG3 );
   410         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   411         CALL3_r32disp_r32_r32_r32(REG_CALLPTR, offset, REG_ARG1, REG_ARG2, REG_ARG3);
   412 #else
   413         MOVL_imm32_rspdisp( 0, 0 );
   414         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   415         CALL3_r32disp_r32_r32_r32(REG_CALLPTR, offset, REG_ARG1, REG_ARG2, 0);
   416 #endif
   417     }
   418 }
   419 #else
   420 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
   421 {
   422     decode_address(address_space(), addr_reg, REG_CALLPTR);
   423     CALL1_r32disp_r32(REG_CALLPTR, offset, addr_reg);
   424     if( value_reg != REG_RESULT1 ) {
   425         MOVL_r32_r32( REG_RESULT1, value_reg );
   426     }
   427 }     
   429 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
   430 {
   431     decode_address(address_space(), addr_reg, REG_CALLPTR);
   432     CALL2_r32disp_r32_r32(REG_CALLPTR, offset, addr_reg, value_reg);
   433 }
   434 #endif
   436 #define MEM_REGION_PTR(name) offsetof( struct mem_region_fn, name )
   437 #define MEM_READ_BYTE( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_byte), pc)
   438 #define MEM_READ_BYTE_FOR_WRITE( addr_reg, value_reg ) call_read_func( addr_reg, value_reg, MEM_REGION_PTR(read_byte_for_write), pc) 
   439 #define MEM_READ_WORD( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_word), pc)
   440 #define MEM_READ_LONG( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_long), pc)
   441 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_byte), pc)
   442 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_word), pc)
   443 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_long), pc)
   444 #define MEM_PREFETCH( addr_reg ) call_read_func(addr_reg, REG_RESULT1, MEM_REGION_PTR(prefetch), pc)
   446 #define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2, 4); sh4_x86.in_delay_slot = DELAY_NONE; return 2;
   448 /** Offset of xlat_sh4_mode field relative to the code pointer */ 
   449 #define XLAT_SH4_MODE_CODE_OFFSET  (int32_t)(offsetof(struct xlat_cache_block, xlat_sh4_mode) - offsetof(struct xlat_cache_block,code) )
   450 #define XLAT_CHAIN_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, chain) - offsetof(struct xlat_cache_block,code) )
   451 #define XLAT_ACTIVE_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, active) - offsetof(struct xlat_cache_block,code) )
   453 void sh4_translate_begin_block( sh4addr_t pc ) 
   454 {
   455 	sh4_x86.code = xlat_output;
   456     sh4_x86.in_delay_slot = FALSE;
   457     sh4_x86.fpuen_checked = FALSE;
   458     sh4_x86.branch_taken = FALSE;
   459     sh4_x86.backpatch_posn = 0;
   460     sh4_x86.block_start_pc = pc;
   461     sh4_x86.tlb_on = IS_TLB_ENABLED();
   462     sh4_x86.tstate = TSTATE_NONE;
   463     sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
   464     sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
   465     sh4_x86.sh4_mode = sh4r.xlat_sh4_mode;
   466     if( sh4_x86.begin_callback ) {
   467         CALL_ptr( sh4_x86.begin_callback );
   468     }
   469     if( sh4_profile_blocks ) {
   470     	MOVP_immptr_rptr( sh4_x86.code + XLAT_ACTIVE_CODE_OFFSET, REG_EAX );
   471     	ADDL_imms_r32disp( 1, REG_EAX, 0 );
   472     }  
   473 }
   476 uint32_t sh4_translate_end_block_size()
   477 {
   478 	uint32_t epilogue_size = EPILOGUE_SIZE;
   479 	if( sh4_x86.end_callback ) {
   480 	    epilogue_size += (CALL1_PTR_MIN_SIZE - 1);
   481 	}
   482     if( sh4_x86.backpatch_posn <= 3 ) {
   483         epilogue_size += (sh4_x86.backpatch_posn*(12+CALL1_PTR_MIN_SIZE));
   484     } else {
   485         epilogue_size += (3*(12+CALL1_PTR_MIN_SIZE)) + (sh4_x86.backpatch_posn-3)*(15+CALL1_PTR_MIN_SIZE);
   486     }
   487     return epilogue_size;
   488 }
   491 /**
   492  * Embed a breakpoint into the generated code
   493  */
   494 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   495 {
   496     MOVL_imm32_r32( pc, REG_EAX );
   497     CALL1_ptr_r32( sh4_translate_breakpoint_hit, REG_EAX );
   498     sh4_x86.tstate = TSTATE_NONE;
   499 }
   502 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   504 /**
   505  * Test if the loaded target code pointer in %eax is valid, and if so jump
   506  * directly into it, bypassing the normal exit.
   507  */
   508 static void jump_next_block()
   509 {
   510 	uint8_t *ptr = xlat_output;
   511 	TESTP_rptr_rptr(REG_EAX, REG_EAX);
   512 	JE_label(nocode);
   513 	if( sh4_x86.sh4_mode == SH4_MODE_UNKNOWN ) {
   514 	    /* sr/fpscr was changed, possibly updated xlat_sh4_mode, so reload it */
   515 	    MOVL_rbpdisp_r32( REG_OFFSET(xlat_sh4_mode), REG_ECX );
   516 	    CMPL_r32_r32disp( REG_ECX, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
   517 	} else {
   518 	    CMPL_imms_r32disp( sh4_x86.sh4_mode, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
   519 	}
   520 	JNE_label(wrongmode);
   521 	if( sh4_x86.end_callback ) {
   522 	    /* Note this does leave the stack out of alignment, but doesn't matter
   523 	     * for what we're currently using it for.
   524 	     */
   525 	    PUSH_r32(REG_EAX);
   526 	    MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
   527 	    JMP_rptr(REG_ECX);
   528 	} else {
   529 	    JMP_rptr(REG_EAX);
   530 	}
   531 	JMP_TARGET(wrongmode);
   532 	MOVP_rptrdisp_rptr( REG_EAX, XLAT_CHAIN_CODE_OFFSET, REG_EAX );
   533 	int rel = ptr - xlat_output;
   534     JMP_prerel(rel);
   535 	JMP_TARGET(nocode); 
   536 }
   538 /**
   539  * 
   540  */
   541 void FASTCALL sh4_translate_link_block( uint32_t pc )
   542 {
   543     uint8_t *target = (uint8_t *)xlat_get_code_by_vma(pc);
   544     while( target != NULL && sh4r.xlat_sh4_mode != XLAT_BLOCK_MODE(target) ) {
   545         target = XLAT_BLOCK_CHAIN(target);
   546 	}
   547     if( target == NULL ) {
   548         target = sh4_translate_basic_block( pc );
   549     }
   550     uint8_t *backpatch = ((uint8_t *)__builtin_return_address(0)) - (CALL1_PTR_MIN_SIZE);
   551     *backpatch = 0xE9;
   552     *(uint32_t *)(backpatch+1) = (uint32_t)(target-backpatch)-5;
   553     *(void **)(backpatch+5) = XLAT_BLOCK_FOR_CODE(target)->use_list;
   554     XLAT_BLOCK_FOR_CODE(target)->use_list = backpatch; 
   556     uint8_t * volatile *retptr = ((uint8_t * volatile *)__builtin_frame_address(0))+1;
   557     assert( *retptr == ((uint8_t *)__builtin_return_address(0)) );
   558 	*retptr = backpatch;
   559 }
   561 static void emit_translate_and_backpatch()
   562 {
   563     /* NB: this is either 7 bytes (i386) or 12 bytes (x86-64) */
   564     CALL1_ptr_r32(sh4_translate_link_block, REG_ARG1);
   566     /* When patched, the jmp instruction will be 5 bytes (either platform) -
   567      * we need to reserve sizeof(void*) bytes for the use-list
   568 	 * pointer
   569 	 */ 
   570     if( sizeof(void*) == 8 ) {
   571         NOP();
   572     } else {
   573         NOP2();
   574     }
   575 }
   577 /**
   578  * If we're jumping to a fixed address (or at least fixed relative to the
   579  * current PC, then we can do a direct branch. REG_ARG1 should contain
   580  * the PC at this point.
   581  */
   582 static void jump_next_block_fixed_pc( sh4addr_t pc )
   583 {
   584 	if( IS_IN_ICACHE(pc) ) {
   585 	    if( sh4_x86.sh4_mode != SH4_MODE_UNKNOWN && sh4_x86.end_callback == NULL ) {
   586 	        /* Fixed address, in cache, and fixed SH4 mode - generate a call to the
   587 	         * fetch-and-backpatch routine, which will replace the call with a branch */
   588            emit_translate_and_backpatch();	         
   589            return;
   590 		} else {
   591             MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
   592             ANDP_imms_rptr( -4, REG_EAX );
   593         }
   594 	} else if( sh4_x86.tlb_on ) {
   595         CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);
   596     } else {
   597         CALL1_ptr_r32(xlat_get_code, REG_ARG1);
   598     }
   599     jump_next_block();
   602 }
   604 static void sh4_x86_translate_unlink_block( void *use_list )
   605 {
   606 	uint8_t *tmp = xlat_output; /* In case something is active, which should never happen */
   607 	void *next = use_list;
   608 	while( next != NULL ) {
   609     	xlat_output = (uint8_t *)next;
   610  	    next = *(void **)(xlat_output+5);
   611  		emit_translate_and_backpatch();
   612  	}
   613  	xlat_output = tmp;
   614 }
   618 static void exit_block()
   619 {
   620 	if( sh4_x86.end_callback ) {
   621 	    MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
   622 	    JMP_rptr(REG_ECX);
   623 	} else {
   624 	    RET();
   625 	}
   626 }
   628 /**
   629  * Exit the block with sh4r.pc already written
   630  */
   631 void exit_block_pcset( sh4addr_t pc )
   632 {
   633     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   634     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   635     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   636     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   637     JBE_label(exitloop);
   638     MOVL_rbpdisp_r32( R_PC, REG_ARG1 );
   639     if( sh4_x86.tlb_on ) {
   640         CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
   641     } else {
   642         CALL1_ptr_r32(xlat_get_code,REG_ARG1);
   643     }
   645     jump_next_block();
   646     JMP_TARGET(exitloop);
   647     exit_block();
   648 }
   650 /**
   651  * Exit the block with sh4r.new_pc written with the target pc
   652  */
   653 void exit_block_newpcset( sh4addr_t pc )
   654 {
   655     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   656     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   657     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   658     MOVL_rbpdisp_r32( R_NEW_PC, REG_ARG1 );
   659     MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   660     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   661     JBE_label(exitloop);
   662     if( sh4_x86.tlb_on ) {
   663         CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
   664     } else {
   665         CALL1_ptr_r32(xlat_get_code,REG_ARG1);
   666     }
   668 	jump_next_block();
   669     JMP_TARGET(exitloop);
   670     exit_block();
   671 }
   674 /**
   675  * Exit the block to an absolute PC
   676  */
   677 void exit_block_abs( sh4addr_t pc, sh4addr_t endpc )
   678 {
   679     MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   680     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   681     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   683     MOVL_imm32_r32( pc, REG_ARG1 );
   684     MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   685     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   686     JBE_label(exitloop);
   687     jump_next_block_fixed_pc(pc);    
   688     JMP_TARGET(exitloop);
   689     exit_block();
   690 }
   692 /**
   693  * Exit the block to a relative PC
   694  */
   695 void exit_block_rel( sh4addr_t pc, sh4addr_t endpc )
   696 {
   697     MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   698     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   699     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   701 	if( pc == sh4_x86.block_start_pc && sh4_x86.sh4_mode == sh4r.xlat_sh4_mode ) {
   702 	    /* Special case for tight loops - the PC doesn't change, and
   703 	     * we already know the target address. Just check events pending before
   704 	     * looping.
   705 	     */
   706         CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   707         uint32_t backdisp = ((uintptr_t)(sh4_x86.code - xlat_output));
   708         JCC_cc_prerel(X86_COND_A, backdisp);
   709 	} else {
   710         MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ARG1 );
   711         ADDL_rbpdisp_r32( R_PC, REG_ARG1 );
   712         MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   713         CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   714         JBE_label(exitloop2);
   716         jump_next_block_fixed_pc(pc);
   717         JMP_TARGET(exitloop2);
   718     }
   719     exit_block();
   720 }
   722 /**
   723  * Exit unconditionally with a general exception
   724  */
   725 void exit_block_exc( int code, sh4addr_t pc, int inst_adjust )
   726 {
   727     MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ECX );
   728     ADDL_r32_rbpdisp( REG_ECX, R_PC );
   729     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc + inst_adjust)>>1)*sh4_cpu_period, REG_ECX );
   730     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   731     MOVL_imm32_r32( code, REG_ARG1 );
   732     CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
   733     exit_block();
   734 }    
   736 /**
   737  * Embed a call to sh4_execute_instruction for situations that we
   738  * can't translate (just page-crossing delay slots at the moment).
   739  * Caller is responsible for setting new_pc before calling this function.
   740  *
   741  * Performs:
   742  *   Set PC = endpc
   743  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   744  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   745  *   Call sh4_execute_instruction
   746  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   747  */
   748 void exit_block_emu( sh4vma_t endpc )
   749 {
   750     MOVL_imm32_r32( endpc - sh4_x86.block_start_pc, REG_ECX );   // 5
   751     ADDL_r32_rbpdisp( REG_ECX, R_PC );
   753     MOVL_imm32_r32( (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period, REG_ECX ); // 5
   754     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );     // 6
   755     MOVL_imm32_r32( sh4_x86.in_delay_slot ? 1 : 0, REG_ECX );
   756     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(in_delay_slot) );
   758     CALL_ptr( sh4_execute_instruction );
   759     exit_block();
   760 } 
   762 /**
   763  * Write the block trailer (exception handling block)
   764  */
   765 void sh4_translate_end_block( sh4addr_t pc ) {
   766     if( sh4_x86.branch_taken == FALSE ) {
   767         // Didn't exit unconditionally already, so write the termination here
   768         exit_block_rel( pc, pc );
   769     }
   770     if( sh4_x86.backpatch_posn != 0 ) {
   771         unsigned int i;
   772         // Exception raised - cleanup and exit
   773         uint8_t *end_ptr = xlat_output;
   774         MOVL_r32_r32( REG_EDX, REG_ECX );
   775         ADDL_r32_r32( REG_EDX, REG_ECX );
   776         ADDL_r32_rbpdisp( REG_ECX, R_SPC );
   777         MOVL_moffptr_eax( &sh4_cpu_period );
   778         INC_r32( REG_EDX );  /* Add 1 for the aborting instruction itself */ 
   779         MULL_r32( REG_EDX );
   780         ADDL_r32_rbpdisp( REG_EAX, REG_OFFSET(slice_cycle) );
   781         exit_block();
   783         for( i=0; i< sh4_x86.backpatch_posn; i++ ) {
   784             uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];
   785             if( sh4_x86.backpatch_list[i].exc_code < 0 ) {
   786                 if( sh4_x86.backpatch_list[i].exc_code == -2 ) {
   787                     *((uintptr_t *)fixup_addr) = (uintptr_t)xlat_output; 
   788                 } else {
   789                     *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
   790                 }
   791                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
   792                 int rel = end_ptr - xlat_output;
   793                 JMP_prerel(rel);
   794             } else {
   795                 *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
   796                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].exc_code, REG_ARG1 );
   797                 CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
   798                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
   799                 int rel = end_ptr - xlat_output;
   800                 JMP_prerel(rel);
   801             }
   802         }
   803     }
   804 }
   806 /**
   807  * Translate a single instruction. Delayed branches are handled specially
   808  * by translating both branch and delayed instruction as a single unit (as
   809  * 
   810  * The instruction MUST be in the icache (assert check)
   811  *
   812  * @return true if the instruction marks the end of a basic block
   813  * (eg a branch or 
   814  */
   815 uint32_t sh4_translate_instruction( sh4vma_t pc )
   816 {
   817     uint32_t ir;
   818     /* Read instruction from icache */
   819     assert( IS_IN_ICACHE(pc) );
   820     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   822     if( !sh4_x86.in_delay_slot ) {
   823 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   824     }
   826     /* check for breakpoints at this pc */
   827     for( int i=0; i<sh4_breakpoint_count; i++ ) {
   828         if( sh4_breakpoints[i].address == pc ) {
   829             sh4_translate_emit_breakpoint(pc);
   830             break;
   831         }
   832     }
   833 %%
   834 /* ALU operations */
   835 ADD Rm, Rn {:
   836     COUNT_INST(I_ADD);
   837     load_reg( REG_EAX, Rm );
   838     load_reg( REG_ECX, Rn );
   839     ADDL_r32_r32( REG_EAX, REG_ECX );
   840     store_reg( REG_ECX, Rn );
   841     sh4_x86.tstate = TSTATE_NONE;
   842 :}
   843 ADD #imm, Rn {:  
   844     COUNT_INST(I_ADDI);
   845     ADDL_imms_rbpdisp( imm, REG_OFFSET(r[Rn]) );
   846     sh4_x86.tstate = TSTATE_NONE;
   847 :}
   848 ADDC Rm, Rn {:
   849     COUNT_INST(I_ADDC);
   850     if( sh4_x86.tstate != TSTATE_C ) {
   851         LDC_t();
   852     }
   853     load_reg( REG_EAX, Rm );
   854     load_reg( REG_ECX, Rn );
   855     ADCL_r32_r32( REG_EAX, REG_ECX );
   856     store_reg( REG_ECX, Rn );
   857     SETC_t();
   858     sh4_x86.tstate = TSTATE_C;
   859 :}
   860 ADDV Rm, Rn {:
   861     COUNT_INST(I_ADDV);
   862     load_reg( REG_EAX, Rm );
   863     load_reg( REG_ECX, Rn );
   864     ADDL_r32_r32( REG_EAX, REG_ECX );
   865     store_reg( REG_ECX, Rn );
   866     SETO_t();
   867     sh4_x86.tstate = TSTATE_O;
   868 :}
   869 AND Rm, Rn {:
   870     COUNT_INST(I_AND);
   871     load_reg( REG_EAX, Rm );
   872     load_reg( REG_ECX, Rn );
   873     ANDL_r32_r32( REG_EAX, REG_ECX );
   874     store_reg( REG_ECX, Rn );
   875     sh4_x86.tstate = TSTATE_NONE;
   876 :}
   877 AND #imm, R0 {:  
   878     COUNT_INST(I_ANDI);
   879     load_reg( REG_EAX, 0 );
   880     ANDL_imms_r32(imm, REG_EAX); 
   881     store_reg( REG_EAX, 0 );
   882     sh4_x86.tstate = TSTATE_NONE;
   883 :}
   884 AND.B #imm, @(R0, GBR) {: 
   885     COUNT_INST(I_ANDB);
   886     load_reg( REG_EAX, 0 );
   887     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
   888     MOVL_r32_r32(REG_EAX, REG_SAVE1);
   889     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
   890     MOVL_r32_r32(REG_SAVE1, REG_EAX);
   891     ANDL_imms_r32(imm, REG_EDX );
   892     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
   893     sh4_x86.tstate = TSTATE_NONE;
   894 :}
   895 CMP/EQ Rm, Rn {:  
   896     COUNT_INST(I_CMPEQ);
   897     load_reg( REG_EAX, Rm );
   898     load_reg( REG_ECX, Rn );
   899     CMPL_r32_r32( REG_EAX, REG_ECX );
   900     SETE_t();
   901     sh4_x86.tstate = TSTATE_E;
   902 :}
   903 CMP/EQ #imm, R0 {:  
   904     COUNT_INST(I_CMPEQI);
   905     load_reg( REG_EAX, 0 );
   906     CMPL_imms_r32(imm, REG_EAX);
   907     SETE_t();
   908     sh4_x86.tstate = TSTATE_E;
   909 :}
   910 CMP/GE Rm, Rn {:  
   911     COUNT_INST(I_CMPGE);
   912     load_reg( REG_EAX, Rm );
   913     load_reg( REG_ECX, Rn );
   914     CMPL_r32_r32( REG_EAX, REG_ECX );
   915     SETGE_t();
   916     sh4_x86.tstate = TSTATE_GE;
   917 :}
   918 CMP/GT Rm, Rn {: 
   919     COUNT_INST(I_CMPGT);
   920     load_reg( REG_EAX, Rm );
   921     load_reg( REG_ECX, Rn );
   922     CMPL_r32_r32( REG_EAX, REG_ECX );
   923     SETG_t();
   924     sh4_x86.tstate = TSTATE_G;
   925 :}
   926 CMP/HI Rm, Rn {:  
   927     COUNT_INST(I_CMPHI);
   928     load_reg( REG_EAX, Rm );
   929     load_reg( REG_ECX, Rn );
   930     CMPL_r32_r32( REG_EAX, REG_ECX );
   931     SETA_t();
   932     sh4_x86.tstate = TSTATE_A;
   933 :}
   934 CMP/HS Rm, Rn {: 
   935     COUNT_INST(I_CMPHS);
   936     load_reg( REG_EAX, Rm );
   937     load_reg( REG_ECX, Rn );
   938     CMPL_r32_r32( REG_EAX, REG_ECX );
   939     SETAE_t();
   940     sh4_x86.tstate = TSTATE_AE;
   941  :}
   942 CMP/PL Rn {: 
   943     COUNT_INST(I_CMPPL);
   944     load_reg( REG_EAX, Rn );
   945     CMPL_imms_r32( 0, REG_EAX );
   946     SETG_t();
   947     sh4_x86.tstate = TSTATE_G;
   948 :}
   949 CMP/PZ Rn {:  
   950     COUNT_INST(I_CMPPZ);
   951     load_reg( REG_EAX, Rn );
   952     CMPL_imms_r32( 0, REG_EAX );
   953     SETGE_t();
   954     sh4_x86.tstate = TSTATE_GE;
   955 :}
   956 CMP/STR Rm, Rn {:  
   957     COUNT_INST(I_CMPSTR);
   958     load_reg( REG_EAX, Rm );
   959     load_reg( REG_ECX, Rn );
   960     XORL_r32_r32( REG_ECX, REG_EAX );
   961     TESTB_r8_r8( REG_AL, REG_AL );
   962     JE_label(target1);
   963     TESTB_r8_r8( REG_AH, REG_AH );
   964     JE_label(target2);
   965     SHRL_imm_r32( 16, REG_EAX );
   966     TESTB_r8_r8( REG_AL, REG_AL );
   967     JE_label(target3);
   968     TESTB_r8_r8( REG_AH, REG_AH );
   969     JMP_TARGET(target1);
   970     JMP_TARGET(target2);
   971     JMP_TARGET(target3);
   972     SETE_t();
   973     sh4_x86.tstate = TSTATE_E;
   974 :}
   975 DIV0S Rm, Rn {:
   976     COUNT_INST(I_DIV0S);
   977     load_reg( REG_EAX, Rm );
   978     load_reg( REG_ECX, Rn );
   979     SHRL_imm_r32( 31, REG_EAX );
   980     SHRL_imm_r32( 31, REG_ECX );
   981     MOVL_r32_rbpdisp( REG_EAX, R_M );
   982     MOVL_r32_rbpdisp( REG_ECX, R_Q );
   983     CMPL_r32_r32( REG_EAX, REG_ECX );
   984     SETNE_t();
   985     sh4_x86.tstate = TSTATE_NE;
   986 :}
   987 DIV0U {:  
   988     COUNT_INST(I_DIV0U);
   989     XORL_r32_r32( REG_EAX, REG_EAX );
   990     MOVL_r32_rbpdisp( REG_EAX, R_Q );
   991     MOVL_r32_rbpdisp( REG_EAX, R_M );
   992     MOVL_r32_rbpdisp( REG_EAX, R_T );
   993     sh4_x86.tstate = TSTATE_C; // works for DIV1
   994 :}
   995 DIV1 Rm, Rn {:
   996     COUNT_INST(I_DIV1);
   997     MOVL_rbpdisp_r32( R_M, REG_ECX );
   998     load_reg( REG_EAX, Rn );
   999     if( sh4_x86.tstate != TSTATE_C ) {
  1000 	LDC_t();
  1002     RCLL_imm_r32( 1, REG_EAX );
  1003     SETC_r8( REG_DL ); // Q'
  1004     CMPL_rbpdisp_r32( R_Q, REG_ECX );
  1005     JE_label(mqequal);
  1006     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1007     JMP_label(end);
  1008     JMP_TARGET(mqequal);
  1009     SUBL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1010     JMP_TARGET(end);
  1011     store_reg( REG_EAX, Rn ); // Done with Rn now
  1012     SETC_r8(REG_AL); // tmp1
  1013     XORB_r8_r8( REG_DL, REG_AL ); // Q' = Q ^ tmp1
  1014     XORB_r8_r8( REG_AL, REG_CL ); // Q'' = Q' ^ M
  1015     MOVL_r32_rbpdisp( REG_ECX, R_Q );
  1016     XORL_imms_r32( 1, REG_AL );   // T = !Q'
  1017     MOVZXL_r8_r32( REG_AL, REG_EAX );
  1018     MOVL_r32_rbpdisp( REG_EAX, R_T );
  1019     sh4_x86.tstate = TSTATE_NONE;
  1020 :}
  1021 DMULS.L Rm, Rn {:  
  1022     COUNT_INST(I_DMULS);
  1023     load_reg( REG_EAX, Rm );
  1024     load_reg( REG_ECX, Rn );
  1025     IMULL_r32(REG_ECX);
  1026     MOVL_r32_rbpdisp( REG_EDX, R_MACH );
  1027     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1028     sh4_x86.tstate = TSTATE_NONE;
  1029 :}
  1030 DMULU.L Rm, Rn {:  
  1031     COUNT_INST(I_DMULU);
  1032     load_reg( REG_EAX, Rm );
  1033     load_reg( REG_ECX, Rn );
  1034     MULL_r32(REG_ECX);
  1035     MOVL_r32_rbpdisp( REG_EDX, R_MACH );
  1036     MOVL_r32_rbpdisp( REG_EAX, R_MACL );    
  1037     sh4_x86.tstate = TSTATE_NONE;
  1038 :}
  1039 DT Rn {:  
  1040     COUNT_INST(I_DT);
  1041     load_reg( REG_EAX, Rn );
  1042     ADDL_imms_r32( -1, REG_EAX );
  1043     store_reg( REG_EAX, Rn );
  1044     SETE_t();
  1045     sh4_x86.tstate = TSTATE_E;
  1046 :}
  1047 EXTS.B Rm, Rn {:  
  1048     COUNT_INST(I_EXTSB);
  1049     load_reg( REG_EAX, Rm );
  1050     MOVSXL_r8_r32( REG_EAX, REG_EAX );
  1051     store_reg( REG_EAX, Rn );
  1052 :}
  1053 EXTS.W Rm, Rn {:  
  1054     COUNT_INST(I_EXTSW);
  1055     load_reg( REG_EAX, Rm );
  1056     MOVSXL_r16_r32( REG_EAX, REG_EAX );
  1057     store_reg( REG_EAX, Rn );
  1058 :}
  1059 EXTU.B Rm, Rn {:  
  1060     COUNT_INST(I_EXTUB);
  1061     load_reg( REG_EAX, Rm );
  1062     MOVZXL_r8_r32( REG_EAX, REG_EAX );
  1063     store_reg( REG_EAX, Rn );
  1064 :}
  1065 EXTU.W Rm, Rn {:  
  1066     COUNT_INST(I_EXTUW);
  1067     load_reg( REG_EAX, Rm );
  1068     MOVZXL_r16_r32( REG_EAX, REG_EAX );
  1069     store_reg( REG_EAX, Rn );
  1070 :}
  1071 MAC.L @Rm+, @Rn+ {:
  1072     COUNT_INST(I_MACL);
  1073     if( Rm == Rn ) {
  1074 	load_reg( REG_EAX, Rm );
  1075 	check_ralign32( REG_EAX );
  1076 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1077 	MOVL_r32_r32(REG_EAX, REG_SAVE1);
  1078 	load_reg( REG_EAX, Rm );
  1079 	LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  1080 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1081         ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rn]) );
  1082     } else {
  1083 	load_reg( REG_EAX, Rm );
  1084 	check_ralign32( REG_EAX );
  1085 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1086 	MOVL_r32_r32(REG_EAX, REG_SAVE1);
  1087 	load_reg( REG_EAX, Rn );
  1088 	check_ralign32( REG_EAX );
  1089 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1090 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
  1091 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  1094     IMULL_r32( REG_SAVE1 );
  1095     ADDL_r32_rbpdisp( REG_EAX, R_MACL );
  1096     ADCL_r32_rbpdisp( REG_EDX, R_MACH );
  1098     MOVL_rbpdisp_r32( R_S, REG_ECX );
  1099     TESTL_r32_r32(REG_ECX, REG_ECX);
  1100     JE_label( nosat );
  1101     CALL_ptr( signsat48 );
  1102     JMP_TARGET( nosat );
  1103     sh4_x86.tstate = TSTATE_NONE;
  1104 :}
  1105 MAC.W @Rm+, @Rn+ {:  
  1106     COUNT_INST(I_MACW);
  1107     if( Rm == Rn ) {
  1108 	load_reg( REG_EAX, Rm );
  1109 	check_ralign16( REG_EAX );
  1110 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1111         MOVL_r32_r32( REG_EAX, REG_SAVE1 );
  1112 	load_reg( REG_EAX, Rm );
  1113 	LEAL_r32disp_r32( REG_EAX, 2, REG_EAX );
  1114 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1115 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
  1116 	// Note translate twice in case of page boundaries. Maybe worth
  1117 	// adding a page-boundary check to skip the second translation
  1118     } else {
  1119 	load_reg( REG_EAX, Rn );
  1120 	check_ralign16( REG_EAX );
  1121 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1122         MOVL_r32_r32( REG_EAX, REG_SAVE1 );
  1123 	load_reg( REG_EAX, Rm );
  1124 	check_ralign16( REG_EAX );
  1125 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1126 	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rn]) );
  1127 	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
  1129     IMULL_r32( REG_SAVE1 );
  1130     MOVL_rbpdisp_r32( R_S, REG_ECX );
  1131     TESTL_r32_r32( REG_ECX, REG_ECX );
  1132     JE_label( nosat );
  1134     ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
  1135     JNO_label( end );            // 2
  1136     MOVL_imm32_r32( 1, REG_EDX );         // 5
  1137     MOVL_r32_rbpdisp( REG_EDX, R_MACH );   // 6
  1138     JS_label( positive );        // 2
  1139     MOVL_imm32_r32( 0x80000000, REG_EAX );// 5
  1140     MOVL_r32_rbpdisp( REG_EAX, R_MACL );   // 6
  1141     JMP_label(end2);           // 2
  1143     JMP_TARGET(positive);
  1144     MOVL_imm32_r32( 0x7FFFFFFF, REG_EAX );// 5
  1145     MOVL_r32_rbpdisp( REG_EAX, R_MACL );   // 6
  1146     JMP_label(end3);            // 2
  1148     JMP_TARGET(nosat);
  1149     ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
  1150     ADCL_r32_rbpdisp( REG_EDX, R_MACH );  // 6
  1151     JMP_TARGET(end);
  1152     JMP_TARGET(end2);
  1153     JMP_TARGET(end3);
  1154     sh4_x86.tstate = TSTATE_NONE;
  1155 :}
  1156 MOVT Rn {:  
  1157     COUNT_INST(I_MOVT);
  1158     MOVL_rbpdisp_r32( R_T, REG_EAX );
  1159     store_reg( REG_EAX, Rn );
  1160 :}
  1161 MUL.L Rm, Rn {:  
  1162     COUNT_INST(I_MULL);
  1163     load_reg( REG_EAX, Rm );
  1164     load_reg( REG_ECX, Rn );
  1165     MULL_r32( REG_ECX );
  1166     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1167     sh4_x86.tstate = TSTATE_NONE;
  1168 :}
  1169 MULS.W Rm, Rn {:
  1170     COUNT_INST(I_MULSW);
  1171     MOVSXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
  1172     MOVSXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
  1173     MULL_r32( REG_ECX );
  1174     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1175     sh4_x86.tstate = TSTATE_NONE;
  1176 :}
  1177 MULU.W Rm, Rn {:  
  1178     COUNT_INST(I_MULUW);
  1179     MOVZXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
  1180     MOVZXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
  1181     MULL_r32( REG_ECX );
  1182     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1183     sh4_x86.tstate = TSTATE_NONE;
  1184 :}
  1185 NEG Rm, Rn {:
  1186     COUNT_INST(I_NEG);
  1187     load_reg( REG_EAX, Rm );
  1188     NEGL_r32( REG_EAX );
  1189     store_reg( REG_EAX, Rn );
  1190     sh4_x86.tstate = TSTATE_NONE;
  1191 :}
  1192 NEGC Rm, Rn {:  
  1193     COUNT_INST(I_NEGC);
  1194     load_reg( REG_EAX, Rm );
  1195     XORL_r32_r32( REG_ECX, REG_ECX );
  1196     LDC_t();
  1197     SBBL_r32_r32( REG_EAX, REG_ECX );
  1198     store_reg( REG_ECX, Rn );
  1199     SETC_t();
  1200     sh4_x86.tstate = TSTATE_C;
  1201 :}
  1202 NOT Rm, Rn {:  
  1203     COUNT_INST(I_NOT);
  1204     load_reg( REG_EAX, Rm );
  1205     NOTL_r32( REG_EAX );
  1206     store_reg( REG_EAX, Rn );
  1207     sh4_x86.tstate = TSTATE_NONE;
  1208 :}
  1209 OR Rm, Rn {:  
  1210     COUNT_INST(I_OR);
  1211     load_reg( REG_EAX, Rm );
  1212     load_reg( REG_ECX, Rn );
  1213     ORL_r32_r32( REG_EAX, REG_ECX );
  1214     store_reg( REG_ECX, Rn );
  1215     sh4_x86.tstate = TSTATE_NONE;
  1216 :}
  1217 OR #imm, R0 {:
  1218     COUNT_INST(I_ORI);
  1219     load_reg( REG_EAX, 0 );
  1220     ORL_imms_r32(imm, REG_EAX);
  1221     store_reg( REG_EAX, 0 );
  1222     sh4_x86.tstate = TSTATE_NONE;
  1223 :}
  1224 OR.B #imm, @(R0, GBR) {:  
  1225     COUNT_INST(I_ORB);
  1226     load_reg( REG_EAX, 0 );
  1227     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
  1228     MOVL_r32_r32( REG_EAX, REG_SAVE1 );
  1229     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
  1230     MOVL_r32_r32( REG_SAVE1, REG_EAX );
  1231     ORL_imms_r32(imm, REG_EDX );
  1232     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1233     sh4_x86.tstate = TSTATE_NONE;
  1234 :}
  1235 ROTCL Rn {:
  1236     COUNT_INST(I_ROTCL);
  1237     load_reg( REG_EAX, Rn );
  1238     if( sh4_x86.tstate != TSTATE_C ) {
  1239 	LDC_t();
  1241     RCLL_imm_r32( 1, REG_EAX );
  1242     store_reg( REG_EAX, Rn );
  1243     SETC_t();
  1244     sh4_x86.tstate = TSTATE_C;
  1245 :}
  1246 ROTCR Rn {:  
  1247     COUNT_INST(I_ROTCR);
  1248     load_reg( REG_EAX, Rn );
  1249     if( sh4_x86.tstate != TSTATE_C ) {
  1250 	LDC_t();
  1252     RCRL_imm_r32( 1, REG_EAX );
  1253     store_reg( REG_EAX, Rn );
  1254     SETC_t();
  1255     sh4_x86.tstate = TSTATE_C;
  1256 :}
  1257 ROTL Rn {:  
  1258     COUNT_INST(I_ROTL);
  1259     load_reg( REG_EAX, Rn );
  1260     ROLL_imm_r32( 1, REG_EAX );
  1261     store_reg( REG_EAX, Rn );
  1262     SETC_t();
  1263     sh4_x86.tstate = TSTATE_C;
  1264 :}
  1265 ROTR Rn {:  
  1266     COUNT_INST(I_ROTR);
  1267     load_reg( REG_EAX, Rn );
  1268     RORL_imm_r32( 1, REG_EAX );
  1269     store_reg( REG_EAX, Rn );
  1270     SETC_t();
  1271     sh4_x86.tstate = TSTATE_C;
  1272 :}
  1273 SHAD Rm, Rn {:
  1274     COUNT_INST(I_SHAD);
  1275     /* Annoyingly enough, not directly convertible */
  1276     load_reg( REG_EAX, Rn );
  1277     load_reg( REG_ECX, Rm );
  1278     CMPL_imms_r32( 0, REG_ECX );
  1279     JGE_label(doshl);
  1281     NEGL_r32( REG_ECX );      // 2
  1282     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1283     JE_label(emptysar);     // 2
  1284     SARL_cl_r32( REG_EAX );       // 2
  1285     JMP_label(end);          // 2
  1287     JMP_TARGET(emptysar);
  1288     SARL_imm_r32(31, REG_EAX );  // 3
  1289     JMP_label(end2);
  1291     JMP_TARGET(doshl);
  1292     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1293     SHLL_cl_r32( REG_EAX );       // 2
  1294     JMP_TARGET(end);
  1295     JMP_TARGET(end2);
  1296     store_reg( REG_EAX, Rn );
  1297     sh4_x86.tstate = TSTATE_NONE;
  1298 :}
  1299 SHLD Rm, Rn {:  
  1300     COUNT_INST(I_SHLD);
  1301     load_reg( REG_EAX, Rn );
  1302     load_reg( REG_ECX, Rm );
  1303     CMPL_imms_r32( 0, REG_ECX );
  1304     JGE_label(doshl);
  1306     NEGL_r32( REG_ECX );      // 2
  1307     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1308     JE_label(emptyshr );
  1309     SHRL_cl_r32( REG_EAX );       // 2
  1310     JMP_label(end);          // 2
  1312     JMP_TARGET(emptyshr);
  1313     XORL_r32_r32( REG_EAX, REG_EAX );
  1314     JMP_label(end2);
  1316     JMP_TARGET(doshl);
  1317     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1318     SHLL_cl_r32( REG_EAX );       // 2
  1319     JMP_TARGET(end);
  1320     JMP_TARGET(end2);
  1321     store_reg( REG_EAX, Rn );
  1322     sh4_x86.tstate = TSTATE_NONE;
  1323 :}
  1324 SHAL Rn {: 
  1325     COUNT_INST(I_SHAL);
  1326     load_reg( REG_EAX, Rn );
  1327     SHLL_imm_r32( 1, REG_EAX );
  1328     SETC_t();
  1329     store_reg( REG_EAX, Rn );
  1330     sh4_x86.tstate = TSTATE_C;
  1331 :}
  1332 SHAR Rn {:  
  1333     COUNT_INST(I_SHAR);
  1334     load_reg( REG_EAX, Rn );
  1335     SARL_imm_r32( 1, REG_EAX );
  1336     SETC_t();
  1337     store_reg( REG_EAX, Rn );
  1338     sh4_x86.tstate = TSTATE_C;
  1339 :}
  1340 SHLL Rn {:  
  1341     COUNT_INST(I_SHLL);
  1342     load_reg( REG_EAX, Rn );
  1343     SHLL_imm_r32( 1, REG_EAX );
  1344     SETC_t();
  1345     store_reg( REG_EAX, Rn );
  1346     sh4_x86.tstate = TSTATE_C;
  1347 :}
  1348 SHLL2 Rn {:
  1349     COUNT_INST(I_SHLL);
  1350     load_reg( REG_EAX, Rn );
  1351     SHLL_imm_r32( 2, REG_EAX );
  1352     store_reg( REG_EAX, Rn );
  1353     sh4_x86.tstate = TSTATE_NONE;
  1354 :}
  1355 SHLL8 Rn {:  
  1356     COUNT_INST(I_SHLL);
  1357     load_reg( REG_EAX, Rn );
  1358     SHLL_imm_r32( 8, REG_EAX );
  1359     store_reg( REG_EAX, Rn );
  1360     sh4_x86.tstate = TSTATE_NONE;
  1361 :}
  1362 SHLL16 Rn {:  
  1363     COUNT_INST(I_SHLL);
  1364     load_reg( REG_EAX, Rn );
  1365     SHLL_imm_r32( 16, REG_EAX );
  1366     store_reg( REG_EAX, Rn );
  1367     sh4_x86.tstate = TSTATE_NONE;
  1368 :}
  1369 SHLR Rn {:  
  1370     COUNT_INST(I_SHLR);
  1371     load_reg( REG_EAX, Rn );
  1372     SHRL_imm_r32( 1, REG_EAX );
  1373     SETC_t();
  1374     store_reg( REG_EAX, Rn );
  1375     sh4_x86.tstate = TSTATE_C;
  1376 :}
  1377 SHLR2 Rn {:  
  1378     COUNT_INST(I_SHLR);
  1379     load_reg( REG_EAX, Rn );
  1380     SHRL_imm_r32( 2, REG_EAX );
  1381     store_reg( REG_EAX, Rn );
  1382     sh4_x86.tstate = TSTATE_NONE;
  1383 :}
  1384 SHLR8 Rn {:  
  1385     COUNT_INST(I_SHLR);
  1386     load_reg( REG_EAX, Rn );
  1387     SHRL_imm_r32( 8, REG_EAX );
  1388     store_reg( REG_EAX, Rn );
  1389     sh4_x86.tstate = TSTATE_NONE;
  1390 :}
  1391 SHLR16 Rn {:  
  1392     COUNT_INST(I_SHLR);
  1393     load_reg( REG_EAX, Rn );
  1394     SHRL_imm_r32( 16, REG_EAX );
  1395     store_reg( REG_EAX, Rn );
  1396     sh4_x86.tstate = TSTATE_NONE;
  1397 :}
  1398 SUB Rm, Rn {:  
  1399     COUNT_INST(I_SUB);
  1400     load_reg( REG_EAX, Rm );
  1401     load_reg( REG_ECX, Rn );
  1402     SUBL_r32_r32( REG_EAX, REG_ECX );
  1403     store_reg( REG_ECX, Rn );
  1404     sh4_x86.tstate = TSTATE_NONE;
  1405 :}
  1406 SUBC Rm, Rn {:  
  1407     COUNT_INST(I_SUBC);
  1408     load_reg( REG_EAX, Rm );
  1409     load_reg( REG_ECX, Rn );
  1410     if( sh4_x86.tstate != TSTATE_C ) {
  1411 	LDC_t();
  1413     SBBL_r32_r32( REG_EAX, REG_ECX );
  1414     store_reg( REG_ECX, Rn );
  1415     SETC_t();
  1416     sh4_x86.tstate = TSTATE_C;
  1417 :}
  1418 SUBV Rm, Rn {:  
  1419     COUNT_INST(I_SUBV);
  1420     load_reg( REG_EAX, Rm );
  1421     load_reg( REG_ECX, Rn );
  1422     SUBL_r32_r32( REG_EAX, REG_ECX );
  1423     store_reg( REG_ECX, Rn );
  1424     SETO_t();
  1425     sh4_x86.tstate = TSTATE_O;
  1426 :}
  1427 SWAP.B Rm, Rn {:  
  1428     COUNT_INST(I_SWAPB);
  1429     load_reg( REG_EAX, Rm );
  1430     XCHGB_r8_r8( REG_AL, REG_AH ); // NB: does not touch EFLAGS
  1431     store_reg( REG_EAX, Rn );
  1432 :}
  1433 SWAP.W Rm, Rn {:  
  1434     COUNT_INST(I_SWAPB);
  1435     load_reg( REG_EAX, Rm );
  1436     MOVL_r32_r32( REG_EAX, REG_ECX );
  1437     SHLL_imm_r32( 16, REG_ECX );
  1438     SHRL_imm_r32( 16, REG_EAX );
  1439     ORL_r32_r32( REG_EAX, REG_ECX );
  1440     store_reg( REG_ECX, Rn );
  1441     sh4_x86.tstate = TSTATE_NONE;
  1442 :}
  1443 TAS.B @Rn {:  
  1444     COUNT_INST(I_TASB);
  1445     load_reg( REG_EAX, Rn );
  1446     MOVL_r32_r32( REG_EAX, REG_SAVE1 );
  1447     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
  1448     TESTB_r8_r8( REG_DL, REG_DL );
  1449     SETE_t();
  1450     ORB_imms_r8( 0x80, REG_DL );
  1451     MOVL_r32_r32( REG_SAVE1, REG_EAX );
  1452     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1453     sh4_x86.tstate = TSTATE_NONE;
  1454 :}
  1455 TST Rm, Rn {:  
  1456     COUNT_INST(I_TST);
  1457     load_reg( REG_EAX, Rm );
  1458     load_reg( REG_ECX, Rn );
  1459     TESTL_r32_r32( REG_EAX, REG_ECX );
  1460     SETE_t();
  1461     sh4_x86.tstate = TSTATE_E;
  1462 :}
  1463 TST #imm, R0 {:  
  1464     COUNT_INST(I_TSTI);
  1465     load_reg( REG_EAX, 0 );
  1466     TESTL_imms_r32( imm, REG_EAX );
  1467     SETE_t();
  1468     sh4_x86.tstate = TSTATE_E;
  1469 :}
  1470 TST.B #imm, @(R0, GBR) {:  
  1471     COUNT_INST(I_TSTB);
  1472     load_reg( REG_EAX, 0);
  1473     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
  1474     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1475     TESTB_imms_r8( imm, REG_AL );
  1476     SETE_t();
  1477     sh4_x86.tstate = TSTATE_E;
  1478 :}
  1479 XOR Rm, Rn {:  
  1480     COUNT_INST(I_XOR);
  1481     load_reg( REG_EAX, Rm );
  1482     load_reg( REG_ECX, Rn );
  1483     XORL_r32_r32( REG_EAX, REG_ECX );
  1484     store_reg( REG_ECX, Rn );
  1485     sh4_x86.tstate = TSTATE_NONE;
  1486 :}
  1487 XOR #imm, R0 {:  
  1488     COUNT_INST(I_XORI);
  1489     load_reg( REG_EAX, 0 );
  1490     XORL_imms_r32( imm, REG_EAX );
  1491     store_reg( REG_EAX, 0 );
  1492     sh4_x86.tstate = TSTATE_NONE;
  1493 :}
  1494 XOR.B #imm, @(R0, GBR) {:  
  1495     COUNT_INST(I_XORB);
  1496     load_reg( REG_EAX, 0 );
  1497     ADDL_rbpdisp_r32( R_GBR, REG_EAX ); 
  1498     MOVL_r32_r32( REG_EAX, REG_SAVE1 );
  1499     MEM_READ_BYTE_FOR_WRITE(REG_EAX, REG_EDX);
  1500     MOVL_r32_r32( REG_SAVE1, REG_EAX );
  1501     XORL_imms_r32( imm, REG_EDX );
  1502     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1503     sh4_x86.tstate = TSTATE_NONE;
  1504 :}
  1505 XTRCT Rm, Rn {:
  1506     COUNT_INST(I_XTRCT);
  1507     load_reg( REG_EAX, Rm );
  1508     load_reg( REG_ECX, Rn );
  1509     SHLL_imm_r32( 16, REG_EAX );
  1510     SHRL_imm_r32( 16, REG_ECX );
  1511     ORL_r32_r32( REG_EAX, REG_ECX );
  1512     store_reg( REG_ECX, Rn );
  1513     sh4_x86.tstate = TSTATE_NONE;
  1514 :}
  1516 /* Data move instructions */
  1517 MOV Rm, Rn {:  
  1518     COUNT_INST(I_MOV);
  1519     load_reg( REG_EAX, Rm );
  1520     store_reg( REG_EAX, Rn );
  1521 :}
  1522 MOV #imm, Rn {:  
  1523     COUNT_INST(I_MOVI);
  1524     MOVL_imm32_r32( imm, REG_EAX );
  1525     store_reg( REG_EAX, Rn );
  1526 :}
  1527 MOV.B Rm, @Rn {:  
  1528     COUNT_INST(I_MOVB);
  1529     load_reg( REG_EAX, Rn );
  1530     load_reg( REG_EDX, Rm );
  1531     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1532     sh4_x86.tstate = TSTATE_NONE;
  1533 :}
  1534 MOV.B Rm, @-Rn {:  
  1535     COUNT_INST(I_MOVB);
  1536     load_reg( REG_EAX, Rn );
  1537     LEAL_r32disp_r32( REG_EAX, -1, REG_EAX );
  1538     load_reg( REG_EDX, Rm );
  1539     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1540     ADDL_imms_rbpdisp( -1, REG_OFFSET(r[Rn]) );
  1541     sh4_x86.tstate = TSTATE_NONE;
  1542 :}
  1543 MOV.B Rm, @(R0, Rn) {:  
  1544     COUNT_INST(I_MOVB);
  1545     load_reg( REG_EAX, 0 );
  1546     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1547     load_reg( REG_EDX, Rm );
  1548     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1549     sh4_x86.tstate = TSTATE_NONE;
  1550 :}
  1551 MOV.B R0, @(disp, GBR) {:  
  1552     COUNT_INST(I_MOVB);
  1553     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1554     ADDL_imms_r32( disp, REG_EAX );
  1555     load_reg( REG_EDX, 0 );
  1556     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1557     sh4_x86.tstate = TSTATE_NONE;
  1558 :}
  1559 MOV.B R0, @(disp, Rn) {:  
  1560     COUNT_INST(I_MOVB);
  1561     load_reg( REG_EAX, Rn );
  1562     ADDL_imms_r32( disp, REG_EAX );
  1563     load_reg( REG_EDX, 0 );
  1564     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1565     sh4_x86.tstate = TSTATE_NONE;
  1566 :}
  1567 MOV.B @Rm, Rn {:  
  1568     COUNT_INST(I_MOVB);
  1569     load_reg( REG_EAX, Rm );
  1570     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1571     store_reg( REG_EAX, Rn );
  1572     sh4_x86.tstate = TSTATE_NONE;
  1573 :}
  1574 MOV.B @Rm+, Rn {:  
  1575     COUNT_INST(I_MOVB);
  1576     load_reg( REG_EAX, Rm );
  1577     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1578     if( Rm != Rn ) {
  1579     	ADDL_imms_rbpdisp( 1, REG_OFFSET(r[Rm]) );
  1581     store_reg( REG_EAX, Rn );
  1582     sh4_x86.tstate = TSTATE_NONE;
  1583 :}
  1584 MOV.B @(R0, Rm), Rn {:  
  1585     COUNT_INST(I_MOVB);
  1586     load_reg( REG_EAX, 0 );
  1587     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1588     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1589     store_reg( REG_EAX, Rn );
  1590     sh4_x86.tstate = TSTATE_NONE;
  1591 :}
  1592 MOV.B @(disp, GBR), R0 {:  
  1593     COUNT_INST(I_MOVB);
  1594     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1595     ADDL_imms_r32( disp, REG_EAX );
  1596     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1597     store_reg( REG_EAX, 0 );
  1598     sh4_x86.tstate = TSTATE_NONE;
  1599 :}
  1600 MOV.B @(disp, Rm), R0 {:  
  1601     COUNT_INST(I_MOVB);
  1602     load_reg( REG_EAX, Rm );
  1603     ADDL_imms_r32( disp, REG_EAX );
  1604     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1605     store_reg( REG_EAX, 0 );
  1606     sh4_x86.tstate = TSTATE_NONE;
  1607 :}
  1608 MOV.L Rm, @Rn {:
  1609     COUNT_INST(I_MOVL);
  1610     load_reg( REG_EAX, Rn );
  1611     check_walign32(REG_EAX);
  1612     MOVL_r32_r32( REG_EAX, REG_ECX );
  1613     ANDL_imms_r32( 0xFC000000, REG_ECX );
  1614     CMPL_imms_r32( 0xE0000000, REG_ECX );
  1615     JNE_label( notsq );
  1616     ANDL_imms_r32( 0x3C, REG_EAX );
  1617     load_reg( REG_EDX, Rm );
  1618     MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
  1619     JMP_label(end);
  1620     JMP_TARGET(notsq);
  1621     load_reg( REG_EDX, Rm );
  1622     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1623     JMP_TARGET(end);
  1624     sh4_x86.tstate = TSTATE_NONE;
  1625 :}
  1626 MOV.L Rm, @-Rn {:  
  1627     COUNT_INST(I_MOVL);
  1628     load_reg( REG_EAX, Rn );
  1629     ADDL_imms_r32( -4, REG_EAX );
  1630     check_walign32( REG_EAX );
  1631     load_reg( REG_EDX, Rm );
  1632     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1633     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  1634     sh4_x86.tstate = TSTATE_NONE;
  1635 :}
  1636 MOV.L Rm, @(R0, Rn) {:  
  1637     COUNT_INST(I_MOVL);
  1638     load_reg( REG_EAX, 0 );
  1639     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1640     check_walign32( REG_EAX );
  1641     load_reg( REG_EDX, Rm );
  1642     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1643     sh4_x86.tstate = TSTATE_NONE;
  1644 :}
  1645 MOV.L R0, @(disp, GBR) {:  
  1646     COUNT_INST(I_MOVL);
  1647     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1648     ADDL_imms_r32( disp, REG_EAX );
  1649     check_walign32( REG_EAX );
  1650     load_reg( REG_EDX, 0 );
  1651     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1652     sh4_x86.tstate = TSTATE_NONE;
  1653 :}
  1654 MOV.L Rm, @(disp, Rn) {:  
  1655     COUNT_INST(I_MOVL);
  1656     load_reg( REG_EAX, Rn );
  1657     ADDL_imms_r32( disp, REG_EAX );
  1658     check_walign32( REG_EAX );
  1659     MOVL_r32_r32( REG_EAX, REG_ECX );
  1660     ANDL_imms_r32( 0xFC000000, REG_ECX );
  1661     CMPL_imms_r32( 0xE0000000, REG_ECX );
  1662     JNE_label( notsq );
  1663     ANDL_imms_r32( 0x3C, REG_EAX );
  1664     load_reg( REG_EDX, Rm );
  1665     MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
  1666     JMP_label(end);
  1667     JMP_TARGET(notsq);
  1668     load_reg( REG_EDX, Rm );
  1669     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1670     JMP_TARGET(end);
  1671     sh4_x86.tstate = TSTATE_NONE;
  1672 :}
  1673 MOV.L @Rm, Rn {:  
  1674     COUNT_INST(I_MOVL);
  1675     load_reg( REG_EAX, Rm );
  1676     check_ralign32( REG_EAX );
  1677     MEM_READ_LONG( REG_EAX, REG_EAX );
  1678     store_reg( REG_EAX, Rn );
  1679     sh4_x86.tstate = TSTATE_NONE;
  1680 :}
  1681 MOV.L @Rm+, Rn {:  
  1682     COUNT_INST(I_MOVL);
  1683     load_reg( REG_EAX, Rm );
  1684     check_ralign32( REG_EAX );
  1685     MEM_READ_LONG( REG_EAX, REG_EAX );
  1686     if( Rm != Rn ) {
  1687     	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  1689     store_reg( REG_EAX, Rn );
  1690     sh4_x86.tstate = TSTATE_NONE;
  1691 :}
  1692 MOV.L @(R0, Rm), Rn {:  
  1693     COUNT_INST(I_MOVL);
  1694     load_reg( REG_EAX, 0 );
  1695     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1696     check_ralign32( REG_EAX );
  1697     MEM_READ_LONG( REG_EAX, REG_EAX );
  1698     store_reg( REG_EAX, Rn );
  1699     sh4_x86.tstate = TSTATE_NONE;
  1700 :}
  1701 MOV.L @(disp, GBR), R0 {:
  1702     COUNT_INST(I_MOVL);
  1703     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1704     ADDL_imms_r32( disp, REG_EAX );
  1705     check_ralign32( REG_EAX );
  1706     MEM_READ_LONG( REG_EAX, REG_EAX );
  1707     store_reg( REG_EAX, 0 );
  1708     sh4_x86.tstate = TSTATE_NONE;
  1709 :}
  1710 MOV.L @(disp, PC), Rn {:  
  1711     COUNT_INST(I_MOVLPC);
  1712     if( sh4_x86.in_delay_slot ) {
  1713 	SLOTILLEGAL();
  1714     } else {
  1715 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1716 	if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
  1717 	    // If the target address is in the same page as the code, it's
  1718 	    // pretty safe to just ref it directly and circumvent the whole
  1719 	    // memory subsystem. (this is a big performance win)
  1721 	    // FIXME: There's a corner-case that's not handled here when
  1722 	    // the current code-page is in the ITLB but not in the UTLB.
  1723 	    // (should generate a TLB miss although need to test SH4 
  1724 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1725 	    // behaviour though.
  1726 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1727 	    MOVL_moffptr_eax( ptr );
  1728 	} else {
  1729 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1730 	    // different virtual address than the translation was done with,
  1731 	    // but we can safely assume that the low bits are the same.
  1732 	    MOVL_imm32_r32( (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_EAX );
  1733 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1734 	    MEM_READ_LONG( REG_EAX, REG_EAX );
  1735 	    sh4_x86.tstate = TSTATE_NONE;
  1737 	store_reg( REG_EAX, Rn );
  1739 :}
  1740 MOV.L @(disp, Rm), Rn {:  
  1741     COUNT_INST(I_MOVL);
  1742     load_reg( REG_EAX, Rm );
  1743     ADDL_imms_r32( disp, REG_EAX );
  1744     check_ralign32( REG_EAX );
  1745     MEM_READ_LONG( REG_EAX, REG_EAX );
  1746     store_reg( REG_EAX, Rn );
  1747     sh4_x86.tstate = TSTATE_NONE;
  1748 :}
  1749 MOV.W Rm, @Rn {:  
  1750     COUNT_INST(I_MOVW);
  1751     load_reg( REG_EAX, Rn );
  1752     check_walign16( REG_EAX );
  1753     load_reg( REG_EDX, Rm );
  1754     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1755     sh4_x86.tstate = TSTATE_NONE;
  1756 :}
  1757 MOV.W Rm, @-Rn {:  
  1758     COUNT_INST(I_MOVW);
  1759     load_reg( REG_EAX, Rn );
  1760     check_walign16( REG_EAX );
  1761     LEAL_r32disp_r32( REG_EAX, -2, REG_EAX );
  1762     load_reg( REG_EDX, Rm );
  1763     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1764     ADDL_imms_rbpdisp( -2, REG_OFFSET(r[Rn]) );
  1765     sh4_x86.tstate = TSTATE_NONE;
  1766 :}
  1767 MOV.W Rm, @(R0, Rn) {:  
  1768     COUNT_INST(I_MOVW);
  1769     load_reg( REG_EAX, 0 );
  1770     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1771     check_walign16( REG_EAX );
  1772     load_reg( REG_EDX, Rm );
  1773     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1774     sh4_x86.tstate = TSTATE_NONE;
  1775 :}
  1776 MOV.W R0, @(disp, GBR) {:  
  1777     COUNT_INST(I_MOVW);
  1778     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1779     ADDL_imms_r32( disp, REG_EAX );
  1780     check_walign16( REG_EAX );
  1781     load_reg( REG_EDX, 0 );
  1782     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1783     sh4_x86.tstate = TSTATE_NONE;
  1784 :}
  1785 MOV.W R0, @(disp, Rn) {:  
  1786     COUNT_INST(I_MOVW);
  1787     load_reg( REG_EAX, Rn );
  1788     ADDL_imms_r32( disp, REG_EAX );
  1789     check_walign16( REG_EAX );
  1790     load_reg( REG_EDX, 0 );
  1791     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1792     sh4_x86.tstate = TSTATE_NONE;
  1793 :}
  1794 MOV.W @Rm, Rn {:  
  1795     COUNT_INST(I_MOVW);
  1796     load_reg( REG_EAX, Rm );
  1797     check_ralign16( REG_EAX );
  1798     MEM_READ_WORD( REG_EAX, REG_EAX );
  1799     store_reg( REG_EAX, Rn );
  1800     sh4_x86.tstate = TSTATE_NONE;
  1801 :}
  1802 MOV.W @Rm+, Rn {:  
  1803     COUNT_INST(I_MOVW);
  1804     load_reg( REG_EAX, Rm );
  1805     check_ralign16( REG_EAX );
  1806     MEM_READ_WORD( REG_EAX, REG_EAX );
  1807     if( Rm != Rn ) {
  1808         ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
  1810     store_reg( REG_EAX, Rn );
  1811     sh4_x86.tstate = TSTATE_NONE;
  1812 :}
  1813 MOV.W @(R0, Rm), Rn {:  
  1814     COUNT_INST(I_MOVW);
  1815     load_reg( REG_EAX, 0 );
  1816     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1817     check_ralign16( REG_EAX );
  1818     MEM_READ_WORD( REG_EAX, REG_EAX );
  1819     store_reg( REG_EAX, Rn );
  1820     sh4_x86.tstate = TSTATE_NONE;
  1821 :}
  1822 MOV.W @(disp, GBR), R0 {:  
  1823     COUNT_INST(I_MOVW);
  1824     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1825     ADDL_imms_r32( disp, REG_EAX );
  1826     check_ralign16( REG_EAX );
  1827     MEM_READ_WORD( REG_EAX, REG_EAX );
  1828     store_reg( REG_EAX, 0 );
  1829     sh4_x86.tstate = TSTATE_NONE;
  1830 :}
  1831 MOV.W @(disp, PC), Rn {:  
  1832     COUNT_INST(I_MOVW);
  1833     if( sh4_x86.in_delay_slot ) {
  1834 	SLOTILLEGAL();
  1835     } else {
  1836 	// See comments for MOV.L @(disp, PC), Rn
  1837 	uint32_t target = pc + disp + 4;
  1838 	if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
  1839 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1840 	    MOVL_moffptr_eax( ptr );
  1841 	    MOVSXL_r16_r32( REG_EAX, REG_EAX );
  1842 	} else {
  1843 	    MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4, REG_EAX );
  1844 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1845 	    MEM_READ_WORD( REG_EAX, REG_EAX );
  1846 	    sh4_x86.tstate = TSTATE_NONE;
  1848 	store_reg( REG_EAX, Rn );
  1850 :}
  1851 MOV.W @(disp, Rm), R0 {:  
  1852     COUNT_INST(I_MOVW);
  1853     load_reg( REG_EAX, Rm );
  1854     ADDL_imms_r32( disp, REG_EAX );
  1855     check_ralign16( REG_EAX );
  1856     MEM_READ_WORD( REG_EAX, REG_EAX );
  1857     store_reg( REG_EAX, 0 );
  1858     sh4_x86.tstate = TSTATE_NONE;
  1859 :}
  1860 MOVA @(disp, PC), R0 {:  
  1861     COUNT_INST(I_MOVA);
  1862     if( sh4_x86.in_delay_slot ) {
  1863 	SLOTILLEGAL();
  1864     } else {
  1865 	MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_ECX );
  1866 	ADDL_rbpdisp_r32( R_PC, REG_ECX );
  1867 	store_reg( REG_ECX, 0 );
  1868 	sh4_x86.tstate = TSTATE_NONE;
  1870 :}
  1871 MOVCA.L R0, @Rn {:  
  1872     COUNT_INST(I_MOVCA);
  1873     load_reg( REG_EAX, Rn );
  1874     check_walign32( REG_EAX );
  1875     load_reg( REG_EDX, 0 );
  1876     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1877     sh4_x86.tstate = TSTATE_NONE;
  1878 :}
  1880 /* Control transfer instructions */
  1881 BF disp {:
  1882     COUNT_INST(I_BF);
  1883     if( sh4_x86.in_delay_slot ) {
  1884 	SLOTILLEGAL();
  1885     } else {
  1886 	sh4vma_t target = disp + pc + 4;
  1887 	JT_label( nottaken );
  1888 	exit_block_rel(target, pc+2 );
  1889 	JMP_TARGET(nottaken);
  1890 	return 2;
  1892 :}
  1893 BF/S disp {:
  1894     COUNT_INST(I_BFS);
  1895     if( sh4_x86.in_delay_slot ) {
  1896 	SLOTILLEGAL();
  1897     } else {
  1898 	sh4_x86.in_delay_slot = DELAY_PC;
  1899 	if( UNTRANSLATABLE(pc+2) ) {
  1900 	    MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1901 	    JT_label(nottaken);
  1902 	    ADDL_imms_r32( disp, REG_EAX );
  1903 	    JMP_TARGET(nottaken);
  1904 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1905 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1906 	    exit_block_emu(pc+2);
  1907 	    sh4_x86.branch_taken = TRUE;
  1908 	    return 2;
  1909 	} else {
  1910 	    LOAD_t();
  1911 	    sh4vma_t target = disp + pc + 4;
  1912 	    JCC_cc_rel32(sh4_x86.tstate,0);
  1913 	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
  1914 	    int save_tstate = sh4_x86.tstate;
  1915 	    sh4_translate_instruction(pc+2);
  1916             sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
  1917 	    exit_block_rel( target, pc+4 );
  1919 	    // not taken
  1920 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1921 	    sh4_x86.tstate = save_tstate;
  1922 	    sh4_translate_instruction(pc+2);
  1923 	    return 4;
  1926 :}
  1927 BRA disp {:  
  1928     COUNT_INST(I_BRA);
  1929     if( sh4_x86.in_delay_slot ) {
  1930 	SLOTILLEGAL();
  1931     } else {
  1932 	sh4_x86.in_delay_slot = DELAY_PC;
  1933 	sh4_x86.branch_taken = TRUE;
  1934 	if( UNTRANSLATABLE(pc+2) ) {
  1935 	    MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1936 	    ADDL_imms_r32( pc + disp + 4 - sh4_x86.block_start_pc, REG_EAX );
  1937 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1938 	    exit_block_emu(pc+2);
  1939 	    return 2;
  1940 	} else {
  1941 	    sh4_translate_instruction( pc + 2 );
  1942 	    exit_block_rel( disp + pc + 4, pc+4 );
  1943 	    return 4;
  1946 :}
  1947 BRAF Rn {:  
  1948     COUNT_INST(I_BRAF);
  1949     if( sh4_x86.in_delay_slot ) {
  1950 	SLOTILLEGAL();
  1951     } else {
  1952 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1953 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1954 	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1955 	MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1956 	sh4_x86.in_delay_slot = DELAY_PC;
  1957 	sh4_x86.tstate = TSTATE_NONE;
  1958 	sh4_x86.branch_taken = TRUE;
  1959 	if( UNTRANSLATABLE(pc+2) ) {
  1960 	    exit_block_emu(pc+2);
  1961 	    return 2;
  1962 	} else {
  1963 	    sh4_translate_instruction( pc + 2 );
  1964 	    exit_block_newpcset(pc+4);
  1965 	    return 4;
  1968 :}
  1969 BSR disp {:  
  1970     COUNT_INST(I_BSR);
  1971     if( sh4_x86.in_delay_slot ) {
  1972 	SLOTILLEGAL();
  1973     } else {
  1974 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1975 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1976 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  1977 	sh4_x86.in_delay_slot = DELAY_PC;
  1978 	sh4_x86.branch_taken = TRUE;
  1979 	sh4_x86.tstate = TSTATE_NONE;
  1980 	if( UNTRANSLATABLE(pc+2) ) {
  1981 	    ADDL_imms_r32( disp, REG_EAX );
  1982 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1983 	    exit_block_emu(pc+2);
  1984 	    return 2;
  1985 	} else {
  1986 	    sh4_translate_instruction( pc + 2 );
  1987 	    exit_block_rel( disp + pc + 4, pc+4 );
  1988 	    return 4;
  1991 :}
  1992 BSRF Rn {:  
  1993     COUNT_INST(I_BSRF);
  1994     if( sh4_x86.in_delay_slot ) {
  1995 	SLOTILLEGAL();
  1996     } else {
  1997 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1998 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1999 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2000 	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  2001 	MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  2003 	sh4_x86.in_delay_slot = DELAY_PC;
  2004 	sh4_x86.tstate = TSTATE_NONE;
  2005 	sh4_x86.branch_taken = TRUE;
  2006 	if( UNTRANSLATABLE(pc+2) ) {
  2007 	    exit_block_emu(pc+2);
  2008 	    return 2;
  2009 	} else {
  2010 	    sh4_translate_instruction( pc + 2 );
  2011 	    exit_block_newpcset(pc+4);
  2012 	    return 4;
  2015 :}
  2016 BT disp {:
  2017     COUNT_INST(I_BT);
  2018     if( sh4_x86.in_delay_slot ) {
  2019 	SLOTILLEGAL();
  2020     } else {
  2021 	sh4vma_t target = disp + pc + 4;
  2022 	JF_label( nottaken );
  2023 	exit_block_rel(target, pc+2 );
  2024 	JMP_TARGET(nottaken);
  2025 	return 2;
  2027 :}
  2028 BT/S disp {:
  2029     COUNT_INST(I_BTS);
  2030     if( sh4_x86.in_delay_slot ) {
  2031 	SLOTILLEGAL();
  2032     } else {
  2033 	sh4_x86.in_delay_slot = DELAY_PC;
  2034 	if( UNTRANSLATABLE(pc+2) ) {
  2035 	    MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2036 	    JF_label(nottaken);
  2037 	    ADDL_imms_r32( disp, REG_EAX );
  2038 	    JMP_TARGET(nottaken);
  2039 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  2040 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  2041 	    exit_block_emu(pc+2);
  2042 	    sh4_x86.branch_taken = TRUE;
  2043 	    return 2;
  2044 	} else {
  2045 		LOAD_t();
  2046 	    JCC_cc_rel32(sh4_x86.tstate^1,0);
  2047 	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
  2049 	    int save_tstate = sh4_x86.tstate;
  2050 	    sh4_translate_instruction(pc+2);
  2051             sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
  2052 	    exit_block_rel( disp + pc + 4, pc+4 );
  2053 	    // not taken
  2054 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  2055 	    sh4_x86.tstate = save_tstate;
  2056 	    sh4_translate_instruction(pc+2);
  2057 	    return 4;
  2060 :}
  2061 JMP @Rn {:  
  2062     COUNT_INST(I_JMP);
  2063     if( sh4_x86.in_delay_slot ) {
  2064 	SLOTILLEGAL();
  2065     } else {
  2066 	load_reg( REG_ECX, Rn );
  2067 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2068 	sh4_x86.in_delay_slot = DELAY_PC;
  2069 	sh4_x86.branch_taken = TRUE;
  2070 	if( UNTRANSLATABLE(pc+2) ) {
  2071 	    exit_block_emu(pc+2);
  2072 	    return 2;
  2073 	} else {
  2074 	    sh4_translate_instruction(pc+2);
  2075 	    exit_block_newpcset(pc+4);
  2076 	    return 4;
  2079 :}
  2080 JSR @Rn {:  
  2081     COUNT_INST(I_JSR);
  2082     if( sh4_x86.in_delay_slot ) {
  2083 	SLOTILLEGAL();
  2084     } else {
  2085 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  2086 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2087 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2088 	load_reg( REG_ECX, Rn );
  2089 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2090 	sh4_x86.in_delay_slot = DELAY_PC;
  2091 	sh4_x86.branch_taken = TRUE;
  2092 	sh4_x86.tstate = TSTATE_NONE;
  2093 	if( UNTRANSLATABLE(pc+2) ) {
  2094 	    exit_block_emu(pc+2);
  2095 	    return 2;
  2096 	} else {
  2097 	    sh4_translate_instruction(pc+2);
  2098 	    exit_block_newpcset(pc+4);
  2099 	    return 4;
  2102 :}
  2103 RTE {:  
  2104     COUNT_INST(I_RTE);
  2105     if( sh4_x86.in_delay_slot ) {
  2106 	SLOTILLEGAL();
  2107     } else {
  2108 	check_priv();
  2109 	MOVL_rbpdisp_r32( R_SPC, REG_ECX );
  2110 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2111 	MOVL_rbpdisp_r32( R_SSR, REG_EAX );
  2112 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2113 	sh4_x86.in_delay_slot = DELAY_PC;
  2114 	sh4_x86.fpuen_checked = FALSE;
  2115 	sh4_x86.tstate = TSTATE_NONE;
  2116 	sh4_x86.branch_taken = TRUE;
  2117     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2118 	if( UNTRANSLATABLE(pc+2) ) {
  2119 	    exit_block_emu(pc+2);
  2120 	    return 2;
  2121 	} else {
  2122 	    sh4_translate_instruction(pc+2);
  2123 	    exit_block_newpcset(pc+4);
  2124 	    return 4;
  2127 :}
  2128 RTS {:  
  2129     COUNT_INST(I_RTS);
  2130     if( sh4_x86.in_delay_slot ) {
  2131 	SLOTILLEGAL();
  2132     } else {
  2133 	MOVL_rbpdisp_r32( R_PR, REG_ECX );
  2134 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2135 	sh4_x86.in_delay_slot = DELAY_PC;
  2136 	sh4_x86.branch_taken = TRUE;
  2137 	if( UNTRANSLATABLE(pc+2) ) {
  2138 	    exit_block_emu(pc+2);
  2139 	    return 2;
  2140 	} else {
  2141 	    sh4_translate_instruction(pc+2);
  2142 	    exit_block_newpcset(pc+4);
  2143 	    return 4;
  2146 :}
  2147 TRAPA #imm {:  
  2148     COUNT_INST(I_TRAPA);
  2149     if( sh4_x86.in_delay_slot ) {
  2150 	SLOTILLEGAL();
  2151     } else {
  2152 	MOVL_imm32_r32( pc+2 - sh4_x86.block_start_pc, REG_ECX );   // 5
  2153 	ADDL_r32_rbpdisp( REG_ECX, R_PC );
  2154 	MOVL_imm32_r32( imm, REG_EAX );
  2155 	CALL1_ptr_r32( sh4_raise_trap, REG_EAX );
  2156 	sh4_x86.tstate = TSTATE_NONE;
  2157 	exit_block_pcset(pc+2);
  2158 	sh4_x86.branch_taken = TRUE;
  2159 	return 2;
  2161 :}
  2162 UNDEF {:  
  2163     COUNT_INST(I_UNDEF);
  2164     if( sh4_x86.in_delay_slot ) {
  2165 	exit_block_exc(EXC_SLOT_ILLEGAL, pc-2, 4);    
  2166     } else {
  2167 	exit_block_exc(EXC_ILLEGAL, pc, 2);    
  2168 	return 2;
  2170 :}
  2172 CLRMAC {:  
  2173     COUNT_INST(I_CLRMAC);
  2174     XORL_r32_r32(REG_EAX, REG_EAX);
  2175     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2176     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2177     sh4_x86.tstate = TSTATE_NONE;
  2178 :}
  2179 CLRS {:
  2180     COUNT_INST(I_CLRS);
  2181     CLC();
  2182     SETCCB_cc_rbpdisp(X86_COND_C, R_S);
  2183     sh4_x86.tstate = TSTATE_NONE;
  2184 :}
  2185 CLRT {:  
  2186     COUNT_INST(I_CLRT);
  2187     CLC();
  2188     SETC_t();
  2189     sh4_x86.tstate = TSTATE_C;
  2190 :}
  2191 SETS {:  
  2192     COUNT_INST(I_SETS);
  2193     STC();
  2194     SETCCB_cc_rbpdisp(X86_COND_C, R_S);
  2195     sh4_x86.tstate = TSTATE_NONE;
  2196 :}
  2197 SETT {:  
  2198     COUNT_INST(I_SETT);
  2199     STC();
  2200     SETC_t();
  2201     sh4_x86.tstate = TSTATE_C;
  2202 :}
  2204 /* Floating point moves */
  2205 FMOV FRm, FRn {:  
  2206     COUNT_INST(I_FMOV1);
  2207     check_fpuen();
  2208     if( sh4_x86.double_size ) {
  2209         load_dr0( REG_EAX, FRm );
  2210         load_dr1( REG_ECX, FRm );
  2211         store_dr0( REG_EAX, FRn );
  2212         store_dr1( REG_ECX, FRn );
  2213     } else {
  2214         load_fr( REG_EAX, FRm ); // SZ=0 branch
  2215         store_fr( REG_EAX, FRn );
  2217 :}
  2218 FMOV FRm, @Rn {: 
  2219     COUNT_INST(I_FMOV2);
  2220     check_fpuen();
  2221     load_reg( REG_EAX, Rn );
  2222     if( sh4_x86.double_size ) {
  2223         check_walign64( REG_EAX );
  2224         load_dr0( REG_EDX, FRm );
  2225         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2226         load_reg( REG_EAX, Rn );
  2227         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2228         load_dr1( REG_EDX, FRm );
  2229         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2230     } else {
  2231         check_walign32( REG_EAX );
  2232         load_fr( REG_EDX, FRm );
  2233         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2235     sh4_x86.tstate = TSTATE_NONE;
  2236 :}
  2237 FMOV @Rm, FRn {:  
  2238     COUNT_INST(I_FMOV5);
  2239     check_fpuen();
  2240     load_reg( REG_EAX, Rm );
  2241     if( sh4_x86.double_size ) {
  2242         check_ralign64( REG_EAX );
  2243         MEM_READ_LONG( REG_EAX, REG_EAX );
  2244         store_dr0( REG_EAX, FRn );
  2245         load_reg( REG_EAX, Rm );
  2246         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2247         MEM_READ_LONG( REG_EAX, REG_EAX );
  2248         store_dr1( REG_EAX, FRn );
  2249     } else {
  2250         check_ralign32( REG_EAX );
  2251         MEM_READ_LONG( REG_EAX, REG_EAX );
  2252         store_fr( REG_EAX, FRn );
  2254     sh4_x86.tstate = TSTATE_NONE;
  2255 :}
  2256 FMOV FRm, @-Rn {:  
  2257     COUNT_INST(I_FMOV3);
  2258     check_fpuen();
  2259     load_reg( REG_EAX, Rn );
  2260     if( sh4_x86.double_size ) {
  2261         check_walign64( REG_EAX );
  2262         LEAL_r32disp_r32( REG_EAX, -8, REG_EAX );
  2263         load_dr0( REG_EDX, FRm );
  2264         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2265         load_reg( REG_EAX, Rn );
  2266         LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2267         load_dr1( REG_EDX, FRm );
  2268         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2269         ADDL_imms_rbpdisp(-8,REG_OFFSET(r[Rn]));
  2270     } else {
  2271         check_walign32( REG_EAX );
  2272         LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2273         load_fr( REG_EDX, FRm );
  2274         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2275         ADDL_imms_rbpdisp(-4,REG_OFFSET(r[Rn]));
  2277     sh4_x86.tstate = TSTATE_NONE;
  2278 :}
  2279 FMOV @Rm+, FRn {:
  2280     COUNT_INST(I_FMOV6);
  2281     check_fpuen();
  2282     load_reg( REG_EAX, Rm );
  2283     if( sh4_x86.double_size ) {
  2284         check_ralign64( REG_EAX );
  2285         MEM_READ_LONG( REG_EAX, REG_EAX );
  2286         store_dr0( REG_EAX, FRn );
  2287         load_reg( REG_EAX, Rm );
  2288         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2289         MEM_READ_LONG( REG_EAX, REG_EAX );
  2290         store_dr1( REG_EAX, FRn );
  2291         ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rm]) );
  2292     } else {
  2293         check_ralign32( REG_EAX );
  2294         MEM_READ_LONG( REG_EAX, REG_EAX );
  2295         store_fr( REG_EAX, FRn );
  2296         ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2298     sh4_x86.tstate = TSTATE_NONE;
  2299 :}
  2300 FMOV FRm, @(R0, Rn) {:  
  2301     COUNT_INST(I_FMOV4);
  2302     check_fpuen();
  2303     load_reg( REG_EAX, Rn );
  2304     ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2305     if( sh4_x86.double_size ) {
  2306         check_walign64( REG_EAX );
  2307         load_dr0( REG_EDX, FRm );
  2308         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2309         load_reg( REG_EAX, Rn );
  2310         ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2311         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2312         load_dr1( REG_EDX, FRm );
  2313         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2314     } else {
  2315         check_walign32( REG_EAX );
  2316         load_fr( REG_EDX, FRm );
  2317         MEM_WRITE_LONG( REG_EAX, REG_EDX ); // 12
  2319     sh4_x86.tstate = TSTATE_NONE;
  2320 :}
  2321 FMOV @(R0, Rm), FRn {:  
  2322     COUNT_INST(I_FMOV7);
  2323     check_fpuen();
  2324     load_reg( REG_EAX, Rm );
  2325     ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2326     if( sh4_x86.double_size ) {
  2327         check_ralign64( REG_EAX );
  2328         MEM_READ_LONG( REG_EAX, REG_EAX );
  2329         store_dr0( REG_EAX, FRn );
  2330         load_reg( REG_EAX, Rm );
  2331         ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2332         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2333         MEM_READ_LONG( REG_EAX, REG_EAX );
  2334         store_dr1( REG_EAX, FRn );
  2335     } else {
  2336         check_ralign32( REG_EAX );
  2337         MEM_READ_LONG( REG_EAX, REG_EAX );
  2338         store_fr( REG_EAX, FRn );
  2340     sh4_x86.tstate = TSTATE_NONE;
  2341 :}
  2342 FLDI0 FRn {:  /* IFF PR=0 */
  2343     COUNT_INST(I_FLDI0);
  2344     check_fpuen();
  2345     if( sh4_x86.double_prec == 0 ) {
  2346         XORL_r32_r32( REG_EAX, REG_EAX );
  2347         store_fr( REG_EAX, FRn );
  2349     sh4_x86.tstate = TSTATE_NONE;
  2350 :}
  2351 FLDI1 FRn {:  /* IFF PR=0 */
  2352     COUNT_INST(I_FLDI1);
  2353     check_fpuen();
  2354     if( sh4_x86.double_prec == 0 ) {
  2355         MOVL_imm32_r32( 0x3F800000, REG_EAX );
  2356         store_fr( REG_EAX, FRn );
  2358 :}
  2360 FLOAT FPUL, FRn {:  
  2361     COUNT_INST(I_FLOAT);
  2362     check_fpuen();
  2363     FILD_rbpdisp(R_FPUL);
  2364     if( sh4_x86.double_prec ) {
  2365         pop_dr( FRn );
  2366     } else {
  2367         pop_fr( FRn );
  2369 :}
  2370 FTRC FRm, FPUL {:  
  2371     COUNT_INST(I_FTRC);
  2372     check_fpuen();
  2373     if( sh4_x86.double_prec ) {
  2374         push_dr( FRm );
  2375     } else {
  2376         push_fr( FRm );
  2378     MOVP_immptr_rptr( &min_int, REG_ECX );
  2379     FILD_r32disp( REG_ECX, 0 );
  2380     FCOMIP_st(1);              
  2381     JAE_label( sat );     
  2382     JP_label( sat2 );       
  2383     MOVP_immptr_rptr( &max_int, REG_ECX );
  2384     FILD_r32disp( REG_ECX, 0 );
  2385     FCOMIP_st(1);
  2386     JNA_label( sat3 );
  2387     MOVP_immptr_rptr( &save_fcw, REG_EAX );
  2388     FNSTCW_r32disp( REG_EAX, 0 );
  2389     MOVP_immptr_rptr( &trunc_fcw, REG_EDX );
  2390     FLDCW_r32disp( REG_EDX, 0 );
  2391     FISTP_rbpdisp(R_FPUL);             
  2392     FLDCW_r32disp( REG_EAX, 0 );
  2393     JMP_label(end);             
  2395     JMP_TARGET(sat);
  2396     JMP_TARGET(sat2);
  2397     JMP_TARGET(sat3);
  2398     MOVL_r32disp_r32( REG_ECX, 0, REG_ECX ); // 2
  2399     MOVL_r32_rbpdisp( REG_ECX, R_FPUL );
  2400     FPOP_st();
  2401     JMP_TARGET(end);
  2402     sh4_x86.tstate = TSTATE_NONE;
  2403 :}
  2404 FLDS FRm, FPUL {:  
  2405     COUNT_INST(I_FLDS);
  2406     check_fpuen();
  2407     load_fr( REG_EAX, FRm );
  2408     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2409 :}
  2410 FSTS FPUL, FRn {:  
  2411     COUNT_INST(I_FSTS);
  2412     check_fpuen();
  2413     MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  2414     store_fr( REG_EAX, FRn );
  2415 :}
  2416 FCNVDS FRm, FPUL {:  
  2417     COUNT_INST(I_FCNVDS);
  2418     check_fpuen();
  2419     if( sh4_x86.double_prec ) {
  2420         push_dr( FRm );
  2421         pop_fpul();
  2423 :}
  2424 FCNVSD FPUL, FRn {:  
  2425     COUNT_INST(I_FCNVSD);
  2426     check_fpuen();
  2427     if( sh4_x86.double_prec ) {
  2428         push_fpul();
  2429         pop_dr( FRn );
  2431 :}
  2433 /* Floating point instructions */
  2434 FABS FRn {:  
  2435     COUNT_INST(I_FABS);
  2436     check_fpuen();
  2437     if( sh4_x86.double_prec ) {
  2438         push_dr(FRn);
  2439         FABS_st0();
  2440         pop_dr(FRn);
  2441     } else {
  2442         push_fr(FRn);
  2443         FABS_st0();
  2444         pop_fr(FRn);
  2446 :}
  2447 FADD FRm, FRn {:  
  2448     COUNT_INST(I_FADD);
  2449     check_fpuen();
  2450     if( sh4_x86.double_prec ) {
  2451         push_dr(FRm);
  2452         push_dr(FRn);
  2453         FADDP_st(1);
  2454         pop_dr(FRn);
  2455     } else {
  2456         push_fr(FRm);
  2457         push_fr(FRn);
  2458         FADDP_st(1);
  2459         pop_fr(FRn);
  2461 :}
  2462 FDIV FRm, FRn {:  
  2463     COUNT_INST(I_FDIV);
  2464     check_fpuen();
  2465     if( sh4_x86.double_prec ) {
  2466         push_dr(FRn);
  2467         push_dr(FRm);
  2468         FDIVP_st(1);
  2469         pop_dr(FRn);
  2470     } else {
  2471         push_fr(FRn);
  2472         push_fr(FRm);
  2473         FDIVP_st(1);
  2474         pop_fr(FRn);
  2476 :}
  2477 FMAC FR0, FRm, FRn {:  
  2478     COUNT_INST(I_FMAC);
  2479     check_fpuen();
  2480     if( sh4_x86.double_prec ) {
  2481         push_dr( 0 );
  2482         push_dr( FRm );
  2483         FMULP_st(1);
  2484         push_dr( FRn );
  2485         FADDP_st(1);
  2486         pop_dr( FRn );
  2487     } else {
  2488         push_fr( 0 );
  2489         push_fr( FRm );
  2490         FMULP_st(1);
  2491         push_fr( FRn );
  2492         FADDP_st(1);
  2493         pop_fr( FRn );
  2495 :}
  2497 FMUL FRm, FRn {:  
  2498     COUNT_INST(I_FMUL);
  2499     check_fpuen();
  2500     if( sh4_x86.double_prec ) {
  2501         push_dr(FRm);
  2502         push_dr(FRn);
  2503         FMULP_st(1);
  2504         pop_dr(FRn);
  2505     } else {
  2506         push_fr(FRm);
  2507         push_fr(FRn);
  2508         FMULP_st(1);
  2509         pop_fr(FRn);
  2511 :}
  2512 FNEG FRn {:  
  2513     COUNT_INST(I_FNEG);
  2514     check_fpuen();
  2515     if( sh4_x86.double_prec ) {
  2516         push_dr(FRn);
  2517         FCHS_st0();
  2518         pop_dr(FRn);
  2519     } else {
  2520         push_fr(FRn);
  2521         FCHS_st0();
  2522         pop_fr(FRn);
  2524 :}
  2525 FSRRA FRn {:  
  2526     COUNT_INST(I_FSRRA);
  2527     check_fpuen();
  2528     if( sh4_x86.double_prec == 0 ) {
  2529         FLD1_st0();
  2530         push_fr(FRn);
  2531         FSQRT_st0();
  2532         FDIVP_st(1);
  2533         pop_fr(FRn);
  2535 :}
  2536 FSQRT FRn {:  
  2537     COUNT_INST(I_FSQRT);
  2538     check_fpuen();
  2539     if( sh4_x86.double_prec ) {
  2540         push_dr(FRn);
  2541         FSQRT_st0();
  2542         pop_dr(FRn);
  2543     } else {
  2544         push_fr(FRn);
  2545         FSQRT_st0();
  2546         pop_fr(FRn);
  2548 :}
  2549 FSUB FRm, FRn {:  
  2550     COUNT_INST(I_FSUB);
  2551     check_fpuen();
  2552     if( sh4_x86.double_prec ) {
  2553         push_dr(FRn);
  2554         push_dr(FRm);
  2555         FSUBP_st(1);
  2556         pop_dr(FRn);
  2557     } else {
  2558         push_fr(FRn);
  2559         push_fr(FRm);
  2560         FSUBP_st(1);
  2561         pop_fr(FRn);
  2563 :}
  2565 FCMP/EQ FRm, FRn {:  
  2566     COUNT_INST(I_FCMPEQ);
  2567     check_fpuen();
  2568     if( sh4_x86.double_prec ) {
  2569         push_dr(FRm);
  2570         push_dr(FRn);
  2571     } else {
  2572         push_fr(FRm);
  2573         push_fr(FRn);
  2575     XORL_r32_r32(REG_EAX, REG_EAX);
  2576     XORL_r32_r32(REG_EDX, REG_EDX);
  2577     FCOMIP_st(1);
  2578     SETCCB_cc_r8(X86_COND_NP, REG_DL);
  2579     CMOVCCL_cc_r32_r32(X86_COND_E, REG_EDX, REG_EAX);
  2580     MOVL_r32_rbpdisp(REG_EAX, R_T);
  2581     FPOP_st();
  2582     sh4_x86.tstate = TSTATE_NONE;
  2583 :}
  2584 FCMP/GT FRm, FRn {:  
  2585     COUNT_INST(I_FCMPGT);
  2586     check_fpuen();
  2587     if( sh4_x86.double_prec ) {
  2588         push_dr(FRm);
  2589         push_dr(FRn);
  2590     } else {
  2591         push_fr(FRm);
  2592         push_fr(FRn);
  2594     FCOMIP_st(1);
  2595     SETA_t();
  2596     FPOP_st();
  2597     sh4_x86.tstate = TSTATE_A;
  2598 :}
  2600 FSCA FPUL, FRn {:  
  2601     COUNT_INST(I_FSCA);
  2602     check_fpuen();
  2603     if( sh4_x86.double_prec == 0 ) {
  2604         LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FRn&0x0E]), REG_EDX );
  2605         MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  2606         CALL2_ptr_r32_r32( sh4_fsca, REG_EAX, REG_EDX );
  2608     sh4_x86.tstate = TSTATE_NONE;
  2609 :}
  2610 FIPR FVm, FVn {:  
  2611     COUNT_INST(I_FIPR);
  2612     check_fpuen();
  2613     if( sh4_x86.double_prec == 0 ) {
  2614         if( sh4_x86.sse3_enabled ) {
  2615             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
  2616             MULPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
  2617             HADDPS_xmm_xmm( 4, 4 ); 
  2618             HADDPS_xmm_xmm( 4, 4 );
  2619             MOVSS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
  2620         } else {
  2621             push_fr( FVm<<2 );
  2622             push_fr( FVn<<2 );
  2623             FMULP_st(1);
  2624             push_fr( (FVm<<2)+1);
  2625             push_fr( (FVn<<2)+1);
  2626             FMULP_st(1);
  2627             FADDP_st(1);
  2628             push_fr( (FVm<<2)+2);
  2629             push_fr( (FVn<<2)+2);
  2630             FMULP_st(1);
  2631             FADDP_st(1);
  2632             push_fr( (FVm<<2)+3);
  2633             push_fr( (FVn<<2)+3);
  2634             FMULP_st(1);
  2635             FADDP_st(1);
  2636             pop_fr( (FVn<<2)+3);
  2639 :}
  2640 FTRV XMTRX, FVn {:  
  2641     COUNT_INST(I_FTRV);
  2642     check_fpuen();
  2643     if( sh4_x86.double_prec == 0 ) {
  2644         if( sh4_x86.sse3_enabled && sh4_x86.begin_callback == NULL ) {
  2645         	/* FIXME: For now, disable this inlining when we're running in shadow mode -
  2646         	 * it gives slightly different results from the emu core. Need to
  2647         	 * fix the precision so both give the right results.
  2648         	 */
  2649             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1  M0  M3  M2
  2650             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5  M4  M7  M6
  2651             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9  M8  M11 M10
  2652             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
  2654             MOVSLDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
  2655             MOVSHDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
  2656             MOV_xmm_xmm( 4, 6 );
  2657             MOV_xmm_xmm( 5, 7 );
  2658             MOVLHPS_xmm_xmm( 4, 4 );  // V1 V1 V1 V1
  2659             MOVHLPS_xmm_xmm( 6, 6 );  // V3 V3 V3 V3
  2660             MOVLHPS_xmm_xmm( 5, 5 );  // V0 V0 V0 V0
  2661             MOVHLPS_xmm_xmm( 7, 7 );  // V2 V2 V2 V2
  2662             MULPS_xmm_xmm( 0, 4 );
  2663             MULPS_xmm_xmm( 1, 5 );
  2664             MULPS_xmm_xmm( 2, 6 );
  2665             MULPS_xmm_xmm( 3, 7 );
  2666             ADDPS_xmm_xmm( 5, 4 );
  2667             ADDPS_xmm_xmm( 7, 6 );
  2668             ADDPS_xmm_xmm( 6, 4 );
  2669             MOVAPS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][FVn<<2]) );
  2670         } else {
  2671             LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FVn<<2]), REG_EAX );
  2672             CALL1_ptr_r32( sh4_ftrv, REG_EAX );
  2675     sh4_x86.tstate = TSTATE_NONE;
  2676 :}
  2678 FRCHG {:  
  2679     COUNT_INST(I_FRCHG);
  2680     check_fpuen();
  2681     XORL_imms_rbpdisp( FPSCR_FR, R_FPSCR );
  2682     CALL_ptr( sh4_switch_fr_banks );
  2683     sh4_x86.tstate = TSTATE_NONE;
  2684 :}
  2685 FSCHG {:  
  2686     COUNT_INST(I_FSCHG);
  2687     check_fpuen();
  2688     XORL_imms_rbpdisp( FPSCR_SZ, R_FPSCR);
  2689     XORL_imms_rbpdisp( FPSCR_SZ, REG_OFFSET(xlat_sh4_mode) );
  2690     sh4_x86.tstate = TSTATE_NONE;
  2691     sh4_x86.double_size = !sh4_x86.double_size;
  2692     sh4_x86.sh4_mode = sh4_x86.sh4_mode ^ FPSCR_SZ;
  2693 :}
  2695 /* Processor control instructions */
  2696 LDC Rm, SR {:
  2697     COUNT_INST(I_LDCSR);
  2698     if( sh4_x86.in_delay_slot ) {
  2699 	SLOTILLEGAL();
  2700     } else {
  2701 	check_priv();
  2702 	load_reg( REG_EAX, Rm );
  2703 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2704 	sh4_x86.fpuen_checked = FALSE;
  2705 	sh4_x86.tstate = TSTATE_NONE;
  2706     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2707 	return 2;
  2709 :}
  2710 LDC Rm, GBR {: 
  2711     COUNT_INST(I_LDC);
  2712     load_reg( REG_EAX, Rm );
  2713     MOVL_r32_rbpdisp( REG_EAX, R_GBR );
  2714 :}
  2715 LDC Rm, VBR {:  
  2716     COUNT_INST(I_LDC);
  2717     check_priv();
  2718     load_reg( REG_EAX, Rm );
  2719     MOVL_r32_rbpdisp( REG_EAX, R_VBR );
  2720     sh4_x86.tstate = TSTATE_NONE;
  2721 :}
  2722 LDC Rm, SSR {:  
  2723     COUNT_INST(I_LDC);
  2724     check_priv();
  2725     load_reg( REG_EAX, Rm );
  2726     MOVL_r32_rbpdisp( REG_EAX, R_SSR );
  2727     sh4_x86.tstate = TSTATE_NONE;
  2728 :}
  2729 LDC Rm, SGR {:  
  2730     COUNT_INST(I_LDC);
  2731     check_priv();
  2732     load_reg( REG_EAX, Rm );
  2733     MOVL_r32_rbpdisp( REG_EAX, R_SGR );
  2734     sh4_x86.tstate = TSTATE_NONE;
  2735 :}
  2736 LDC Rm, SPC {:  
  2737     COUNT_INST(I_LDC);
  2738     check_priv();
  2739     load_reg( REG_EAX, Rm );
  2740     MOVL_r32_rbpdisp( REG_EAX, R_SPC );
  2741     sh4_x86.tstate = TSTATE_NONE;
  2742 :}
  2743 LDC Rm, DBR {:  
  2744     COUNT_INST(I_LDC);
  2745     check_priv();
  2746     load_reg( REG_EAX, Rm );
  2747     MOVL_r32_rbpdisp( REG_EAX, R_DBR );
  2748     sh4_x86.tstate = TSTATE_NONE;
  2749 :}
  2750 LDC Rm, Rn_BANK {:  
  2751     COUNT_INST(I_LDC);
  2752     check_priv();
  2753     load_reg( REG_EAX, Rm );
  2754     MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2755     sh4_x86.tstate = TSTATE_NONE;
  2756 :}
  2757 LDC.L @Rm+, GBR {:  
  2758     COUNT_INST(I_LDCM);
  2759     load_reg( REG_EAX, Rm );
  2760     check_ralign32( REG_EAX );
  2761     MEM_READ_LONG( REG_EAX, REG_EAX );
  2762     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2763     MOVL_r32_rbpdisp( REG_EAX, R_GBR );
  2764     sh4_x86.tstate = TSTATE_NONE;
  2765 :}
  2766 LDC.L @Rm+, SR {:
  2767     COUNT_INST(I_LDCSRM);
  2768     if( sh4_x86.in_delay_slot ) {
  2769 	SLOTILLEGAL();
  2770     } else {
  2771 	check_priv();
  2772 	load_reg( REG_EAX, Rm );
  2773 	check_ralign32( REG_EAX );
  2774 	MEM_READ_LONG( REG_EAX, REG_EAX );
  2775 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2776 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2777 	sh4_x86.fpuen_checked = FALSE;
  2778 	sh4_x86.tstate = TSTATE_NONE;
  2779     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2780 	return 2;
  2782 :}
  2783 LDC.L @Rm+, VBR {:  
  2784     COUNT_INST(I_LDCM);
  2785     check_priv();
  2786     load_reg( REG_EAX, Rm );
  2787     check_ralign32( REG_EAX );
  2788     MEM_READ_LONG( REG_EAX, REG_EAX );
  2789     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2790     MOVL_r32_rbpdisp( REG_EAX, R_VBR );
  2791     sh4_x86.tstate = TSTATE_NONE;
  2792 :}
  2793 LDC.L @Rm+, SSR {:
  2794     COUNT_INST(I_LDCM);
  2795     check_priv();
  2796     load_reg( REG_EAX, Rm );
  2797     check_ralign32( REG_EAX );
  2798     MEM_READ_LONG( REG_EAX, REG_EAX );
  2799     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2800     MOVL_r32_rbpdisp( REG_EAX, R_SSR );
  2801     sh4_x86.tstate = TSTATE_NONE;
  2802 :}
  2803 LDC.L @Rm+, SGR {:  
  2804     COUNT_INST(I_LDCM);
  2805     check_priv();
  2806     load_reg( REG_EAX, Rm );
  2807     check_ralign32( REG_EAX );
  2808     MEM_READ_LONG( REG_EAX, REG_EAX );
  2809     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2810     MOVL_r32_rbpdisp( REG_EAX, R_SGR );
  2811     sh4_x86.tstate = TSTATE_NONE;
  2812 :}
  2813 LDC.L @Rm+, SPC {:  
  2814     COUNT_INST(I_LDCM);
  2815     check_priv();
  2816     load_reg( REG_EAX, Rm );
  2817     check_ralign32( REG_EAX );
  2818     MEM_READ_LONG( REG_EAX, REG_EAX );
  2819     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2820     MOVL_r32_rbpdisp( REG_EAX, R_SPC );
  2821     sh4_x86.tstate = TSTATE_NONE;
  2822 :}
  2823 LDC.L @Rm+, DBR {:  
  2824     COUNT_INST(I_LDCM);
  2825     check_priv();
  2826     load_reg( REG_EAX, Rm );
  2827     check_ralign32( REG_EAX );
  2828     MEM_READ_LONG( REG_EAX, REG_EAX );
  2829     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2830     MOVL_r32_rbpdisp( REG_EAX, R_DBR );
  2831     sh4_x86.tstate = TSTATE_NONE;
  2832 :}
  2833 LDC.L @Rm+, Rn_BANK {:  
  2834     COUNT_INST(I_LDCM);
  2835     check_priv();
  2836     load_reg( REG_EAX, Rm );
  2837     check_ralign32( REG_EAX );
  2838     MEM_READ_LONG( REG_EAX, REG_EAX );
  2839     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2840     MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2841     sh4_x86.tstate = TSTATE_NONE;
  2842 :}
  2843 LDS Rm, FPSCR {:
  2844     COUNT_INST(I_LDSFPSCR);
  2845     check_fpuen();
  2846     load_reg( REG_EAX, Rm );
  2847     CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
  2848     sh4_x86.tstate = TSTATE_NONE;
  2849     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2850     return 2;
  2851 :}
  2852 LDS.L @Rm+, FPSCR {:  
  2853     COUNT_INST(I_LDSFPSCRM);
  2854     check_fpuen();
  2855     load_reg( REG_EAX, Rm );
  2856     check_ralign32( REG_EAX );
  2857     MEM_READ_LONG( REG_EAX, REG_EAX );
  2858     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2859     CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
  2860     sh4_x86.tstate = TSTATE_NONE;
  2861     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2862     return 2;
  2863 :}
  2864 LDS Rm, FPUL {:  
  2865     COUNT_INST(I_LDS);
  2866     check_fpuen();
  2867     load_reg( REG_EAX, Rm );
  2868     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2869 :}
  2870 LDS.L @Rm+, FPUL {:  
  2871     COUNT_INST(I_LDSM);
  2872     check_fpuen();
  2873     load_reg( REG_EAX, Rm );
  2874     check_ralign32( REG_EAX );
  2875     MEM_READ_LONG( REG_EAX, REG_EAX );
  2876     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2877     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2878     sh4_x86.tstate = TSTATE_NONE;
  2879 :}
  2880 LDS Rm, MACH {: 
  2881     COUNT_INST(I_LDS);
  2882     load_reg( REG_EAX, Rm );
  2883     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2884 :}
  2885 LDS.L @Rm+, MACH {:  
  2886     COUNT_INST(I_LDSM);
  2887     load_reg( REG_EAX, Rm );
  2888     check_ralign32( REG_EAX );
  2889     MEM_READ_LONG( REG_EAX, REG_EAX );
  2890     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2891     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2892     sh4_x86.tstate = TSTATE_NONE;
  2893 :}
  2894 LDS Rm, MACL {:  
  2895     COUNT_INST(I_LDS);
  2896     load_reg( REG_EAX, Rm );
  2897     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2898 :}
  2899 LDS.L @Rm+, MACL {:  
  2900     COUNT_INST(I_LDSM);
  2901     load_reg( REG_EAX, Rm );
  2902     check_ralign32( REG_EAX );
  2903     MEM_READ_LONG( REG_EAX, REG_EAX );
  2904     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2905     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2906     sh4_x86.tstate = TSTATE_NONE;
  2907 :}
  2908 LDS Rm, PR {:  
  2909     COUNT_INST(I_LDS);
  2910     load_reg( REG_EAX, Rm );
  2911     MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2912 :}
  2913 LDS.L @Rm+, PR {:  
  2914     COUNT_INST(I_LDSM);
  2915     load_reg( REG_EAX, Rm );
  2916     check_ralign32( REG_EAX );
  2917     MEM_READ_LONG( REG_EAX, REG_EAX );
  2918     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2919     MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2920     sh4_x86.tstate = TSTATE_NONE;
  2921 :}
  2922 LDTLB {:  
  2923     COUNT_INST(I_LDTLB);
  2924     CALL_ptr( MMU_ldtlb );
  2925     sh4_x86.tstate = TSTATE_NONE;
  2926 :}
  2927 OCBI @Rn {:
  2928     COUNT_INST(I_OCBI);
  2929 :}
  2930 OCBP @Rn {:
  2931     COUNT_INST(I_OCBP);
  2932 :}
  2933 OCBWB @Rn {:
  2934     COUNT_INST(I_OCBWB);
  2935 :}
  2936 PREF @Rn {:
  2937     COUNT_INST(I_PREF);
  2938     load_reg( REG_EAX, Rn );
  2939     MEM_PREFETCH( REG_EAX );
  2940     sh4_x86.tstate = TSTATE_NONE;
  2941 :}
  2942 SLEEP {: 
  2943     COUNT_INST(I_SLEEP);
  2944     check_priv();
  2945     CALL_ptr( sh4_sleep );
  2946     sh4_x86.tstate = TSTATE_NONE;
  2947     sh4_x86.in_delay_slot = DELAY_NONE;
  2948     return 2;
  2949 :}
  2950 STC SR, Rn {:
  2951     COUNT_INST(I_STCSR);
  2952     check_priv();
  2953     CALL_ptr(sh4_read_sr);
  2954     store_reg( REG_EAX, Rn );
  2955     sh4_x86.tstate = TSTATE_NONE;
  2956 :}
  2957 STC GBR, Rn {:  
  2958     COUNT_INST(I_STC);
  2959     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  2960     store_reg( REG_EAX, Rn );
  2961 :}
  2962 STC VBR, Rn {:  
  2963     COUNT_INST(I_STC);
  2964     check_priv();
  2965     MOVL_rbpdisp_r32( R_VBR, REG_EAX );
  2966     store_reg( REG_EAX, Rn );
  2967     sh4_x86.tstate = TSTATE_NONE;
  2968 :}
  2969 STC SSR, Rn {:  
  2970     COUNT_INST(I_STC);
  2971     check_priv();
  2972     MOVL_rbpdisp_r32( R_SSR, REG_EAX );
  2973     store_reg( REG_EAX, Rn );
  2974     sh4_x86.tstate = TSTATE_NONE;
  2975 :}
  2976 STC SPC, Rn {:  
  2977     COUNT_INST(I_STC);
  2978     check_priv();
  2979     MOVL_rbpdisp_r32( R_SPC, REG_EAX );
  2980     store_reg( REG_EAX, Rn );
  2981     sh4_x86.tstate = TSTATE_NONE;
  2982 :}
  2983 STC SGR, Rn {:  
  2984     COUNT_INST(I_STC);
  2985     check_priv();
  2986     MOVL_rbpdisp_r32( R_SGR, REG_EAX );
  2987     store_reg( REG_EAX, Rn );
  2988     sh4_x86.tstate = TSTATE_NONE;
  2989 :}
  2990 STC DBR, Rn {:  
  2991     COUNT_INST(I_STC);
  2992     check_priv();
  2993     MOVL_rbpdisp_r32( R_DBR, REG_EAX );
  2994     store_reg( REG_EAX, Rn );
  2995     sh4_x86.tstate = TSTATE_NONE;
  2996 :}
  2997 STC Rm_BANK, Rn {:
  2998     COUNT_INST(I_STC);
  2999     check_priv();
  3000     MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EAX );
  3001     store_reg( REG_EAX, Rn );
  3002     sh4_x86.tstate = TSTATE_NONE;
  3003 :}
  3004 STC.L SR, @-Rn {:
  3005     COUNT_INST(I_STCSRM);
  3006     check_priv();
  3007     CALL_ptr( sh4_read_sr );
  3008     MOVL_r32_r32( REG_EAX, REG_EDX );
  3009     load_reg( REG_EAX, Rn );
  3010     check_walign32( REG_EAX );
  3011     LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  3012     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3013     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3014     sh4_x86.tstate = TSTATE_NONE;
  3015 :}
  3016 STC.L VBR, @-Rn {:  
  3017     COUNT_INST(I_STCM);
  3018     check_priv();
  3019     load_reg( REG_EAX, Rn );
  3020     check_walign32( REG_EAX );
  3021     ADDL_imms_r32( -4, REG_EAX );
  3022     MOVL_rbpdisp_r32( R_VBR, REG_EDX );
  3023     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3024     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3025     sh4_x86.tstate = TSTATE_NONE;
  3026 :}
  3027 STC.L SSR, @-Rn {:  
  3028     COUNT_INST(I_STCM);
  3029     check_priv();
  3030     load_reg( REG_EAX, Rn );
  3031     check_walign32( REG_EAX );
  3032     ADDL_imms_r32( -4, REG_EAX );
  3033     MOVL_rbpdisp_r32( R_SSR, REG_EDX );
  3034     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3035     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3036     sh4_x86.tstate = TSTATE_NONE;
  3037 :}
  3038 STC.L SPC, @-Rn {:
  3039     COUNT_INST(I_STCM);
  3040     check_priv();
  3041     load_reg( REG_EAX, Rn );
  3042     check_walign32( REG_EAX );
  3043     ADDL_imms_r32( -4, REG_EAX );
  3044     MOVL_rbpdisp_r32( R_SPC, REG_EDX );
  3045     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3046     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3047     sh4_x86.tstate = TSTATE_NONE;
  3048 :}
  3049 STC.L SGR, @-Rn {:  
  3050     COUNT_INST(I_STCM);
  3051     check_priv();
  3052     load_reg( REG_EAX, Rn );
  3053     check_walign32( REG_EAX );
  3054     ADDL_imms_r32( -4, REG_EAX );
  3055     MOVL_rbpdisp_r32( R_SGR, REG_EDX );
  3056     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3057     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3058     sh4_x86.tstate = TSTATE_NONE;
  3059 :}
  3060 STC.L DBR, @-Rn {:  
  3061     COUNT_INST(I_STCM);
  3062     check_priv();
  3063     load_reg( REG_EAX, Rn );
  3064     check_walign32( REG_EAX );
  3065     ADDL_imms_r32( -4, REG_EAX );
  3066     MOVL_rbpdisp_r32( R_DBR, REG_EDX );
  3067     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3068     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3069     sh4_x86.tstate = TSTATE_NONE;
  3070 :}
  3071 STC.L Rm_BANK, @-Rn {:  
  3072     COUNT_INST(I_STCM);
  3073     check_priv();
  3074     load_reg( REG_EAX, Rn );
  3075     check_walign32( REG_EAX );
  3076     ADDL_imms_r32( -4, REG_EAX );
  3077     MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EDX );
  3078     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3079     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3080     sh4_x86.tstate = TSTATE_NONE;
  3081 :}
  3082 STC.L GBR, @-Rn {:  
  3083     COUNT_INST(I_STCM);
  3084     load_reg( REG_EAX, Rn );
  3085     check_walign32( REG_EAX );
  3086     ADDL_imms_r32( -4, REG_EAX );
  3087     MOVL_rbpdisp_r32( R_GBR, REG_EDX );
  3088     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3089     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3090     sh4_x86.tstate = TSTATE_NONE;
  3091 :}
  3092 STS FPSCR, Rn {:  
  3093     COUNT_INST(I_STSFPSCR);
  3094     check_fpuen();
  3095     MOVL_rbpdisp_r32( R_FPSCR, REG_EAX );
  3096     store_reg( REG_EAX, Rn );
  3097 :}
  3098 STS.L FPSCR, @-Rn {:  
  3099     COUNT_INST(I_STSFPSCRM);
  3100     check_fpuen();
  3101     load_reg( REG_EAX, Rn );
  3102     check_walign32( REG_EAX );
  3103     ADDL_imms_r32( -4, REG_EAX );
  3104     MOVL_rbpdisp_r32( R_FPSCR, REG_EDX );
  3105     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3106     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3107     sh4_x86.tstate = TSTATE_NONE;
  3108 :}
  3109 STS FPUL, Rn {:  
  3110     COUNT_INST(I_STS);
  3111     check_fpuen();
  3112     MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  3113     store_reg( REG_EAX, Rn );
  3114 :}
  3115 STS.L FPUL, @-Rn {:  
  3116     COUNT_INST(I_STSM);
  3117     check_fpuen();
  3118     load_reg( REG_EAX, Rn );
  3119     check_walign32( REG_EAX );
  3120     ADDL_imms_r32( -4, REG_EAX );
  3121     MOVL_rbpdisp_r32( R_FPUL, REG_EDX );
  3122     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3123     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3124     sh4_x86.tstate = TSTATE_NONE;
  3125 :}
  3126 STS MACH, Rn {:  
  3127     COUNT_INST(I_STS);
  3128     MOVL_rbpdisp_r32( R_MACH, REG_EAX );
  3129     store_reg( REG_EAX, Rn );
  3130 :}
  3131 STS.L MACH, @-Rn {:  
  3132     COUNT_INST(I_STSM);
  3133     load_reg( REG_EAX, Rn );
  3134     check_walign32( REG_EAX );
  3135     ADDL_imms_r32( -4, REG_EAX );
  3136     MOVL_rbpdisp_r32( R_MACH, REG_EDX );
  3137     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3138     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3139     sh4_x86.tstate = TSTATE_NONE;
  3140 :}
  3141 STS MACL, Rn {:  
  3142     COUNT_INST(I_STS);
  3143     MOVL_rbpdisp_r32( R_MACL, REG_EAX );
  3144     store_reg( REG_EAX, Rn );
  3145 :}
  3146 STS.L MACL, @-Rn {:  
  3147     COUNT_INST(I_STSM);
  3148     load_reg( REG_EAX, Rn );
  3149     check_walign32( REG_EAX );
  3150     ADDL_imms_r32( -4, REG_EAX );
  3151     MOVL_rbpdisp_r32( R_MACL, REG_EDX );
  3152     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3153     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3154     sh4_x86.tstate = TSTATE_NONE;
  3155 :}
  3156 STS PR, Rn {:  
  3157     COUNT_INST(I_STS);
  3158     MOVL_rbpdisp_r32( R_PR, REG_EAX );
  3159     store_reg( REG_EAX, Rn );
  3160 :}
  3161 STS.L PR, @-Rn {:  
  3162     COUNT_INST(I_STSM);
  3163     load_reg( REG_EAX, Rn );
  3164     check_walign32( REG_EAX );
  3165     ADDL_imms_r32( -4, REG_EAX );
  3166     MOVL_rbpdisp_r32( R_PR, REG_EDX );
  3167     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3168     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3169     sh4_x86.tstate = TSTATE_NONE;
  3170 :}
  3172 NOP {: 
  3173     COUNT_INST(I_NOP);
  3174     /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ 
  3175 :}
  3176 %%
  3177     sh4_x86.in_delay_slot = DELAY_NONE;
  3178     return 0;
  3182 /**
  3183  * The unwind methods only work if we compiled with DWARF2 frame information
  3184  * (ie -fexceptions), otherwise we have to use the direct frame scan.
  3185  */
  3186 #ifdef HAVE_EXCEPTIONS
  3187 #include <unwind.h>
  3189 struct UnwindInfo {
  3190     uintptr_t block_start;
  3191     uintptr_t block_end;
  3192     void *pc;
  3193 };
  3195 static _Unwind_Reason_Code xlat_check_frame( struct _Unwind_Context *context, void *arg )
  3197     struct UnwindInfo *info = arg;
  3198     void *pc = (void *)_Unwind_GetIP(context);
  3199     if( ((uintptr_t)pc) >= info->block_start && ((uintptr_t)pc) < info->block_end ) {
  3200         info->pc = pc;
  3201         return _URC_NORMAL_STOP;
  3203     return _URC_NO_REASON;
  3206 void *xlat_get_native_pc( void *code, uint32_t code_size )
  3208     struct UnwindInfo info;
  3210     info.pc = NULL;
  3211     info.block_start = (uintptr_t)code;
  3212     info.block_end = info.block_start + code_size;
  3213     _Unwind_Backtrace( xlat_check_frame, &info );
  3214     return info.pc;
  3216 #else
  3217 /* Assume this is an ia32 build - amd64 should always have dwarf information */
  3218 void *xlat_get_native_pc( void *code, uint32_t code_size )
  3220     void *result = NULL;
  3221     __asm__(
  3222         "mov %%ebp, %%eax\n\t"
  3223         "mov $0x8, %%ecx\n\t"
  3224         "mov %1, %%edx\n"
  3225         "frame_loop: test %%eax, %%eax\n\t"
  3226         "je frame_not_found\n\t"
  3227         "cmp (%%eax), %%edx\n\t"
  3228         "je frame_found\n\t"
  3229         "sub $0x1, %%ecx\n\t"
  3230         "je frame_not_found\n\t"
  3231         "movl (%%eax), %%eax\n\t"
  3232         "jmp frame_loop\n"
  3233         "frame_found: movl 0x4(%%eax), %0\n"
  3234         "frame_not_found:"
  3235         : "=r" (result)
  3236         : "r" (((uint8_t *)&sh4r) + 128 )
  3237         : "eax", "ecx", "edx" );
  3238     return result;
  3240 #endif
.