Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 1196:a14dbddafd13
prev1194:ee6ce5804608
next1197:904fba59a705
author Nathan Keynes <nkeynes@lxdream.org>
date Wed Dec 14 21:51:55 2011 +1000 (10 years ago)
permissions -rw-r--r--
last change Update maximum epilogue size
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "lxdream.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4dasm.h"
    31 #include "sh4/sh4trans.h"
    32 #include "sh4/sh4stat.h"
    33 #include "sh4/sh4mmio.h"
    34 #include "sh4/mmu.h"
    35 #include "xlat/xltcache.h"
    36 #include "xlat/x86/x86op.h"
    37 #include "x86dasm/x86dasm.h"
    38 #include "clock.h"
    40 #define DEFAULT_BACKPATCH_SIZE 4096
    42 /* Offset of a reg relative to the sh4r structure */
    43 #define REG_OFFSET(reg)  (((char *)&sh4r.reg) - ((char *)&sh4r) - 128)
    45 #define R_T      REG_OFFSET(t)
    46 #define R_Q      REG_OFFSET(q)
    47 #define R_S      REG_OFFSET(s)
    48 #define R_M      REG_OFFSET(m)
    49 #define R_SR     REG_OFFSET(sr)
    50 #define R_GBR    REG_OFFSET(gbr)
    51 #define R_SSR    REG_OFFSET(ssr)
    52 #define R_SPC    REG_OFFSET(spc)
    53 #define R_VBR    REG_OFFSET(vbr)
    54 #define R_MACH   REG_OFFSET(mac)+4
    55 #define R_MACL   REG_OFFSET(mac)
    56 #define R_PC     REG_OFFSET(pc)
    57 #define R_NEW_PC REG_OFFSET(new_pc)
    58 #define R_PR     REG_OFFSET(pr)
    59 #define R_SGR    REG_OFFSET(sgr)
    60 #define R_FPUL   REG_OFFSET(fpul)
    61 #define R_FPSCR  REG_OFFSET(fpscr)
    62 #define R_DBR    REG_OFFSET(dbr)
    63 #define R_R(rn)  REG_OFFSET(r[rn])
    64 #define R_FR(f)  REG_OFFSET(fr[0][(f)^1])
    65 #define R_XF(f)  REG_OFFSET(fr[1][(f)^1])
    66 #define R_DR(f)  REG_OFFSET(fr[(f)&1][(f)&0x0E])
    67 #define R_DRL(f) REG_OFFSET(fr[(f)&1][(f)|0x01])
    68 #define R_DRH(f) REG_OFFSET(fr[(f)&1][(f)&0x0E])
    70 #define DELAY_NONE 0
    71 #define DELAY_PC 1
    72 #define DELAY_PC_PR 2
    74 #define SH4_MODE_UNKNOWN -1
    76 struct backpatch_record {
    77     uint32_t fixup_offset;
    78     uint32_t fixup_icount;
    79     int32_t exc_code;
    80 };
    82 /** 
    83  * Struct to manage internal translation state. This state is not saved -
    84  * it is only valid between calls to sh4_translate_begin_block() and
    85  * sh4_translate_end_block()
    86  */
    87 struct sh4_x86_state {
    88     int in_delay_slot;
    89     uint8_t *code;
    90     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    91     gboolean branch_taken; /* true if we branched unconditionally */
    92     gboolean double_prec; /* true if FPU is in double-precision mode */
    93     gboolean double_size; /* true if FPU is in double-size mode */
    94     gboolean sse3_enabled; /* true if host supports SSE3 instructions */
    95     uint32_t block_start_pc;
    96     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    97     uint32_t sh4_mode;     /* Mirror of sh4r.xlat_sh4_mode */
    98     int tstate;
   100     /* mode settings */
   101     gboolean tlb_on; /* True if tlb translation is active */
   102     struct mem_region_fn **priv_address_space;
   103     struct mem_region_fn **user_address_space;
   105     /* Instrumentation */
   106     xlat_block_begin_callback_t begin_callback;
   107     xlat_block_end_callback_t end_callback;
   108     gboolean fastmem;
   109     gboolean profile_blocks;
   111     /* Allocated memory for the (block-wide) back-patch list */
   112     struct backpatch_record *backpatch_list;
   113     uint32_t backpatch_posn;
   114     uint32_t backpatch_size;
   115 };
   117 static struct sh4_x86_state sh4_x86;
   119 static uint32_t max_int = 0x7FFFFFFF;
   120 static uint32_t min_int = 0x80000000;
   121 static uint32_t save_fcw; /* save value for fpu control word */
   122 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   124 static void FASTCALL sh4_translate_get_code_and_backpatch( uint32_t pc );
   126 static struct x86_symbol x86_symbol_table[] = {
   127     { "sh4r+128", ((char *)&sh4r)+128 },
   128     { "sh4_cpu_period", &sh4_cpu_period },
   129     { "sh4_address_space", NULL },
   130     { "sh4_user_address_space", NULL },
   131     { "sh4_translate_breakpoint_hit", sh4_translate_breakpoint_hit },
   132     { "sh4_translate_get_code_and_backpatch", sh4_translate_get_code_and_backpatch },
   133     { "sh4_write_fpscr", sh4_write_fpscr },
   134     { "sh4_write_sr", sh4_write_sr },
   135     { "sh4_read_sr", sh4_read_sr },
   136     { "sh4_raise_exception", sh4_raise_exception },
   137     { "sh4_sleep", sh4_sleep },
   138     { "sh4_fsca", sh4_fsca },
   139     { "sh4_ftrv", sh4_ftrv },
   140     { "sh4_switch_fr_banks", sh4_switch_fr_banks },
   141     { "sh4_execute_instruction", sh4_execute_instruction },
   142     { "signsat48", signsat48 },
   143     { "xlat_get_code_by_vma", xlat_get_code_by_vma },
   144     { "xlat_get_code", xlat_get_code }
   145 };
   148 gboolean is_sse3_supported()
   149 {
   150     uint32_t features;
   152     __asm__ __volatile__(
   153         "mov $0x01, %%eax\n\t"
   154         "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
   155     return (features & 1) ? TRUE : FALSE;
   156 }
   158 void sh4_translate_set_address_space( struct mem_region_fn **priv, struct mem_region_fn **user )
   159 {
   160     sh4_x86.priv_address_space = priv;
   161     sh4_x86.user_address_space = user;
   162     x86_symbol_table[2].ptr = priv;
   163     x86_symbol_table[3].ptr = user;
   164 }
   166 void sh4_translate_init(void)
   167 {
   168     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   169     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   170     sh4_x86.begin_callback = NULL;
   171     sh4_x86.end_callback = NULL;
   172     sh4_translate_set_address_space( sh4_address_space, sh4_user_address_space );
   173     sh4_x86.fastmem = TRUE;
   174     sh4_x86.profile_blocks = FALSE;
   175     sh4_x86.sse3_enabled = is_sse3_supported();
   176     x86_disasm_init();
   177     x86_set_symtab( x86_symbol_table, sizeof(x86_symbol_table)/sizeof(struct x86_symbol) );
   178 }
   180 void sh4_translate_set_callbacks( xlat_block_begin_callback_t begin, xlat_block_end_callback_t end )
   181 {
   182     sh4_x86.begin_callback = begin;
   183     sh4_x86.end_callback = end;
   184 }
   186 void sh4_translate_set_fastmem( gboolean flag )
   187 {
   188     sh4_x86.fastmem = flag;
   189 }
   191 void sh4_translate_set_profile_blocks( gboolean flag )
   192 {
   193     sh4_x86.profile_blocks = flag;
   194 }
   196 gboolean sh4_translate_get_profile_blocks()
   197 {
   198     return sh4_x86.profile_blocks;
   199 }
   201 /**
   202  * Disassemble the given translated code block, and it's source SH4 code block
   203  * side-by-side. The current native pc will be marked if non-null.
   204  */
   205 void sh4_translate_disasm_block( FILE *out, void *code, sh4addr_t source_start, void *native_pc )
   206 {
   207     char buf[256];
   208     char op[256];
   210     uintptr_t target_start = (uintptr_t)code, target_pc;
   211     uintptr_t target_end = target_start + xlat_get_code_size(code);
   212     uint32_t source_pc = source_start;
   213     uint32_t source_end = source_pc;
   214     xlat_recovery_record_t source_recov_table = XLAT_RECOVERY_TABLE(code);
   215     xlat_recovery_record_t source_recov_end = source_recov_table + XLAT_BLOCK_FOR_CODE(code)->recover_table_size - 1;
   217     for( target_pc = target_start; target_pc < target_end;  ) {
   218         uintptr_t pc2 = x86_disasm_instruction( target_pc, buf, sizeof(buf), op );
   219 #if SIZEOF_VOID_P == 8
   220         fprintf( out, "%c%016lx: %-30s %-40s", (target_pc == (uintptr_t)native_pc ? '*' : ' '),
   221                       target_pc, op, buf );
   222 #else
   223         fprintf( out, "%c%08lx: %-30s %-40s", (target_pc == (uintptr_t)native_pc ? '*' : ' '),
   224                       target_pc, op, buf );
   225 #endif        
   226         if( source_recov_table < source_recov_end && 
   227             target_pc >= (target_start + source_recov_table->xlat_offset) ) {
   228             source_recov_table++;
   229             if( source_end < (source_start + (source_recov_table->sh4_icount)*2) )
   230                 source_end = source_start + (source_recov_table->sh4_icount)*2;
   231         }
   233         if( source_pc < source_end ) {
   234             uint32_t source_pc2 = sh4_disasm_instruction( source_pc, buf, sizeof(buf), op );
   235             fprintf( out, " %08X: %s  %s\n", source_pc, op, buf );
   236             source_pc = source_pc2;
   237         } else {
   238             fprintf( out, "\n" );
   239         }
   241         target_pc = pc2;
   242     }
   244     while( source_pc < source_end ) {
   245         uint32_t source_pc2 = sh4_disasm_instruction( source_pc, buf, sizeof(buf), op );
   246         fprintf( out, "%*c %08X: %s  %s\n", 72,' ', source_pc, op, buf );
   247         source_pc = source_pc2;
   248     }
   249 }
   251 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   252 {
   253     int reloc_size = 4;
   255     if( exc_code == -2 ) {
   256         reloc_size = sizeof(void *);
   257     }
   259     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   260 	sh4_x86.backpatch_size <<= 1;
   261 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   262 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   263 	assert( sh4_x86.backpatch_list != NULL );
   264     }
   265     if( sh4_x86.in_delay_slot ) {
   266 	fixup_pc -= 2;
   267     }
   269     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
   270 	(((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code)) - reloc_size;
   271     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   272     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   273     sh4_x86.backpatch_posn++;
   274 }
   276 #define TSTATE_NONE -1
   277 #define TSTATE_O    X86_COND_O
   278 #define TSTATE_C    X86_COND_C
   279 #define TSTATE_E    X86_COND_E
   280 #define TSTATE_NE   X86_COND_NE
   281 #define TSTATE_G    X86_COND_G
   282 #define TSTATE_GE   X86_COND_GE
   283 #define TSTATE_A    X86_COND_A
   284 #define TSTATE_AE   X86_COND_AE
   286 #define MARK_JMP8(x) uint8_t *_mark_jmp_##x = (xlat_output-1)
   287 #define JMP_TARGET(x) *_mark_jmp_##x += (xlat_output - _mark_jmp_##x)
   289 /* Convenience instructions */
   290 #define LDC_t()          CMPB_imms_rbpdisp(1,R_T); CMC()
   291 #define SETE_t()         SETCCB_cc_rbpdisp(X86_COND_E,R_T)
   292 #define SETA_t()         SETCCB_cc_rbpdisp(X86_COND_A,R_T)
   293 #define SETAE_t()        SETCCB_cc_rbpdisp(X86_COND_AE,R_T)
   294 #define SETG_t()         SETCCB_cc_rbpdisp(X86_COND_G,R_T)
   295 #define SETGE_t()        SETCCB_cc_rbpdisp(X86_COND_GE,R_T)
   296 #define SETC_t()         SETCCB_cc_rbpdisp(X86_COND_C,R_T)
   297 #define SETO_t()         SETCCB_cc_rbpdisp(X86_COND_O,R_T)
   298 #define SETNE_t()        SETCCB_cc_rbpdisp(X86_COND_NE,R_T)
   299 #define SETC_r8(r1)      SETCCB_cc_r8(X86_COND_C, r1)
   300 #define JAE_label(label) JCC_cc_rel8(X86_COND_AE,-1); MARK_JMP8(label)
   301 #define JBE_label(label) JCC_cc_rel8(X86_COND_BE,-1); MARK_JMP8(label)
   302 #define JE_label(label)  JCC_cc_rel8(X86_COND_E,-1); MARK_JMP8(label)
   303 #define JGE_label(label) JCC_cc_rel8(X86_COND_GE,-1); MARK_JMP8(label)
   304 #define JNA_label(label) JCC_cc_rel8(X86_COND_NA,-1); MARK_JMP8(label)
   305 #define JNE_label(label) JCC_cc_rel8(X86_COND_NE,-1); MARK_JMP8(label)
   306 #define JNO_label(label) JCC_cc_rel8(X86_COND_NO,-1); MARK_JMP8(label)
   307 #define JS_label(label)  JCC_cc_rel8(X86_COND_S,-1); MARK_JMP8(label)
   308 #define JMP_label(label) JMP_rel8(-1); MARK_JMP8(label)
   309 #define JNE_exc(exc)     JCC_cc_rel32(X86_COND_NE,0); sh4_x86_add_backpatch(xlat_output, pc, exc)
   311 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
   312 #define JT_label(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
   313 	CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
   314     JCC_cc_rel8(sh4_x86.tstate,-1); MARK_JMP8(label)
   316 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
   317 #define JF_label(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
   318 	CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
   319     JCC_cc_rel8(sh4_x86.tstate^1, -1); MARK_JMP8(label)
   322 #define load_reg(x86reg,sh4reg)     MOVL_rbpdisp_r32( REG_OFFSET(r[sh4reg]), x86reg )
   323 #define store_reg(x86reg,sh4reg)    MOVL_r32_rbpdisp( x86reg, REG_OFFSET(r[sh4reg]) )
   325 /**
   326  * Load an FR register (single-precision floating point) into an integer x86
   327  * register (eg for register-to-register moves)
   328  */
   329 #define load_fr(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[0][(frm)^1]), reg )
   330 #define load_xf(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[1][(frm)^1]), reg )
   332 /**
   333  * Load the low half of a DR register (DR or XD) into an integer x86 register 
   334  */
   335 #define load_dr0(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm|0x01]), reg )
   336 #define load_dr1(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm&0x0E]), reg )
   338 /**
   339  * Store an FR register (single-precision floating point) from an integer x86+
   340  * register (eg for register-to-register moves)
   341  */
   342 #define store_fr(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[0][(frm)^1]) )
   343 #define store_xf(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[1][(frm)^1]) )
   345 #define store_dr0(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   346 #define store_dr1(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   349 #define push_fpul()  FLDF_rbpdisp(R_FPUL)
   350 #define pop_fpul()   FSTPF_rbpdisp(R_FPUL)
   351 #define push_fr(frm) FLDF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
   352 #define pop_fr(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
   353 #define push_xf(frm) FLDF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
   354 #define pop_xf(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
   355 #define push_dr(frm) FLDD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
   356 #define pop_dr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
   357 #define push_xdr(frm) FLDD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
   358 #define pop_xdr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
   360 #ifdef ENABLE_SH4STATS
   361 #define COUNT_INST(id) MOVL_imm32_r32( id, REG_EAX ); CALL1_ptr_r32(sh4_stats_add, REG_EAX); sh4_x86.tstate = TSTATE_NONE
   362 #else
   363 #define COUNT_INST(id)
   364 #endif
   367 /* Exception checks - Note that all exception checks will clobber EAX */
   369 #define check_priv( ) \
   370     if( (sh4_x86.sh4_mode & SR_MD) == 0 ) { \
   371         if( sh4_x86.in_delay_slot ) { \
   372             exit_block_exc(EXC_SLOT_ILLEGAL, (pc-2), 4 ); \
   373         } else { \
   374             exit_block_exc(EXC_ILLEGAL, pc, 2); \
   375         } \
   376         sh4_x86.branch_taken = TRUE; \
   377         sh4_x86.in_delay_slot = DELAY_NONE; \
   378         return 2; \
   379     }
   381 #define check_fpuen( ) \
   382     if( !sh4_x86.fpuen_checked ) {\
   383 	sh4_x86.fpuen_checked = TRUE;\
   384 	MOVL_rbpdisp_r32( R_SR, REG_EAX );\
   385 	ANDL_imms_r32( SR_FD, REG_EAX );\
   386 	if( sh4_x86.in_delay_slot ) {\
   387 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   388 	} else {\
   389 	    JNE_exc(EXC_FPU_DISABLED);\
   390 	}\
   391 	sh4_x86.tstate = TSTATE_NONE; \
   392     }
   394 #define check_ralign16( x86reg ) \
   395     TESTL_imms_r32( 0x00000001, x86reg ); \
   396     JNE_exc(EXC_DATA_ADDR_READ)
   398 #define check_walign16( x86reg ) \
   399     TESTL_imms_r32( 0x00000001, x86reg ); \
   400     JNE_exc(EXC_DATA_ADDR_WRITE);
   402 #define check_ralign32( x86reg ) \
   403     TESTL_imms_r32( 0x00000003, x86reg ); \
   404     JNE_exc(EXC_DATA_ADDR_READ)
   406 #define check_walign32( x86reg ) \
   407     TESTL_imms_r32( 0x00000003, x86reg ); \
   408     JNE_exc(EXC_DATA_ADDR_WRITE);
   410 #define check_ralign64( x86reg ) \
   411     TESTL_imms_r32( 0x00000007, x86reg ); \
   412     JNE_exc(EXC_DATA_ADDR_READ)
   414 #define check_walign64( x86reg ) \
   415     TESTL_imms_r32( 0x00000007, x86reg ); \
   416     JNE_exc(EXC_DATA_ADDR_WRITE);
   418 #define address_space() ((sh4_x86.sh4_mode&SR_MD) ? (uintptr_t)sh4_x86.priv_address_space : (uintptr_t)sh4_x86.user_address_space)
   420 #define UNDEF(ir)
   421 /* Note: For SR.MD == 1 && MMUCR.AT == 0, there are no memory exceptions, so 
   422  * don't waste the cycles expecting them. Otherwise we need to save the exception pointer.
   423  */
   424 #ifdef HAVE_FRAME_ADDRESS
   425 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
   426 {
   427     decode_address(address_space(), addr_reg);
   428     if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) { 
   429         CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
   430     } else {
   431         if( addr_reg != REG_ARG1 ) {
   432             MOVL_r32_r32( addr_reg, REG_ARG1 );
   433         }
   434         MOVP_immptr_rptr( 0, REG_ARG2 );
   435         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   436         CALL2_r32disp_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2);
   437     }
   438     if( value_reg != REG_RESULT1 ) { 
   439         MOVL_r32_r32( REG_RESULT1, value_reg );
   440     }
   441 }
   443 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
   444 {
   445     decode_address(address_space(), addr_reg);
   446     if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) { 
   447         CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
   448     } else {
   449         if( value_reg != REG_ARG2 ) {
   450             MOVL_r32_r32( value_reg, REG_ARG2 );
   451 	}        
   452         if( addr_reg != REG_ARG1 ) {
   453             MOVL_r32_r32( addr_reg, REG_ARG1 );
   454         }
   455 #if MAX_REG_ARG > 2        
   456         MOVP_immptr_rptr( 0, REG_ARG3 );
   457         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   458         CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, REG_ARG3);
   459 #else
   460         MOVL_imm32_rspdisp( 0, 0 );
   461         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   462         CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, 0);
   463 #endif
   464     }
   465 }
   466 #else
   467 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
   468 {
   469     decode_address(address_space(), addr_reg);
   470     CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
   471     if( value_reg != REG_RESULT1 ) {
   472         MOVL_r32_r32( REG_RESULT1, value_reg );
   473     }
   474 }     
   476 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
   477 {
   478     decode_address(address_space(), addr_reg);
   479     CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
   480 }
   481 #endif
   483 #define MEM_REGION_PTR(name) offsetof( struct mem_region_fn, name )
   484 #define MEM_READ_BYTE( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_byte), pc)
   485 #define MEM_READ_BYTE_FOR_WRITE( addr_reg, value_reg ) call_read_func( addr_reg, value_reg, MEM_REGION_PTR(read_byte_for_write), pc) 
   486 #define MEM_READ_WORD( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_word), pc)
   487 #define MEM_READ_LONG( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_long), pc)
   488 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_byte), pc)
   489 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_word), pc)
   490 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_long), pc)
   491 #define MEM_PREFETCH( addr_reg ) call_read_func(addr_reg, REG_RESULT1, MEM_REGION_PTR(prefetch), pc)
   493 #define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2, 4); sh4_x86.in_delay_slot = DELAY_NONE; return 2;
   495 /** Offset of xlat_sh4_mode field relative to the code pointer */ 
   496 #define XLAT_SH4_MODE_CODE_OFFSET  (int32_t)(offsetof(struct xlat_cache_block, xlat_sh4_mode) - offsetof(struct xlat_cache_block,code) )
   497 #define XLAT_CHAIN_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, chain) - offsetof(struct xlat_cache_block,code) )
   498 #define XLAT_ACTIVE_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, active) - offsetof(struct xlat_cache_block,code) )
   500 void sh4_translate_begin_block( sh4addr_t pc ) 
   501 {
   502 	sh4_x86.code = xlat_output;
   503     sh4_x86.in_delay_slot = FALSE;
   504     sh4_x86.fpuen_checked = FALSE;
   505     sh4_x86.branch_taken = FALSE;
   506     sh4_x86.backpatch_posn = 0;
   507     sh4_x86.block_start_pc = pc;
   508     sh4_x86.tlb_on = IS_TLB_ENABLED();
   509     sh4_x86.tstate = TSTATE_NONE;
   510     sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
   511     sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
   512     sh4_x86.sh4_mode = sh4r.xlat_sh4_mode;
   513     emit_prologue();
   514     if( sh4_x86.begin_callback ) {
   515         CALL_ptr( sh4_x86.begin_callback );
   516     }
   517     if( sh4_x86.profile_blocks ) {
   518     	MOVP_immptr_rptr( sh4_x86.code + XLAT_ACTIVE_CODE_OFFSET, REG_EAX );
   519     	ADDL_imms_r32disp( 1, REG_EAX, 0 );
   520     }  
   521 }
   524 uint32_t sh4_translate_end_block_size()
   525 {
   526 	uint32_t epilogue_size = EPILOGUE_SIZE;
   527 	if( sh4_x86.end_callback ) {
   528 	    epilogue_size += (CALL1_PTR_MIN_SIZE - 1);
   529 	}
   530     if( sh4_x86.backpatch_posn <= 3 ) {
   531         epilogue_size += (sh4_x86.backpatch_posn*(12+CALL1_PTR_MIN_SIZE));
   532     } else {
   533         epilogue_size += (3*(12+CALL1_PTR_MIN_SIZE)) + (sh4_x86.backpatch_posn-3)*(15+CALL1_PTR_MIN_SIZE);
   534     }
   535     return epilogue_size;
   536 }
   539 /**
   540  * Embed a breakpoint into the generated code
   541  */
   542 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   543 {
   544     MOVL_imm32_r32( pc, REG_EAX );
   545     CALL1_ptr_r32( sh4_translate_breakpoint_hit, REG_EAX );
   546     sh4_x86.tstate = TSTATE_NONE;
   547 }
   550 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   552 /**
   553  * Test if the loaded target code pointer in %eax is valid, and if so jump
   554  * directly into it, bypassing the normal exit.
   555  */
   556 static void jump_next_block()
   557 {
   558 	uint8_t *ptr = xlat_output;
   559 	TESTP_rptr_rptr(REG_EAX, REG_EAX);
   560 	JE_label(nocode);
   561 	if( sh4_x86.sh4_mode == SH4_MODE_UNKNOWN ) {
   562 	    /* sr/fpscr was changed, possibly updated xlat_sh4_mode, so reload it */
   563 	    MOVL_rbpdisp_r32( REG_OFFSET(xlat_sh4_mode), REG_ECX );
   564 	    CMPL_r32_r32disp( REG_ECX, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
   565 	} else {
   566 	    CMPL_imms_r32disp( sh4_x86.sh4_mode, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
   567 	}
   568 	JNE_label(wrongmode);
   569 	LEAP_rptrdisp_rptr(REG_EAX, PROLOGUE_SIZE,REG_EAX);
   570 	if( sh4_x86.end_callback ) {
   571 	    /* Note this does leave the stack out of alignment, but doesn't matter
   572 	     * for what we're currently using it for.
   573 	     */
   574 	    PUSH_r32(REG_EAX);
   575 	    MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
   576 	    JMP_rptr(REG_ECX);
   577 	} else {
   578 	    JMP_rptr(REG_EAX);
   579 	}
   580 	JMP_TARGET(wrongmode);
   581 	MOVP_rptrdisp_rptr( REG_EAX, XLAT_CHAIN_CODE_OFFSET, REG_EAX );
   582 	int rel = ptr - xlat_output;
   583     JMP_prerel(rel);
   584 	JMP_TARGET(nocode); 
   585 }
   587 /**
   588  * 
   589  */
   590 static void FASTCALL sh4_translate_get_code_and_backpatch( uint32_t pc )
   591 {
   592     uint8_t *target = (uint8_t *)xlat_get_code_by_vma(pc);
   593     while( target != NULL && sh4r.xlat_sh4_mode != XLAT_BLOCK_MODE(target) ) {
   594         target = XLAT_BLOCK_CHAIN(target);
   595 	}
   596     if( target == NULL ) {
   597         target = sh4_translate_basic_block( pc );
   598     }
   599     uint8_t *backpatch = ((uint8_t *)__builtin_return_address(0)) - (CALL1_PTR_MIN_SIZE);
   600     *backpatch = 0xE9;
   601     *(uint32_t *)(backpatch+1) = (uint32_t)(target-backpatch)+PROLOGUE_SIZE-5;
   602     *(void **)(backpatch+5) = XLAT_BLOCK_FOR_CODE(target)->use_list;
   603     XLAT_BLOCK_FOR_CODE(target)->use_list = backpatch; 
   605     uint8_t **retptr = ((uint8_t **)__builtin_frame_address(0))+1;
   606     assert( *retptr == ((uint8_t *)__builtin_return_address(0)) );
   607 	*retptr = backpatch;
   608 }
   610 static void emit_translate_and_backpatch()
   611 {
   612     /* NB: this is either 7 bytes (i386) or 12 bytes (x86-64) */
   613     CALL1_ptr_r32(sh4_translate_get_code_and_backpatch, REG_ARG1);
   615     /* When patched, the jmp instruction will be 5 bytes (either platform) -
   616      * we need to reserve sizeof(void*) bytes for the use-list
   617 	 * pointer
   618 	 */ 
   619     if( sizeof(void*) == 8 ) {
   620         NOP();
   621     } else {
   622         NOP2();
   623     }
   624 }
   626 /**
   627  * If we're jumping to a fixed address (or at least fixed relative to the
   628  * current PC, then we can do a direct branch. REG_ARG1 should contain
   629  * the PC at this point.
   630  */
   631 static void jump_next_block_fixed_pc( sh4addr_t pc )
   632 {
   633 	if( IS_IN_ICACHE(pc) ) {
   634 	    if( sh4_x86.sh4_mode != SH4_MODE_UNKNOWN && sh4_x86.end_callback == NULL ) {
   635 	        /* Fixed address, in cache, and fixed SH4 mode - generate a call to the
   636 	         * fetch-and-backpatch routine, which will replace the call with a branch */
   637            emit_translate_and_backpatch();	         
   638            return;
   639 		} else {
   640             MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
   641             ANDP_imms_rptr( -4, REG_EAX );
   642         }
   643 	} else if( sh4_x86.tlb_on ) {
   644         CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);
   645     } else {
   646         CALL1_ptr_r32(xlat_get_code, REG_ARG1);
   647     }
   648     jump_next_block();
   651 }
   653 void sh4_translate_unlink_block( void *use_list )
   654 {
   655 	uint8_t *tmp = xlat_output; /* In case something is active, which should never happen */
   656 	void *next = use_list;
   657 	while( next != NULL ) {
   658     	xlat_output = (uint8_t *)next;
   659  	    next = *(void **)(xlat_output+5);
   660  		emit_translate_and_backpatch();
   661  	}
   662  	xlat_output = tmp;
   663 }
   667 static void exit_block()
   668 {
   669 	emit_epilogue();
   670 	if( sh4_x86.end_callback ) {
   671 	    MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
   672 	    JMP_rptr(REG_ECX);
   673 	} else {
   674 	    RET();
   675 	}
   676 }
   678 /**
   679  * Exit the block with sh4r.pc already written
   680  */
   681 void exit_block_pcset( sh4addr_t pc )
   682 {
   683     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   684     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   685     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   686     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   687     JBE_label(exitloop);
   688     MOVL_rbpdisp_r32( R_PC, REG_ARG1 );
   689     if( sh4_x86.tlb_on ) {
   690         CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
   691     } else {
   692         CALL1_ptr_r32(xlat_get_code,REG_ARG1);
   693     }
   695     jump_next_block();
   696     JMP_TARGET(exitloop);
   697     exit_block();
   698 }
   700 /**
   701  * Exit the block with sh4r.new_pc written with the target pc
   702  */
   703 void exit_block_newpcset( sh4addr_t pc )
   704 {
   705     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   706     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   707     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   708     MOVL_rbpdisp_r32( R_NEW_PC, REG_ARG1 );
   709     MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   710     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   711     JBE_label(exitloop);
   712     if( sh4_x86.tlb_on ) {
   713         CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
   714     } else {
   715         CALL1_ptr_r32(xlat_get_code,REG_ARG1);
   716     }
   718 	jump_next_block();
   719     JMP_TARGET(exitloop);
   720     exit_block();
   721 }
   724 /**
   725  * Exit the block to an absolute PC
   726  */
   727 void exit_block_abs( sh4addr_t pc, sh4addr_t endpc )
   728 {
   729     MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   730     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   731     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   733     MOVL_imm32_r32( pc, REG_ARG1 );
   734     MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   735     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   736     JBE_label(exitloop);
   737     jump_next_block_fixed_pc(pc);    
   738     JMP_TARGET(exitloop);
   739     exit_block();
   740 }
   742 /**
   743  * Exit the block to a relative PC
   744  */
   745 void exit_block_rel( sh4addr_t pc, sh4addr_t endpc )
   746 {
   747     MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   748     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   749     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   751 	if( pc == sh4_x86.block_start_pc && sh4_x86.sh4_mode == sh4r.xlat_sh4_mode ) {
   752 	    /* Special case for tight loops - the PC doesn't change, and
   753 	     * we already know the target address. Just check events pending before
   754 	     * looping.
   755 	     */
   756         CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   757         uint32_t backdisp = ((uintptr_t)(sh4_x86.code - xlat_output)) + PROLOGUE_SIZE;
   758         JCC_cc_prerel(X86_COND_A, backdisp);
   759 	} else {
   760         MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ARG1 );
   761         ADDL_rbpdisp_r32( R_PC, REG_ARG1 );
   762         MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   763         CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   764         JBE_label(exitloop2);
   766         jump_next_block_fixed_pc(pc);
   767         JMP_TARGET(exitloop2);
   768     }
   769     exit_block();
   770 }
   772 /**
   773  * Exit unconditionally with a general exception
   774  */
   775 void exit_block_exc( int code, sh4addr_t pc, int inst_adjust )
   776 {
   777     MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ECX );
   778     ADDL_r32_rbpdisp( REG_ECX, R_PC );
   779     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc + inst_adjust)>>1)*sh4_cpu_period, REG_ECX );
   780     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   781     MOVL_imm32_r32( code, REG_ARG1 );
   782     CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
   783     exit_block();
   784 }    
   786 /**
   787  * Embed a call to sh4_execute_instruction for situations that we
   788  * can't translate (just page-crossing delay slots at the moment).
   789  * Caller is responsible for setting new_pc before calling this function.
   790  *
   791  * Performs:
   792  *   Set PC = endpc
   793  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   794  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   795  *   Call sh4_execute_instruction
   796  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   797  */
   798 void exit_block_emu( sh4vma_t endpc )
   799 {
   800     MOVL_imm32_r32( endpc - sh4_x86.block_start_pc, REG_ECX );   // 5
   801     ADDL_r32_rbpdisp( REG_ECX, R_PC );
   803     MOVL_imm32_r32( (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period, REG_ECX ); // 5
   804     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );     // 6
   805     MOVL_imm32_r32( sh4_x86.in_delay_slot ? 1 : 0, REG_ECX );
   806     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(in_delay_slot) );
   808     CALL_ptr( sh4_execute_instruction );
   809     exit_block();
   810 } 
   812 /**
   813  * Write the block trailer (exception handling block)
   814  */
   815 void sh4_translate_end_block( sh4addr_t pc ) {
   816     if( sh4_x86.branch_taken == FALSE ) {
   817         // Didn't exit unconditionally already, so write the termination here
   818         exit_block_rel( pc, pc );
   819     }
   820     if( sh4_x86.backpatch_posn != 0 ) {
   821         unsigned int i;
   822         // Exception raised - cleanup and exit
   823         uint8_t *end_ptr = xlat_output;
   824         MOVL_r32_r32( REG_EDX, REG_ECX );
   825         ADDL_r32_r32( REG_EDX, REG_ECX );
   826         ADDL_r32_rbpdisp( REG_ECX, R_SPC );
   827         MOVL_moffptr_eax( &sh4_cpu_period );
   828         INC_r32( REG_EDX );  /* Add 1 for the aborting instruction itself */ 
   829         MULL_r32( REG_EDX );
   830         ADDL_r32_rbpdisp( REG_EAX, REG_OFFSET(slice_cycle) );
   831         exit_block();
   833         for( i=0; i< sh4_x86.backpatch_posn; i++ ) {
   834             uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];
   835             if( sh4_x86.backpatch_list[i].exc_code < 0 ) {
   836                 if( sh4_x86.backpatch_list[i].exc_code == -2 ) {
   837                     *((uintptr_t *)fixup_addr) = (uintptr_t)xlat_output; 
   838                 } else {
   839                     *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
   840                 }
   841                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
   842                 int rel = end_ptr - xlat_output;
   843                 JMP_prerel(rel);
   844             } else {
   845                 *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
   846                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].exc_code, REG_ARG1 );
   847                 CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
   848                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
   849                 int rel = end_ptr - xlat_output;
   850                 JMP_prerel(rel);
   851             }
   852         }
   853     }
   854 }
   856 /**
   857  * Translate a single instruction. Delayed branches are handled specially
   858  * by translating both branch and delayed instruction as a single unit (as
   859  * 
   860  * The instruction MUST be in the icache (assert check)
   861  *
   862  * @return true if the instruction marks the end of a basic block
   863  * (eg a branch or 
   864  */
   865 uint32_t sh4_translate_instruction( sh4vma_t pc )
   866 {
   867     uint32_t ir;
   868     /* Read instruction from icache */
   869     assert( IS_IN_ICACHE(pc) );
   870     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   872     if( !sh4_x86.in_delay_slot ) {
   873 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   874     }
   876     /* check for breakpoints at this pc */
   877     for( int i=0; i<sh4_breakpoint_count; i++ ) {
   878         if( sh4_breakpoints[i].address == pc ) {
   879             sh4_translate_emit_breakpoint(pc);
   880             break;
   881         }
   882     }
   883 %%
   884 /* ALU operations */
   885 ADD Rm, Rn {:
   886     COUNT_INST(I_ADD);
   887     load_reg( REG_EAX, Rm );
   888     load_reg( REG_ECX, Rn );
   889     ADDL_r32_r32( REG_EAX, REG_ECX );
   890     store_reg( REG_ECX, Rn );
   891     sh4_x86.tstate = TSTATE_NONE;
   892 :}
   893 ADD #imm, Rn {:  
   894     COUNT_INST(I_ADDI);
   895     ADDL_imms_rbpdisp( imm, REG_OFFSET(r[Rn]) );
   896     sh4_x86.tstate = TSTATE_NONE;
   897 :}
   898 ADDC Rm, Rn {:
   899     COUNT_INST(I_ADDC);
   900     if( sh4_x86.tstate != TSTATE_C ) {
   901         LDC_t();
   902     }
   903     load_reg( REG_EAX, Rm );
   904     load_reg( REG_ECX, Rn );
   905     ADCL_r32_r32( REG_EAX, REG_ECX );
   906     store_reg( REG_ECX, Rn );
   907     SETC_t();
   908     sh4_x86.tstate = TSTATE_C;
   909 :}
   910 ADDV Rm, Rn {:
   911     COUNT_INST(I_ADDV);
   912     load_reg( REG_EAX, Rm );
   913     load_reg( REG_ECX, Rn );
   914     ADDL_r32_r32( REG_EAX, REG_ECX );
   915     store_reg( REG_ECX, Rn );
   916     SETO_t();
   917     sh4_x86.tstate = TSTATE_O;
   918 :}
   919 AND Rm, Rn {:
   920     COUNT_INST(I_AND);
   921     load_reg( REG_EAX, Rm );
   922     load_reg( REG_ECX, Rn );
   923     ANDL_r32_r32( REG_EAX, REG_ECX );
   924     store_reg( REG_ECX, Rn );
   925     sh4_x86.tstate = TSTATE_NONE;
   926 :}
   927 AND #imm, R0 {:  
   928     COUNT_INST(I_ANDI);
   929     load_reg( REG_EAX, 0 );
   930     ANDL_imms_r32(imm, REG_EAX); 
   931     store_reg( REG_EAX, 0 );
   932     sh4_x86.tstate = TSTATE_NONE;
   933 :}
   934 AND.B #imm, @(R0, GBR) {: 
   935     COUNT_INST(I_ANDB);
   936     load_reg( REG_EAX, 0 );
   937     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
   938     MOVL_r32_rspdisp(REG_EAX, 0);
   939     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
   940     MOVL_rspdisp_r32(0, REG_EAX);
   941     ANDL_imms_r32(imm, REG_EDX );
   942     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
   943     sh4_x86.tstate = TSTATE_NONE;
   944 :}
   945 CMP/EQ Rm, Rn {:  
   946     COUNT_INST(I_CMPEQ);
   947     load_reg( REG_EAX, Rm );
   948     load_reg( REG_ECX, Rn );
   949     CMPL_r32_r32( REG_EAX, REG_ECX );
   950     SETE_t();
   951     sh4_x86.tstate = TSTATE_E;
   952 :}
   953 CMP/EQ #imm, R0 {:  
   954     COUNT_INST(I_CMPEQI);
   955     load_reg( REG_EAX, 0 );
   956     CMPL_imms_r32(imm, REG_EAX);
   957     SETE_t();
   958     sh4_x86.tstate = TSTATE_E;
   959 :}
   960 CMP/GE Rm, Rn {:  
   961     COUNT_INST(I_CMPGE);
   962     load_reg( REG_EAX, Rm );
   963     load_reg( REG_ECX, Rn );
   964     CMPL_r32_r32( REG_EAX, REG_ECX );
   965     SETGE_t();
   966     sh4_x86.tstate = TSTATE_GE;
   967 :}
   968 CMP/GT Rm, Rn {: 
   969     COUNT_INST(I_CMPGT);
   970     load_reg( REG_EAX, Rm );
   971     load_reg( REG_ECX, Rn );
   972     CMPL_r32_r32( REG_EAX, REG_ECX );
   973     SETG_t();
   974     sh4_x86.tstate = TSTATE_G;
   975 :}
   976 CMP/HI Rm, Rn {:  
   977     COUNT_INST(I_CMPHI);
   978     load_reg( REG_EAX, Rm );
   979     load_reg( REG_ECX, Rn );
   980     CMPL_r32_r32( REG_EAX, REG_ECX );
   981     SETA_t();
   982     sh4_x86.tstate = TSTATE_A;
   983 :}
   984 CMP/HS Rm, Rn {: 
   985     COUNT_INST(I_CMPHS);
   986     load_reg( REG_EAX, Rm );
   987     load_reg( REG_ECX, Rn );
   988     CMPL_r32_r32( REG_EAX, REG_ECX );
   989     SETAE_t();
   990     sh4_x86.tstate = TSTATE_AE;
   991  :}
   992 CMP/PL Rn {: 
   993     COUNT_INST(I_CMPPL);
   994     load_reg( REG_EAX, Rn );
   995     CMPL_imms_r32( 0, REG_EAX );
   996     SETG_t();
   997     sh4_x86.tstate = TSTATE_G;
   998 :}
   999 CMP/PZ Rn {:  
  1000     COUNT_INST(I_CMPPZ);
  1001     load_reg( REG_EAX, Rn );
  1002     CMPL_imms_r32( 0, REG_EAX );
  1003     SETGE_t();
  1004     sh4_x86.tstate = TSTATE_GE;
  1005 :}
  1006 CMP/STR Rm, Rn {:  
  1007     COUNT_INST(I_CMPSTR);
  1008     load_reg( REG_EAX, Rm );
  1009     load_reg( REG_ECX, Rn );
  1010     XORL_r32_r32( REG_ECX, REG_EAX );
  1011     TESTB_r8_r8( REG_AL, REG_AL );
  1012     JE_label(target1);
  1013     TESTB_r8_r8( REG_AH, REG_AH );
  1014     JE_label(target2);
  1015     SHRL_imm_r32( 16, REG_EAX );
  1016     TESTB_r8_r8( REG_AL, REG_AL );
  1017     JE_label(target3);
  1018     TESTB_r8_r8( REG_AH, REG_AH );
  1019     JMP_TARGET(target1);
  1020     JMP_TARGET(target2);
  1021     JMP_TARGET(target3);
  1022     SETE_t();
  1023     sh4_x86.tstate = TSTATE_E;
  1024 :}
  1025 DIV0S Rm, Rn {:
  1026     COUNT_INST(I_DIV0S);
  1027     load_reg( REG_EAX, Rm );
  1028     load_reg( REG_ECX, Rn );
  1029     SHRL_imm_r32( 31, REG_EAX );
  1030     SHRL_imm_r32( 31, REG_ECX );
  1031     MOVL_r32_rbpdisp( REG_EAX, R_M );
  1032     MOVL_r32_rbpdisp( REG_ECX, R_Q );
  1033     CMPL_r32_r32( REG_EAX, REG_ECX );
  1034     SETNE_t();
  1035     sh4_x86.tstate = TSTATE_NE;
  1036 :}
  1037 DIV0U {:  
  1038     COUNT_INST(I_DIV0U);
  1039     XORL_r32_r32( REG_EAX, REG_EAX );
  1040     MOVL_r32_rbpdisp( REG_EAX, R_Q );
  1041     MOVL_r32_rbpdisp( REG_EAX, R_M );
  1042     MOVL_r32_rbpdisp( REG_EAX, R_T );
  1043     sh4_x86.tstate = TSTATE_C; // works for DIV1
  1044 :}
  1045 DIV1 Rm, Rn {:
  1046     COUNT_INST(I_DIV1);
  1047     MOVL_rbpdisp_r32( R_M, REG_ECX );
  1048     load_reg( REG_EAX, Rn );
  1049     if( sh4_x86.tstate != TSTATE_C ) {
  1050 	LDC_t();
  1052     RCLL_imm_r32( 1, REG_EAX );
  1053     SETC_r8( REG_DL ); // Q'
  1054     CMPL_rbpdisp_r32( R_Q, REG_ECX );
  1055     JE_label(mqequal);
  1056     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1057     JMP_label(end);
  1058     JMP_TARGET(mqequal);
  1059     SUBL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1060     JMP_TARGET(end);
  1061     store_reg( REG_EAX, Rn ); // Done with Rn now
  1062     SETC_r8(REG_AL); // tmp1
  1063     XORB_r8_r8( REG_DL, REG_AL ); // Q' = Q ^ tmp1
  1064     XORB_r8_r8( REG_AL, REG_CL ); // Q'' = Q' ^ M
  1065     MOVL_r32_rbpdisp( REG_ECX, R_Q );
  1066     XORL_imms_r32( 1, REG_AL );   // T = !Q'
  1067     MOVZXL_r8_r32( REG_AL, REG_EAX );
  1068     MOVL_r32_rbpdisp( REG_EAX, R_T );
  1069     sh4_x86.tstate = TSTATE_NONE;
  1070 :}
  1071 DMULS.L Rm, Rn {:  
  1072     COUNT_INST(I_DMULS);
  1073     load_reg( REG_EAX, Rm );
  1074     load_reg( REG_ECX, Rn );
  1075     IMULL_r32(REG_ECX);
  1076     MOVL_r32_rbpdisp( REG_EDX, R_MACH );
  1077     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1078     sh4_x86.tstate = TSTATE_NONE;
  1079 :}
  1080 DMULU.L Rm, Rn {:  
  1081     COUNT_INST(I_DMULU);
  1082     load_reg( REG_EAX, Rm );
  1083     load_reg( REG_ECX, Rn );
  1084     MULL_r32(REG_ECX);
  1085     MOVL_r32_rbpdisp( REG_EDX, R_MACH );
  1086     MOVL_r32_rbpdisp( REG_EAX, R_MACL );    
  1087     sh4_x86.tstate = TSTATE_NONE;
  1088 :}
  1089 DT Rn {:  
  1090     COUNT_INST(I_DT);
  1091     load_reg( REG_EAX, Rn );
  1092     ADDL_imms_r32( -1, REG_EAX );
  1093     store_reg( REG_EAX, Rn );
  1094     SETE_t();
  1095     sh4_x86.tstate = TSTATE_E;
  1096 :}
  1097 EXTS.B Rm, Rn {:  
  1098     COUNT_INST(I_EXTSB);
  1099     load_reg( REG_EAX, Rm );
  1100     MOVSXL_r8_r32( REG_EAX, REG_EAX );
  1101     store_reg( REG_EAX, Rn );
  1102 :}
  1103 EXTS.W Rm, Rn {:  
  1104     COUNT_INST(I_EXTSW);
  1105     load_reg( REG_EAX, Rm );
  1106     MOVSXL_r16_r32( REG_EAX, REG_EAX );
  1107     store_reg( REG_EAX, Rn );
  1108 :}
  1109 EXTU.B Rm, Rn {:  
  1110     COUNT_INST(I_EXTUB);
  1111     load_reg( REG_EAX, Rm );
  1112     MOVZXL_r8_r32( REG_EAX, REG_EAX );
  1113     store_reg( REG_EAX, Rn );
  1114 :}
  1115 EXTU.W Rm, Rn {:  
  1116     COUNT_INST(I_EXTUW);
  1117     load_reg( REG_EAX, Rm );
  1118     MOVZXL_r16_r32( REG_EAX, REG_EAX );
  1119     store_reg( REG_EAX, Rn );
  1120 :}
  1121 MAC.L @Rm+, @Rn+ {:
  1122     COUNT_INST(I_MACL);
  1123     if( Rm == Rn ) {
  1124 	load_reg( REG_EAX, Rm );
  1125 	check_ralign32( REG_EAX );
  1126 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1127 	MOVL_r32_rspdisp(REG_EAX, 0);
  1128 	load_reg( REG_EAX, Rm );
  1129 	LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  1130 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1131         ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rn]) );
  1132     } else {
  1133 	load_reg( REG_EAX, Rm );
  1134 	check_ralign32( REG_EAX );
  1135 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1136 	MOVL_r32_rspdisp( REG_EAX, 0 );
  1137 	load_reg( REG_EAX, Rn );
  1138 	check_ralign32( REG_EAX );
  1139 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1140 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
  1141 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  1144     IMULL_rspdisp( 0 );
  1145     ADDL_r32_rbpdisp( REG_EAX, R_MACL );
  1146     ADCL_r32_rbpdisp( REG_EDX, R_MACH );
  1148     MOVL_rbpdisp_r32( R_S, REG_ECX );
  1149     TESTL_r32_r32(REG_ECX, REG_ECX);
  1150     JE_label( nosat );
  1151     CALL_ptr( signsat48 );
  1152     JMP_TARGET( nosat );
  1153     sh4_x86.tstate = TSTATE_NONE;
  1154 :}
  1155 MAC.W @Rm+, @Rn+ {:  
  1156     COUNT_INST(I_MACW);
  1157     if( Rm == Rn ) {
  1158 	load_reg( REG_EAX, Rm );
  1159 	check_ralign16( REG_EAX );
  1160 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1161         MOVL_r32_rspdisp( REG_EAX, 0 );
  1162 	load_reg( REG_EAX, Rm );
  1163 	LEAL_r32disp_r32( REG_EAX, 2, REG_EAX );
  1164 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1165 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
  1166 	// Note translate twice in case of page boundaries. Maybe worth
  1167 	// adding a page-boundary check to skip the second translation
  1168     } else {
  1169 	load_reg( REG_EAX, Rn );
  1170 	check_ralign16( REG_EAX );
  1171 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1172         MOVL_r32_rspdisp( REG_EAX, 0 );
  1173 	load_reg( REG_EAX, Rm );
  1174 	check_ralign16( REG_EAX );
  1175 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1176 	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rn]) );
  1177 	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
  1179     IMULL_rspdisp( 0 );
  1180     MOVL_rbpdisp_r32( R_S, REG_ECX );
  1181     TESTL_r32_r32( REG_ECX, REG_ECX );
  1182     JE_label( nosat );
  1184     ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
  1185     JNO_label( end );            // 2
  1186     MOVL_imm32_r32( 1, REG_EDX );         // 5
  1187     MOVL_r32_rbpdisp( REG_EDX, R_MACH );   // 6
  1188     JS_label( positive );        // 2
  1189     MOVL_imm32_r32( 0x80000000, REG_EAX );// 5
  1190     MOVL_r32_rbpdisp( REG_EAX, R_MACL );   // 6
  1191     JMP_label(end2);           // 2
  1193     JMP_TARGET(positive);
  1194     MOVL_imm32_r32( 0x7FFFFFFF, REG_EAX );// 5
  1195     MOVL_r32_rbpdisp( REG_EAX, R_MACL );   // 6
  1196     JMP_label(end3);            // 2
  1198     JMP_TARGET(nosat);
  1199     ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
  1200     ADCL_r32_rbpdisp( REG_EDX, R_MACH );  // 6
  1201     JMP_TARGET(end);
  1202     JMP_TARGET(end2);
  1203     JMP_TARGET(end3);
  1204     sh4_x86.tstate = TSTATE_NONE;
  1205 :}
  1206 MOVT Rn {:  
  1207     COUNT_INST(I_MOVT);
  1208     MOVL_rbpdisp_r32( R_T, REG_EAX );
  1209     store_reg( REG_EAX, Rn );
  1210 :}
  1211 MUL.L Rm, Rn {:  
  1212     COUNT_INST(I_MULL);
  1213     load_reg( REG_EAX, Rm );
  1214     load_reg( REG_ECX, Rn );
  1215     MULL_r32( REG_ECX );
  1216     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1217     sh4_x86.tstate = TSTATE_NONE;
  1218 :}
  1219 MULS.W Rm, Rn {:
  1220     COUNT_INST(I_MULSW);
  1221     MOVSXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
  1222     MOVSXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
  1223     MULL_r32( REG_ECX );
  1224     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1225     sh4_x86.tstate = TSTATE_NONE;
  1226 :}
  1227 MULU.W Rm, Rn {:  
  1228     COUNT_INST(I_MULUW);
  1229     MOVZXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
  1230     MOVZXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
  1231     MULL_r32( REG_ECX );
  1232     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1233     sh4_x86.tstate = TSTATE_NONE;
  1234 :}
  1235 NEG Rm, Rn {:
  1236     COUNT_INST(I_NEG);
  1237     load_reg( REG_EAX, Rm );
  1238     NEGL_r32( REG_EAX );
  1239     store_reg( REG_EAX, Rn );
  1240     sh4_x86.tstate = TSTATE_NONE;
  1241 :}
  1242 NEGC Rm, Rn {:  
  1243     COUNT_INST(I_NEGC);
  1244     load_reg( REG_EAX, Rm );
  1245     XORL_r32_r32( REG_ECX, REG_ECX );
  1246     LDC_t();
  1247     SBBL_r32_r32( REG_EAX, REG_ECX );
  1248     store_reg( REG_ECX, Rn );
  1249     SETC_t();
  1250     sh4_x86.tstate = TSTATE_C;
  1251 :}
  1252 NOT Rm, Rn {:  
  1253     COUNT_INST(I_NOT);
  1254     load_reg( REG_EAX, Rm );
  1255     NOTL_r32( REG_EAX );
  1256     store_reg( REG_EAX, Rn );
  1257     sh4_x86.tstate = TSTATE_NONE;
  1258 :}
  1259 OR Rm, Rn {:  
  1260     COUNT_INST(I_OR);
  1261     load_reg( REG_EAX, Rm );
  1262     load_reg( REG_ECX, Rn );
  1263     ORL_r32_r32( REG_EAX, REG_ECX );
  1264     store_reg( REG_ECX, Rn );
  1265     sh4_x86.tstate = TSTATE_NONE;
  1266 :}
  1267 OR #imm, R0 {:
  1268     COUNT_INST(I_ORI);
  1269     load_reg( REG_EAX, 0 );
  1270     ORL_imms_r32(imm, REG_EAX);
  1271     store_reg( REG_EAX, 0 );
  1272     sh4_x86.tstate = TSTATE_NONE;
  1273 :}
  1274 OR.B #imm, @(R0, GBR) {:  
  1275     COUNT_INST(I_ORB);
  1276     load_reg( REG_EAX, 0 );
  1277     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
  1278     MOVL_r32_rspdisp( REG_EAX, 0 );
  1279     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
  1280     MOVL_rspdisp_r32( 0, REG_EAX );
  1281     ORL_imms_r32(imm, REG_EDX );
  1282     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1283     sh4_x86.tstate = TSTATE_NONE;
  1284 :}
  1285 ROTCL Rn {:
  1286     COUNT_INST(I_ROTCL);
  1287     load_reg( REG_EAX, Rn );
  1288     if( sh4_x86.tstate != TSTATE_C ) {
  1289 	LDC_t();
  1291     RCLL_imm_r32( 1, REG_EAX );
  1292     store_reg( REG_EAX, Rn );
  1293     SETC_t();
  1294     sh4_x86.tstate = TSTATE_C;
  1295 :}
  1296 ROTCR Rn {:  
  1297     COUNT_INST(I_ROTCR);
  1298     load_reg( REG_EAX, Rn );
  1299     if( sh4_x86.tstate != TSTATE_C ) {
  1300 	LDC_t();
  1302     RCRL_imm_r32( 1, REG_EAX );
  1303     store_reg( REG_EAX, Rn );
  1304     SETC_t();
  1305     sh4_x86.tstate = TSTATE_C;
  1306 :}
  1307 ROTL Rn {:  
  1308     COUNT_INST(I_ROTL);
  1309     load_reg( REG_EAX, Rn );
  1310     ROLL_imm_r32( 1, REG_EAX );
  1311     store_reg( REG_EAX, Rn );
  1312     SETC_t();
  1313     sh4_x86.tstate = TSTATE_C;
  1314 :}
  1315 ROTR Rn {:  
  1316     COUNT_INST(I_ROTR);
  1317     load_reg( REG_EAX, Rn );
  1318     RORL_imm_r32( 1, REG_EAX );
  1319     store_reg( REG_EAX, Rn );
  1320     SETC_t();
  1321     sh4_x86.tstate = TSTATE_C;
  1322 :}
  1323 SHAD Rm, Rn {:
  1324     COUNT_INST(I_SHAD);
  1325     /* Annoyingly enough, not directly convertible */
  1326     load_reg( REG_EAX, Rn );
  1327     load_reg( REG_ECX, Rm );
  1328     CMPL_imms_r32( 0, REG_ECX );
  1329     JGE_label(doshl);
  1331     NEGL_r32( REG_ECX );      // 2
  1332     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1333     JE_label(emptysar);     // 2
  1334     SARL_cl_r32( REG_EAX );       // 2
  1335     JMP_label(end);          // 2
  1337     JMP_TARGET(emptysar);
  1338     SARL_imm_r32(31, REG_EAX );  // 3
  1339     JMP_label(end2);
  1341     JMP_TARGET(doshl);
  1342     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1343     SHLL_cl_r32( REG_EAX );       // 2
  1344     JMP_TARGET(end);
  1345     JMP_TARGET(end2);
  1346     store_reg( REG_EAX, Rn );
  1347     sh4_x86.tstate = TSTATE_NONE;
  1348 :}
  1349 SHLD Rm, Rn {:  
  1350     COUNT_INST(I_SHLD);
  1351     load_reg( REG_EAX, Rn );
  1352     load_reg( REG_ECX, Rm );
  1353     CMPL_imms_r32( 0, REG_ECX );
  1354     JGE_label(doshl);
  1356     NEGL_r32( REG_ECX );      // 2
  1357     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1358     JE_label(emptyshr );
  1359     SHRL_cl_r32( REG_EAX );       // 2
  1360     JMP_label(end);          // 2
  1362     JMP_TARGET(emptyshr);
  1363     XORL_r32_r32( REG_EAX, REG_EAX );
  1364     JMP_label(end2);
  1366     JMP_TARGET(doshl);
  1367     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1368     SHLL_cl_r32( REG_EAX );       // 2
  1369     JMP_TARGET(end);
  1370     JMP_TARGET(end2);
  1371     store_reg( REG_EAX, Rn );
  1372     sh4_x86.tstate = TSTATE_NONE;
  1373 :}
  1374 SHAL Rn {: 
  1375     COUNT_INST(I_SHAL);
  1376     load_reg( REG_EAX, Rn );
  1377     SHLL_imm_r32( 1, REG_EAX );
  1378     SETC_t();
  1379     store_reg( REG_EAX, Rn );
  1380     sh4_x86.tstate = TSTATE_C;
  1381 :}
  1382 SHAR Rn {:  
  1383     COUNT_INST(I_SHAR);
  1384     load_reg( REG_EAX, Rn );
  1385     SARL_imm_r32( 1, REG_EAX );
  1386     SETC_t();
  1387     store_reg( REG_EAX, Rn );
  1388     sh4_x86.tstate = TSTATE_C;
  1389 :}
  1390 SHLL Rn {:  
  1391     COUNT_INST(I_SHLL);
  1392     load_reg( REG_EAX, Rn );
  1393     SHLL_imm_r32( 1, REG_EAX );
  1394     SETC_t();
  1395     store_reg( REG_EAX, Rn );
  1396     sh4_x86.tstate = TSTATE_C;
  1397 :}
  1398 SHLL2 Rn {:
  1399     COUNT_INST(I_SHLL);
  1400     load_reg( REG_EAX, Rn );
  1401     SHLL_imm_r32( 2, REG_EAX );
  1402     store_reg( REG_EAX, Rn );
  1403     sh4_x86.tstate = TSTATE_NONE;
  1404 :}
  1405 SHLL8 Rn {:  
  1406     COUNT_INST(I_SHLL);
  1407     load_reg( REG_EAX, Rn );
  1408     SHLL_imm_r32( 8, REG_EAX );
  1409     store_reg( REG_EAX, Rn );
  1410     sh4_x86.tstate = TSTATE_NONE;
  1411 :}
  1412 SHLL16 Rn {:  
  1413     COUNT_INST(I_SHLL);
  1414     load_reg( REG_EAX, Rn );
  1415     SHLL_imm_r32( 16, REG_EAX );
  1416     store_reg( REG_EAX, Rn );
  1417     sh4_x86.tstate = TSTATE_NONE;
  1418 :}
  1419 SHLR Rn {:  
  1420     COUNT_INST(I_SHLR);
  1421     load_reg( REG_EAX, Rn );
  1422     SHRL_imm_r32( 1, REG_EAX );
  1423     SETC_t();
  1424     store_reg( REG_EAX, Rn );
  1425     sh4_x86.tstate = TSTATE_C;
  1426 :}
  1427 SHLR2 Rn {:  
  1428     COUNT_INST(I_SHLR);
  1429     load_reg( REG_EAX, Rn );
  1430     SHRL_imm_r32( 2, REG_EAX );
  1431     store_reg( REG_EAX, Rn );
  1432     sh4_x86.tstate = TSTATE_NONE;
  1433 :}
  1434 SHLR8 Rn {:  
  1435     COUNT_INST(I_SHLR);
  1436     load_reg( REG_EAX, Rn );
  1437     SHRL_imm_r32( 8, REG_EAX );
  1438     store_reg( REG_EAX, Rn );
  1439     sh4_x86.tstate = TSTATE_NONE;
  1440 :}
  1441 SHLR16 Rn {:  
  1442     COUNT_INST(I_SHLR);
  1443     load_reg( REG_EAX, Rn );
  1444     SHRL_imm_r32( 16, REG_EAX );
  1445     store_reg( REG_EAX, Rn );
  1446     sh4_x86.tstate = TSTATE_NONE;
  1447 :}
  1448 SUB Rm, Rn {:  
  1449     COUNT_INST(I_SUB);
  1450     load_reg( REG_EAX, Rm );
  1451     load_reg( REG_ECX, Rn );
  1452     SUBL_r32_r32( REG_EAX, REG_ECX );
  1453     store_reg( REG_ECX, Rn );
  1454     sh4_x86.tstate = TSTATE_NONE;
  1455 :}
  1456 SUBC Rm, Rn {:  
  1457     COUNT_INST(I_SUBC);
  1458     load_reg( REG_EAX, Rm );
  1459     load_reg( REG_ECX, Rn );
  1460     if( sh4_x86.tstate != TSTATE_C ) {
  1461 	LDC_t();
  1463     SBBL_r32_r32( REG_EAX, REG_ECX );
  1464     store_reg( REG_ECX, Rn );
  1465     SETC_t();
  1466     sh4_x86.tstate = TSTATE_C;
  1467 :}
  1468 SUBV Rm, Rn {:  
  1469     COUNT_INST(I_SUBV);
  1470     load_reg( REG_EAX, Rm );
  1471     load_reg( REG_ECX, Rn );
  1472     SUBL_r32_r32( REG_EAX, REG_ECX );
  1473     store_reg( REG_ECX, Rn );
  1474     SETO_t();
  1475     sh4_x86.tstate = TSTATE_O;
  1476 :}
  1477 SWAP.B Rm, Rn {:  
  1478     COUNT_INST(I_SWAPB);
  1479     load_reg( REG_EAX, Rm );
  1480     XCHGB_r8_r8( REG_AL, REG_AH ); // NB: does not touch EFLAGS
  1481     store_reg( REG_EAX, Rn );
  1482 :}
  1483 SWAP.W Rm, Rn {:  
  1484     COUNT_INST(I_SWAPB);
  1485     load_reg( REG_EAX, Rm );
  1486     MOVL_r32_r32( REG_EAX, REG_ECX );
  1487     SHLL_imm_r32( 16, REG_ECX );
  1488     SHRL_imm_r32( 16, REG_EAX );
  1489     ORL_r32_r32( REG_EAX, REG_ECX );
  1490     store_reg( REG_ECX, Rn );
  1491     sh4_x86.tstate = TSTATE_NONE;
  1492 :}
  1493 TAS.B @Rn {:  
  1494     COUNT_INST(I_TASB);
  1495     load_reg( REG_EAX, Rn );
  1496     MOVL_r32_rspdisp( REG_EAX, 0 );
  1497     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
  1498     TESTB_r8_r8( REG_DL, REG_DL );
  1499     SETE_t();
  1500     ORB_imms_r8( 0x80, REG_DL );
  1501     MOVL_rspdisp_r32( 0, REG_EAX );
  1502     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1503     sh4_x86.tstate = TSTATE_NONE;
  1504 :}
  1505 TST Rm, Rn {:  
  1506     COUNT_INST(I_TST);
  1507     load_reg( REG_EAX, Rm );
  1508     load_reg( REG_ECX, Rn );
  1509     TESTL_r32_r32( REG_EAX, REG_ECX );
  1510     SETE_t();
  1511     sh4_x86.tstate = TSTATE_E;
  1512 :}
  1513 TST #imm, R0 {:  
  1514     COUNT_INST(I_TSTI);
  1515     load_reg( REG_EAX, 0 );
  1516     TESTL_imms_r32( imm, REG_EAX );
  1517     SETE_t();
  1518     sh4_x86.tstate = TSTATE_E;
  1519 :}
  1520 TST.B #imm, @(R0, GBR) {:  
  1521     COUNT_INST(I_TSTB);
  1522     load_reg( REG_EAX, 0);
  1523     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
  1524     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1525     TESTB_imms_r8( imm, REG_AL );
  1526     SETE_t();
  1527     sh4_x86.tstate = TSTATE_E;
  1528 :}
  1529 XOR Rm, Rn {:  
  1530     COUNT_INST(I_XOR);
  1531     load_reg( REG_EAX, Rm );
  1532     load_reg( REG_ECX, Rn );
  1533     XORL_r32_r32( REG_EAX, REG_ECX );
  1534     store_reg( REG_ECX, Rn );
  1535     sh4_x86.tstate = TSTATE_NONE;
  1536 :}
  1537 XOR #imm, R0 {:  
  1538     COUNT_INST(I_XORI);
  1539     load_reg( REG_EAX, 0 );
  1540     XORL_imms_r32( imm, REG_EAX );
  1541     store_reg( REG_EAX, 0 );
  1542     sh4_x86.tstate = TSTATE_NONE;
  1543 :}
  1544 XOR.B #imm, @(R0, GBR) {:  
  1545     COUNT_INST(I_XORB);
  1546     load_reg( REG_EAX, 0 );
  1547     ADDL_rbpdisp_r32( R_GBR, REG_EAX ); 
  1548     MOVL_r32_rspdisp( REG_EAX, 0 );
  1549     MEM_READ_BYTE_FOR_WRITE(REG_EAX, REG_EDX);
  1550     MOVL_rspdisp_r32( 0, REG_EAX );
  1551     XORL_imms_r32( imm, REG_EDX );
  1552     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1553     sh4_x86.tstate = TSTATE_NONE;
  1554 :}
  1555 XTRCT Rm, Rn {:
  1556     COUNT_INST(I_XTRCT);
  1557     load_reg( REG_EAX, Rm );
  1558     load_reg( REG_ECX, Rn );
  1559     SHLL_imm_r32( 16, REG_EAX );
  1560     SHRL_imm_r32( 16, REG_ECX );
  1561     ORL_r32_r32( REG_EAX, REG_ECX );
  1562     store_reg( REG_ECX, Rn );
  1563     sh4_x86.tstate = TSTATE_NONE;
  1564 :}
  1566 /* Data move instructions */
  1567 MOV Rm, Rn {:  
  1568     COUNT_INST(I_MOV);
  1569     load_reg( REG_EAX, Rm );
  1570     store_reg( REG_EAX, Rn );
  1571 :}
  1572 MOV #imm, Rn {:  
  1573     COUNT_INST(I_MOVI);
  1574     MOVL_imm32_r32( imm, REG_EAX );
  1575     store_reg( REG_EAX, Rn );
  1576 :}
  1577 MOV.B Rm, @Rn {:  
  1578     COUNT_INST(I_MOVB);
  1579     load_reg( REG_EAX, Rn );
  1580     load_reg( REG_EDX, Rm );
  1581     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1582     sh4_x86.tstate = TSTATE_NONE;
  1583 :}
  1584 MOV.B Rm, @-Rn {:  
  1585     COUNT_INST(I_MOVB);
  1586     load_reg( REG_EAX, Rn );
  1587     LEAL_r32disp_r32( REG_EAX, -1, REG_EAX );
  1588     load_reg( REG_EDX, Rm );
  1589     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1590     ADDL_imms_rbpdisp( -1, REG_OFFSET(r[Rn]) );
  1591     sh4_x86.tstate = TSTATE_NONE;
  1592 :}
  1593 MOV.B Rm, @(R0, Rn) {:  
  1594     COUNT_INST(I_MOVB);
  1595     load_reg( REG_EAX, 0 );
  1596     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1597     load_reg( REG_EDX, Rm );
  1598     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1599     sh4_x86.tstate = TSTATE_NONE;
  1600 :}
  1601 MOV.B R0, @(disp, GBR) {:  
  1602     COUNT_INST(I_MOVB);
  1603     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1604     ADDL_imms_r32( disp, REG_EAX );
  1605     load_reg( REG_EDX, 0 );
  1606     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1607     sh4_x86.tstate = TSTATE_NONE;
  1608 :}
  1609 MOV.B R0, @(disp, Rn) {:  
  1610     COUNT_INST(I_MOVB);
  1611     load_reg( REG_EAX, Rn );
  1612     ADDL_imms_r32( disp, REG_EAX );
  1613     load_reg( REG_EDX, 0 );
  1614     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1615     sh4_x86.tstate = TSTATE_NONE;
  1616 :}
  1617 MOV.B @Rm, Rn {:  
  1618     COUNT_INST(I_MOVB);
  1619     load_reg( REG_EAX, Rm );
  1620     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1621     store_reg( REG_EAX, Rn );
  1622     sh4_x86.tstate = TSTATE_NONE;
  1623 :}
  1624 MOV.B @Rm+, Rn {:  
  1625     COUNT_INST(I_MOVB);
  1626     load_reg( REG_EAX, Rm );
  1627     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1628     if( Rm != Rn ) {
  1629     	ADDL_imms_rbpdisp( 1, REG_OFFSET(r[Rm]) );
  1631     store_reg( REG_EAX, Rn );
  1632     sh4_x86.tstate = TSTATE_NONE;
  1633 :}
  1634 MOV.B @(R0, Rm), Rn {:  
  1635     COUNT_INST(I_MOVB);
  1636     load_reg( REG_EAX, 0 );
  1637     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1638     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1639     store_reg( REG_EAX, Rn );
  1640     sh4_x86.tstate = TSTATE_NONE;
  1641 :}
  1642 MOV.B @(disp, GBR), R0 {:  
  1643     COUNT_INST(I_MOVB);
  1644     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1645     ADDL_imms_r32( disp, REG_EAX );
  1646     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1647     store_reg( REG_EAX, 0 );
  1648     sh4_x86.tstate = TSTATE_NONE;
  1649 :}
  1650 MOV.B @(disp, Rm), R0 {:  
  1651     COUNT_INST(I_MOVB);
  1652     load_reg( REG_EAX, Rm );
  1653     ADDL_imms_r32( disp, REG_EAX );
  1654     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1655     store_reg( REG_EAX, 0 );
  1656     sh4_x86.tstate = TSTATE_NONE;
  1657 :}
  1658 MOV.L Rm, @Rn {:
  1659     COUNT_INST(I_MOVL);
  1660     load_reg( REG_EAX, Rn );
  1661     check_walign32(REG_EAX);
  1662     MOVL_r32_r32( REG_EAX, REG_ECX );
  1663     ANDL_imms_r32( 0xFC000000, REG_ECX );
  1664     CMPL_imms_r32( 0xE0000000, REG_ECX );
  1665     JNE_label( notsq );
  1666     ANDL_imms_r32( 0x3C, REG_EAX );
  1667     load_reg( REG_EDX, Rm );
  1668     MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
  1669     JMP_label(end);
  1670     JMP_TARGET(notsq);
  1671     load_reg( REG_EDX, Rm );
  1672     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1673     JMP_TARGET(end);
  1674     sh4_x86.tstate = TSTATE_NONE;
  1675 :}
  1676 MOV.L Rm, @-Rn {:  
  1677     COUNT_INST(I_MOVL);
  1678     load_reg( REG_EAX, Rn );
  1679     ADDL_imms_r32( -4, REG_EAX );
  1680     check_walign32( REG_EAX );
  1681     load_reg( REG_EDX, Rm );
  1682     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1683     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  1684     sh4_x86.tstate = TSTATE_NONE;
  1685 :}
  1686 MOV.L Rm, @(R0, Rn) {:  
  1687     COUNT_INST(I_MOVL);
  1688     load_reg( REG_EAX, 0 );
  1689     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1690     check_walign32( REG_EAX );
  1691     load_reg( REG_EDX, Rm );
  1692     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1693     sh4_x86.tstate = TSTATE_NONE;
  1694 :}
  1695 MOV.L R0, @(disp, GBR) {:  
  1696     COUNT_INST(I_MOVL);
  1697     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1698     ADDL_imms_r32( disp, REG_EAX );
  1699     check_walign32( REG_EAX );
  1700     load_reg( REG_EDX, 0 );
  1701     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1702     sh4_x86.tstate = TSTATE_NONE;
  1703 :}
  1704 MOV.L Rm, @(disp, Rn) {:  
  1705     COUNT_INST(I_MOVL);
  1706     load_reg( REG_EAX, Rn );
  1707     ADDL_imms_r32( disp, REG_EAX );
  1708     check_walign32( REG_EAX );
  1709     MOVL_r32_r32( REG_EAX, REG_ECX );
  1710     ANDL_imms_r32( 0xFC000000, REG_ECX );
  1711     CMPL_imms_r32( 0xE0000000, REG_ECX );
  1712     JNE_label( notsq );
  1713     ANDL_imms_r32( 0x3C, REG_EAX );
  1714     load_reg( REG_EDX, Rm );
  1715     MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
  1716     JMP_label(end);
  1717     JMP_TARGET(notsq);
  1718     load_reg( REG_EDX, Rm );
  1719     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1720     JMP_TARGET(end);
  1721     sh4_x86.tstate = TSTATE_NONE;
  1722 :}
  1723 MOV.L @Rm, Rn {:  
  1724     COUNT_INST(I_MOVL);
  1725     load_reg( REG_EAX, Rm );
  1726     check_ralign32( REG_EAX );
  1727     MEM_READ_LONG( REG_EAX, REG_EAX );
  1728     store_reg( REG_EAX, Rn );
  1729     sh4_x86.tstate = TSTATE_NONE;
  1730 :}
  1731 MOV.L @Rm+, Rn {:  
  1732     COUNT_INST(I_MOVL);
  1733     load_reg( REG_EAX, Rm );
  1734     check_ralign32( REG_EAX );
  1735     MEM_READ_LONG( REG_EAX, REG_EAX );
  1736     if( Rm != Rn ) {
  1737     	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  1739     store_reg( REG_EAX, Rn );
  1740     sh4_x86.tstate = TSTATE_NONE;
  1741 :}
  1742 MOV.L @(R0, Rm), Rn {:  
  1743     COUNT_INST(I_MOVL);
  1744     load_reg( REG_EAX, 0 );
  1745     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1746     check_ralign32( REG_EAX );
  1747     MEM_READ_LONG( REG_EAX, REG_EAX );
  1748     store_reg( REG_EAX, Rn );
  1749     sh4_x86.tstate = TSTATE_NONE;
  1750 :}
  1751 MOV.L @(disp, GBR), R0 {:
  1752     COUNT_INST(I_MOVL);
  1753     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1754     ADDL_imms_r32( disp, REG_EAX );
  1755     check_ralign32( REG_EAX );
  1756     MEM_READ_LONG( REG_EAX, REG_EAX );
  1757     store_reg( REG_EAX, 0 );
  1758     sh4_x86.tstate = TSTATE_NONE;
  1759 :}
  1760 MOV.L @(disp, PC), Rn {:  
  1761     COUNT_INST(I_MOVLPC);
  1762     if( sh4_x86.in_delay_slot ) {
  1763 	SLOTILLEGAL();
  1764     } else {
  1765 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1766 	if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
  1767 	    // If the target address is in the same page as the code, it's
  1768 	    // pretty safe to just ref it directly and circumvent the whole
  1769 	    // memory subsystem. (this is a big performance win)
  1771 	    // FIXME: There's a corner-case that's not handled here when
  1772 	    // the current code-page is in the ITLB but not in the UTLB.
  1773 	    // (should generate a TLB miss although need to test SH4 
  1774 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1775 	    // behaviour though.
  1776 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1777 	    MOVL_moffptr_eax( ptr );
  1778 	} else {
  1779 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1780 	    // different virtual address than the translation was done with,
  1781 	    // but we can safely assume that the low bits are the same.
  1782 	    MOVL_imm32_r32( (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_EAX );
  1783 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1784 	    MEM_READ_LONG( REG_EAX, REG_EAX );
  1785 	    sh4_x86.tstate = TSTATE_NONE;
  1787 	store_reg( REG_EAX, Rn );
  1789 :}
  1790 MOV.L @(disp, Rm), Rn {:  
  1791     COUNT_INST(I_MOVL);
  1792     load_reg( REG_EAX, Rm );
  1793     ADDL_imms_r32( disp, REG_EAX );
  1794     check_ralign32( REG_EAX );
  1795     MEM_READ_LONG( REG_EAX, REG_EAX );
  1796     store_reg( REG_EAX, Rn );
  1797     sh4_x86.tstate = TSTATE_NONE;
  1798 :}
  1799 MOV.W Rm, @Rn {:  
  1800     COUNT_INST(I_MOVW);
  1801     load_reg( REG_EAX, Rn );
  1802     check_walign16( REG_EAX );
  1803     load_reg( REG_EDX, Rm );
  1804     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1805     sh4_x86.tstate = TSTATE_NONE;
  1806 :}
  1807 MOV.W Rm, @-Rn {:  
  1808     COUNT_INST(I_MOVW);
  1809     load_reg( REG_EAX, Rn );
  1810     check_walign16( REG_EAX );
  1811     LEAL_r32disp_r32( REG_EAX, -2, REG_EAX );
  1812     load_reg( REG_EDX, Rm );
  1813     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1814     ADDL_imms_rbpdisp( -2, REG_OFFSET(r[Rn]) );
  1815     sh4_x86.tstate = TSTATE_NONE;
  1816 :}
  1817 MOV.W Rm, @(R0, Rn) {:  
  1818     COUNT_INST(I_MOVW);
  1819     load_reg( REG_EAX, 0 );
  1820     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1821     check_walign16( REG_EAX );
  1822     load_reg( REG_EDX, Rm );
  1823     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1824     sh4_x86.tstate = TSTATE_NONE;
  1825 :}
  1826 MOV.W R0, @(disp, GBR) {:  
  1827     COUNT_INST(I_MOVW);
  1828     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1829     ADDL_imms_r32( disp, REG_EAX );
  1830     check_walign16( REG_EAX );
  1831     load_reg( REG_EDX, 0 );
  1832     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1833     sh4_x86.tstate = TSTATE_NONE;
  1834 :}
  1835 MOV.W R0, @(disp, Rn) {:  
  1836     COUNT_INST(I_MOVW);
  1837     load_reg( REG_EAX, Rn );
  1838     ADDL_imms_r32( disp, REG_EAX );
  1839     check_walign16( REG_EAX );
  1840     load_reg( REG_EDX, 0 );
  1841     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1842     sh4_x86.tstate = TSTATE_NONE;
  1843 :}
  1844 MOV.W @Rm, Rn {:  
  1845     COUNT_INST(I_MOVW);
  1846     load_reg( REG_EAX, Rm );
  1847     check_ralign16( REG_EAX );
  1848     MEM_READ_WORD( REG_EAX, REG_EAX );
  1849     store_reg( REG_EAX, Rn );
  1850     sh4_x86.tstate = TSTATE_NONE;
  1851 :}
  1852 MOV.W @Rm+, Rn {:  
  1853     COUNT_INST(I_MOVW);
  1854     load_reg( REG_EAX, Rm );
  1855     check_ralign16( REG_EAX );
  1856     MEM_READ_WORD( REG_EAX, REG_EAX );
  1857     if( Rm != Rn ) {
  1858         ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
  1860     store_reg( REG_EAX, Rn );
  1861     sh4_x86.tstate = TSTATE_NONE;
  1862 :}
  1863 MOV.W @(R0, Rm), Rn {:  
  1864     COUNT_INST(I_MOVW);
  1865     load_reg( REG_EAX, 0 );
  1866     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1867     check_ralign16( REG_EAX );
  1868     MEM_READ_WORD( REG_EAX, REG_EAX );
  1869     store_reg( REG_EAX, Rn );
  1870     sh4_x86.tstate = TSTATE_NONE;
  1871 :}
  1872 MOV.W @(disp, GBR), R0 {:  
  1873     COUNT_INST(I_MOVW);
  1874     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1875     ADDL_imms_r32( disp, REG_EAX );
  1876     check_ralign16( REG_EAX );
  1877     MEM_READ_WORD( REG_EAX, REG_EAX );
  1878     store_reg( REG_EAX, 0 );
  1879     sh4_x86.tstate = TSTATE_NONE;
  1880 :}
  1881 MOV.W @(disp, PC), Rn {:  
  1882     COUNT_INST(I_MOVW);
  1883     if( sh4_x86.in_delay_slot ) {
  1884 	SLOTILLEGAL();
  1885     } else {
  1886 	// See comments for MOV.L @(disp, PC), Rn
  1887 	uint32_t target = pc + disp + 4;
  1888 	if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
  1889 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1890 	    MOVL_moffptr_eax( ptr );
  1891 	    MOVSXL_r16_r32( REG_EAX, REG_EAX );
  1892 	} else {
  1893 	    MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4, REG_EAX );
  1894 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1895 	    MEM_READ_WORD( REG_EAX, REG_EAX );
  1896 	    sh4_x86.tstate = TSTATE_NONE;
  1898 	store_reg( REG_EAX, Rn );
  1900 :}
  1901 MOV.W @(disp, Rm), R0 {:  
  1902     COUNT_INST(I_MOVW);
  1903     load_reg( REG_EAX, Rm );
  1904     ADDL_imms_r32( disp, REG_EAX );
  1905     check_ralign16( REG_EAX );
  1906     MEM_READ_WORD( REG_EAX, REG_EAX );
  1907     store_reg( REG_EAX, 0 );
  1908     sh4_x86.tstate = TSTATE_NONE;
  1909 :}
  1910 MOVA @(disp, PC), R0 {:  
  1911     COUNT_INST(I_MOVA);
  1912     if( sh4_x86.in_delay_slot ) {
  1913 	SLOTILLEGAL();
  1914     } else {
  1915 	MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_ECX );
  1916 	ADDL_rbpdisp_r32( R_PC, REG_ECX );
  1917 	store_reg( REG_ECX, 0 );
  1918 	sh4_x86.tstate = TSTATE_NONE;
  1920 :}
  1921 MOVCA.L R0, @Rn {:  
  1922     COUNT_INST(I_MOVCA);
  1923     load_reg( REG_EAX, Rn );
  1924     check_walign32( REG_EAX );
  1925     load_reg( REG_EDX, 0 );
  1926     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1927     sh4_x86.tstate = TSTATE_NONE;
  1928 :}
  1930 /* Control transfer instructions */
  1931 BF disp {:
  1932     COUNT_INST(I_BF);
  1933     if( sh4_x86.in_delay_slot ) {
  1934 	SLOTILLEGAL();
  1935     } else {
  1936 	sh4vma_t target = disp + pc + 4;
  1937 	JT_label( nottaken );
  1938 	exit_block_rel(target, pc+2 );
  1939 	JMP_TARGET(nottaken);
  1940 	return 2;
  1942 :}
  1943 BF/S disp {:
  1944     COUNT_INST(I_BFS);
  1945     if( sh4_x86.in_delay_slot ) {
  1946 	SLOTILLEGAL();
  1947     } else {
  1948 	sh4_x86.in_delay_slot = DELAY_PC;
  1949 	if( UNTRANSLATABLE(pc+2) ) {
  1950 	    MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1951 	    JT_label(nottaken);
  1952 	    ADDL_imms_r32( disp, REG_EAX );
  1953 	    JMP_TARGET(nottaken);
  1954 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1955 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1956 	    exit_block_emu(pc+2);
  1957 	    sh4_x86.branch_taken = TRUE;
  1958 	    return 2;
  1959 	} else {
  1960 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1961 		CMPL_imms_rbpdisp( 1, R_T );
  1962 		sh4_x86.tstate = TSTATE_E;
  1964 	    sh4vma_t target = disp + pc + 4;
  1965 	    JCC_cc_rel32(sh4_x86.tstate,0);
  1966 	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
  1967 	    int save_tstate = sh4_x86.tstate;
  1968 	    sh4_translate_instruction(pc+2);
  1969             sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
  1970 	    exit_block_rel( target, pc+4 );
  1972 	    // not taken
  1973 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1974 	    sh4_x86.tstate = save_tstate;
  1975 	    sh4_translate_instruction(pc+2);
  1976 	    return 4;
  1979 :}
  1980 BRA disp {:  
  1981     COUNT_INST(I_BRA);
  1982     if( sh4_x86.in_delay_slot ) {
  1983 	SLOTILLEGAL();
  1984     } else {
  1985 	sh4_x86.in_delay_slot = DELAY_PC;
  1986 	sh4_x86.branch_taken = TRUE;
  1987 	if( UNTRANSLATABLE(pc+2) ) {
  1988 	    MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1989 	    ADDL_imms_r32( pc + disp + 4 - sh4_x86.block_start_pc, REG_EAX );
  1990 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1991 	    exit_block_emu(pc+2);
  1992 	    return 2;
  1993 	} else {
  1994 	    sh4_translate_instruction( pc + 2 );
  1995 	    exit_block_rel( disp + pc + 4, pc+4 );
  1996 	    return 4;
  1999 :}
  2000 BRAF Rn {:  
  2001     COUNT_INST(I_BRAF);
  2002     if( sh4_x86.in_delay_slot ) {
  2003 	SLOTILLEGAL();
  2004     } else {
  2005 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  2006 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2007 	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  2008 	MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  2009 	sh4_x86.in_delay_slot = DELAY_PC;
  2010 	sh4_x86.tstate = TSTATE_NONE;
  2011 	sh4_x86.branch_taken = TRUE;
  2012 	if( UNTRANSLATABLE(pc+2) ) {
  2013 	    exit_block_emu(pc+2);
  2014 	    return 2;
  2015 	} else {
  2016 	    sh4_translate_instruction( pc + 2 );
  2017 	    exit_block_newpcset(pc+4);
  2018 	    return 4;
  2021 :}
  2022 BSR disp {:  
  2023     COUNT_INST(I_BSR);
  2024     if( sh4_x86.in_delay_slot ) {
  2025 	SLOTILLEGAL();
  2026     } else {
  2027 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  2028 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2029 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2030 	sh4_x86.in_delay_slot = DELAY_PC;
  2031 	sh4_x86.branch_taken = TRUE;
  2032 	sh4_x86.tstate = TSTATE_NONE;
  2033 	if( UNTRANSLATABLE(pc+2) ) {
  2034 	    ADDL_imms_r32( disp, REG_EAX );
  2035 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  2036 	    exit_block_emu(pc+2);
  2037 	    return 2;
  2038 	} else {
  2039 	    sh4_translate_instruction( pc + 2 );
  2040 	    exit_block_rel( disp + pc + 4, pc+4 );
  2041 	    return 4;
  2044 :}
  2045 BSRF Rn {:  
  2046     COUNT_INST(I_BSRF);
  2047     if( sh4_x86.in_delay_slot ) {
  2048 	SLOTILLEGAL();
  2049     } else {
  2050 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  2051 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2052 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2053 	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  2054 	MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  2056 	sh4_x86.in_delay_slot = DELAY_PC;
  2057 	sh4_x86.tstate = TSTATE_NONE;
  2058 	sh4_x86.branch_taken = TRUE;
  2059 	if( UNTRANSLATABLE(pc+2) ) {
  2060 	    exit_block_emu(pc+2);
  2061 	    return 2;
  2062 	} else {
  2063 	    sh4_translate_instruction( pc + 2 );
  2064 	    exit_block_newpcset(pc+4);
  2065 	    return 4;
  2068 :}
  2069 BT disp {:
  2070     COUNT_INST(I_BT);
  2071     if( sh4_x86.in_delay_slot ) {
  2072 	SLOTILLEGAL();
  2073     } else {
  2074 	sh4vma_t target = disp + pc + 4;
  2075 	JF_label( nottaken );
  2076 	exit_block_rel(target, pc+2 );
  2077 	JMP_TARGET(nottaken);
  2078 	return 2;
  2080 :}
  2081 BT/S disp {:
  2082     COUNT_INST(I_BTS);
  2083     if( sh4_x86.in_delay_slot ) {
  2084 	SLOTILLEGAL();
  2085     } else {
  2086 	sh4_x86.in_delay_slot = DELAY_PC;
  2087 	if( UNTRANSLATABLE(pc+2) ) {
  2088 	    MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2089 	    JF_label(nottaken);
  2090 	    ADDL_imms_r32( disp, REG_EAX );
  2091 	    JMP_TARGET(nottaken);
  2092 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  2093 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  2094 	    exit_block_emu(pc+2);
  2095 	    sh4_x86.branch_taken = TRUE;
  2096 	    return 2;
  2097 	} else {
  2098 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  2099 		CMPL_imms_rbpdisp( 1, R_T );
  2100 		sh4_x86.tstate = TSTATE_E;
  2102 	    JCC_cc_rel32(sh4_x86.tstate^1,0);
  2103 	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
  2105 	    int save_tstate = sh4_x86.tstate;
  2106 	    sh4_translate_instruction(pc+2);
  2107             sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
  2108 	    exit_block_rel( disp + pc + 4, pc+4 );
  2109 	    // not taken
  2110 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  2111 	    sh4_x86.tstate = save_tstate;
  2112 	    sh4_translate_instruction(pc+2);
  2113 	    return 4;
  2116 :}
  2117 JMP @Rn {:  
  2118     COUNT_INST(I_JMP);
  2119     if( sh4_x86.in_delay_slot ) {
  2120 	SLOTILLEGAL();
  2121     } else {
  2122 	load_reg( REG_ECX, Rn );
  2123 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2124 	sh4_x86.in_delay_slot = DELAY_PC;
  2125 	sh4_x86.branch_taken = TRUE;
  2126 	if( UNTRANSLATABLE(pc+2) ) {
  2127 	    exit_block_emu(pc+2);
  2128 	    return 2;
  2129 	} else {
  2130 	    sh4_translate_instruction(pc+2);
  2131 	    exit_block_newpcset(pc+4);
  2132 	    return 4;
  2135 :}
  2136 JSR @Rn {:  
  2137     COUNT_INST(I_JSR);
  2138     if( sh4_x86.in_delay_slot ) {
  2139 	SLOTILLEGAL();
  2140     } else {
  2141 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  2142 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2143 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2144 	load_reg( REG_ECX, Rn );
  2145 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2146 	sh4_x86.in_delay_slot = DELAY_PC;
  2147 	sh4_x86.branch_taken = TRUE;
  2148 	sh4_x86.tstate = TSTATE_NONE;
  2149 	if( UNTRANSLATABLE(pc+2) ) {
  2150 	    exit_block_emu(pc+2);
  2151 	    return 2;
  2152 	} else {
  2153 	    sh4_translate_instruction(pc+2);
  2154 	    exit_block_newpcset(pc+4);
  2155 	    return 4;
  2158 :}
  2159 RTE {:  
  2160     COUNT_INST(I_RTE);
  2161     if( sh4_x86.in_delay_slot ) {
  2162 	SLOTILLEGAL();
  2163     } else {
  2164 	check_priv();
  2165 	MOVL_rbpdisp_r32( R_SPC, REG_ECX );
  2166 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2167 	MOVL_rbpdisp_r32( R_SSR, REG_EAX );
  2168 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2169 	sh4_x86.in_delay_slot = DELAY_PC;
  2170 	sh4_x86.fpuen_checked = FALSE;
  2171 	sh4_x86.tstate = TSTATE_NONE;
  2172 	sh4_x86.branch_taken = TRUE;
  2173     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2174 	if( UNTRANSLATABLE(pc+2) ) {
  2175 	    exit_block_emu(pc+2);
  2176 	    return 2;
  2177 	} else {
  2178 	    sh4_translate_instruction(pc+2);
  2179 	    exit_block_newpcset(pc+4);
  2180 	    return 4;
  2183 :}
  2184 RTS {:  
  2185     COUNT_INST(I_RTS);
  2186     if( sh4_x86.in_delay_slot ) {
  2187 	SLOTILLEGAL();
  2188     } else {
  2189 	MOVL_rbpdisp_r32( R_PR, REG_ECX );
  2190 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2191 	sh4_x86.in_delay_slot = DELAY_PC;
  2192 	sh4_x86.branch_taken = TRUE;
  2193 	if( UNTRANSLATABLE(pc+2) ) {
  2194 	    exit_block_emu(pc+2);
  2195 	    return 2;
  2196 	} else {
  2197 	    sh4_translate_instruction(pc+2);
  2198 	    exit_block_newpcset(pc+4);
  2199 	    return 4;
  2202 :}
  2203 TRAPA #imm {:  
  2204     COUNT_INST(I_TRAPA);
  2205     if( sh4_x86.in_delay_slot ) {
  2206 	SLOTILLEGAL();
  2207     } else {
  2208 	MOVL_imm32_r32( pc+2 - sh4_x86.block_start_pc, REG_ECX );   // 5
  2209 	ADDL_r32_rbpdisp( REG_ECX, R_PC );
  2210 	MOVL_imm32_r32( imm, REG_EAX );
  2211 	CALL1_ptr_r32( sh4_raise_trap, REG_EAX );
  2212 	sh4_x86.tstate = TSTATE_NONE;
  2213 	exit_block_pcset(pc+2);
  2214 	sh4_x86.branch_taken = TRUE;
  2215 	return 2;
  2217 :}
  2218 UNDEF {:  
  2219     COUNT_INST(I_UNDEF);
  2220     if( sh4_x86.in_delay_slot ) {
  2221 	exit_block_exc(EXC_SLOT_ILLEGAL, pc-2, 4);    
  2222     } else {
  2223 	exit_block_exc(EXC_ILLEGAL, pc, 2);    
  2224 	return 2;
  2226 :}
  2228 CLRMAC {:  
  2229     COUNT_INST(I_CLRMAC);
  2230     XORL_r32_r32(REG_EAX, REG_EAX);
  2231     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2232     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2233     sh4_x86.tstate = TSTATE_NONE;
  2234 :}
  2235 CLRS {:
  2236     COUNT_INST(I_CLRS);
  2237     CLC();
  2238     SETCCB_cc_rbpdisp(X86_COND_C, R_S);
  2239     sh4_x86.tstate = TSTATE_NONE;
  2240 :}
  2241 CLRT {:  
  2242     COUNT_INST(I_CLRT);
  2243     CLC();
  2244     SETC_t();
  2245     sh4_x86.tstate = TSTATE_C;
  2246 :}
  2247 SETS {:  
  2248     COUNT_INST(I_SETS);
  2249     STC();
  2250     SETCCB_cc_rbpdisp(X86_COND_C, R_S);
  2251     sh4_x86.tstate = TSTATE_NONE;
  2252 :}
  2253 SETT {:  
  2254     COUNT_INST(I_SETT);
  2255     STC();
  2256     SETC_t();
  2257     sh4_x86.tstate = TSTATE_C;
  2258 :}
  2260 /* Floating point moves */
  2261 FMOV FRm, FRn {:  
  2262     COUNT_INST(I_FMOV1);
  2263     check_fpuen();
  2264     if( sh4_x86.double_size ) {
  2265         load_dr0( REG_EAX, FRm );
  2266         load_dr1( REG_ECX, FRm );
  2267         store_dr0( REG_EAX, FRn );
  2268         store_dr1( REG_ECX, FRn );
  2269     } else {
  2270         load_fr( REG_EAX, FRm ); // SZ=0 branch
  2271         store_fr( REG_EAX, FRn );
  2273 :}
  2274 FMOV FRm, @Rn {: 
  2275     COUNT_INST(I_FMOV2);
  2276     check_fpuen();
  2277     load_reg( REG_EAX, Rn );
  2278     if( sh4_x86.double_size ) {
  2279         check_walign64( REG_EAX );
  2280         load_dr0( REG_EDX, FRm );
  2281         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2282         load_reg( REG_EAX, Rn );
  2283         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2284         load_dr1( REG_EDX, FRm );
  2285         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2286     } else {
  2287         check_walign32( REG_EAX );
  2288         load_fr( REG_EDX, FRm );
  2289         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2291     sh4_x86.tstate = TSTATE_NONE;
  2292 :}
  2293 FMOV @Rm, FRn {:  
  2294     COUNT_INST(I_FMOV5);
  2295     check_fpuen();
  2296     load_reg( REG_EAX, Rm );
  2297     if( sh4_x86.double_size ) {
  2298         check_ralign64( REG_EAX );
  2299         MEM_READ_LONG( REG_EAX, REG_EAX );
  2300         store_dr0( REG_EAX, FRn );
  2301         load_reg( REG_EAX, Rm );
  2302         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2303         MEM_READ_LONG( REG_EAX, REG_EAX );
  2304         store_dr1( REG_EAX, FRn );
  2305     } else {
  2306         check_ralign32( REG_EAX );
  2307         MEM_READ_LONG( REG_EAX, REG_EAX );
  2308         store_fr( REG_EAX, FRn );
  2310     sh4_x86.tstate = TSTATE_NONE;
  2311 :}
  2312 FMOV FRm, @-Rn {:  
  2313     COUNT_INST(I_FMOV3);
  2314     check_fpuen();
  2315     load_reg( REG_EAX, Rn );
  2316     if( sh4_x86.double_size ) {
  2317         check_walign64( REG_EAX );
  2318         LEAL_r32disp_r32( REG_EAX, -8, REG_EAX );
  2319         load_dr0( REG_EDX, FRm );
  2320         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2321         load_reg( REG_EAX, Rn );
  2322         LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2323         load_dr1( REG_EDX, FRm );
  2324         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2325         ADDL_imms_rbpdisp(-8,REG_OFFSET(r[Rn]));
  2326     } else {
  2327         check_walign32( REG_EAX );
  2328         LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2329         load_fr( REG_EDX, FRm );
  2330         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2331         ADDL_imms_rbpdisp(-4,REG_OFFSET(r[Rn]));
  2333     sh4_x86.tstate = TSTATE_NONE;
  2334 :}
  2335 FMOV @Rm+, FRn {:
  2336     COUNT_INST(I_FMOV6);
  2337     check_fpuen();
  2338     load_reg( REG_EAX, Rm );
  2339     if( sh4_x86.double_size ) {
  2340         check_ralign64( REG_EAX );
  2341         MEM_READ_LONG( REG_EAX, REG_EAX );
  2342         store_dr0( REG_EAX, FRn );
  2343         load_reg( REG_EAX, Rm );
  2344         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2345         MEM_READ_LONG( REG_EAX, REG_EAX );
  2346         store_dr1( REG_EAX, FRn );
  2347         ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rm]) );
  2348     } else {
  2349         check_ralign32( REG_EAX );
  2350         MEM_READ_LONG( REG_EAX, REG_EAX );
  2351         store_fr( REG_EAX, FRn );
  2352         ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2354     sh4_x86.tstate = TSTATE_NONE;
  2355 :}
  2356 FMOV FRm, @(R0, Rn) {:  
  2357     COUNT_INST(I_FMOV4);
  2358     check_fpuen();
  2359     load_reg( REG_EAX, Rn );
  2360     ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2361     if( sh4_x86.double_size ) {
  2362         check_walign64( REG_EAX );
  2363         load_dr0( REG_EDX, FRm );
  2364         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2365         load_reg( REG_EAX, Rn );
  2366         ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2367         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2368         load_dr1( REG_EDX, FRm );
  2369         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2370     } else {
  2371         check_walign32( REG_EAX );
  2372         load_fr( REG_EDX, FRm );
  2373         MEM_WRITE_LONG( REG_EAX, REG_EDX ); // 12
  2375     sh4_x86.tstate = TSTATE_NONE;
  2376 :}
  2377 FMOV @(R0, Rm), FRn {:  
  2378     COUNT_INST(I_FMOV7);
  2379     check_fpuen();
  2380     load_reg( REG_EAX, Rm );
  2381     ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2382     if( sh4_x86.double_size ) {
  2383         check_ralign64( REG_EAX );
  2384         MEM_READ_LONG( REG_EAX, REG_EAX );
  2385         store_dr0( REG_EAX, FRn );
  2386         load_reg( REG_EAX, Rm );
  2387         ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2388         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2389         MEM_READ_LONG( REG_EAX, REG_EAX );
  2390         store_dr1( REG_EAX, FRn );
  2391     } else {
  2392         check_ralign32( REG_EAX );
  2393         MEM_READ_LONG( REG_EAX, REG_EAX );
  2394         store_fr( REG_EAX, FRn );
  2396     sh4_x86.tstate = TSTATE_NONE;
  2397 :}
  2398 FLDI0 FRn {:  /* IFF PR=0 */
  2399     COUNT_INST(I_FLDI0);
  2400     check_fpuen();
  2401     if( sh4_x86.double_prec == 0 ) {
  2402         XORL_r32_r32( REG_EAX, REG_EAX );
  2403         store_fr( REG_EAX, FRn );
  2405     sh4_x86.tstate = TSTATE_NONE;
  2406 :}
  2407 FLDI1 FRn {:  /* IFF PR=0 */
  2408     COUNT_INST(I_FLDI1);
  2409     check_fpuen();
  2410     if( sh4_x86.double_prec == 0 ) {
  2411         MOVL_imm32_r32( 0x3F800000, REG_EAX );
  2412         store_fr( REG_EAX, FRn );
  2414 :}
  2416 FLOAT FPUL, FRn {:  
  2417     COUNT_INST(I_FLOAT);
  2418     check_fpuen();
  2419     FILD_rbpdisp(R_FPUL);
  2420     if( sh4_x86.double_prec ) {
  2421         pop_dr( FRn );
  2422     } else {
  2423         pop_fr( FRn );
  2425 :}
  2426 FTRC FRm, FPUL {:  
  2427     COUNT_INST(I_FTRC);
  2428     check_fpuen();
  2429     if( sh4_x86.double_prec ) {
  2430         push_dr( FRm );
  2431     } else {
  2432         push_fr( FRm );
  2434     MOVP_immptr_rptr( &max_int, REG_ECX );
  2435     FILD_r32disp( REG_ECX, 0 );
  2436     FCOMIP_st(1);
  2437     JNA_label( sat );
  2438     MOVP_immptr_rptr( &min_int, REG_ECX );
  2439     FILD_r32disp( REG_ECX, 0 );
  2440     FCOMIP_st(1);              
  2441     JAE_label( sat2 );            
  2442     MOVP_immptr_rptr( &save_fcw, REG_EAX );
  2443     FNSTCW_r32disp( REG_EAX, 0 );
  2444     MOVP_immptr_rptr( &trunc_fcw, REG_EDX );
  2445     FLDCW_r32disp( REG_EDX, 0 );
  2446     FISTP_rbpdisp(R_FPUL);             
  2447     FLDCW_r32disp( REG_EAX, 0 );
  2448     JMP_label(end);             
  2450     JMP_TARGET(sat);
  2451     JMP_TARGET(sat2);
  2452     MOVL_r32disp_r32( REG_ECX, 0, REG_ECX ); // 2
  2453     MOVL_r32_rbpdisp( REG_ECX, R_FPUL );
  2454     FPOP_st();
  2455     JMP_TARGET(end);
  2456     sh4_x86.tstate = TSTATE_NONE;
  2457 :}
  2458 FLDS FRm, FPUL {:  
  2459     COUNT_INST(I_FLDS);
  2460     check_fpuen();
  2461     load_fr( REG_EAX, FRm );
  2462     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2463 :}
  2464 FSTS FPUL, FRn {:  
  2465     COUNT_INST(I_FSTS);
  2466     check_fpuen();
  2467     MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  2468     store_fr( REG_EAX, FRn );
  2469 :}
  2470 FCNVDS FRm, FPUL {:  
  2471     COUNT_INST(I_FCNVDS);
  2472     check_fpuen();
  2473     if( sh4_x86.double_prec ) {
  2474         push_dr( FRm );
  2475         pop_fpul();
  2477 :}
  2478 FCNVSD FPUL, FRn {:  
  2479     COUNT_INST(I_FCNVSD);
  2480     check_fpuen();
  2481     if( sh4_x86.double_prec ) {
  2482         push_fpul();
  2483         pop_dr( FRn );
  2485 :}
  2487 /* Floating point instructions */
  2488 FABS FRn {:  
  2489     COUNT_INST(I_FABS);
  2490     check_fpuen();
  2491     if( sh4_x86.double_prec ) {
  2492         push_dr(FRn);
  2493         FABS_st0();
  2494         pop_dr(FRn);
  2495     } else {
  2496         push_fr(FRn);
  2497         FABS_st0();
  2498         pop_fr(FRn);
  2500 :}
  2501 FADD FRm, FRn {:  
  2502     COUNT_INST(I_FADD);
  2503     check_fpuen();
  2504     if( sh4_x86.double_prec ) {
  2505         push_dr(FRm);
  2506         push_dr(FRn);
  2507         FADDP_st(1);
  2508         pop_dr(FRn);
  2509     } else {
  2510         push_fr(FRm);
  2511         push_fr(FRn);
  2512         FADDP_st(1);
  2513         pop_fr(FRn);
  2515 :}
  2516 FDIV FRm, FRn {:  
  2517     COUNT_INST(I_FDIV);
  2518     check_fpuen();
  2519     if( sh4_x86.double_prec ) {
  2520         push_dr(FRn);
  2521         push_dr(FRm);
  2522         FDIVP_st(1);
  2523         pop_dr(FRn);
  2524     } else {
  2525         push_fr(FRn);
  2526         push_fr(FRm);
  2527         FDIVP_st(1);
  2528         pop_fr(FRn);
  2530 :}
  2531 FMAC FR0, FRm, FRn {:  
  2532     COUNT_INST(I_FMAC);
  2533     check_fpuen();
  2534     if( sh4_x86.double_prec ) {
  2535         push_dr( 0 );
  2536         push_dr( FRm );
  2537         FMULP_st(1);
  2538         push_dr( FRn );
  2539         FADDP_st(1);
  2540         pop_dr( FRn );
  2541     } else {
  2542         push_fr( 0 );
  2543         push_fr( FRm );
  2544         FMULP_st(1);
  2545         push_fr( FRn );
  2546         FADDP_st(1);
  2547         pop_fr( FRn );
  2549 :}
  2551 FMUL FRm, FRn {:  
  2552     COUNT_INST(I_FMUL);
  2553     check_fpuen();
  2554     if( sh4_x86.double_prec ) {
  2555         push_dr(FRm);
  2556         push_dr(FRn);
  2557         FMULP_st(1);
  2558         pop_dr(FRn);
  2559     } else {
  2560         push_fr(FRm);
  2561         push_fr(FRn);
  2562         FMULP_st(1);
  2563         pop_fr(FRn);
  2565 :}
  2566 FNEG FRn {:  
  2567     COUNT_INST(I_FNEG);
  2568     check_fpuen();
  2569     if( sh4_x86.double_prec ) {
  2570         push_dr(FRn);
  2571         FCHS_st0();
  2572         pop_dr(FRn);
  2573     } else {
  2574         push_fr(FRn);
  2575         FCHS_st0();
  2576         pop_fr(FRn);
  2578 :}
  2579 FSRRA FRn {:  
  2580     COUNT_INST(I_FSRRA);
  2581     check_fpuen();
  2582     if( sh4_x86.double_prec == 0 ) {
  2583         FLD1_st0();
  2584         push_fr(FRn);
  2585         FSQRT_st0();
  2586         FDIVP_st(1);
  2587         pop_fr(FRn);
  2589 :}
  2590 FSQRT FRn {:  
  2591     COUNT_INST(I_FSQRT);
  2592     check_fpuen();
  2593     if( sh4_x86.double_prec ) {
  2594         push_dr(FRn);
  2595         FSQRT_st0();
  2596         pop_dr(FRn);
  2597     } else {
  2598         push_fr(FRn);
  2599         FSQRT_st0();
  2600         pop_fr(FRn);
  2602 :}
  2603 FSUB FRm, FRn {:  
  2604     COUNT_INST(I_FSUB);
  2605     check_fpuen();
  2606     if( sh4_x86.double_prec ) {
  2607         push_dr(FRn);
  2608         push_dr(FRm);
  2609         FSUBP_st(1);
  2610         pop_dr(FRn);
  2611     } else {
  2612         push_fr(FRn);
  2613         push_fr(FRm);
  2614         FSUBP_st(1);
  2615         pop_fr(FRn);
  2617 :}
  2619 FCMP/EQ FRm, FRn {:  
  2620     COUNT_INST(I_FCMPEQ);
  2621     check_fpuen();
  2622     if( sh4_x86.double_prec ) {
  2623         push_dr(FRm);
  2624         push_dr(FRn);
  2625     } else {
  2626         push_fr(FRm);
  2627         push_fr(FRn);
  2629     FCOMIP_st(1);
  2630     SETE_t();
  2631     FPOP_st();
  2632     sh4_x86.tstate = TSTATE_E;
  2633 :}
  2634 FCMP/GT FRm, FRn {:  
  2635     COUNT_INST(I_FCMPGT);
  2636     check_fpuen();
  2637     if( sh4_x86.double_prec ) {
  2638         push_dr(FRm);
  2639         push_dr(FRn);
  2640     } else {
  2641         push_fr(FRm);
  2642         push_fr(FRn);
  2644     FCOMIP_st(1);
  2645     SETA_t();
  2646     FPOP_st();
  2647     sh4_x86.tstate = TSTATE_A;
  2648 :}
  2650 FSCA FPUL, FRn {:  
  2651     COUNT_INST(I_FSCA);
  2652     check_fpuen();
  2653     if( sh4_x86.double_prec == 0 ) {
  2654         LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FRn&0x0E]), REG_EDX );
  2655         MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  2656         CALL2_ptr_r32_r32( sh4_fsca, REG_EAX, REG_EDX );
  2658     sh4_x86.tstate = TSTATE_NONE;
  2659 :}
  2660 FIPR FVm, FVn {:  
  2661     COUNT_INST(I_FIPR);
  2662     check_fpuen();
  2663     if( sh4_x86.double_prec == 0 ) {
  2664         if( sh4_x86.sse3_enabled ) {
  2665             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
  2666             MULPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
  2667             HADDPS_xmm_xmm( 4, 4 ); 
  2668             HADDPS_xmm_xmm( 4, 4 );
  2669             MOVSS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
  2670         } else {
  2671             push_fr( FVm<<2 );
  2672             push_fr( FVn<<2 );
  2673             FMULP_st(1);
  2674             push_fr( (FVm<<2)+1);
  2675             push_fr( (FVn<<2)+1);
  2676             FMULP_st(1);
  2677             FADDP_st(1);
  2678             push_fr( (FVm<<2)+2);
  2679             push_fr( (FVn<<2)+2);
  2680             FMULP_st(1);
  2681             FADDP_st(1);
  2682             push_fr( (FVm<<2)+3);
  2683             push_fr( (FVn<<2)+3);
  2684             FMULP_st(1);
  2685             FADDP_st(1);
  2686             pop_fr( (FVn<<2)+3);
  2689 :}
  2690 FTRV XMTRX, FVn {:  
  2691     COUNT_INST(I_FTRV);
  2692     check_fpuen();
  2693     if( sh4_x86.double_prec == 0 ) {
  2694         if( sh4_x86.sse3_enabled && sh4_x86.begin_callback == NULL ) {
  2695         	/* FIXME: For now, disable this inlining when we're running in shadow mode -
  2696         	 * it gives slightly different results from the emu core. Need to
  2697         	 * fix the precision so both give the right results.
  2698         	 */
  2699             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1  M0  M3  M2
  2700             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5  M4  M7  M6
  2701             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9  M8  M11 M10
  2702             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
  2704             MOVSLDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
  2705             MOVSHDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
  2706             MOV_xmm_xmm( 4, 6 );
  2707             MOV_xmm_xmm( 5, 7 );
  2708             MOVLHPS_xmm_xmm( 4, 4 );  // V1 V1 V1 V1
  2709             MOVHLPS_xmm_xmm( 6, 6 );  // V3 V3 V3 V3
  2710             MOVLHPS_xmm_xmm( 5, 5 );  // V0 V0 V0 V0
  2711             MOVHLPS_xmm_xmm( 7, 7 );  // V2 V2 V2 V2
  2712             MULPS_xmm_xmm( 0, 4 );
  2713             MULPS_xmm_xmm( 1, 5 );
  2714             MULPS_xmm_xmm( 2, 6 );
  2715             MULPS_xmm_xmm( 3, 7 );
  2716             ADDPS_xmm_xmm( 5, 4 );
  2717             ADDPS_xmm_xmm( 7, 6 );
  2718             ADDPS_xmm_xmm( 6, 4 );
  2719             MOVAPS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][FVn<<2]) );
  2720         } else {
  2721             LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FVn<<2]), REG_EAX );
  2722             CALL1_ptr_r32( sh4_ftrv, REG_EAX );
  2725     sh4_x86.tstate = TSTATE_NONE;
  2726 :}
  2728 FRCHG {:  
  2729     COUNT_INST(I_FRCHG);
  2730     check_fpuen();
  2731     XORL_imms_rbpdisp( FPSCR_FR, R_FPSCR );
  2732     CALL_ptr( sh4_switch_fr_banks );
  2733     sh4_x86.tstate = TSTATE_NONE;
  2734 :}
  2735 FSCHG {:  
  2736     COUNT_INST(I_FSCHG);
  2737     check_fpuen();
  2738     XORL_imms_rbpdisp( FPSCR_SZ, R_FPSCR);
  2739     XORL_imms_rbpdisp( FPSCR_SZ, REG_OFFSET(xlat_sh4_mode) );
  2740     sh4_x86.tstate = TSTATE_NONE;
  2741     sh4_x86.double_size = !sh4_x86.double_size;
  2742     sh4_x86.sh4_mode = sh4_x86.sh4_mode ^ FPSCR_SZ;
  2743 :}
  2745 /* Processor control instructions */
  2746 LDC Rm, SR {:
  2747     COUNT_INST(I_LDCSR);
  2748     if( sh4_x86.in_delay_slot ) {
  2749 	SLOTILLEGAL();
  2750     } else {
  2751 	check_priv();
  2752 	load_reg( REG_EAX, Rm );
  2753 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2754 	sh4_x86.fpuen_checked = FALSE;
  2755 	sh4_x86.tstate = TSTATE_NONE;
  2756     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2757 	return 2;
  2759 :}
  2760 LDC Rm, GBR {: 
  2761     COUNT_INST(I_LDC);
  2762     load_reg( REG_EAX, Rm );
  2763     MOVL_r32_rbpdisp( REG_EAX, R_GBR );
  2764 :}
  2765 LDC Rm, VBR {:  
  2766     COUNT_INST(I_LDC);
  2767     check_priv();
  2768     load_reg( REG_EAX, Rm );
  2769     MOVL_r32_rbpdisp( REG_EAX, R_VBR );
  2770     sh4_x86.tstate = TSTATE_NONE;
  2771 :}
  2772 LDC Rm, SSR {:  
  2773     COUNT_INST(I_LDC);
  2774     check_priv();
  2775     load_reg( REG_EAX, Rm );
  2776     MOVL_r32_rbpdisp( REG_EAX, R_SSR );
  2777     sh4_x86.tstate = TSTATE_NONE;
  2778 :}
  2779 LDC Rm, SGR {:  
  2780     COUNT_INST(I_LDC);
  2781     check_priv();
  2782     load_reg( REG_EAX, Rm );
  2783     MOVL_r32_rbpdisp( REG_EAX, R_SGR );
  2784     sh4_x86.tstate = TSTATE_NONE;
  2785 :}
  2786 LDC Rm, SPC {:  
  2787     COUNT_INST(I_LDC);
  2788     check_priv();
  2789     load_reg( REG_EAX, Rm );
  2790     MOVL_r32_rbpdisp( REG_EAX, R_SPC );
  2791     sh4_x86.tstate = TSTATE_NONE;
  2792 :}
  2793 LDC Rm, DBR {:  
  2794     COUNT_INST(I_LDC);
  2795     check_priv();
  2796     load_reg( REG_EAX, Rm );
  2797     MOVL_r32_rbpdisp( REG_EAX, R_DBR );
  2798     sh4_x86.tstate = TSTATE_NONE;
  2799 :}
  2800 LDC Rm, Rn_BANK {:  
  2801     COUNT_INST(I_LDC);
  2802     check_priv();
  2803     load_reg( REG_EAX, Rm );
  2804     MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2805     sh4_x86.tstate = TSTATE_NONE;
  2806 :}
  2807 LDC.L @Rm+, GBR {:  
  2808     COUNT_INST(I_LDCM);
  2809     load_reg( REG_EAX, Rm );
  2810     check_ralign32( REG_EAX );
  2811     MEM_READ_LONG( REG_EAX, REG_EAX );
  2812     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2813     MOVL_r32_rbpdisp( REG_EAX, R_GBR );
  2814     sh4_x86.tstate = TSTATE_NONE;
  2815 :}
  2816 LDC.L @Rm+, SR {:
  2817     COUNT_INST(I_LDCSRM);
  2818     if( sh4_x86.in_delay_slot ) {
  2819 	SLOTILLEGAL();
  2820     } else {
  2821 	check_priv();
  2822 	load_reg( REG_EAX, Rm );
  2823 	check_ralign32( REG_EAX );
  2824 	MEM_READ_LONG( REG_EAX, REG_EAX );
  2825 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2826 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2827 	sh4_x86.fpuen_checked = FALSE;
  2828 	sh4_x86.tstate = TSTATE_NONE;
  2829     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2830 	return 2;
  2832 :}
  2833 LDC.L @Rm+, VBR {:  
  2834     COUNT_INST(I_LDCM);
  2835     check_priv();
  2836     load_reg( REG_EAX, Rm );
  2837     check_ralign32( REG_EAX );
  2838     MEM_READ_LONG( REG_EAX, REG_EAX );
  2839     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2840     MOVL_r32_rbpdisp( REG_EAX, R_VBR );
  2841     sh4_x86.tstate = TSTATE_NONE;
  2842 :}
  2843 LDC.L @Rm+, SSR {:
  2844     COUNT_INST(I_LDCM);
  2845     check_priv();
  2846     load_reg( REG_EAX, Rm );
  2847     check_ralign32( REG_EAX );
  2848     MEM_READ_LONG( REG_EAX, REG_EAX );
  2849     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2850     MOVL_r32_rbpdisp( REG_EAX, R_SSR );
  2851     sh4_x86.tstate = TSTATE_NONE;
  2852 :}
  2853 LDC.L @Rm+, SGR {:  
  2854     COUNT_INST(I_LDCM);
  2855     check_priv();
  2856     load_reg( REG_EAX, Rm );
  2857     check_ralign32( REG_EAX );
  2858     MEM_READ_LONG( REG_EAX, REG_EAX );
  2859     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2860     MOVL_r32_rbpdisp( REG_EAX, R_SGR );
  2861     sh4_x86.tstate = TSTATE_NONE;
  2862 :}
  2863 LDC.L @Rm+, SPC {:  
  2864     COUNT_INST(I_LDCM);
  2865     check_priv();
  2866     load_reg( REG_EAX, Rm );
  2867     check_ralign32( REG_EAX );
  2868     MEM_READ_LONG( REG_EAX, REG_EAX );
  2869     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2870     MOVL_r32_rbpdisp( REG_EAX, R_SPC );
  2871     sh4_x86.tstate = TSTATE_NONE;
  2872 :}
  2873 LDC.L @Rm+, DBR {:  
  2874     COUNT_INST(I_LDCM);
  2875     check_priv();
  2876     load_reg( REG_EAX, Rm );
  2877     check_ralign32( REG_EAX );
  2878     MEM_READ_LONG( REG_EAX, REG_EAX );
  2879     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2880     MOVL_r32_rbpdisp( REG_EAX, R_DBR );
  2881     sh4_x86.tstate = TSTATE_NONE;
  2882 :}
  2883 LDC.L @Rm+, Rn_BANK {:  
  2884     COUNT_INST(I_LDCM);
  2885     check_priv();
  2886     load_reg( REG_EAX, Rm );
  2887     check_ralign32( REG_EAX );
  2888     MEM_READ_LONG( REG_EAX, REG_EAX );
  2889     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2890     MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2891     sh4_x86.tstate = TSTATE_NONE;
  2892 :}
  2893 LDS Rm, FPSCR {:
  2894     COUNT_INST(I_LDSFPSCR);
  2895     check_fpuen();
  2896     load_reg( REG_EAX, Rm );
  2897     CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
  2898     sh4_x86.tstate = TSTATE_NONE;
  2899     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2900     return 2;
  2901 :}
  2902 LDS.L @Rm+, FPSCR {:  
  2903     COUNT_INST(I_LDSFPSCRM);
  2904     check_fpuen();
  2905     load_reg( REG_EAX, Rm );
  2906     check_ralign32( REG_EAX );
  2907     MEM_READ_LONG( REG_EAX, REG_EAX );
  2908     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2909     CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
  2910     sh4_x86.tstate = TSTATE_NONE;
  2911     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2912     return 2;
  2913 :}
  2914 LDS Rm, FPUL {:  
  2915     COUNT_INST(I_LDS);
  2916     check_fpuen();
  2917     load_reg( REG_EAX, Rm );
  2918     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2919 :}
  2920 LDS.L @Rm+, FPUL {:  
  2921     COUNT_INST(I_LDSM);
  2922     check_fpuen();
  2923     load_reg( REG_EAX, Rm );
  2924     check_ralign32( REG_EAX );
  2925     MEM_READ_LONG( REG_EAX, REG_EAX );
  2926     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2927     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2928     sh4_x86.tstate = TSTATE_NONE;
  2929 :}
  2930 LDS Rm, MACH {: 
  2931     COUNT_INST(I_LDS);
  2932     load_reg( REG_EAX, Rm );
  2933     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2934 :}
  2935 LDS.L @Rm+, MACH {:  
  2936     COUNT_INST(I_LDSM);
  2937     load_reg( REG_EAX, Rm );
  2938     check_ralign32( REG_EAX );
  2939     MEM_READ_LONG( REG_EAX, REG_EAX );
  2940     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2941     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2942     sh4_x86.tstate = TSTATE_NONE;
  2943 :}
  2944 LDS Rm, MACL {:  
  2945     COUNT_INST(I_LDS);
  2946     load_reg( REG_EAX, Rm );
  2947     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2948 :}
  2949 LDS.L @Rm+, MACL {:  
  2950     COUNT_INST(I_LDSM);
  2951     load_reg( REG_EAX, Rm );
  2952     check_ralign32( REG_EAX );
  2953     MEM_READ_LONG( REG_EAX, REG_EAX );
  2954     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2955     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2956     sh4_x86.tstate = TSTATE_NONE;
  2957 :}
  2958 LDS Rm, PR {:  
  2959     COUNT_INST(I_LDS);
  2960     load_reg( REG_EAX, Rm );
  2961     MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2962 :}
  2963 LDS.L @Rm+, PR {:  
  2964     COUNT_INST(I_LDSM);
  2965     load_reg( REG_EAX, Rm );
  2966     check_ralign32( REG_EAX );
  2967     MEM_READ_LONG( REG_EAX, REG_EAX );
  2968     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2969     MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2970     sh4_x86.tstate = TSTATE_NONE;
  2971 :}
  2972 LDTLB {:  
  2973     COUNT_INST(I_LDTLB);
  2974     CALL_ptr( MMU_ldtlb );
  2975     sh4_x86.tstate = TSTATE_NONE;
  2976 :}
  2977 OCBI @Rn {:
  2978     COUNT_INST(I_OCBI);
  2979 :}
  2980 OCBP @Rn {:
  2981     COUNT_INST(I_OCBP);
  2982 :}
  2983 OCBWB @Rn {:
  2984     COUNT_INST(I_OCBWB);
  2985 :}
  2986 PREF @Rn {:
  2987     COUNT_INST(I_PREF);
  2988     load_reg( REG_EAX, Rn );
  2989     MEM_PREFETCH( REG_EAX );
  2990     sh4_x86.tstate = TSTATE_NONE;
  2991 :}
  2992 SLEEP {: 
  2993     COUNT_INST(I_SLEEP);
  2994     check_priv();
  2995     CALL_ptr( sh4_sleep );
  2996     sh4_x86.tstate = TSTATE_NONE;
  2997     sh4_x86.in_delay_slot = DELAY_NONE;
  2998     return 2;
  2999 :}
  3000 STC SR, Rn {:
  3001     COUNT_INST(I_STCSR);
  3002     check_priv();
  3003     CALL_ptr(sh4_read_sr);
  3004     store_reg( REG_EAX, Rn );
  3005     sh4_x86.tstate = TSTATE_NONE;
  3006 :}
  3007 STC GBR, Rn {:  
  3008     COUNT_INST(I_STC);
  3009     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  3010     store_reg( REG_EAX, Rn );
  3011 :}
  3012 STC VBR, Rn {:  
  3013     COUNT_INST(I_STC);
  3014     check_priv();
  3015     MOVL_rbpdisp_r32( R_VBR, REG_EAX );
  3016     store_reg( REG_EAX, Rn );
  3017     sh4_x86.tstate = TSTATE_NONE;
  3018 :}
  3019 STC SSR, Rn {:  
  3020     COUNT_INST(I_STC);
  3021     check_priv();
  3022     MOVL_rbpdisp_r32( R_SSR, REG_EAX );
  3023     store_reg( REG_EAX, Rn );
  3024     sh4_x86.tstate = TSTATE_NONE;
  3025 :}
  3026 STC SPC, Rn {:  
  3027     COUNT_INST(I_STC);
  3028     check_priv();
  3029     MOVL_rbpdisp_r32( R_SPC, REG_EAX );
  3030     store_reg( REG_EAX, Rn );
  3031     sh4_x86.tstate = TSTATE_NONE;
  3032 :}
  3033 STC SGR, Rn {:  
  3034     COUNT_INST(I_STC);
  3035     check_priv();
  3036     MOVL_rbpdisp_r32( R_SGR, REG_EAX );
  3037     store_reg( REG_EAX, Rn );
  3038     sh4_x86.tstate = TSTATE_NONE;
  3039 :}
  3040 STC DBR, Rn {:  
  3041     COUNT_INST(I_STC);
  3042     check_priv();
  3043     MOVL_rbpdisp_r32( R_DBR, REG_EAX );
  3044     store_reg( REG_EAX, Rn );
  3045     sh4_x86.tstate = TSTATE_NONE;
  3046 :}
  3047 STC Rm_BANK, Rn {:
  3048     COUNT_INST(I_STC);
  3049     check_priv();
  3050     MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EAX );
  3051     store_reg( REG_EAX, Rn );
  3052     sh4_x86.tstate = TSTATE_NONE;
  3053 :}
  3054 STC.L SR, @-Rn {:
  3055     COUNT_INST(I_STCSRM);
  3056     check_priv();
  3057     CALL_ptr( sh4_read_sr );
  3058     MOVL_r32_r32( REG_EAX, REG_EDX );
  3059     load_reg( REG_EAX, Rn );
  3060     check_walign32( REG_EAX );
  3061     LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  3062     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3063     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3064     sh4_x86.tstate = TSTATE_NONE;
  3065 :}
  3066 STC.L VBR, @-Rn {:  
  3067     COUNT_INST(I_STCM);
  3068     check_priv();
  3069     load_reg( REG_EAX, Rn );
  3070     check_walign32( REG_EAX );
  3071     ADDL_imms_r32( -4, REG_EAX );
  3072     MOVL_rbpdisp_r32( R_VBR, REG_EDX );
  3073     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3074     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3075     sh4_x86.tstate = TSTATE_NONE;
  3076 :}
  3077 STC.L SSR, @-Rn {:  
  3078     COUNT_INST(I_STCM);
  3079     check_priv();
  3080     load_reg( REG_EAX, Rn );
  3081     check_walign32( REG_EAX );
  3082     ADDL_imms_r32( -4, REG_EAX );
  3083     MOVL_rbpdisp_r32( R_SSR, REG_EDX );
  3084     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3085     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3086     sh4_x86.tstate = TSTATE_NONE;
  3087 :}
  3088 STC.L SPC, @-Rn {:
  3089     COUNT_INST(I_STCM);
  3090     check_priv();
  3091     load_reg( REG_EAX, Rn );
  3092     check_walign32( REG_EAX );
  3093     ADDL_imms_r32( -4, REG_EAX );
  3094     MOVL_rbpdisp_r32( R_SPC, REG_EDX );
  3095     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3096     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3097     sh4_x86.tstate = TSTATE_NONE;
  3098 :}
  3099 STC.L SGR, @-Rn {:  
  3100     COUNT_INST(I_STCM);
  3101     check_priv();
  3102     load_reg( REG_EAX, Rn );
  3103     check_walign32( REG_EAX );
  3104     ADDL_imms_r32( -4, REG_EAX );
  3105     MOVL_rbpdisp_r32( R_SGR, REG_EDX );
  3106     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3107     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3108     sh4_x86.tstate = TSTATE_NONE;
  3109 :}
  3110 STC.L DBR, @-Rn {:  
  3111     COUNT_INST(I_STCM);
  3112     check_priv();
  3113     load_reg( REG_EAX, Rn );
  3114     check_walign32( REG_EAX );
  3115     ADDL_imms_r32( -4, REG_EAX );
  3116     MOVL_rbpdisp_r32( R_DBR, REG_EDX );
  3117     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3118     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3119     sh4_x86.tstate = TSTATE_NONE;
  3120 :}
  3121 STC.L Rm_BANK, @-Rn {:  
  3122     COUNT_INST(I_STCM);
  3123     check_priv();
  3124     load_reg( REG_EAX, Rn );
  3125     check_walign32( REG_EAX );
  3126     ADDL_imms_r32( -4, REG_EAX );
  3127     MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EDX );
  3128     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3129     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3130     sh4_x86.tstate = TSTATE_NONE;
  3131 :}
  3132 STC.L GBR, @-Rn {:  
  3133     COUNT_INST(I_STCM);
  3134     load_reg( REG_EAX, Rn );
  3135     check_walign32( REG_EAX );
  3136     ADDL_imms_r32( -4, REG_EAX );
  3137     MOVL_rbpdisp_r32( R_GBR, REG_EDX );
  3138     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3139     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3140     sh4_x86.tstate = TSTATE_NONE;
  3141 :}
  3142 STS FPSCR, Rn {:  
  3143     COUNT_INST(I_STSFPSCR);
  3144     check_fpuen();
  3145     MOVL_rbpdisp_r32( R_FPSCR, REG_EAX );
  3146     store_reg( REG_EAX, Rn );
  3147 :}
  3148 STS.L FPSCR, @-Rn {:  
  3149     COUNT_INST(I_STSFPSCRM);
  3150     check_fpuen();
  3151     load_reg( REG_EAX, Rn );
  3152     check_walign32( REG_EAX );
  3153     ADDL_imms_r32( -4, REG_EAX );
  3154     MOVL_rbpdisp_r32( R_FPSCR, REG_EDX );
  3155     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3156     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3157     sh4_x86.tstate = TSTATE_NONE;
  3158 :}
  3159 STS FPUL, Rn {:  
  3160     COUNT_INST(I_STS);
  3161     check_fpuen();
  3162     MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  3163     store_reg( REG_EAX, Rn );
  3164 :}
  3165 STS.L FPUL, @-Rn {:  
  3166     COUNT_INST(I_STSM);
  3167     check_fpuen();
  3168     load_reg( REG_EAX, Rn );
  3169     check_walign32( REG_EAX );
  3170     ADDL_imms_r32( -4, REG_EAX );
  3171     MOVL_rbpdisp_r32( R_FPUL, REG_EDX );
  3172     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3173     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3174     sh4_x86.tstate = TSTATE_NONE;
  3175 :}
  3176 STS MACH, Rn {:  
  3177     COUNT_INST(I_STS);
  3178     MOVL_rbpdisp_r32( R_MACH, REG_EAX );
  3179     store_reg( REG_EAX, Rn );
  3180 :}
  3181 STS.L MACH, @-Rn {:  
  3182     COUNT_INST(I_STSM);
  3183     load_reg( REG_EAX, Rn );
  3184     check_walign32( REG_EAX );
  3185     ADDL_imms_r32( -4, REG_EAX );
  3186     MOVL_rbpdisp_r32( R_MACH, REG_EDX );
  3187     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3188     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3189     sh4_x86.tstate = TSTATE_NONE;
  3190 :}
  3191 STS MACL, Rn {:  
  3192     COUNT_INST(I_STS);
  3193     MOVL_rbpdisp_r32( R_MACL, REG_EAX );
  3194     store_reg( REG_EAX, Rn );
  3195 :}
  3196 STS.L MACL, @-Rn {:  
  3197     COUNT_INST(I_STSM);
  3198     load_reg( REG_EAX, Rn );
  3199     check_walign32( REG_EAX );
  3200     ADDL_imms_r32( -4, REG_EAX );
  3201     MOVL_rbpdisp_r32( R_MACL, REG_EDX );
  3202     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3203     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3204     sh4_x86.tstate = TSTATE_NONE;
  3205 :}
  3206 STS PR, Rn {:  
  3207     COUNT_INST(I_STS);
  3208     MOVL_rbpdisp_r32( R_PR, REG_EAX );
  3209     store_reg( REG_EAX, Rn );
  3210 :}
  3211 STS.L PR, @-Rn {:  
  3212     COUNT_INST(I_STSM);
  3213     load_reg( REG_EAX, Rn );
  3214     check_walign32( REG_EAX );
  3215     ADDL_imms_r32( -4, REG_EAX );
  3216     MOVL_rbpdisp_r32( R_PR, REG_EDX );
  3217     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3218     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3219     sh4_x86.tstate = TSTATE_NONE;
  3220 :}
  3222 NOP {: 
  3223     COUNT_INST(I_NOP);
  3224     /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ 
  3225 :}
  3226 %%
  3227     sh4_x86.in_delay_slot = DELAY_NONE;
  3228     return 0;
  3232 /**
  3233  * The unwind methods only work if we compiled with DWARF2 frame information
  3234  * (ie -fexceptions), otherwise we have to use the direct frame scan.
  3235  */
  3236 #ifdef HAVE_EXCEPTIONS
  3237 #include <unwind.h>
  3239 struct UnwindInfo {
  3240     uintptr_t block_start;
  3241     uintptr_t block_end;
  3242     void *pc;
  3243 };
  3245 static _Unwind_Reason_Code xlat_check_frame( struct _Unwind_Context *context, void *arg )
  3247     struct UnwindInfo *info = arg;
  3248     void *pc = (void *)_Unwind_GetIP(context);
  3249     if( ((uintptr_t)pc) >= info->block_start && ((uintptr_t)pc) < info->block_end ) {
  3250         info->pc = pc;
  3251         return _URC_NORMAL_STOP;
  3253     return _URC_NO_REASON;
  3256 void *xlat_get_native_pc( void *code, uint32_t code_size )
  3258     struct _Unwind_Exception exc;
  3259     struct UnwindInfo info;
  3261     info.pc = NULL;
  3262     info.block_start = (uintptr_t)code;
  3263     info.block_end = info.block_start + code_size;
  3264     void *result = NULL;
  3265     _Unwind_Backtrace( xlat_check_frame, &info );
  3266     return info.pc;
  3268 #else
  3269 /* Assume this is an ia32 build - amd64 should always have dwarf information */
  3270 void *xlat_get_native_pc( void *code, uint32_t code_size )
  3272     void *result = NULL;
  3273     __asm__(
  3274         "mov %%ebp, %%eax\n\t"
  3275         "mov $0x8, %%ecx\n\t"
  3276         "mov %1, %%edx\n"
  3277         "frame_loop: test %%eax, %%eax\n\t"
  3278         "je frame_not_found\n\t"
  3279         "cmp (%%eax), %%edx\n\t"
  3280         "je frame_found\n\t"
  3281         "sub $0x1, %%ecx\n\t"
  3282         "je frame_not_found\n\t"
  3283         "movl (%%eax), %%eax\n\t"
  3284         "jmp frame_loop\n"
  3285         "frame_found: movl 0x4(%%eax), %0\n"
  3286         "frame_not_found:"
  3287         : "=r" (result)
  3288         : "r" (((uint8_t *)&sh4r) + 128 )
  3289         : "eax", "ecx", "edx" );
  3290     return result;
  3292 #endif
.