Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 1186:2dc47c67bb93
prev1182:b38a327ad8fa
next1191:12fdf3aafcd4
author nkeynes
date Fri Dec 02 18:14:27 2011 +1000 (12 years ago)
permissions -rw-r--r--
last change Handle calls to sh4_disasm_instruction when the memory isn't mapped
(as can happen if we try to print a translated block that's been unmapped)
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "lxdream.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4dasm.h"
    31 #include "sh4/sh4trans.h"
    32 #include "sh4/sh4stat.h"
    33 #include "sh4/sh4mmio.h"
    34 #include "sh4/mmu.h"
    35 #include "xlat/xltcache.h"
    36 #include "xlat/x86/x86op.h"
    37 #include "x86dasm/x86dasm.h"
    38 #include "clock.h"
    40 #define DEFAULT_BACKPATCH_SIZE 4096
    42 /* Offset of a reg relative to the sh4r structure */
    43 #define REG_OFFSET(reg)  (((char *)&sh4r.reg) - ((char *)&sh4r) - 128)
    45 #define R_T      REG_OFFSET(t)
    46 #define R_Q      REG_OFFSET(q)
    47 #define R_S      REG_OFFSET(s)
    48 #define R_M      REG_OFFSET(m)
    49 #define R_SR     REG_OFFSET(sr)
    50 #define R_GBR    REG_OFFSET(gbr)
    51 #define R_SSR    REG_OFFSET(ssr)
    52 #define R_SPC    REG_OFFSET(spc)
    53 #define R_VBR    REG_OFFSET(vbr)
    54 #define R_MACH   REG_OFFSET(mac)+4
    55 #define R_MACL   REG_OFFSET(mac)
    56 #define R_PC     REG_OFFSET(pc)
    57 #define R_NEW_PC REG_OFFSET(new_pc)
    58 #define R_PR     REG_OFFSET(pr)
    59 #define R_SGR    REG_OFFSET(sgr)
    60 #define R_FPUL   REG_OFFSET(fpul)
    61 #define R_FPSCR  REG_OFFSET(fpscr)
    62 #define R_DBR    REG_OFFSET(dbr)
    63 #define R_R(rn)  REG_OFFSET(r[rn])
    64 #define R_FR(f)  REG_OFFSET(fr[0][(f)^1])
    65 #define R_XF(f)  REG_OFFSET(fr[1][(f)^1])
    66 #define R_DR(f)  REG_OFFSET(fr[(f)&1][(f)&0x0E])
    67 #define R_DRL(f) REG_OFFSET(fr[(f)&1][(f)|0x01])
    68 #define R_DRH(f) REG_OFFSET(fr[(f)&1][(f)&0x0E])
    70 #define DELAY_NONE 0
    71 #define DELAY_PC 1
    72 #define DELAY_PC_PR 2
    74 #define SH4_MODE_UNKNOWN -1
    76 struct backpatch_record {
    77     uint32_t fixup_offset;
    78     uint32_t fixup_icount;
    79     int32_t exc_code;
    80 };
    82 /** 
    83  * Struct to manage internal translation state. This state is not saved -
    84  * it is only valid between calls to sh4_translate_begin_block() and
    85  * sh4_translate_end_block()
    86  */
    87 struct sh4_x86_state {
    88     int in_delay_slot;
    89     uint8_t *code;
    90     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    91     gboolean branch_taken; /* true if we branched unconditionally */
    92     gboolean double_prec; /* true if FPU is in double-precision mode */
    93     gboolean double_size; /* true if FPU is in double-size mode */
    94     gboolean sse3_enabled; /* true if host supports SSE3 instructions */
    95     uint32_t block_start_pc;
    96     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    97     uint32_t sh4_mode;     /* Mirror of sh4r.xlat_sh4_mode */
    98     int tstate;
   100     /* mode settings */
   101     gboolean tlb_on; /* True if tlb translation is active */
   102     struct mem_region_fn **priv_address_space;
   103     struct mem_region_fn **user_address_space;
   105     /* Instrumentation */
   106     xlat_block_begin_callback_t begin_callback;
   107     xlat_block_end_callback_t end_callback;
   108     gboolean fastmem;
   109     gboolean profile_blocks;
   111     /* Allocated memory for the (block-wide) back-patch list */
   112     struct backpatch_record *backpatch_list;
   113     uint32_t backpatch_posn;
   114     uint32_t backpatch_size;
   115 };
   117 static struct sh4_x86_state sh4_x86;
   119 static uint32_t max_int = 0x7FFFFFFF;
   120 static uint32_t min_int = 0x80000000;
   121 static uint32_t save_fcw; /* save value for fpu control word */
   122 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   124 static struct x86_symbol x86_symbol_table[] = {
   125     { "sh4r+128", ((char *)&sh4r)+128 },
   126     { "sh4_cpu_period", &sh4_cpu_period },
   127     { "sh4_address_space", NULL },
   128     { "sh4_user_address_space", NULL },
   129     { "sh4_translate_breakpoint_hit", sh4_translate_breakpoint_hit },
   130     { "sh4_write_fpscr", sh4_write_fpscr },
   131     { "sh4_write_sr", sh4_write_sr },
   132     { "sh4_read_sr", sh4_read_sr },
   133     { "sh4_sleep", sh4_sleep },
   134     { "sh4_fsca", sh4_fsca },
   135     { "sh4_ftrv", sh4_ftrv },
   136     { "sh4_switch_fr_banks", sh4_switch_fr_banks },
   137     { "sh4_execute_instruction", sh4_execute_instruction },
   138     { "signsat48", signsat48 },
   139     { "xlat_get_code_by_vma", xlat_get_code_by_vma },
   140     { "xlat_get_code", xlat_get_code }
   141 };
   144 gboolean is_sse3_supported()
   145 {
   146     uint32_t features;
   148     __asm__ __volatile__(
   149         "mov $0x01, %%eax\n\t"
   150         "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
   151     return (features & 1) ? TRUE : FALSE;
   152 }
   154 void sh4_translate_set_address_space( struct mem_region_fn **priv, struct mem_region_fn **user )
   155 {
   156     sh4_x86.priv_address_space = priv;
   157     sh4_x86.user_address_space = user;
   158     x86_symbol_table[2].ptr = priv;
   159     x86_symbol_table[3].ptr = user;
   160 }
   162 void sh4_translate_init(void)
   163 {
   164     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   165     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   166     sh4_x86.begin_callback = NULL;
   167     sh4_x86.end_callback = NULL;
   168     sh4_translate_set_address_space( sh4_address_space, sh4_user_address_space );
   169     sh4_x86.fastmem = TRUE;
   170     sh4_x86.profile_blocks = FALSE;
   171     sh4_x86.sse3_enabled = is_sse3_supported();
   172     x86_disasm_init();
   173     x86_set_symtab( x86_symbol_table, sizeof(x86_symbol_table)/sizeof(struct x86_symbol) );
   174 }
   176 void sh4_translate_set_callbacks( xlat_block_begin_callback_t begin, xlat_block_end_callback_t end )
   177 {
   178     sh4_x86.begin_callback = begin;
   179     sh4_x86.end_callback = end;
   180 }
   182 void sh4_translate_set_fastmem( gboolean flag )
   183 {
   184     sh4_x86.fastmem = flag;
   185 }
   187 void sh4_translate_set_profile_blocks( gboolean flag )
   188 {
   189     sh4_x86.profile_blocks = flag;
   190 }
   192 gboolean sh4_translate_get_profile_blocks()
   193 {
   194     return sh4_x86.profile_blocks;
   195 }
   197 /**
   198  * Disassemble the given translated code block, and it's source SH4 code block
   199  * side-by-side. The current native pc will be marked if non-null.
   200  */
   201 void sh4_translate_disasm_block( FILE *out, void *code, sh4addr_t source_start, void *native_pc )
   202 {
   203     char buf[256];
   204     char op[256];
   206     uintptr_t target_start = (uintptr_t)code, target_pc;
   207     uintptr_t target_end = target_start + xlat_get_code_size(code);
   208     uint32_t source_pc = source_start;
   209     uint32_t source_end = source_pc;
   210     xlat_recovery_record_t source_recov_table = XLAT_RECOVERY_TABLE(code);
   211     xlat_recovery_record_t source_recov_end = source_recov_table + XLAT_BLOCK_FOR_CODE(code)->recover_table_size - 1;
   213     for( target_pc = target_start; target_pc < target_end;  ) {
   214         uintptr_t pc2 = x86_disasm_instruction( target_pc, buf, sizeof(buf), op );
   215 #if SIZEOF_VOID_P == 8
   216         fprintf( out, "%c%016lx: %-30s %-40s", (target_pc == (uintptr_t)native_pc ? '*' : ' '),
   217                       target_pc, op, buf );
   218 #else
   219         fprintf( out, "%c%08lx: %-30s %-40s", (target_pc == (uintptr_t)native_pc ? '*' : ' '),
   220                       target_pc, op, buf );
   221 #endif        
   222         if( source_recov_table < source_recov_end && 
   223             target_pc >= (target_start + source_recov_table->xlat_offset) ) {
   224             source_recov_table++;
   225             if( source_end < (source_start + (source_recov_table->sh4_icount)*2) )
   226                 source_end = source_start + (source_recov_table->sh4_icount)*2;
   227         }
   229         if( source_pc < source_end ) {
   230             uint32_t source_pc2 = sh4_disasm_instruction( source_pc, buf, sizeof(buf), op );
   231             fprintf( out, " %08X: %s  %s\n", source_pc, op, buf );
   232             source_pc = source_pc2;
   233         } else {
   234             fprintf( out, "\n" );
   235         }
   237         target_pc = pc2;
   238     }
   240     while( source_pc < source_end ) {
   241         uint32_t source_pc2 = sh4_disasm_instruction( source_pc, buf, sizeof(buf), op );
   242         fprintf( out, "%*c %08X: %s  %s\n", 72,' ', source_pc, op, buf );
   243         source_pc = source_pc2;
   244     }
   245 }
   247 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   248 {
   249     int reloc_size = 4;
   251     if( exc_code == -2 ) {
   252         reloc_size = sizeof(void *);
   253     }
   255     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   256 	sh4_x86.backpatch_size <<= 1;
   257 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   258 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   259 	assert( sh4_x86.backpatch_list != NULL );
   260     }
   261     if( sh4_x86.in_delay_slot ) {
   262 	fixup_pc -= 2;
   263     }
   265     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
   266 	(((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code)) - reloc_size;
   267     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   268     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   269     sh4_x86.backpatch_posn++;
   270 }
   272 #define TSTATE_NONE -1
   273 #define TSTATE_O    X86_COND_O
   274 #define TSTATE_C    X86_COND_C
   275 #define TSTATE_E    X86_COND_E
   276 #define TSTATE_NE   X86_COND_NE
   277 #define TSTATE_G    X86_COND_G
   278 #define TSTATE_GE   X86_COND_GE
   279 #define TSTATE_A    X86_COND_A
   280 #define TSTATE_AE   X86_COND_AE
   282 #define MARK_JMP8(x) uint8_t *_mark_jmp_##x = (xlat_output-1)
   283 #define JMP_TARGET(x) *_mark_jmp_##x += (xlat_output - _mark_jmp_##x)
   285 /* Convenience instructions */
   286 #define LDC_t()          CMPB_imms_rbpdisp(1,R_T); CMC()
   287 #define SETE_t()         SETCCB_cc_rbpdisp(X86_COND_E,R_T)
   288 #define SETA_t()         SETCCB_cc_rbpdisp(X86_COND_A,R_T)
   289 #define SETAE_t()        SETCCB_cc_rbpdisp(X86_COND_AE,R_T)
   290 #define SETG_t()         SETCCB_cc_rbpdisp(X86_COND_G,R_T)
   291 #define SETGE_t()        SETCCB_cc_rbpdisp(X86_COND_GE,R_T)
   292 #define SETC_t()         SETCCB_cc_rbpdisp(X86_COND_C,R_T)
   293 #define SETO_t()         SETCCB_cc_rbpdisp(X86_COND_O,R_T)
   294 #define SETNE_t()        SETCCB_cc_rbpdisp(X86_COND_NE,R_T)
   295 #define SETC_r8(r1)      SETCCB_cc_r8(X86_COND_C, r1)
   296 #define JAE_label(label) JCC_cc_rel8(X86_COND_AE,-1); MARK_JMP8(label)
   297 #define JBE_label(label) JCC_cc_rel8(X86_COND_BE,-1); MARK_JMP8(label)
   298 #define JE_label(label)  JCC_cc_rel8(X86_COND_E,-1); MARK_JMP8(label)
   299 #define JGE_label(label) JCC_cc_rel8(X86_COND_GE,-1); MARK_JMP8(label)
   300 #define JNA_label(label) JCC_cc_rel8(X86_COND_NA,-1); MARK_JMP8(label)
   301 #define JNE_label(label) JCC_cc_rel8(X86_COND_NE,-1); MARK_JMP8(label)
   302 #define JNO_label(label) JCC_cc_rel8(X86_COND_NO,-1); MARK_JMP8(label)
   303 #define JS_label(label)  JCC_cc_rel8(X86_COND_S,-1); MARK_JMP8(label)
   304 #define JMP_label(label) JMP_rel8(-1); MARK_JMP8(label)
   305 #define JNE_exc(exc)     JCC_cc_rel32(X86_COND_NE,0); sh4_x86_add_backpatch(xlat_output, pc, exc)
   307 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
   308 #define JT_label(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
   309 	CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
   310     JCC_cc_rel8(sh4_x86.tstate,-1); MARK_JMP8(label)
   312 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
   313 #define JF_label(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
   314 	CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
   315     JCC_cc_rel8(sh4_x86.tstate^1, -1); MARK_JMP8(label)
   318 #define load_reg(x86reg,sh4reg)     MOVL_rbpdisp_r32( REG_OFFSET(r[sh4reg]), x86reg )
   319 #define store_reg(x86reg,sh4reg)    MOVL_r32_rbpdisp( x86reg, REG_OFFSET(r[sh4reg]) )
   321 /**
   322  * Load an FR register (single-precision floating point) into an integer x86
   323  * register (eg for register-to-register moves)
   324  */
   325 #define load_fr(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[0][(frm)^1]), reg )
   326 #define load_xf(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[1][(frm)^1]), reg )
   328 /**
   329  * Load the low half of a DR register (DR or XD) into an integer x86 register 
   330  */
   331 #define load_dr0(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm|0x01]), reg )
   332 #define load_dr1(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm&0x0E]), reg )
   334 /**
   335  * Store an FR register (single-precision floating point) from an integer x86+
   336  * register (eg for register-to-register moves)
   337  */
   338 #define store_fr(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[0][(frm)^1]) )
   339 #define store_xf(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[1][(frm)^1]) )
   341 #define store_dr0(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   342 #define store_dr1(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   345 #define push_fpul()  FLDF_rbpdisp(R_FPUL)
   346 #define pop_fpul()   FSTPF_rbpdisp(R_FPUL)
   347 #define push_fr(frm) FLDF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
   348 #define pop_fr(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
   349 #define push_xf(frm) FLDF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
   350 #define pop_xf(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
   351 #define push_dr(frm) FLDD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
   352 #define pop_dr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
   353 #define push_xdr(frm) FLDD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
   354 #define pop_xdr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
   356 #ifdef ENABLE_SH4STATS
   357 #define COUNT_INST(id) MOVL_imm32_r32( id, REG_EAX ); CALL1_ptr_r32(sh4_stats_add, REG_EAX); sh4_x86.tstate = TSTATE_NONE
   358 #else
   359 #define COUNT_INST(id)
   360 #endif
   363 /* Exception checks - Note that all exception checks will clobber EAX */
   365 #define check_priv( ) \
   366     if( (sh4_x86.sh4_mode & SR_MD) == 0 ) { \
   367         if( sh4_x86.in_delay_slot ) { \
   368             exit_block_exc(EXC_SLOT_ILLEGAL, (pc-2) ); \
   369         } else { \
   370             exit_block_exc(EXC_ILLEGAL, pc); \
   371         } \
   372         sh4_x86.branch_taken = TRUE; \
   373         sh4_x86.in_delay_slot = DELAY_NONE; \
   374         return 2; \
   375     }
   377 #define check_fpuen( ) \
   378     if( !sh4_x86.fpuen_checked ) {\
   379 	sh4_x86.fpuen_checked = TRUE;\
   380 	MOVL_rbpdisp_r32( R_SR, REG_EAX );\
   381 	ANDL_imms_r32( SR_FD, REG_EAX );\
   382 	if( sh4_x86.in_delay_slot ) {\
   383 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   384 	} else {\
   385 	    JNE_exc(EXC_FPU_DISABLED);\
   386 	}\
   387 	sh4_x86.tstate = TSTATE_NONE; \
   388     }
   390 #define check_ralign16( x86reg ) \
   391     TESTL_imms_r32( 0x00000001, x86reg ); \
   392     JNE_exc(EXC_DATA_ADDR_READ)
   394 #define check_walign16( x86reg ) \
   395     TESTL_imms_r32( 0x00000001, x86reg ); \
   396     JNE_exc(EXC_DATA_ADDR_WRITE);
   398 #define check_ralign32( x86reg ) \
   399     TESTL_imms_r32( 0x00000003, x86reg ); \
   400     JNE_exc(EXC_DATA_ADDR_READ)
   402 #define check_walign32( x86reg ) \
   403     TESTL_imms_r32( 0x00000003, x86reg ); \
   404     JNE_exc(EXC_DATA_ADDR_WRITE);
   406 #define check_ralign64( x86reg ) \
   407     TESTL_imms_r32( 0x00000007, x86reg ); \
   408     JNE_exc(EXC_DATA_ADDR_READ)
   410 #define check_walign64( x86reg ) \
   411     TESTL_imms_r32( 0x00000007, x86reg ); \
   412     JNE_exc(EXC_DATA_ADDR_WRITE);
   414 #define address_space() ((sh4_x86.sh4_mode&SR_MD) ? (uintptr_t)sh4_x86.priv_address_space : (uintptr_t)sh4_x86.user_address_space)
   416 #define UNDEF(ir)
   417 /* Note: For SR.MD == 1 && MMUCR.AT == 0, there are no memory exceptions, so 
   418  * don't waste the cycles expecting them. Otherwise we need to save the exception pointer.
   419  */
   420 #ifdef HAVE_FRAME_ADDRESS
   421 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
   422 {
   423     decode_address(address_space(), addr_reg);
   424     if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) { 
   425         CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
   426     } else {
   427         if( addr_reg != REG_ARG1 ) {
   428             MOVL_r32_r32( addr_reg, REG_ARG1 );
   429         }
   430         MOVP_immptr_rptr( 0, REG_ARG2 );
   431         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   432         CALL2_r32disp_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2);
   433     }
   434     if( value_reg != REG_RESULT1 ) { 
   435         MOVL_r32_r32( REG_RESULT1, value_reg );
   436     }
   437 }
   439 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
   440 {
   441     decode_address(address_space(), addr_reg);
   442     if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) { 
   443         CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
   444     } else {
   445         if( value_reg != REG_ARG2 ) {
   446             MOVL_r32_r32( value_reg, REG_ARG2 );
   447 	}        
   448         if( addr_reg != REG_ARG1 ) {
   449             MOVL_r32_r32( addr_reg, REG_ARG1 );
   450         }
   451 #if MAX_REG_ARG > 2        
   452         MOVP_immptr_rptr( 0, REG_ARG3 );
   453         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   454         CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, REG_ARG3);
   455 #else
   456         MOVL_imm32_rspdisp( 0, 0 );
   457         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   458         CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, 0);
   459 #endif
   460     }
   461 }
   462 #else
   463 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
   464 {
   465     decode_address(address_space(), addr_reg);
   466     CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
   467     if( value_reg != REG_RESULT1 ) {
   468         MOVL_r32_r32( REG_RESULT1, value_reg );
   469     }
   470 }     
   472 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
   473 {
   474     decode_address(address_space(), addr_reg);
   475     CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
   476 }
   477 #endif
   479 #define MEM_REGION_PTR(name) offsetof( struct mem_region_fn, name )
   480 #define MEM_READ_BYTE( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_byte), pc)
   481 #define MEM_READ_BYTE_FOR_WRITE( addr_reg, value_reg ) call_read_func( addr_reg, value_reg, MEM_REGION_PTR(read_byte_for_write), pc) 
   482 #define MEM_READ_WORD( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_word), pc)
   483 #define MEM_READ_LONG( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_long), pc)
   484 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_byte), pc)
   485 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_word), pc)
   486 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_long), pc)
   487 #define MEM_PREFETCH( addr_reg ) call_read_func(addr_reg, REG_RESULT1, MEM_REGION_PTR(prefetch), pc)
   489 #define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2); sh4_x86.in_delay_slot = DELAY_NONE; return 2;
   491 /** Offset of xlat_sh4_mode field relative to the code pointer */ 
   492 #define XLAT_SH4_MODE_CODE_OFFSET  (int32_t)(offsetof(struct xlat_cache_block, xlat_sh4_mode) - offsetof(struct xlat_cache_block,code) )
   493 #define XLAT_CHAIN_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, chain) - offsetof(struct xlat_cache_block,code) )
   494 #define XLAT_ACTIVE_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, active) - offsetof(struct xlat_cache_block,code) )
   496 void sh4_translate_begin_block( sh4addr_t pc ) 
   497 {
   498 	sh4_x86.code = xlat_output;
   499     sh4_x86.in_delay_slot = FALSE;
   500     sh4_x86.fpuen_checked = FALSE;
   501     sh4_x86.branch_taken = FALSE;
   502     sh4_x86.backpatch_posn = 0;
   503     sh4_x86.block_start_pc = pc;
   504     sh4_x86.tlb_on = IS_TLB_ENABLED();
   505     sh4_x86.tstate = TSTATE_NONE;
   506     sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
   507     sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
   508     sh4_x86.sh4_mode = sh4r.xlat_sh4_mode;
   509     emit_prologue();
   510     if( sh4_x86.begin_callback ) {
   511         CALL_ptr( sh4_x86.begin_callback );
   512     }
   513     if( sh4_x86.profile_blocks ) {
   514     	MOVP_immptr_rptr( sh4_x86.code + XLAT_ACTIVE_CODE_OFFSET, REG_EAX );
   515     	ADDL_imms_r32disp( 1, REG_EAX, 0 );
   516     }  
   517 }
   520 uint32_t sh4_translate_end_block_size()
   521 {
   522     if( sh4_x86.backpatch_posn <= 3 ) {
   523         return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*(12+CALL1_PTR_MIN_SIZE));
   524     } else {
   525         return EPILOGUE_SIZE + (3*(12+CALL1_PTR_MIN_SIZE)) + (sh4_x86.backpatch_posn-3)*(15+CALL1_PTR_MIN_SIZE);
   526     }
   527 }
   530 /**
   531  * Embed a breakpoint into the generated code
   532  */
   533 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   534 {
   535     MOVL_imm32_r32( pc, REG_EAX );
   536     CALL1_ptr_r32( sh4_translate_breakpoint_hit, REG_EAX );
   537     sh4_x86.tstate = TSTATE_NONE;
   538 }
   541 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   543 /**
   544  * Test if the loaded target code pointer in %eax is valid, and if so jump
   545  * directly into it, bypassing the normal exit.
   546  */
   547 static void jump_next_block()
   548 {
   549 	uint8_t *ptr = xlat_output;
   550 	TESTP_rptr_rptr(REG_EAX, REG_EAX);
   551 	JE_label(nocode);
   552 	if( sh4_x86.sh4_mode == SH4_MODE_UNKNOWN ) {
   553 	    /* sr/fpscr was changed, possibly updated xlat_sh4_mode, so reload it */
   554 	    MOVL_rbpdisp_r32( REG_OFFSET(xlat_sh4_mode), REG_ECX );
   555 	    CMPL_r32_r32disp( REG_ECX, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
   556 	} else {
   557 	    CMPL_imms_r32disp( sh4_x86.sh4_mode, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
   558 	}
   559 	JNE_label(wrongmode);
   560 	LEAP_rptrdisp_rptr(REG_EAX, PROLOGUE_SIZE,REG_EAX);
   561 	if( sh4_x86.end_callback ) {
   562 	    /* Note this does leave the stack out of alignment, but doesn't matter
   563 	     * for what we're currently using it for.
   564 	     */
   565 	    PUSH_r32(REG_EAX);
   566 	    MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
   567 	    JMP_rptr(REG_ECX);
   568 	} else {
   569 	    JMP_rptr(REG_EAX);
   570 	}
   571 	JMP_TARGET(wrongmode);
   572 	MOVP_rptrdisp_rptr( REG_EAX, XLAT_CHAIN_CODE_OFFSET, REG_EAX );
   573 	int rel = ptr - xlat_output;
   574     JMP_prerel(rel);
   575 	JMP_TARGET(nocode); 
   576 }
   578 /**
   579  * 
   580  */
   581 static void FASTCALL sh4_translate_get_code_and_backpatch( uint32_t pc )
   582 {
   583     uint8_t *target = (uint8_t *)xlat_get_code_by_vma(pc);
   584     while( target != NULL && sh4r.xlat_sh4_mode != XLAT_BLOCK_MODE(target) ) {
   585         target = XLAT_BLOCK_CHAIN(target);
   586 	}
   587     if( target == NULL ) {
   588         target = sh4_translate_basic_block( pc );
   589     }
   590     uint8_t *backpatch = ((uint8_t *)__builtin_return_address(0)) - (CALL1_PTR_MIN_SIZE);
   591     *backpatch = 0xE9;
   592     *(uint32_t *)(backpatch+1) = (uint32_t)(target-backpatch)+PROLOGUE_SIZE-5;
   593     *(void **)(backpatch+5) = XLAT_BLOCK_FOR_CODE(target)->use_list;
   594     XLAT_BLOCK_FOR_CODE(target)->use_list = backpatch; 
   596     uint8_t **retptr = ((uint8_t **)__builtin_frame_address(0))+1;
   597     assert( *retptr == ((uint8_t *)__builtin_return_address(0)) );
   598 	*retptr = backpatch;
   599 }
   601 static void emit_translate_and_backpatch()
   602 {
   603     /* NB: this is either 7 bytes (i386) or 12 bytes (x86-64) */
   604     CALL1_ptr_r32(sh4_translate_get_code_and_backpatch, REG_ARG1);
   606     /* When patched, the jmp instruction will be 5 bytes (either platform) -
   607      * we need to reserve sizeof(void*) bytes for the use-list
   608 	 * pointer
   609 	 */ 
   610     if( sizeof(void*) == 8 ) {
   611         NOP();
   612     } else {
   613         NOP2();
   614     }
   615 }
   617 /**
   618  * If we're jumping to a fixed address (or at least fixed relative to the
   619  * current PC, then we can do a direct branch. REG_ARG1 should contain
   620  * the PC at this point.
   621  */
   622 static void jump_next_block_fixed_pc( sh4addr_t pc )
   623 {
   624 	if( IS_IN_ICACHE(pc) ) {
   625 	    if( sh4_x86.sh4_mode != SH4_MODE_UNKNOWN ) {
   626 	        /* Fixed address, in cache, and fixed SH4 mode - generate a call to the
   627 	         * fetch-and-backpatch routine, which will replace the call with a branch */
   628            emit_translate_and_backpatch();	         
   629            return;
   630 		} else {
   631             MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
   632             ANDP_imms_rptr( -4, REG_EAX );
   633         }
   634 	} else if( sh4_x86.tlb_on ) {
   635         CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);
   636     } else {
   637         CALL1_ptr_r32(xlat_get_code, REG_ARG1);
   638     }
   639     jump_next_block();
   642 }
   644 void sh4_translate_unlink_block( void *use_list )
   645 {
   646 	uint8_t *tmp = xlat_output; /* In case something is active, which should never happen */
   647 	void *next = use_list;
   648 	while( next != NULL ) {
   649     	xlat_output = (uint8_t *)next;
   650  	    next = *(void **)(xlat_output+5);
   651  		emit_translate_and_backpatch();
   652  	}
   653  	xlat_output = tmp;
   654 }
   658 static void exit_block()
   659 {
   660 	emit_epilogue();
   661 	if( sh4_x86.end_callback ) {
   662 	    MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
   663 	    JMP_rptr(REG_ECX);
   664 	} else {
   665 	    RET();
   666 	}
   667 }
   669 /**
   670  * Exit the block with sh4r.pc already written
   671  */
   672 void exit_block_pcset( sh4addr_t pc )
   673 {
   674     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   675     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   676     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   677     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   678     JBE_label(exitloop);
   679     MOVL_rbpdisp_r32( R_PC, REG_ARG1 );
   680     if( sh4_x86.tlb_on ) {
   681         CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
   682     } else {
   683         CALL1_ptr_r32(xlat_get_code,REG_ARG1);
   684     }
   686     jump_next_block();
   687     JMP_TARGET(exitloop);
   688     exit_block();
   689 }
   691 /**
   692  * Exit the block with sh4r.new_pc written with the target pc
   693  */
   694 void exit_block_newpcset( sh4addr_t pc )
   695 {
   696     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   697     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   698     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   699     MOVL_rbpdisp_r32( R_NEW_PC, REG_ARG1 );
   700     MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   701     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   702     JBE_label(exitloop);
   703     if( sh4_x86.tlb_on ) {
   704         CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
   705     } else {
   706         CALL1_ptr_r32(xlat_get_code,REG_ARG1);
   707     }
   709 	jump_next_block();
   710     JMP_TARGET(exitloop);
   711     exit_block();
   712 }
   715 /**
   716  * Exit the block to an absolute PC
   717  */
   718 void exit_block_abs( sh4addr_t pc, sh4addr_t endpc )
   719 {
   720     MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   721     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   722     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   724     MOVL_imm32_r32( pc, REG_ARG1 );
   725     MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   726     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   727     JBE_label(exitloop);
   728     jump_next_block_fixed_pc(pc);    
   729     JMP_TARGET(exitloop);
   730     exit_block();
   731 }
   733 /**
   734  * Exit the block to a relative PC
   735  */
   736 void exit_block_rel( sh4addr_t pc, sh4addr_t endpc )
   737 {
   738     MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   739     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   740     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   742 	if( pc == sh4_x86.block_start_pc && sh4_x86.sh4_mode == sh4r.xlat_sh4_mode ) {
   743 	    /* Special case for tight loops - the PC doesn't change, and
   744 	     * we already know the target address. Just check events pending before
   745 	     * looping.
   746 	     */
   747         CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   748         uint32_t backdisp = ((uintptr_t)(sh4_x86.code - xlat_output)) + PROLOGUE_SIZE;
   749         JCC_cc_prerel(X86_COND_A, backdisp);
   750 	} else {
   751         MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ARG1 );
   752         ADDL_rbpdisp_r32( R_PC, REG_ARG1 );
   753         MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   754         CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   755         JBE_label(exitloop2);
   757         jump_next_block_fixed_pc(pc);
   758         JMP_TARGET(exitloop2);
   759     }
   760     exit_block();
   761 }
   763 /**
   764  * Exit unconditionally with a general exception
   765  */
   766 void exit_block_exc( int code, sh4addr_t pc )
   767 {
   768     MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ECX );
   769     ADDL_r32_rbpdisp( REG_ECX, R_PC );
   770     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   771     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   772     MOVL_imm32_r32( code, REG_ARG1 );
   773     CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
   774     exit_block();
   775 }    
   777 /**
   778  * Embed a call to sh4_execute_instruction for situations that we
   779  * can't translate (just page-crossing delay slots at the moment).
   780  * Caller is responsible for setting new_pc before calling this function.
   781  *
   782  * Performs:
   783  *   Set PC = endpc
   784  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   785  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   786  *   Call sh4_execute_instruction
   787  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   788  */
   789 void exit_block_emu( sh4vma_t endpc )
   790 {
   791     MOVL_imm32_r32( endpc - sh4_x86.block_start_pc, REG_ECX );   // 5
   792     ADDL_r32_rbpdisp( REG_ECX, R_PC );
   794     MOVL_imm32_r32( (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period, REG_ECX ); // 5
   795     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );     // 6
   796     MOVL_imm32_r32( sh4_x86.in_delay_slot ? 1 : 0, REG_ECX );
   797     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(in_delay_slot) );
   799     CALL_ptr( sh4_execute_instruction );
   800     exit_block();
   801 } 
   803 /**
   804  * Write the block trailer (exception handling block)
   805  */
   806 void sh4_translate_end_block( sh4addr_t pc ) {
   807     if( sh4_x86.branch_taken == FALSE ) {
   808         // Didn't exit unconditionally already, so write the termination here
   809         exit_block_rel( pc, pc );
   810     }
   811     if( sh4_x86.backpatch_posn != 0 ) {
   812         unsigned int i;
   813         // Exception raised - cleanup and exit
   814         uint8_t *end_ptr = xlat_output;
   815         MOVL_r32_r32( REG_EDX, REG_ECX );
   816         ADDL_r32_r32( REG_EDX, REG_ECX );
   817         ADDL_r32_rbpdisp( REG_ECX, R_SPC );
   818         MOVL_moffptr_eax( &sh4_cpu_period );
   819         MULL_r32( REG_EDX );
   820         ADDL_r32_rbpdisp( REG_EAX, REG_OFFSET(slice_cycle) );
   821         exit_block();
   823         for( i=0; i< sh4_x86.backpatch_posn; i++ ) {
   824             uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];
   825             if( sh4_x86.backpatch_list[i].exc_code < 0 ) {
   826                 if( sh4_x86.backpatch_list[i].exc_code == -2 ) {
   827                     *((uintptr_t *)fixup_addr) = (uintptr_t)xlat_output; 
   828                 } else {
   829                     *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
   830                 }
   831                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
   832                 int rel = end_ptr - xlat_output;
   833                 JMP_prerel(rel);
   834             } else {
   835                 *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
   836                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].exc_code, REG_ARG1 );
   837                 CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
   838                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
   839                 int rel = end_ptr - xlat_output;
   840                 JMP_prerel(rel);
   841             }
   842         }
   843     }
   844 }
   846 /**
   847  * Translate a single instruction. Delayed branches are handled specially
   848  * by translating both branch and delayed instruction as a single unit (as
   849  * 
   850  * The instruction MUST be in the icache (assert check)
   851  *
   852  * @return true if the instruction marks the end of a basic block
   853  * (eg a branch or 
   854  */
   855 uint32_t sh4_translate_instruction( sh4vma_t pc )
   856 {
   857     uint32_t ir;
   858     /* Read instruction from icache */
   859     assert( IS_IN_ICACHE(pc) );
   860     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   862     if( !sh4_x86.in_delay_slot ) {
   863 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   864     }
   866     /* check for breakpoints at this pc */
   867     for( int i=0; i<sh4_breakpoint_count; i++ ) {
   868         if( sh4_breakpoints[i].address == pc ) {
   869             sh4_translate_emit_breakpoint(pc);
   870             break;
   871         }
   872     }
   873 %%
   874 /* ALU operations */
   875 ADD Rm, Rn {:
   876     COUNT_INST(I_ADD);
   877     load_reg( REG_EAX, Rm );
   878     load_reg( REG_ECX, Rn );
   879     ADDL_r32_r32( REG_EAX, REG_ECX );
   880     store_reg( REG_ECX, Rn );
   881     sh4_x86.tstate = TSTATE_NONE;
   882 :}
   883 ADD #imm, Rn {:  
   884     COUNT_INST(I_ADDI);
   885     ADDL_imms_rbpdisp( imm, REG_OFFSET(r[Rn]) );
   886     sh4_x86.tstate = TSTATE_NONE;
   887 :}
   888 ADDC Rm, Rn {:
   889     COUNT_INST(I_ADDC);
   890     if( sh4_x86.tstate != TSTATE_C ) {
   891         LDC_t();
   892     }
   893     load_reg( REG_EAX, Rm );
   894     load_reg( REG_ECX, Rn );
   895     ADCL_r32_r32( REG_EAX, REG_ECX );
   896     store_reg( REG_ECX, Rn );
   897     SETC_t();
   898     sh4_x86.tstate = TSTATE_C;
   899 :}
   900 ADDV Rm, Rn {:
   901     COUNT_INST(I_ADDV);
   902     load_reg( REG_EAX, Rm );
   903     load_reg( REG_ECX, Rn );
   904     ADDL_r32_r32( REG_EAX, REG_ECX );
   905     store_reg( REG_ECX, Rn );
   906     SETO_t();
   907     sh4_x86.tstate = TSTATE_O;
   908 :}
   909 AND Rm, Rn {:
   910     COUNT_INST(I_AND);
   911     load_reg( REG_EAX, Rm );
   912     load_reg( REG_ECX, Rn );
   913     ANDL_r32_r32( REG_EAX, REG_ECX );
   914     store_reg( REG_ECX, Rn );
   915     sh4_x86.tstate = TSTATE_NONE;
   916 :}
   917 AND #imm, R0 {:  
   918     COUNT_INST(I_ANDI);
   919     load_reg( REG_EAX, 0 );
   920     ANDL_imms_r32(imm, REG_EAX); 
   921     store_reg( REG_EAX, 0 );
   922     sh4_x86.tstate = TSTATE_NONE;
   923 :}
   924 AND.B #imm, @(R0, GBR) {: 
   925     COUNT_INST(I_ANDB);
   926     load_reg( REG_EAX, 0 );
   927     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
   928     MOVL_r32_rspdisp(REG_EAX, 0);
   929     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
   930     MOVL_rspdisp_r32(0, REG_EAX);
   931     ANDL_imms_r32(imm, REG_EDX );
   932     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
   933     sh4_x86.tstate = TSTATE_NONE;
   934 :}
   935 CMP/EQ Rm, Rn {:  
   936     COUNT_INST(I_CMPEQ);
   937     load_reg( REG_EAX, Rm );
   938     load_reg( REG_ECX, Rn );
   939     CMPL_r32_r32( REG_EAX, REG_ECX );
   940     SETE_t();
   941     sh4_x86.tstate = TSTATE_E;
   942 :}
   943 CMP/EQ #imm, R0 {:  
   944     COUNT_INST(I_CMPEQI);
   945     load_reg( REG_EAX, 0 );
   946     CMPL_imms_r32(imm, REG_EAX);
   947     SETE_t();
   948     sh4_x86.tstate = TSTATE_E;
   949 :}
   950 CMP/GE Rm, Rn {:  
   951     COUNT_INST(I_CMPGE);
   952     load_reg( REG_EAX, Rm );
   953     load_reg( REG_ECX, Rn );
   954     CMPL_r32_r32( REG_EAX, REG_ECX );
   955     SETGE_t();
   956     sh4_x86.tstate = TSTATE_GE;
   957 :}
   958 CMP/GT Rm, Rn {: 
   959     COUNT_INST(I_CMPGT);
   960     load_reg( REG_EAX, Rm );
   961     load_reg( REG_ECX, Rn );
   962     CMPL_r32_r32( REG_EAX, REG_ECX );
   963     SETG_t();
   964     sh4_x86.tstate = TSTATE_G;
   965 :}
   966 CMP/HI Rm, Rn {:  
   967     COUNT_INST(I_CMPHI);
   968     load_reg( REG_EAX, Rm );
   969     load_reg( REG_ECX, Rn );
   970     CMPL_r32_r32( REG_EAX, REG_ECX );
   971     SETA_t();
   972     sh4_x86.tstate = TSTATE_A;
   973 :}
   974 CMP/HS Rm, Rn {: 
   975     COUNT_INST(I_CMPHS);
   976     load_reg( REG_EAX, Rm );
   977     load_reg( REG_ECX, Rn );
   978     CMPL_r32_r32( REG_EAX, REG_ECX );
   979     SETAE_t();
   980     sh4_x86.tstate = TSTATE_AE;
   981  :}
   982 CMP/PL Rn {: 
   983     COUNT_INST(I_CMPPL);
   984     load_reg( REG_EAX, Rn );
   985     CMPL_imms_r32( 0, REG_EAX );
   986     SETG_t();
   987     sh4_x86.tstate = TSTATE_G;
   988 :}
   989 CMP/PZ Rn {:  
   990     COUNT_INST(I_CMPPZ);
   991     load_reg( REG_EAX, Rn );
   992     CMPL_imms_r32( 0, REG_EAX );
   993     SETGE_t();
   994     sh4_x86.tstate = TSTATE_GE;
   995 :}
   996 CMP/STR Rm, Rn {:  
   997     COUNT_INST(I_CMPSTR);
   998     load_reg( REG_EAX, Rm );
   999     load_reg( REG_ECX, Rn );
  1000     XORL_r32_r32( REG_ECX, REG_EAX );
  1001     TESTB_r8_r8( REG_AL, REG_AL );
  1002     JE_label(target1);
  1003     TESTB_r8_r8( REG_AH, REG_AH );
  1004     JE_label(target2);
  1005     SHRL_imm_r32( 16, REG_EAX );
  1006     TESTB_r8_r8( REG_AL, REG_AL );
  1007     JE_label(target3);
  1008     TESTB_r8_r8( REG_AH, REG_AH );
  1009     JMP_TARGET(target1);
  1010     JMP_TARGET(target2);
  1011     JMP_TARGET(target3);
  1012     SETE_t();
  1013     sh4_x86.tstate = TSTATE_E;
  1014 :}
  1015 DIV0S Rm, Rn {:
  1016     COUNT_INST(I_DIV0S);
  1017     load_reg( REG_EAX, Rm );
  1018     load_reg( REG_ECX, Rn );
  1019     SHRL_imm_r32( 31, REG_EAX );
  1020     SHRL_imm_r32( 31, REG_ECX );
  1021     MOVL_r32_rbpdisp( REG_EAX, R_M );
  1022     MOVL_r32_rbpdisp( REG_ECX, R_Q );
  1023     CMPL_r32_r32( REG_EAX, REG_ECX );
  1024     SETNE_t();
  1025     sh4_x86.tstate = TSTATE_NE;
  1026 :}
  1027 DIV0U {:  
  1028     COUNT_INST(I_DIV0U);
  1029     XORL_r32_r32( REG_EAX, REG_EAX );
  1030     MOVL_r32_rbpdisp( REG_EAX, R_Q );
  1031     MOVL_r32_rbpdisp( REG_EAX, R_M );
  1032     MOVL_r32_rbpdisp( REG_EAX, R_T );
  1033     sh4_x86.tstate = TSTATE_C; // works for DIV1
  1034 :}
  1035 DIV1 Rm, Rn {:
  1036     COUNT_INST(I_DIV1);
  1037     MOVL_rbpdisp_r32( R_M, REG_ECX );
  1038     load_reg( REG_EAX, Rn );
  1039     if( sh4_x86.tstate != TSTATE_C ) {
  1040 	LDC_t();
  1042     RCLL_imm_r32( 1, REG_EAX );
  1043     SETC_r8( REG_DL ); // Q'
  1044     CMPL_rbpdisp_r32( R_Q, REG_ECX );
  1045     JE_label(mqequal);
  1046     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1047     JMP_label(end);
  1048     JMP_TARGET(mqequal);
  1049     SUBL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1050     JMP_TARGET(end);
  1051     store_reg( REG_EAX, Rn ); // Done with Rn now
  1052     SETC_r8(REG_AL); // tmp1
  1053     XORB_r8_r8( REG_DL, REG_AL ); // Q' = Q ^ tmp1
  1054     XORB_r8_r8( REG_AL, REG_CL ); // Q'' = Q' ^ M
  1055     MOVL_r32_rbpdisp( REG_ECX, R_Q );
  1056     XORL_imms_r32( 1, REG_AL );   // T = !Q'
  1057     MOVZXL_r8_r32( REG_AL, REG_EAX );
  1058     MOVL_r32_rbpdisp( REG_EAX, R_T );
  1059     sh4_x86.tstate = TSTATE_NONE;
  1060 :}
  1061 DMULS.L Rm, Rn {:  
  1062     COUNT_INST(I_DMULS);
  1063     load_reg( REG_EAX, Rm );
  1064     load_reg( REG_ECX, Rn );
  1065     IMULL_r32(REG_ECX);
  1066     MOVL_r32_rbpdisp( REG_EDX, R_MACH );
  1067     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1068     sh4_x86.tstate = TSTATE_NONE;
  1069 :}
  1070 DMULU.L Rm, Rn {:  
  1071     COUNT_INST(I_DMULU);
  1072     load_reg( REG_EAX, Rm );
  1073     load_reg( REG_ECX, Rn );
  1074     MULL_r32(REG_ECX);
  1075     MOVL_r32_rbpdisp( REG_EDX, R_MACH );
  1076     MOVL_r32_rbpdisp( REG_EAX, R_MACL );    
  1077     sh4_x86.tstate = TSTATE_NONE;
  1078 :}
  1079 DT Rn {:  
  1080     COUNT_INST(I_DT);
  1081     load_reg( REG_EAX, Rn );
  1082     ADDL_imms_r32( -1, REG_EAX );
  1083     store_reg( REG_EAX, Rn );
  1084     SETE_t();
  1085     sh4_x86.tstate = TSTATE_E;
  1086 :}
  1087 EXTS.B Rm, Rn {:  
  1088     COUNT_INST(I_EXTSB);
  1089     load_reg( REG_EAX, Rm );
  1090     MOVSXL_r8_r32( REG_EAX, REG_EAX );
  1091     store_reg( REG_EAX, Rn );
  1092 :}
  1093 EXTS.W Rm, Rn {:  
  1094     COUNT_INST(I_EXTSW);
  1095     load_reg( REG_EAX, Rm );
  1096     MOVSXL_r16_r32( REG_EAX, REG_EAX );
  1097     store_reg( REG_EAX, Rn );
  1098 :}
  1099 EXTU.B Rm, Rn {:  
  1100     COUNT_INST(I_EXTUB);
  1101     load_reg( REG_EAX, Rm );
  1102     MOVZXL_r8_r32( REG_EAX, REG_EAX );
  1103     store_reg( REG_EAX, Rn );
  1104 :}
  1105 EXTU.W Rm, Rn {:  
  1106     COUNT_INST(I_EXTUW);
  1107     load_reg( REG_EAX, Rm );
  1108     MOVZXL_r16_r32( REG_EAX, REG_EAX );
  1109     store_reg( REG_EAX, Rn );
  1110 :}
  1111 MAC.L @Rm+, @Rn+ {:
  1112     COUNT_INST(I_MACL);
  1113     if( Rm == Rn ) {
  1114 	load_reg( REG_EAX, Rm );
  1115 	check_ralign32( REG_EAX );
  1116 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1117 	MOVL_r32_rspdisp(REG_EAX, 0);
  1118 	load_reg( REG_EAX, Rm );
  1119 	LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  1120 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1121         ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rn]) );
  1122     } else {
  1123 	load_reg( REG_EAX, Rm );
  1124 	check_ralign32( REG_EAX );
  1125 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1126 	MOVL_r32_rspdisp( REG_EAX, 0 );
  1127 	load_reg( REG_EAX, Rn );
  1128 	check_ralign32( REG_EAX );
  1129 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1130 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
  1131 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  1134     IMULL_rspdisp( 0 );
  1135     ADDL_r32_rbpdisp( REG_EAX, R_MACL );
  1136     ADCL_r32_rbpdisp( REG_EDX, R_MACH );
  1138     MOVL_rbpdisp_r32( R_S, REG_ECX );
  1139     TESTL_r32_r32(REG_ECX, REG_ECX);
  1140     JE_label( nosat );
  1141     CALL_ptr( signsat48 );
  1142     JMP_TARGET( nosat );
  1143     sh4_x86.tstate = TSTATE_NONE;
  1144 :}
  1145 MAC.W @Rm+, @Rn+ {:  
  1146     COUNT_INST(I_MACW);
  1147     if( Rm == Rn ) {
  1148 	load_reg( REG_EAX, Rm );
  1149 	check_ralign16( REG_EAX );
  1150 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1151         MOVL_r32_rspdisp( REG_EAX, 0 );
  1152 	load_reg( REG_EAX, Rm );
  1153 	LEAL_r32disp_r32( REG_EAX, 2, REG_EAX );
  1154 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1155 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
  1156 	// Note translate twice in case of page boundaries. Maybe worth
  1157 	// adding a page-boundary check to skip the second translation
  1158     } else {
  1159 	load_reg( REG_EAX, Rm );
  1160 	check_ralign16( REG_EAX );
  1161 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1162         MOVL_r32_rspdisp( REG_EAX, 0 );
  1163 	load_reg( REG_EAX, Rn );
  1164 	check_ralign16( REG_EAX );
  1165 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1166 	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rn]) );
  1167 	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
  1169     IMULL_rspdisp( 0 );
  1170     MOVL_rbpdisp_r32( R_S, REG_ECX );
  1171     TESTL_r32_r32( REG_ECX, REG_ECX );
  1172     JE_label( nosat );
  1174     ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
  1175     JNO_label( end );            // 2
  1176     MOVL_imm32_r32( 1, REG_EDX );         // 5
  1177     MOVL_r32_rbpdisp( REG_EDX, R_MACH );   // 6
  1178     JS_label( positive );        // 2
  1179     MOVL_imm32_r32( 0x80000000, REG_EAX );// 5
  1180     MOVL_r32_rbpdisp( REG_EAX, R_MACL );   // 6
  1181     JMP_label(end2);           // 2
  1183     JMP_TARGET(positive);
  1184     MOVL_imm32_r32( 0x7FFFFFFF, REG_EAX );// 5
  1185     MOVL_r32_rbpdisp( REG_EAX, R_MACL );   // 6
  1186     JMP_label(end3);            // 2
  1188     JMP_TARGET(nosat);
  1189     ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
  1190     ADCL_r32_rbpdisp( REG_EDX, R_MACH );  // 6
  1191     JMP_TARGET(end);
  1192     JMP_TARGET(end2);
  1193     JMP_TARGET(end3);
  1194     sh4_x86.tstate = TSTATE_NONE;
  1195 :}
  1196 MOVT Rn {:  
  1197     COUNT_INST(I_MOVT);
  1198     MOVL_rbpdisp_r32( R_T, REG_EAX );
  1199     store_reg( REG_EAX, Rn );
  1200 :}
  1201 MUL.L Rm, Rn {:  
  1202     COUNT_INST(I_MULL);
  1203     load_reg( REG_EAX, Rm );
  1204     load_reg( REG_ECX, Rn );
  1205     MULL_r32( REG_ECX );
  1206     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1207     sh4_x86.tstate = TSTATE_NONE;
  1208 :}
  1209 MULS.W Rm, Rn {:
  1210     COUNT_INST(I_MULSW);
  1211     MOVSXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
  1212     MOVSXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
  1213     MULL_r32( REG_ECX );
  1214     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1215     sh4_x86.tstate = TSTATE_NONE;
  1216 :}
  1217 MULU.W Rm, Rn {:  
  1218     COUNT_INST(I_MULUW);
  1219     MOVZXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
  1220     MOVZXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
  1221     MULL_r32( REG_ECX );
  1222     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1223     sh4_x86.tstate = TSTATE_NONE;
  1224 :}
  1225 NEG Rm, Rn {:
  1226     COUNT_INST(I_NEG);
  1227     load_reg( REG_EAX, Rm );
  1228     NEGL_r32( REG_EAX );
  1229     store_reg( REG_EAX, Rn );
  1230     sh4_x86.tstate = TSTATE_NONE;
  1231 :}
  1232 NEGC Rm, Rn {:  
  1233     COUNT_INST(I_NEGC);
  1234     load_reg( REG_EAX, Rm );
  1235     XORL_r32_r32( REG_ECX, REG_ECX );
  1236     LDC_t();
  1237     SBBL_r32_r32( REG_EAX, REG_ECX );
  1238     store_reg( REG_ECX, Rn );
  1239     SETC_t();
  1240     sh4_x86.tstate = TSTATE_C;
  1241 :}
  1242 NOT Rm, Rn {:  
  1243     COUNT_INST(I_NOT);
  1244     load_reg( REG_EAX, Rm );
  1245     NOTL_r32( REG_EAX );
  1246     store_reg( REG_EAX, Rn );
  1247     sh4_x86.tstate = TSTATE_NONE;
  1248 :}
  1249 OR Rm, Rn {:  
  1250     COUNT_INST(I_OR);
  1251     load_reg( REG_EAX, Rm );
  1252     load_reg( REG_ECX, Rn );
  1253     ORL_r32_r32( REG_EAX, REG_ECX );
  1254     store_reg( REG_ECX, Rn );
  1255     sh4_x86.tstate = TSTATE_NONE;
  1256 :}
  1257 OR #imm, R0 {:
  1258     COUNT_INST(I_ORI);
  1259     load_reg( REG_EAX, 0 );
  1260     ORL_imms_r32(imm, REG_EAX);
  1261     store_reg( REG_EAX, 0 );
  1262     sh4_x86.tstate = TSTATE_NONE;
  1263 :}
  1264 OR.B #imm, @(R0, GBR) {:  
  1265     COUNT_INST(I_ORB);
  1266     load_reg( REG_EAX, 0 );
  1267     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
  1268     MOVL_r32_rspdisp( REG_EAX, 0 );
  1269     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
  1270     MOVL_rspdisp_r32( 0, REG_EAX );
  1271     ORL_imms_r32(imm, REG_EDX );
  1272     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1273     sh4_x86.tstate = TSTATE_NONE;
  1274 :}
  1275 ROTCL Rn {:
  1276     COUNT_INST(I_ROTCL);
  1277     load_reg( REG_EAX, Rn );
  1278     if( sh4_x86.tstate != TSTATE_C ) {
  1279 	LDC_t();
  1281     RCLL_imm_r32( 1, REG_EAX );
  1282     store_reg( REG_EAX, Rn );
  1283     SETC_t();
  1284     sh4_x86.tstate = TSTATE_C;
  1285 :}
  1286 ROTCR Rn {:  
  1287     COUNT_INST(I_ROTCR);
  1288     load_reg( REG_EAX, Rn );
  1289     if( sh4_x86.tstate != TSTATE_C ) {
  1290 	LDC_t();
  1292     RCRL_imm_r32( 1, REG_EAX );
  1293     store_reg( REG_EAX, Rn );
  1294     SETC_t();
  1295     sh4_x86.tstate = TSTATE_C;
  1296 :}
  1297 ROTL Rn {:  
  1298     COUNT_INST(I_ROTL);
  1299     load_reg( REG_EAX, Rn );
  1300     ROLL_imm_r32( 1, REG_EAX );
  1301     store_reg( REG_EAX, Rn );
  1302     SETC_t();
  1303     sh4_x86.tstate = TSTATE_C;
  1304 :}
  1305 ROTR Rn {:  
  1306     COUNT_INST(I_ROTR);
  1307     load_reg( REG_EAX, Rn );
  1308     RORL_imm_r32( 1, REG_EAX );
  1309     store_reg( REG_EAX, Rn );
  1310     SETC_t();
  1311     sh4_x86.tstate = TSTATE_C;
  1312 :}
  1313 SHAD Rm, Rn {:
  1314     COUNT_INST(I_SHAD);
  1315     /* Annoyingly enough, not directly convertible */
  1316     load_reg( REG_EAX, Rn );
  1317     load_reg( REG_ECX, Rm );
  1318     CMPL_imms_r32( 0, REG_ECX );
  1319     JGE_label(doshl);
  1321     NEGL_r32( REG_ECX );      // 2
  1322     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1323     JE_label(emptysar);     // 2
  1324     SARL_cl_r32( REG_EAX );       // 2
  1325     JMP_label(end);          // 2
  1327     JMP_TARGET(emptysar);
  1328     SARL_imm_r32(31, REG_EAX );  // 3
  1329     JMP_label(end2);
  1331     JMP_TARGET(doshl);
  1332     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1333     SHLL_cl_r32( REG_EAX );       // 2
  1334     JMP_TARGET(end);
  1335     JMP_TARGET(end2);
  1336     store_reg( REG_EAX, Rn );
  1337     sh4_x86.tstate = TSTATE_NONE;
  1338 :}
  1339 SHLD Rm, Rn {:  
  1340     COUNT_INST(I_SHLD);
  1341     load_reg( REG_EAX, Rn );
  1342     load_reg( REG_ECX, Rm );
  1343     CMPL_imms_r32( 0, REG_ECX );
  1344     JGE_label(doshl);
  1346     NEGL_r32( REG_ECX );      // 2
  1347     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1348     JE_label(emptyshr );
  1349     SHRL_cl_r32( REG_EAX );       // 2
  1350     JMP_label(end);          // 2
  1352     JMP_TARGET(emptyshr);
  1353     XORL_r32_r32( REG_EAX, REG_EAX );
  1354     JMP_label(end2);
  1356     JMP_TARGET(doshl);
  1357     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1358     SHLL_cl_r32( REG_EAX );       // 2
  1359     JMP_TARGET(end);
  1360     JMP_TARGET(end2);
  1361     store_reg( REG_EAX, Rn );
  1362     sh4_x86.tstate = TSTATE_NONE;
  1363 :}
  1364 SHAL Rn {: 
  1365     COUNT_INST(I_SHAL);
  1366     load_reg( REG_EAX, Rn );
  1367     SHLL_imm_r32( 1, REG_EAX );
  1368     SETC_t();
  1369     store_reg( REG_EAX, Rn );
  1370     sh4_x86.tstate = TSTATE_C;
  1371 :}
  1372 SHAR Rn {:  
  1373     COUNT_INST(I_SHAR);
  1374     load_reg( REG_EAX, Rn );
  1375     SARL_imm_r32( 1, REG_EAX );
  1376     SETC_t();
  1377     store_reg( REG_EAX, Rn );
  1378     sh4_x86.tstate = TSTATE_C;
  1379 :}
  1380 SHLL Rn {:  
  1381     COUNT_INST(I_SHLL);
  1382     load_reg( REG_EAX, Rn );
  1383     SHLL_imm_r32( 1, REG_EAX );
  1384     SETC_t();
  1385     store_reg( REG_EAX, Rn );
  1386     sh4_x86.tstate = TSTATE_C;
  1387 :}
  1388 SHLL2 Rn {:
  1389     COUNT_INST(I_SHLL);
  1390     load_reg( REG_EAX, Rn );
  1391     SHLL_imm_r32( 2, REG_EAX );
  1392     store_reg( REG_EAX, Rn );
  1393     sh4_x86.tstate = TSTATE_NONE;
  1394 :}
  1395 SHLL8 Rn {:  
  1396     COUNT_INST(I_SHLL);
  1397     load_reg( REG_EAX, Rn );
  1398     SHLL_imm_r32( 8, REG_EAX );
  1399     store_reg( REG_EAX, Rn );
  1400     sh4_x86.tstate = TSTATE_NONE;
  1401 :}
  1402 SHLL16 Rn {:  
  1403     COUNT_INST(I_SHLL);
  1404     load_reg( REG_EAX, Rn );
  1405     SHLL_imm_r32( 16, REG_EAX );
  1406     store_reg( REG_EAX, Rn );
  1407     sh4_x86.tstate = TSTATE_NONE;
  1408 :}
  1409 SHLR Rn {:  
  1410     COUNT_INST(I_SHLR);
  1411     load_reg( REG_EAX, Rn );
  1412     SHRL_imm_r32( 1, REG_EAX );
  1413     SETC_t();
  1414     store_reg( REG_EAX, Rn );
  1415     sh4_x86.tstate = TSTATE_C;
  1416 :}
  1417 SHLR2 Rn {:  
  1418     COUNT_INST(I_SHLR);
  1419     load_reg( REG_EAX, Rn );
  1420     SHRL_imm_r32( 2, REG_EAX );
  1421     store_reg( REG_EAX, Rn );
  1422     sh4_x86.tstate = TSTATE_NONE;
  1423 :}
  1424 SHLR8 Rn {:  
  1425     COUNT_INST(I_SHLR);
  1426     load_reg( REG_EAX, Rn );
  1427     SHRL_imm_r32( 8, REG_EAX );
  1428     store_reg( REG_EAX, Rn );
  1429     sh4_x86.tstate = TSTATE_NONE;
  1430 :}
  1431 SHLR16 Rn {:  
  1432     COUNT_INST(I_SHLR);
  1433     load_reg( REG_EAX, Rn );
  1434     SHRL_imm_r32( 16, REG_EAX );
  1435     store_reg( REG_EAX, Rn );
  1436     sh4_x86.tstate = TSTATE_NONE;
  1437 :}
  1438 SUB Rm, Rn {:  
  1439     COUNT_INST(I_SUB);
  1440     load_reg( REG_EAX, Rm );
  1441     load_reg( REG_ECX, Rn );
  1442     SUBL_r32_r32( REG_EAX, REG_ECX );
  1443     store_reg( REG_ECX, Rn );
  1444     sh4_x86.tstate = TSTATE_NONE;
  1445 :}
  1446 SUBC Rm, Rn {:  
  1447     COUNT_INST(I_SUBC);
  1448     load_reg( REG_EAX, Rm );
  1449     load_reg( REG_ECX, Rn );
  1450     if( sh4_x86.tstate != TSTATE_C ) {
  1451 	LDC_t();
  1453     SBBL_r32_r32( REG_EAX, REG_ECX );
  1454     store_reg( REG_ECX, Rn );
  1455     SETC_t();
  1456     sh4_x86.tstate = TSTATE_C;
  1457 :}
  1458 SUBV Rm, Rn {:  
  1459     COUNT_INST(I_SUBV);
  1460     load_reg( REG_EAX, Rm );
  1461     load_reg( REG_ECX, Rn );
  1462     SUBL_r32_r32( REG_EAX, REG_ECX );
  1463     store_reg( REG_ECX, Rn );
  1464     SETO_t();
  1465     sh4_x86.tstate = TSTATE_O;
  1466 :}
  1467 SWAP.B Rm, Rn {:  
  1468     COUNT_INST(I_SWAPB);
  1469     load_reg( REG_EAX, Rm );
  1470     XCHGB_r8_r8( REG_AL, REG_AH ); // NB: does not touch EFLAGS
  1471     store_reg( REG_EAX, Rn );
  1472 :}
  1473 SWAP.W Rm, Rn {:  
  1474     COUNT_INST(I_SWAPB);
  1475     load_reg( REG_EAX, Rm );
  1476     MOVL_r32_r32( REG_EAX, REG_ECX );
  1477     SHLL_imm_r32( 16, REG_ECX );
  1478     SHRL_imm_r32( 16, REG_EAX );
  1479     ORL_r32_r32( REG_EAX, REG_ECX );
  1480     store_reg( REG_ECX, Rn );
  1481     sh4_x86.tstate = TSTATE_NONE;
  1482 :}
  1483 TAS.B @Rn {:  
  1484     COUNT_INST(I_TASB);
  1485     load_reg( REG_EAX, Rn );
  1486     MOVL_r32_rspdisp( REG_EAX, 0 );
  1487     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
  1488     TESTB_r8_r8( REG_DL, REG_DL );
  1489     SETE_t();
  1490     ORB_imms_r8( 0x80, REG_DL );
  1491     MOVL_rspdisp_r32( 0, REG_EAX );
  1492     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1493     sh4_x86.tstate = TSTATE_NONE;
  1494 :}
  1495 TST Rm, Rn {:  
  1496     COUNT_INST(I_TST);
  1497     load_reg( REG_EAX, Rm );
  1498     load_reg( REG_ECX, Rn );
  1499     TESTL_r32_r32( REG_EAX, REG_ECX );
  1500     SETE_t();
  1501     sh4_x86.tstate = TSTATE_E;
  1502 :}
  1503 TST #imm, R0 {:  
  1504     COUNT_INST(I_TSTI);
  1505     load_reg( REG_EAX, 0 );
  1506     TESTL_imms_r32( imm, REG_EAX );
  1507     SETE_t();
  1508     sh4_x86.tstate = TSTATE_E;
  1509 :}
  1510 TST.B #imm, @(R0, GBR) {:  
  1511     COUNT_INST(I_TSTB);
  1512     load_reg( REG_EAX, 0);
  1513     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
  1514     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1515     TESTB_imms_r8( imm, REG_AL );
  1516     SETE_t();
  1517     sh4_x86.tstate = TSTATE_E;
  1518 :}
  1519 XOR Rm, Rn {:  
  1520     COUNT_INST(I_XOR);
  1521     load_reg( REG_EAX, Rm );
  1522     load_reg( REG_ECX, Rn );
  1523     XORL_r32_r32( REG_EAX, REG_ECX );
  1524     store_reg( REG_ECX, Rn );
  1525     sh4_x86.tstate = TSTATE_NONE;
  1526 :}
  1527 XOR #imm, R0 {:  
  1528     COUNT_INST(I_XORI);
  1529     load_reg( REG_EAX, 0 );
  1530     XORL_imms_r32( imm, REG_EAX );
  1531     store_reg( REG_EAX, 0 );
  1532     sh4_x86.tstate = TSTATE_NONE;
  1533 :}
  1534 XOR.B #imm, @(R0, GBR) {:  
  1535     COUNT_INST(I_XORB);
  1536     load_reg( REG_EAX, 0 );
  1537     ADDL_rbpdisp_r32( R_GBR, REG_EAX ); 
  1538     MOVL_r32_rspdisp( REG_EAX, 0 );
  1539     MEM_READ_BYTE_FOR_WRITE(REG_EAX, REG_EDX);
  1540     MOVL_rspdisp_r32( 0, REG_EAX );
  1541     XORL_imms_r32( imm, REG_EDX );
  1542     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1543     sh4_x86.tstate = TSTATE_NONE;
  1544 :}
  1545 XTRCT Rm, Rn {:
  1546     COUNT_INST(I_XTRCT);
  1547     load_reg( REG_EAX, Rm );
  1548     load_reg( REG_ECX, Rn );
  1549     SHLL_imm_r32( 16, REG_EAX );
  1550     SHRL_imm_r32( 16, REG_ECX );
  1551     ORL_r32_r32( REG_EAX, REG_ECX );
  1552     store_reg( REG_ECX, Rn );
  1553     sh4_x86.tstate = TSTATE_NONE;
  1554 :}
  1556 /* Data move instructions */
  1557 MOV Rm, Rn {:  
  1558     COUNT_INST(I_MOV);
  1559     load_reg( REG_EAX, Rm );
  1560     store_reg( REG_EAX, Rn );
  1561 :}
  1562 MOV #imm, Rn {:  
  1563     COUNT_INST(I_MOVI);
  1564     MOVL_imm32_r32( imm, REG_EAX );
  1565     store_reg( REG_EAX, Rn );
  1566 :}
  1567 MOV.B Rm, @Rn {:  
  1568     COUNT_INST(I_MOVB);
  1569     load_reg( REG_EAX, Rn );
  1570     load_reg( REG_EDX, Rm );
  1571     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1572     sh4_x86.tstate = TSTATE_NONE;
  1573 :}
  1574 MOV.B Rm, @-Rn {:  
  1575     COUNT_INST(I_MOVB);
  1576     load_reg( REG_EAX, Rn );
  1577     LEAL_r32disp_r32( REG_EAX, -1, REG_EAX );
  1578     load_reg( REG_EDX, Rm );
  1579     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1580     ADDL_imms_rbpdisp( -1, REG_OFFSET(r[Rn]) );
  1581     sh4_x86.tstate = TSTATE_NONE;
  1582 :}
  1583 MOV.B Rm, @(R0, Rn) {:  
  1584     COUNT_INST(I_MOVB);
  1585     load_reg( REG_EAX, 0 );
  1586     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1587     load_reg( REG_EDX, Rm );
  1588     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1589     sh4_x86.tstate = TSTATE_NONE;
  1590 :}
  1591 MOV.B R0, @(disp, GBR) {:  
  1592     COUNT_INST(I_MOVB);
  1593     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1594     ADDL_imms_r32( disp, REG_EAX );
  1595     load_reg( REG_EDX, 0 );
  1596     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1597     sh4_x86.tstate = TSTATE_NONE;
  1598 :}
  1599 MOV.B R0, @(disp, Rn) {:  
  1600     COUNT_INST(I_MOVB);
  1601     load_reg( REG_EAX, Rn );
  1602     ADDL_imms_r32( disp, REG_EAX );
  1603     load_reg( REG_EDX, 0 );
  1604     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1605     sh4_x86.tstate = TSTATE_NONE;
  1606 :}
  1607 MOV.B @Rm, Rn {:  
  1608     COUNT_INST(I_MOVB);
  1609     load_reg( REG_EAX, Rm );
  1610     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1611     store_reg( REG_EAX, Rn );
  1612     sh4_x86.tstate = TSTATE_NONE;
  1613 :}
  1614 MOV.B @Rm+, Rn {:  
  1615     COUNT_INST(I_MOVB);
  1616     load_reg( REG_EAX, Rm );
  1617     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1618     if( Rm != Rn ) {
  1619     	ADDL_imms_rbpdisp( 1, REG_OFFSET(r[Rm]) );
  1621     store_reg( REG_EAX, Rn );
  1622     sh4_x86.tstate = TSTATE_NONE;
  1623 :}
  1624 MOV.B @(R0, Rm), Rn {:  
  1625     COUNT_INST(I_MOVB);
  1626     load_reg( REG_EAX, 0 );
  1627     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1628     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1629     store_reg( REG_EAX, Rn );
  1630     sh4_x86.tstate = TSTATE_NONE;
  1631 :}
  1632 MOV.B @(disp, GBR), R0 {:  
  1633     COUNT_INST(I_MOVB);
  1634     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1635     ADDL_imms_r32( disp, REG_EAX );
  1636     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1637     store_reg( REG_EAX, 0 );
  1638     sh4_x86.tstate = TSTATE_NONE;
  1639 :}
  1640 MOV.B @(disp, Rm), R0 {:  
  1641     COUNT_INST(I_MOVB);
  1642     load_reg( REG_EAX, Rm );
  1643     ADDL_imms_r32( disp, REG_EAX );
  1644     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1645     store_reg( REG_EAX, 0 );
  1646     sh4_x86.tstate = TSTATE_NONE;
  1647 :}
  1648 MOV.L Rm, @Rn {:
  1649     COUNT_INST(I_MOVL);
  1650     load_reg( REG_EAX, Rn );
  1651     check_walign32(REG_EAX);
  1652     MOVL_r32_r32( REG_EAX, REG_ECX );
  1653     ANDL_imms_r32( 0xFC000000, REG_ECX );
  1654     CMPL_imms_r32( 0xE0000000, REG_ECX );
  1655     JNE_label( notsq );
  1656     ANDL_imms_r32( 0x3C, REG_EAX );
  1657     load_reg( REG_EDX, Rm );
  1658     MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
  1659     JMP_label(end);
  1660     JMP_TARGET(notsq);
  1661     load_reg( REG_EDX, Rm );
  1662     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1663     JMP_TARGET(end);
  1664     sh4_x86.tstate = TSTATE_NONE;
  1665 :}
  1666 MOV.L Rm, @-Rn {:  
  1667     COUNT_INST(I_MOVL);
  1668     load_reg( REG_EAX, Rn );
  1669     ADDL_imms_r32( -4, REG_EAX );
  1670     check_walign32( REG_EAX );
  1671     load_reg( REG_EDX, Rm );
  1672     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1673     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  1674     sh4_x86.tstate = TSTATE_NONE;
  1675 :}
  1676 MOV.L Rm, @(R0, Rn) {:  
  1677     COUNT_INST(I_MOVL);
  1678     load_reg( REG_EAX, 0 );
  1679     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1680     check_walign32( REG_EAX );
  1681     load_reg( REG_EDX, Rm );
  1682     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1683     sh4_x86.tstate = TSTATE_NONE;
  1684 :}
  1685 MOV.L R0, @(disp, GBR) {:  
  1686     COUNT_INST(I_MOVL);
  1687     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1688     ADDL_imms_r32( disp, REG_EAX );
  1689     check_walign32( REG_EAX );
  1690     load_reg( REG_EDX, 0 );
  1691     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1692     sh4_x86.tstate = TSTATE_NONE;
  1693 :}
  1694 MOV.L Rm, @(disp, Rn) {:  
  1695     COUNT_INST(I_MOVL);
  1696     load_reg( REG_EAX, Rn );
  1697     ADDL_imms_r32( disp, REG_EAX );
  1698     check_walign32( REG_EAX );
  1699     MOVL_r32_r32( REG_EAX, REG_ECX );
  1700     ANDL_imms_r32( 0xFC000000, REG_ECX );
  1701     CMPL_imms_r32( 0xE0000000, REG_ECX );
  1702     JNE_label( notsq );
  1703     ANDL_imms_r32( 0x3C, REG_EAX );
  1704     load_reg( REG_EDX, Rm );
  1705     MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
  1706     JMP_label(end);
  1707     JMP_TARGET(notsq);
  1708     load_reg( REG_EDX, Rm );
  1709     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1710     JMP_TARGET(end);
  1711     sh4_x86.tstate = TSTATE_NONE;
  1712 :}
  1713 MOV.L @Rm, Rn {:  
  1714     COUNT_INST(I_MOVL);
  1715     load_reg( REG_EAX, Rm );
  1716     check_ralign32( REG_EAX );
  1717     MEM_READ_LONG( REG_EAX, REG_EAX );
  1718     store_reg( REG_EAX, Rn );
  1719     sh4_x86.tstate = TSTATE_NONE;
  1720 :}
  1721 MOV.L @Rm+, Rn {:  
  1722     COUNT_INST(I_MOVL);
  1723     load_reg( REG_EAX, Rm );
  1724     check_ralign32( REG_EAX );
  1725     MEM_READ_LONG( REG_EAX, REG_EAX );
  1726     if( Rm != Rn ) {
  1727     	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  1729     store_reg( REG_EAX, Rn );
  1730     sh4_x86.tstate = TSTATE_NONE;
  1731 :}
  1732 MOV.L @(R0, Rm), Rn {:  
  1733     COUNT_INST(I_MOVL);
  1734     load_reg( REG_EAX, 0 );
  1735     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1736     check_ralign32( REG_EAX );
  1737     MEM_READ_LONG( REG_EAX, REG_EAX );
  1738     store_reg( REG_EAX, Rn );
  1739     sh4_x86.tstate = TSTATE_NONE;
  1740 :}
  1741 MOV.L @(disp, GBR), R0 {:
  1742     COUNT_INST(I_MOVL);
  1743     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1744     ADDL_imms_r32( disp, REG_EAX );
  1745     check_ralign32( REG_EAX );
  1746     MEM_READ_LONG( REG_EAX, REG_EAX );
  1747     store_reg( REG_EAX, 0 );
  1748     sh4_x86.tstate = TSTATE_NONE;
  1749 :}
  1750 MOV.L @(disp, PC), Rn {:  
  1751     COUNT_INST(I_MOVLPC);
  1752     if( sh4_x86.in_delay_slot ) {
  1753 	SLOTILLEGAL();
  1754     } else {
  1755 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1756 	if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
  1757 	    // If the target address is in the same page as the code, it's
  1758 	    // pretty safe to just ref it directly and circumvent the whole
  1759 	    // memory subsystem. (this is a big performance win)
  1761 	    // FIXME: There's a corner-case that's not handled here when
  1762 	    // the current code-page is in the ITLB but not in the UTLB.
  1763 	    // (should generate a TLB miss although need to test SH4 
  1764 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1765 	    // behaviour though.
  1766 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1767 	    MOVL_moffptr_eax( ptr );
  1768 	} else {
  1769 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1770 	    // different virtual address than the translation was done with,
  1771 	    // but we can safely assume that the low bits are the same.
  1772 	    MOVL_imm32_r32( (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_EAX );
  1773 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1774 	    MEM_READ_LONG( REG_EAX, REG_EAX );
  1775 	    sh4_x86.tstate = TSTATE_NONE;
  1777 	store_reg( REG_EAX, Rn );
  1779 :}
  1780 MOV.L @(disp, Rm), Rn {:  
  1781     COUNT_INST(I_MOVL);
  1782     load_reg( REG_EAX, Rm );
  1783     ADDL_imms_r32( disp, REG_EAX );
  1784     check_ralign32( REG_EAX );
  1785     MEM_READ_LONG( REG_EAX, REG_EAX );
  1786     store_reg( REG_EAX, Rn );
  1787     sh4_x86.tstate = TSTATE_NONE;
  1788 :}
  1789 MOV.W Rm, @Rn {:  
  1790     COUNT_INST(I_MOVW);
  1791     load_reg( REG_EAX, Rn );
  1792     check_walign16( REG_EAX );
  1793     load_reg( REG_EDX, Rm );
  1794     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1795     sh4_x86.tstate = TSTATE_NONE;
  1796 :}
  1797 MOV.W Rm, @-Rn {:  
  1798     COUNT_INST(I_MOVW);
  1799     load_reg( REG_EAX, Rn );
  1800     check_walign16( REG_EAX );
  1801     LEAL_r32disp_r32( REG_EAX, -2, REG_EAX );
  1802     load_reg( REG_EDX, Rm );
  1803     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1804     ADDL_imms_rbpdisp( -2, REG_OFFSET(r[Rn]) );
  1805     sh4_x86.tstate = TSTATE_NONE;
  1806 :}
  1807 MOV.W Rm, @(R0, Rn) {:  
  1808     COUNT_INST(I_MOVW);
  1809     load_reg( REG_EAX, 0 );
  1810     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1811     check_walign16( REG_EAX );
  1812     load_reg( REG_EDX, Rm );
  1813     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1814     sh4_x86.tstate = TSTATE_NONE;
  1815 :}
  1816 MOV.W R0, @(disp, GBR) {:  
  1817     COUNT_INST(I_MOVW);
  1818     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1819     ADDL_imms_r32( disp, REG_EAX );
  1820     check_walign16( REG_EAX );
  1821     load_reg( REG_EDX, 0 );
  1822     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1823     sh4_x86.tstate = TSTATE_NONE;
  1824 :}
  1825 MOV.W R0, @(disp, Rn) {:  
  1826     COUNT_INST(I_MOVW);
  1827     load_reg( REG_EAX, Rn );
  1828     ADDL_imms_r32( disp, REG_EAX );
  1829     check_walign16( REG_EAX );
  1830     load_reg( REG_EDX, 0 );
  1831     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1832     sh4_x86.tstate = TSTATE_NONE;
  1833 :}
  1834 MOV.W @Rm, Rn {:  
  1835     COUNT_INST(I_MOVW);
  1836     load_reg( REG_EAX, Rm );
  1837     check_ralign16( REG_EAX );
  1838     MEM_READ_WORD( REG_EAX, REG_EAX );
  1839     store_reg( REG_EAX, Rn );
  1840     sh4_x86.tstate = TSTATE_NONE;
  1841 :}
  1842 MOV.W @Rm+, Rn {:  
  1843     COUNT_INST(I_MOVW);
  1844     load_reg( REG_EAX, Rm );
  1845     check_ralign16( REG_EAX );
  1846     MEM_READ_WORD( REG_EAX, REG_EAX );
  1847     if( Rm != Rn ) {
  1848         ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
  1850     store_reg( REG_EAX, Rn );
  1851     sh4_x86.tstate = TSTATE_NONE;
  1852 :}
  1853 MOV.W @(R0, Rm), Rn {:  
  1854     COUNT_INST(I_MOVW);
  1855     load_reg( REG_EAX, 0 );
  1856     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1857     check_ralign16( REG_EAX );
  1858     MEM_READ_WORD( REG_EAX, REG_EAX );
  1859     store_reg( REG_EAX, Rn );
  1860     sh4_x86.tstate = TSTATE_NONE;
  1861 :}
  1862 MOV.W @(disp, GBR), R0 {:  
  1863     COUNT_INST(I_MOVW);
  1864     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1865     ADDL_imms_r32( disp, REG_EAX );
  1866     check_ralign16( REG_EAX );
  1867     MEM_READ_WORD( REG_EAX, REG_EAX );
  1868     store_reg( REG_EAX, 0 );
  1869     sh4_x86.tstate = TSTATE_NONE;
  1870 :}
  1871 MOV.W @(disp, PC), Rn {:  
  1872     COUNT_INST(I_MOVW);
  1873     if( sh4_x86.in_delay_slot ) {
  1874 	SLOTILLEGAL();
  1875     } else {
  1876 	// See comments for MOV.L @(disp, PC), Rn
  1877 	uint32_t target = pc + disp + 4;
  1878 	if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
  1879 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1880 	    MOVL_moffptr_eax( ptr );
  1881 	    MOVSXL_r16_r32( REG_EAX, REG_EAX );
  1882 	} else {
  1883 	    MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4, REG_EAX );
  1884 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1885 	    MEM_READ_WORD( REG_EAX, REG_EAX );
  1886 	    sh4_x86.tstate = TSTATE_NONE;
  1888 	store_reg( REG_EAX, Rn );
  1890 :}
  1891 MOV.W @(disp, Rm), R0 {:  
  1892     COUNT_INST(I_MOVW);
  1893     load_reg( REG_EAX, Rm );
  1894     ADDL_imms_r32( disp, REG_EAX );
  1895     check_ralign16( REG_EAX );
  1896     MEM_READ_WORD( REG_EAX, REG_EAX );
  1897     store_reg( REG_EAX, 0 );
  1898     sh4_x86.tstate = TSTATE_NONE;
  1899 :}
  1900 MOVA @(disp, PC), R0 {:  
  1901     COUNT_INST(I_MOVA);
  1902     if( sh4_x86.in_delay_slot ) {
  1903 	SLOTILLEGAL();
  1904     } else {
  1905 	MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_ECX );
  1906 	ADDL_rbpdisp_r32( R_PC, REG_ECX );
  1907 	store_reg( REG_ECX, 0 );
  1908 	sh4_x86.tstate = TSTATE_NONE;
  1910 :}
  1911 MOVCA.L R0, @Rn {:  
  1912     COUNT_INST(I_MOVCA);
  1913     load_reg( REG_EAX, Rn );
  1914     check_walign32( REG_EAX );
  1915     load_reg( REG_EDX, 0 );
  1916     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1917     sh4_x86.tstate = TSTATE_NONE;
  1918 :}
  1920 /* Control transfer instructions */
  1921 BF disp {:
  1922     COUNT_INST(I_BF);
  1923     if( sh4_x86.in_delay_slot ) {
  1924 	SLOTILLEGAL();
  1925     } else {
  1926 	sh4vma_t target = disp + pc + 4;
  1927 	JT_label( nottaken );
  1928 	exit_block_rel(target, pc+2 );
  1929 	JMP_TARGET(nottaken);
  1930 	return 2;
  1932 :}
  1933 BF/S disp {:
  1934     COUNT_INST(I_BFS);
  1935     if( sh4_x86.in_delay_slot ) {
  1936 	SLOTILLEGAL();
  1937     } else {
  1938 	sh4_x86.in_delay_slot = DELAY_PC;
  1939 	if( UNTRANSLATABLE(pc+2) ) {
  1940 	    MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1941 	    JT_label(nottaken);
  1942 	    ADDL_imms_r32( disp, REG_EAX );
  1943 	    JMP_TARGET(nottaken);
  1944 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1945 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1946 	    exit_block_emu(pc+2);
  1947 	    sh4_x86.branch_taken = TRUE;
  1948 	    return 2;
  1949 	} else {
  1950 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1951 		CMPL_imms_rbpdisp( 1, R_T );
  1952 		sh4_x86.tstate = TSTATE_E;
  1954 	    sh4vma_t target = disp + pc + 4;
  1955 	    JCC_cc_rel32(sh4_x86.tstate,0);
  1956 	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
  1957 	    int save_tstate = sh4_x86.tstate;
  1958 	    sh4_translate_instruction(pc+2);
  1959             sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
  1960 	    exit_block_rel( target, pc+4 );
  1962 	    // not taken
  1963 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1964 	    sh4_x86.tstate = save_tstate;
  1965 	    sh4_translate_instruction(pc+2);
  1966 	    return 4;
  1969 :}
  1970 BRA disp {:  
  1971     COUNT_INST(I_BRA);
  1972     if( sh4_x86.in_delay_slot ) {
  1973 	SLOTILLEGAL();
  1974     } else {
  1975 	sh4_x86.in_delay_slot = DELAY_PC;
  1976 	sh4_x86.branch_taken = TRUE;
  1977 	if( UNTRANSLATABLE(pc+2) ) {
  1978 	    MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1979 	    ADDL_imms_r32( pc + disp + 4 - sh4_x86.block_start_pc, REG_EAX );
  1980 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1981 	    exit_block_emu(pc+2);
  1982 	    return 2;
  1983 	} else {
  1984 	    sh4_translate_instruction( pc + 2 );
  1985 	    exit_block_rel( disp + pc + 4, pc+4 );
  1986 	    return 4;
  1989 :}
  1990 BRAF Rn {:  
  1991     COUNT_INST(I_BRAF);
  1992     if( sh4_x86.in_delay_slot ) {
  1993 	SLOTILLEGAL();
  1994     } else {
  1995 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1996 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1997 	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1998 	MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1999 	sh4_x86.in_delay_slot = DELAY_PC;
  2000 	sh4_x86.tstate = TSTATE_NONE;
  2001 	sh4_x86.branch_taken = TRUE;
  2002 	if( UNTRANSLATABLE(pc+2) ) {
  2003 	    exit_block_emu(pc+2);
  2004 	    return 2;
  2005 	} else {
  2006 	    sh4_translate_instruction( pc + 2 );
  2007 	    exit_block_newpcset(pc+4);
  2008 	    return 4;
  2011 :}
  2012 BSR disp {:  
  2013     COUNT_INST(I_BSR);
  2014     if( sh4_x86.in_delay_slot ) {
  2015 	SLOTILLEGAL();
  2016     } else {
  2017 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  2018 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2019 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2020 	sh4_x86.in_delay_slot = DELAY_PC;
  2021 	sh4_x86.branch_taken = TRUE;
  2022 	sh4_x86.tstate = TSTATE_NONE;
  2023 	if( UNTRANSLATABLE(pc+2) ) {
  2024 	    ADDL_imms_r32( disp, REG_EAX );
  2025 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  2026 	    exit_block_emu(pc+2);
  2027 	    return 2;
  2028 	} else {
  2029 	    sh4_translate_instruction( pc + 2 );
  2030 	    exit_block_rel( disp + pc + 4, pc+4 );
  2031 	    return 4;
  2034 :}
  2035 BSRF Rn {:  
  2036     COUNT_INST(I_BSRF);
  2037     if( sh4_x86.in_delay_slot ) {
  2038 	SLOTILLEGAL();
  2039     } else {
  2040 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  2041 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2042 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2043 	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  2044 	MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  2046 	sh4_x86.in_delay_slot = DELAY_PC;
  2047 	sh4_x86.tstate = TSTATE_NONE;
  2048 	sh4_x86.branch_taken = TRUE;
  2049 	if( UNTRANSLATABLE(pc+2) ) {
  2050 	    exit_block_emu(pc+2);
  2051 	    return 2;
  2052 	} else {
  2053 	    sh4_translate_instruction( pc + 2 );
  2054 	    exit_block_newpcset(pc+4);
  2055 	    return 4;
  2058 :}
  2059 BT disp {:
  2060     COUNT_INST(I_BT);
  2061     if( sh4_x86.in_delay_slot ) {
  2062 	SLOTILLEGAL();
  2063     } else {
  2064 	sh4vma_t target = disp + pc + 4;
  2065 	JF_label( nottaken );
  2066 	exit_block_rel(target, pc+2 );
  2067 	JMP_TARGET(nottaken);
  2068 	return 2;
  2070 :}
  2071 BT/S disp {:
  2072     COUNT_INST(I_BTS);
  2073     if( sh4_x86.in_delay_slot ) {
  2074 	SLOTILLEGAL();
  2075     } else {
  2076 	sh4_x86.in_delay_slot = DELAY_PC;
  2077 	if( UNTRANSLATABLE(pc+2) ) {
  2078 	    MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2079 	    JF_label(nottaken);
  2080 	    ADDL_imms_r32( disp, REG_EAX );
  2081 	    JMP_TARGET(nottaken);
  2082 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  2083 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  2084 	    exit_block_emu(pc+2);
  2085 	    sh4_x86.branch_taken = TRUE;
  2086 	    return 2;
  2087 	} else {
  2088 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  2089 		CMPL_imms_rbpdisp( 1, R_T );
  2090 		sh4_x86.tstate = TSTATE_E;
  2092 	    JCC_cc_rel32(sh4_x86.tstate^1,0);
  2093 	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
  2095 	    int save_tstate = sh4_x86.tstate;
  2096 	    sh4_translate_instruction(pc+2);
  2097             sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
  2098 	    exit_block_rel( disp + pc + 4, pc+4 );
  2099 	    // not taken
  2100 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  2101 	    sh4_x86.tstate = save_tstate;
  2102 	    sh4_translate_instruction(pc+2);
  2103 	    return 4;
  2106 :}
  2107 JMP @Rn {:  
  2108     COUNT_INST(I_JMP);
  2109     if( sh4_x86.in_delay_slot ) {
  2110 	SLOTILLEGAL();
  2111     } else {
  2112 	load_reg( REG_ECX, Rn );
  2113 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2114 	sh4_x86.in_delay_slot = DELAY_PC;
  2115 	sh4_x86.branch_taken = TRUE;
  2116 	if( UNTRANSLATABLE(pc+2) ) {
  2117 	    exit_block_emu(pc+2);
  2118 	    return 2;
  2119 	} else {
  2120 	    sh4_translate_instruction(pc+2);
  2121 	    exit_block_newpcset(pc+4);
  2122 	    return 4;
  2125 :}
  2126 JSR @Rn {:  
  2127     COUNT_INST(I_JSR);
  2128     if( sh4_x86.in_delay_slot ) {
  2129 	SLOTILLEGAL();
  2130     } else {
  2131 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  2132 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2133 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2134 	load_reg( REG_ECX, Rn );
  2135 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2136 	sh4_x86.in_delay_slot = DELAY_PC;
  2137 	sh4_x86.branch_taken = TRUE;
  2138 	sh4_x86.tstate = TSTATE_NONE;
  2139 	if( UNTRANSLATABLE(pc+2) ) {
  2140 	    exit_block_emu(pc+2);
  2141 	    return 2;
  2142 	} else {
  2143 	    sh4_translate_instruction(pc+2);
  2144 	    exit_block_newpcset(pc+4);
  2145 	    return 4;
  2148 :}
  2149 RTE {:  
  2150     COUNT_INST(I_RTE);
  2151     if( sh4_x86.in_delay_slot ) {
  2152 	SLOTILLEGAL();
  2153     } else {
  2154 	check_priv();
  2155 	MOVL_rbpdisp_r32( R_SPC, REG_ECX );
  2156 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2157 	MOVL_rbpdisp_r32( R_SSR, REG_EAX );
  2158 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2159 	sh4_x86.in_delay_slot = DELAY_PC;
  2160 	sh4_x86.fpuen_checked = FALSE;
  2161 	sh4_x86.tstate = TSTATE_NONE;
  2162 	sh4_x86.branch_taken = TRUE;
  2163     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2164 	if( UNTRANSLATABLE(pc+2) ) {
  2165 	    exit_block_emu(pc+2);
  2166 	    return 2;
  2167 	} else {
  2168 	    sh4_translate_instruction(pc+2);
  2169 	    exit_block_newpcset(pc+4);
  2170 	    return 4;
  2173 :}
  2174 RTS {:  
  2175     COUNT_INST(I_RTS);
  2176     if( sh4_x86.in_delay_slot ) {
  2177 	SLOTILLEGAL();
  2178     } else {
  2179 	MOVL_rbpdisp_r32( R_PR, REG_ECX );
  2180 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2181 	sh4_x86.in_delay_slot = DELAY_PC;
  2182 	sh4_x86.branch_taken = TRUE;
  2183 	if( UNTRANSLATABLE(pc+2) ) {
  2184 	    exit_block_emu(pc+2);
  2185 	    return 2;
  2186 	} else {
  2187 	    sh4_translate_instruction(pc+2);
  2188 	    exit_block_newpcset(pc+4);
  2189 	    return 4;
  2192 :}
  2193 TRAPA #imm {:  
  2194     COUNT_INST(I_TRAPA);
  2195     if( sh4_x86.in_delay_slot ) {
  2196 	SLOTILLEGAL();
  2197     } else {
  2198 	MOVL_imm32_r32( pc+2 - sh4_x86.block_start_pc, REG_ECX );   // 5
  2199 	ADDL_r32_rbpdisp( REG_ECX, R_PC );
  2200 	MOVL_imm32_r32( imm, REG_EAX );
  2201 	CALL1_ptr_r32( sh4_raise_trap, REG_EAX );
  2202 	sh4_x86.tstate = TSTATE_NONE;
  2203 	exit_block_pcset(pc+2);
  2204 	sh4_x86.branch_taken = TRUE;
  2205 	return 2;
  2207 :}
  2208 UNDEF {:  
  2209     COUNT_INST(I_UNDEF);
  2210     if( sh4_x86.in_delay_slot ) {
  2211 	exit_block_exc(EXC_SLOT_ILLEGAL, pc-2);    
  2212     } else {
  2213 	exit_block_exc(EXC_ILLEGAL, pc);    
  2214 	return 2;
  2216 :}
  2218 CLRMAC {:  
  2219     COUNT_INST(I_CLRMAC);
  2220     XORL_r32_r32(REG_EAX, REG_EAX);
  2221     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2222     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2223     sh4_x86.tstate = TSTATE_NONE;
  2224 :}
  2225 CLRS {:
  2226     COUNT_INST(I_CLRS);
  2227     CLC();
  2228     SETCCB_cc_rbpdisp(X86_COND_C, R_S);
  2229     sh4_x86.tstate = TSTATE_NONE;
  2230 :}
  2231 CLRT {:  
  2232     COUNT_INST(I_CLRT);
  2233     CLC();
  2234     SETC_t();
  2235     sh4_x86.tstate = TSTATE_C;
  2236 :}
  2237 SETS {:  
  2238     COUNT_INST(I_SETS);
  2239     STC();
  2240     SETCCB_cc_rbpdisp(X86_COND_C, R_S);
  2241     sh4_x86.tstate = TSTATE_NONE;
  2242 :}
  2243 SETT {:  
  2244     COUNT_INST(I_SETT);
  2245     STC();
  2246     SETC_t();
  2247     sh4_x86.tstate = TSTATE_C;
  2248 :}
  2250 /* Floating point moves */
  2251 FMOV FRm, FRn {:  
  2252     COUNT_INST(I_FMOV1);
  2253     check_fpuen();
  2254     if( sh4_x86.double_size ) {
  2255         load_dr0( REG_EAX, FRm );
  2256         load_dr1( REG_ECX, FRm );
  2257         store_dr0( REG_EAX, FRn );
  2258         store_dr1( REG_ECX, FRn );
  2259     } else {
  2260         load_fr( REG_EAX, FRm ); // SZ=0 branch
  2261         store_fr( REG_EAX, FRn );
  2263 :}
  2264 FMOV FRm, @Rn {: 
  2265     COUNT_INST(I_FMOV2);
  2266     check_fpuen();
  2267     load_reg( REG_EAX, Rn );
  2268     if( sh4_x86.double_size ) {
  2269         check_walign64( REG_EAX );
  2270         load_dr0( REG_EDX, FRm );
  2271         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2272         load_reg( REG_EAX, Rn );
  2273         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2274         load_dr1( REG_EDX, FRm );
  2275         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2276     } else {
  2277         check_walign32( REG_EAX );
  2278         load_fr( REG_EDX, FRm );
  2279         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2281     sh4_x86.tstate = TSTATE_NONE;
  2282 :}
  2283 FMOV @Rm, FRn {:  
  2284     COUNT_INST(I_FMOV5);
  2285     check_fpuen();
  2286     load_reg( REG_EAX, Rm );
  2287     if( sh4_x86.double_size ) {
  2288         check_ralign64( REG_EAX );
  2289         MEM_READ_LONG( REG_EAX, REG_EAX );
  2290         store_dr0( REG_EAX, FRn );
  2291         load_reg( REG_EAX, Rm );
  2292         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2293         MEM_READ_LONG( REG_EAX, REG_EAX );
  2294         store_dr1( REG_EAX, FRn );
  2295     } else {
  2296         check_ralign32( REG_EAX );
  2297         MEM_READ_LONG( REG_EAX, REG_EAX );
  2298         store_fr( REG_EAX, FRn );
  2300     sh4_x86.tstate = TSTATE_NONE;
  2301 :}
  2302 FMOV FRm, @-Rn {:  
  2303     COUNT_INST(I_FMOV3);
  2304     check_fpuen();
  2305     load_reg( REG_EAX, Rn );
  2306     if( sh4_x86.double_size ) {
  2307         check_walign64( REG_EAX );
  2308         LEAL_r32disp_r32( REG_EAX, -8, REG_EAX );
  2309         load_dr0( REG_EDX, FRm );
  2310         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2311         load_reg( REG_EAX, Rn );
  2312         LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2313         load_dr1( REG_EDX, FRm );
  2314         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2315         ADDL_imms_rbpdisp(-8,REG_OFFSET(r[Rn]));
  2316     } else {
  2317         check_walign32( REG_EAX );
  2318         LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2319         load_fr( REG_EDX, FRm );
  2320         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2321         ADDL_imms_rbpdisp(-4,REG_OFFSET(r[Rn]));
  2323     sh4_x86.tstate = TSTATE_NONE;
  2324 :}
  2325 FMOV @Rm+, FRn {:
  2326     COUNT_INST(I_FMOV6);
  2327     check_fpuen();
  2328     load_reg( REG_EAX, Rm );
  2329     if( sh4_x86.double_size ) {
  2330         check_ralign64( REG_EAX );
  2331         MEM_READ_LONG( REG_EAX, REG_EAX );
  2332         store_dr0( REG_EAX, FRn );
  2333         load_reg( REG_EAX, Rm );
  2334         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2335         MEM_READ_LONG( REG_EAX, REG_EAX );
  2336         store_dr1( REG_EAX, FRn );
  2337         ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rm]) );
  2338     } else {
  2339         check_ralign32( REG_EAX );
  2340         MEM_READ_LONG( REG_EAX, REG_EAX );
  2341         store_fr( REG_EAX, FRn );
  2342         ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2344     sh4_x86.tstate = TSTATE_NONE;
  2345 :}
  2346 FMOV FRm, @(R0, Rn) {:  
  2347     COUNT_INST(I_FMOV4);
  2348     check_fpuen();
  2349     load_reg( REG_EAX, Rn );
  2350     ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2351     if( sh4_x86.double_size ) {
  2352         check_walign64( REG_EAX );
  2353         load_dr0( REG_EDX, FRm );
  2354         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2355         load_reg( REG_EAX, Rn );
  2356         ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2357         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2358         load_dr1( REG_EDX, FRm );
  2359         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2360     } else {
  2361         check_walign32( REG_EAX );
  2362         load_fr( REG_EDX, FRm );
  2363         MEM_WRITE_LONG( REG_EAX, REG_EDX ); // 12
  2365     sh4_x86.tstate = TSTATE_NONE;
  2366 :}
  2367 FMOV @(R0, Rm), FRn {:  
  2368     COUNT_INST(I_FMOV7);
  2369     check_fpuen();
  2370     load_reg( REG_EAX, Rm );
  2371     ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2372     if( sh4_x86.double_size ) {
  2373         check_ralign64( REG_EAX );
  2374         MEM_READ_LONG( REG_EAX, REG_EAX );
  2375         store_dr0( REG_EAX, FRn );
  2376         load_reg( REG_EAX, Rm );
  2377         ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2378         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2379         MEM_READ_LONG( REG_EAX, REG_EAX );
  2380         store_dr1( REG_EAX, FRn );
  2381     } else {
  2382         check_ralign32( REG_EAX );
  2383         MEM_READ_LONG( REG_EAX, REG_EAX );
  2384         store_fr( REG_EAX, FRn );
  2386     sh4_x86.tstate = TSTATE_NONE;
  2387 :}
  2388 FLDI0 FRn {:  /* IFF PR=0 */
  2389     COUNT_INST(I_FLDI0);
  2390     check_fpuen();
  2391     if( sh4_x86.double_prec == 0 ) {
  2392         XORL_r32_r32( REG_EAX, REG_EAX );
  2393         store_fr( REG_EAX, FRn );
  2395     sh4_x86.tstate = TSTATE_NONE;
  2396 :}
  2397 FLDI1 FRn {:  /* IFF PR=0 */
  2398     COUNT_INST(I_FLDI1);
  2399     check_fpuen();
  2400     if( sh4_x86.double_prec == 0 ) {
  2401         MOVL_imm32_r32( 0x3F800000, REG_EAX );
  2402         store_fr( REG_EAX, FRn );
  2404 :}
  2406 FLOAT FPUL, FRn {:  
  2407     COUNT_INST(I_FLOAT);
  2408     check_fpuen();
  2409     FILD_rbpdisp(R_FPUL);
  2410     if( sh4_x86.double_prec ) {
  2411         pop_dr( FRn );
  2412     } else {
  2413         pop_fr( FRn );
  2415 :}
  2416 FTRC FRm, FPUL {:  
  2417     COUNT_INST(I_FTRC);
  2418     check_fpuen();
  2419     if( sh4_x86.double_prec ) {
  2420         push_dr( FRm );
  2421     } else {
  2422         push_fr( FRm );
  2424     MOVP_immptr_rptr( &max_int, REG_ECX );
  2425     FILD_r32disp( REG_ECX, 0 );
  2426     FCOMIP_st(1);
  2427     JNA_label( sat );
  2428     MOVP_immptr_rptr( &min_int, REG_ECX );
  2429     FILD_r32disp( REG_ECX, 0 );
  2430     FCOMIP_st(1);              
  2431     JAE_label( sat2 );            
  2432     MOVP_immptr_rptr( &save_fcw, REG_EAX );
  2433     FNSTCW_r32disp( REG_EAX, 0 );
  2434     MOVP_immptr_rptr( &trunc_fcw, REG_EDX );
  2435     FLDCW_r32disp( REG_EDX, 0 );
  2436     FISTP_rbpdisp(R_FPUL);             
  2437     FLDCW_r32disp( REG_EAX, 0 );
  2438     JMP_label(end);             
  2440     JMP_TARGET(sat);
  2441     JMP_TARGET(sat2);
  2442     MOVL_r32disp_r32( REG_ECX, 0, REG_ECX ); // 2
  2443     MOVL_r32_rbpdisp( REG_ECX, R_FPUL );
  2444     FPOP_st();
  2445     JMP_TARGET(end);
  2446     sh4_x86.tstate = TSTATE_NONE;
  2447 :}
  2448 FLDS FRm, FPUL {:  
  2449     COUNT_INST(I_FLDS);
  2450     check_fpuen();
  2451     load_fr( REG_EAX, FRm );
  2452     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2453 :}
  2454 FSTS FPUL, FRn {:  
  2455     COUNT_INST(I_FSTS);
  2456     check_fpuen();
  2457     MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  2458     store_fr( REG_EAX, FRn );
  2459 :}
  2460 FCNVDS FRm, FPUL {:  
  2461     COUNT_INST(I_FCNVDS);
  2462     check_fpuen();
  2463     if( sh4_x86.double_prec ) {
  2464         push_dr( FRm );
  2465         pop_fpul();
  2467 :}
  2468 FCNVSD FPUL, FRn {:  
  2469     COUNT_INST(I_FCNVSD);
  2470     check_fpuen();
  2471     if( sh4_x86.double_prec ) {
  2472         push_fpul();
  2473         pop_dr( FRn );
  2475 :}
  2477 /* Floating point instructions */
  2478 FABS FRn {:  
  2479     COUNT_INST(I_FABS);
  2480     check_fpuen();
  2481     if( sh4_x86.double_prec ) {
  2482         push_dr(FRn);
  2483         FABS_st0();
  2484         pop_dr(FRn);
  2485     } else {
  2486         push_fr(FRn);
  2487         FABS_st0();
  2488         pop_fr(FRn);
  2490 :}
  2491 FADD FRm, FRn {:  
  2492     COUNT_INST(I_FADD);
  2493     check_fpuen();
  2494     if( sh4_x86.double_prec ) {
  2495         push_dr(FRm);
  2496         push_dr(FRn);
  2497         FADDP_st(1);
  2498         pop_dr(FRn);
  2499     } else {
  2500         push_fr(FRm);
  2501         push_fr(FRn);
  2502         FADDP_st(1);
  2503         pop_fr(FRn);
  2505 :}
  2506 FDIV FRm, FRn {:  
  2507     COUNT_INST(I_FDIV);
  2508     check_fpuen();
  2509     if( sh4_x86.double_prec ) {
  2510         push_dr(FRn);
  2511         push_dr(FRm);
  2512         FDIVP_st(1);
  2513         pop_dr(FRn);
  2514     } else {
  2515         push_fr(FRn);
  2516         push_fr(FRm);
  2517         FDIVP_st(1);
  2518         pop_fr(FRn);
  2520 :}
  2521 FMAC FR0, FRm, FRn {:  
  2522     COUNT_INST(I_FMAC);
  2523     check_fpuen();
  2524     if( sh4_x86.double_prec ) {
  2525         push_dr( 0 );
  2526         push_dr( FRm );
  2527         FMULP_st(1);
  2528         push_dr( FRn );
  2529         FADDP_st(1);
  2530         pop_dr( FRn );
  2531     } else {
  2532         push_fr( 0 );
  2533         push_fr( FRm );
  2534         FMULP_st(1);
  2535         push_fr( FRn );
  2536         FADDP_st(1);
  2537         pop_fr( FRn );
  2539 :}
  2541 FMUL FRm, FRn {:  
  2542     COUNT_INST(I_FMUL);
  2543     check_fpuen();
  2544     if( sh4_x86.double_prec ) {
  2545         push_dr(FRm);
  2546         push_dr(FRn);
  2547         FMULP_st(1);
  2548         pop_dr(FRn);
  2549     } else {
  2550         push_fr(FRm);
  2551         push_fr(FRn);
  2552         FMULP_st(1);
  2553         pop_fr(FRn);
  2555 :}
  2556 FNEG FRn {:  
  2557     COUNT_INST(I_FNEG);
  2558     check_fpuen();
  2559     if( sh4_x86.double_prec ) {
  2560         push_dr(FRn);
  2561         FCHS_st0();
  2562         pop_dr(FRn);
  2563     } else {
  2564         push_fr(FRn);
  2565         FCHS_st0();
  2566         pop_fr(FRn);
  2568 :}
  2569 FSRRA FRn {:  
  2570     COUNT_INST(I_FSRRA);
  2571     check_fpuen();
  2572     if( sh4_x86.double_prec == 0 ) {
  2573         FLD1_st0();
  2574         push_fr(FRn);
  2575         FSQRT_st0();
  2576         FDIVP_st(1);
  2577         pop_fr(FRn);
  2579 :}
  2580 FSQRT FRn {:  
  2581     COUNT_INST(I_FSQRT);
  2582     check_fpuen();
  2583     if( sh4_x86.double_prec ) {
  2584         push_dr(FRn);
  2585         FSQRT_st0();
  2586         pop_dr(FRn);
  2587     } else {
  2588         push_fr(FRn);
  2589         FSQRT_st0();
  2590         pop_fr(FRn);
  2592 :}
  2593 FSUB FRm, FRn {:  
  2594     COUNT_INST(I_FSUB);
  2595     check_fpuen();
  2596     if( sh4_x86.double_prec ) {
  2597         push_dr(FRn);
  2598         push_dr(FRm);
  2599         FSUBP_st(1);
  2600         pop_dr(FRn);
  2601     } else {
  2602         push_fr(FRn);
  2603         push_fr(FRm);
  2604         FSUBP_st(1);
  2605         pop_fr(FRn);
  2607 :}
  2609 FCMP/EQ FRm, FRn {:  
  2610     COUNT_INST(I_FCMPEQ);
  2611     check_fpuen();
  2612     if( sh4_x86.double_prec ) {
  2613         push_dr(FRm);
  2614         push_dr(FRn);
  2615     } else {
  2616         push_fr(FRm);
  2617         push_fr(FRn);
  2619     FCOMIP_st(1);
  2620     SETE_t();
  2621     FPOP_st();
  2622     sh4_x86.tstate = TSTATE_E;
  2623 :}
  2624 FCMP/GT FRm, FRn {:  
  2625     COUNT_INST(I_FCMPGT);
  2626     check_fpuen();
  2627     if( sh4_x86.double_prec ) {
  2628         push_dr(FRm);
  2629         push_dr(FRn);
  2630     } else {
  2631         push_fr(FRm);
  2632         push_fr(FRn);
  2634     FCOMIP_st(1);
  2635     SETA_t();
  2636     FPOP_st();
  2637     sh4_x86.tstate = TSTATE_A;
  2638 :}
  2640 FSCA FPUL, FRn {:  
  2641     COUNT_INST(I_FSCA);
  2642     check_fpuen();
  2643     if( sh4_x86.double_prec == 0 ) {
  2644         LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FRn&0x0E]), REG_EDX );
  2645         MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  2646         CALL2_ptr_r32_r32( sh4_fsca, REG_EAX, REG_EDX );
  2648     sh4_x86.tstate = TSTATE_NONE;
  2649 :}
  2650 FIPR FVm, FVn {:  
  2651     COUNT_INST(I_FIPR);
  2652     check_fpuen();
  2653     if( sh4_x86.double_prec == 0 ) {
  2654         if( sh4_x86.sse3_enabled ) {
  2655             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
  2656             MULPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
  2657             HADDPS_xmm_xmm( 4, 4 ); 
  2658             HADDPS_xmm_xmm( 4, 4 );
  2659             MOVSS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
  2660         } else {
  2661             push_fr( FVm<<2 );
  2662             push_fr( FVn<<2 );
  2663             FMULP_st(1);
  2664             push_fr( (FVm<<2)+1);
  2665             push_fr( (FVn<<2)+1);
  2666             FMULP_st(1);
  2667             FADDP_st(1);
  2668             push_fr( (FVm<<2)+2);
  2669             push_fr( (FVn<<2)+2);
  2670             FMULP_st(1);
  2671             FADDP_st(1);
  2672             push_fr( (FVm<<2)+3);
  2673             push_fr( (FVn<<2)+3);
  2674             FMULP_st(1);
  2675             FADDP_st(1);
  2676             pop_fr( (FVn<<2)+3);
  2679 :}
  2680 FTRV XMTRX, FVn {:  
  2681     COUNT_INST(I_FTRV);
  2682     check_fpuen();
  2683     if( sh4_x86.double_prec == 0 ) {
  2684         if( sh4_x86.sse3_enabled ) {
  2685             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1  M0  M3  M2
  2686             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5  M4  M7  M6
  2687             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9  M8  M11 M10
  2688             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
  2690             MOVSLDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
  2691             MOVSHDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
  2692             MOV_xmm_xmm( 4, 6 );
  2693             MOV_xmm_xmm( 5, 7 );
  2694             MOVLHPS_xmm_xmm( 4, 4 );  // V1 V1 V1 V1
  2695             MOVHLPS_xmm_xmm( 6, 6 );  // V3 V3 V3 V3
  2696             MOVLHPS_xmm_xmm( 5, 5 );  // V0 V0 V0 V0
  2697             MOVHLPS_xmm_xmm( 7, 7 );  // V2 V2 V2 V2
  2698             MULPS_xmm_xmm( 0, 4 );
  2699             MULPS_xmm_xmm( 1, 5 );
  2700             MULPS_xmm_xmm( 2, 6 );
  2701             MULPS_xmm_xmm( 3, 7 );
  2702             ADDPS_xmm_xmm( 5, 4 );
  2703             ADDPS_xmm_xmm( 7, 6 );
  2704             ADDPS_xmm_xmm( 6, 4 );
  2705             MOVAPS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][FVn<<2]) );
  2706         } else {
  2707             LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FVn<<2]), REG_EAX );
  2708             CALL1_ptr_r32( sh4_ftrv, REG_EAX );
  2711     sh4_x86.tstate = TSTATE_NONE;
  2712 :}
  2714 FRCHG {:  
  2715     COUNT_INST(I_FRCHG);
  2716     check_fpuen();
  2717     XORL_imms_rbpdisp( FPSCR_FR, R_FPSCR );
  2718     CALL_ptr( sh4_switch_fr_banks );
  2719     sh4_x86.tstate = TSTATE_NONE;
  2720 :}
  2721 FSCHG {:  
  2722     COUNT_INST(I_FSCHG);
  2723     check_fpuen();
  2724     XORL_imms_rbpdisp( FPSCR_SZ, R_FPSCR);
  2725     XORL_imms_rbpdisp( FPSCR_SZ, REG_OFFSET(xlat_sh4_mode) );
  2726     sh4_x86.tstate = TSTATE_NONE;
  2727     sh4_x86.double_size = !sh4_x86.double_size;
  2728     sh4_x86.sh4_mode = sh4_x86.sh4_mode ^ FPSCR_SZ;
  2729 :}
  2731 /* Processor control instructions */
  2732 LDC Rm, SR {:
  2733     COUNT_INST(I_LDCSR);
  2734     if( sh4_x86.in_delay_slot ) {
  2735 	SLOTILLEGAL();
  2736     } else {
  2737 	check_priv();
  2738 	load_reg( REG_EAX, Rm );
  2739 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2740 	sh4_x86.fpuen_checked = FALSE;
  2741 	sh4_x86.tstate = TSTATE_NONE;
  2742     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2743 	return 2;
  2745 :}
  2746 LDC Rm, GBR {: 
  2747     COUNT_INST(I_LDC);
  2748     load_reg( REG_EAX, Rm );
  2749     MOVL_r32_rbpdisp( REG_EAX, R_GBR );
  2750 :}
  2751 LDC Rm, VBR {:  
  2752     COUNT_INST(I_LDC);
  2753     check_priv();
  2754     load_reg( REG_EAX, Rm );
  2755     MOVL_r32_rbpdisp( REG_EAX, R_VBR );
  2756     sh4_x86.tstate = TSTATE_NONE;
  2757 :}
  2758 LDC Rm, SSR {:  
  2759     COUNT_INST(I_LDC);
  2760     check_priv();
  2761     load_reg( REG_EAX, Rm );
  2762     MOVL_r32_rbpdisp( REG_EAX, R_SSR );
  2763     sh4_x86.tstate = TSTATE_NONE;
  2764 :}
  2765 LDC Rm, SGR {:  
  2766     COUNT_INST(I_LDC);
  2767     check_priv();
  2768     load_reg( REG_EAX, Rm );
  2769     MOVL_r32_rbpdisp( REG_EAX, R_SGR );
  2770     sh4_x86.tstate = TSTATE_NONE;
  2771 :}
  2772 LDC Rm, SPC {:  
  2773     COUNT_INST(I_LDC);
  2774     check_priv();
  2775     load_reg( REG_EAX, Rm );
  2776     MOVL_r32_rbpdisp( REG_EAX, R_SPC );
  2777     sh4_x86.tstate = TSTATE_NONE;
  2778 :}
  2779 LDC Rm, DBR {:  
  2780     COUNT_INST(I_LDC);
  2781     check_priv();
  2782     load_reg( REG_EAX, Rm );
  2783     MOVL_r32_rbpdisp( REG_EAX, R_DBR );
  2784     sh4_x86.tstate = TSTATE_NONE;
  2785 :}
  2786 LDC Rm, Rn_BANK {:  
  2787     COUNT_INST(I_LDC);
  2788     check_priv();
  2789     load_reg( REG_EAX, Rm );
  2790     MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2791     sh4_x86.tstate = TSTATE_NONE;
  2792 :}
  2793 LDC.L @Rm+, GBR {:  
  2794     COUNT_INST(I_LDCM);
  2795     load_reg( REG_EAX, Rm );
  2796     check_ralign32( REG_EAX );
  2797     MEM_READ_LONG( REG_EAX, REG_EAX );
  2798     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2799     MOVL_r32_rbpdisp( REG_EAX, R_GBR );
  2800     sh4_x86.tstate = TSTATE_NONE;
  2801 :}
  2802 LDC.L @Rm+, SR {:
  2803     COUNT_INST(I_LDCSRM);
  2804     if( sh4_x86.in_delay_slot ) {
  2805 	SLOTILLEGAL();
  2806     } else {
  2807 	check_priv();
  2808 	load_reg( REG_EAX, Rm );
  2809 	check_ralign32( REG_EAX );
  2810 	MEM_READ_LONG( REG_EAX, REG_EAX );
  2811 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2812 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2813 	sh4_x86.fpuen_checked = FALSE;
  2814 	sh4_x86.tstate = TSTATE_NONE;
  2815     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2816 	return 2;
  2818 :}
  2819 LDC.L @Rm+, VBR {:  
  2820     COUNT_INST(I_LDCM);
  2821     check_priv();
  2822     load_reg( REG_EAX, Rm );
  2823     check_ralign32( REG_EAX );
  2824     MEM_READ_LONG( REG_EAX, REG_EAX );
  2825     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2826     MOVL_r32_rbpdisp( REG_EAX, R_VBR );
  2827     sh4_x86.tstate = TSTATE_NONE;
  2828 :}
  2829 LDC.L @Rm+, SSR {:
  2830     COUNT_INST(I_LDCM);
  2831     check_priv();
  2832     load_reg( REG_EAX, Rm );
  2833     check_ralign32( REG_EAX );
  2834     MEM_READ_LONG( REG_EAX, REG_EAX );
  2835     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2836     MOVL_r32_rbpdisp( REG_EAX, R_SSR );
  2837     sh4_x86.tstate = TSTATE_NONE;
  2838 :}
  2839 LDC.L @Rm+, SGR {:  
  2840     COUNT_INST(I_LDCM);
  2841     check_priv();
  2842     load_reg( REG_EAX, Rm );
  2843     check_ralign32( REG_EAX );
  2844     MEM_READ_LONG( REG_EAX, REG_EAX );
  2845     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2846     MOVL_r32_rbpdisp( REG_EAX, R_SGR );
  2847     sh4_x86.tstate = TSTATE_NONE;
  2848 :}
  2849 LDC.L @Rm+, SPC {:  
  2850     COUNT_INST(I_LDCM);
  2851     check_priv();
  2852     load_reg( REG_EAX, Rm );
  2853     check_ralign32( REG_EAX );
  2854     MEM_READ_LONG( REG_EAX, REG_EAX );
  2855     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2856     MOVL_r32_rbpdisp( REG_EAX, R_SPC );
  2857     sh4_x86.tstate = TSTATE_NONE;
  2858 :}
  2859 LDC.L @Rm+, DBR {:  
  2860     COUNT_INST(I_LDCM);
  2861     check_priv();
  2862     load_reg( REG_EAX, Rm );
  2863     check_ralign32( REG_EAX );
  2864     MEM_READ_LONG( REG_EAX, REG_EAX );
  2865     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2866     MOVL_r32_rbpdisp( REG_EAX, R_DBR );
  2867     sh4_x86.tstate = TSTATE_NONE;
  2868 :}
  2869 LDC.L @Rm+, Rn_BANK {:  
  2870     COUNT_INST(I_LDCM);
  2871     check_priv();
  2872     load_reg( REG_EAX, Rm );
  2873     check_ralign32( REG_EAX );
  2874     MEM_READ_LONG( REG_EAX, REG_EAX );
  2875     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2876     MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2877     sh4_x86.tstate = TSTATE_NONE;
  2878 :}
  2879 LDS Rm, FPSCR {:
  2880     COUNT_INST(I_LDSFPSCR);
  2881     check_fpuen();
  2882     load_reg( REG_EAX, Rm );
  2883     CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
  2884     sh4_x86.tstate = TSTATE_NONE;
  2885     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2886     return 2;
  2887 :}
  2888 LDS.L @Rm+, FPSCR {:  
  2889     COUNT_INST(I_LDSFPSCRM);
  2890     check_fpuen();
  2891     load_reg( REG_EAX, Rm );
  2892     check_ralign32( REG_EAX );
  2893     MEM_READ_LONG( REG_EAX, REG_EAX );
  2894     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2895     CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
  2896     sh4_x86.tstate = TSTATE_NONE;
  2897     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2898     return 2;
  2899 :}
  2900 LDS Rm, FPUL {:  
  2901     COUNT_INST(I_LDS);
  2902     check_fpuen();
  2903     load_reg( REG_EAX, Rm );
  2904     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2905 :}
  2906 LDS.L @Rm+, FPUL {:  
  2907     COUNT_INST(I_LDSM);
  2908     check_fpuen();
  2909     load_reg( REG_EAX, Rm );
  2910     check_ralign32( REG_EAX );
  2911     MEM_READ_LONG( REG_EAX, REG_EAX );
  2912     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2913     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2914     sh4_x86.tstate = TSTATE_NONE;
  2915 :}
  2916 LDS Rm, MACH {: 
  2917     COUNT_INST(I_LDS);
  2918     load_reg( REG_EAX, Rm );
  2919     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2920 :}
  2921 LDS.L @Rm+, MACH {:  
  2922     COUNT_INST(I_LDSM);
  2923     load_reg( REG_EAX, Rm );
  2924     check_ralign32( REG_EAX );
  2925     MEM_READ_LONG( REG_EAX, REG_EAX );
  2926     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2927     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2928     sh4_x86.tstate = TSTATE_NONE;
  2929 :}
  2930 LDS Rm, MACL {:  
  2931     COUNT_INST(I_LDS);
  2932     load_reg( REG_EAX, Rm );
  2933     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2934 :}
  2935 LDS.L @Rm+, MACL {:  
  2936     COUNT_INST(I_LDSM);
  2937     load_reg( REG_EAX, Rm );
  2938     check_ralign32( REG_EAX );
  2939     MEM_READ_LONG( REG_EAX, REG_EAX );
  2940     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2941     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2942     sh4_x86.tstate = TSTATE_NONE;
  2943 :}
  2944 LDS Rm, PR {:  
  2945     COUNT_INST(I_LDS);
  2946     load_reg( REG_EAX, Rm );
  2947     MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2948 :}
  2949 LDS.L @Rm+, PR {:  
  2950     COUNT_INST(I_LDSM);
  2951     load_reg( REG_EAX, Rm );
  2952     check_ralign32( REG_EAX );
  2953     MEM_READ_LONG( REG_EAX, REG_EAX );
  2954     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2955     MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2956     sh4_x86.tstate = TSTATE_NONE;
  2957 :}
  2958 LDTLB {:  
  2959     COUNT_INST(I_LDTLB);
  2960     CALL_ptr( MMU_ldtlb );
  2961     sh4_x86.tstate = TSTATE_NONE;
  2962 :}
  2963 OCBI @Rn {:
  2964     COUNT_INST(I_OCBI);
  2965 :}
  2966 OCBP @Rn {:
  2967     COUNT_INST(I_OCBP);
  2968 :}
  2969 OCBWB @Rn {:
  2970     COUNT_INST(I_OCBWB);
  2971 :}
  2972 PREF @Rn {:
  2973     COUNT_INST(I_PREF);
  2974     load_reg( REG_EAX, Rn );
  2975     MEM_PREFETCH( REG_EAX );
  2976     sh4_x86.tstate = TSTATE_NONE;
  2977 :}
  2978 SLEEP {: 
  2979     COUNT_INST(I_SLEEP);
  2980     check_priv();
  2981     CALL_ptr( sh4_sleep );
  2982     sh4_x86.tstate = TSTATE_NONE;
  2983     sh4_x86.in_delay_slot = DELAY_NONE;
  2984     return 2;
  2985 :}
  2986 STC SR, Rn {:
  2987     COUNT_INST(I_STCSR);
  2988     check_priv();
  2989     CALL_ptr(sh4_read_sr);
  2990     store_reg( REG_EAX, Rn );
  2991     sh4_x86.tstate = TSTATE_NONE;
  2992 :}
  2993 STC GBR, Rn {:  
  2994     COUNT_INST(I_STC);
  2995     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  2996     store_reg( REG_EAX, Rn );
  2997 :}
  2998 STC VBR, Rn {:  
  2999     COUNT_INST(I_STC);
  3000     check_priv();
  3001     MOVL_rbpdisp_r32( R_VBR, REG_EAX );
  3002     store_reg( REG_EAX, Rn );
  3003     sh4_x86.tstate = TSTATE_NONE;
  3004 :}
  3005 STC SSR, Rn {:  
  3006     COUNT_INST(I_STC);
  3007     check_priv();
  3008     MOVL_rbpdisp_r32( R_SSR, REG_EAX );
  3009     store_reg( REG_EAX, Rn );
  3010     sh4_x86.tstate = TSTATE_NONE;
  3011 :}
  3012 STC SPC, Rn {:  
  3013     COUNT_INST(I_STC);
  3014     check_priv();
  3015     MOVL_rbpdisp_r32( R_SPC, REG_EAX );
  3016     store_reg( REG_EAX, Rn );
  3017     sh4_x86.tstate = TSTATE_NONE;
  3018 :}
  3019 STC SGR, Rn {:  
  3020     COUNT_INST(I_STC);
  3021     check_priv();
  3022     MOVL_rbpdisp_r32( R_SGR, REG_EAX );
  3023     store_reg( REG_EAX, Rn );
  3024     sh4_x86.tstate = TSTATE_NONE;
  3025 :}
  3026 STC DBR, Rn {:  
  3027     COUNT_INST(I_STC);
  3028     check_priv();
  3029     MOVL_rbpdisp_r32( R_DBR, REG_EAX );
  3030     store_reg( REG_EAX, Rn );
  3031     sh4_x86.tstate = TSTATE_NONE;
  3032 :}
  3033 STC Rm_BANK, Rn {:
  3034     COUNT_INST(I_STC);
  3035     check_priv();
  3036     MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EAX );
  3037     store_reg( REG_EAX, Rn );
  3038     sh4_x86.tstate = TSTATE_NONE;
  3039 :}
  3040 STC.L SR, @-Rn {:
  3041     COUNT_INST(I_STCSRM);
  3042     check_priv();
  3043     CALL_ptr( sh4_read_sr );
  3044     MOVL_r32_r32( REG_EAX, REG_EDX );
  3045     load_reg( REG_EAX, Rn );
  3046     check_walign32( REG_EAX );
  3047     LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  3048     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3049     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3050     sh4_x86.tstate = TSTATE_NONE;
  3051 :}
  3052 STC.L VBR, @-Rn {:  
  3053     COUNT_INST(I_STCM);
  3054     check_priv();
  3055     load_reg( REG_EAX, Rn );
  3056     check_walign32( REG_EAX );
  3057     ADDL_imms_r32( -4, REG_EAX );
  3058     MOVL_rbpdisp_r32( R_VBR, REG_EDX );
  3059     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3060     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3061     sh4_x86.tstate = TSTATE_NONE;
  3062 :}
  3063 STC.L SSR, @-Rn {:  
  3064     COUNT_INST(I_STCM);
  3065     check_priv();
  3066     load_reg( REG_EAX, Rn );
  3067     check_walign32( REG_EAX );
  3068     ADDL_imms_r32( -4, REG_EAX );
  3069     MOVL_rbpdisp_r32( R_SSR, REG_EDX );
  3070     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3071     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3072     sh4_x86.tstate = TSTATE_NONE;
  3073 :}
  3074 STC.L SPC, @-Rn {:
  3075     COUNT_INST(I_STCM);
  3076     check_priv();
  3077     load_reg( REG_EAX, Rn );
  3078     check_walign32( REG_EAX );
  3079     ADDL_imms_r32( -4, REG_EAX );
  3080     MOVL_rbpdisp_r32( R_SPC, REG_EDX );
  3081     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3082     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3083     sh4_x86.tstate = TSTATE_NONE;
  3084 :}
  3085 STC.L SGR, @-Rn {:  
  3086     COUNT_INST(I_STCM);
  3087     check_priv();
  3088     load_reg( REG_EAX, Rn );
  3089     check_walign32( REG_EAX );
  3090     ADDL_imms_r32( -4, REG_EAX );
  3091     MOVL_rbpdisp_r32( R_SGR, REG_EDX );
  3092     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3093     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3094     sh4_x86.tstate = TSTATE_NONE;
  3095 :}
  3096 STC.L DBR, @-Rn {:  
  3097     COUNT_INST(I_STCM);
  3098     check_priv();
  3099     load_reg( REG_EAX, Rn );
  3100     check_walign32( REG_EAX );
  3101     ADDL_imms_r32( -4, REG_EAX );
  3102     MOVL_rbpdisp_r32( R_DBR, REG_EDX );
  3103     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3104     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3105     sh4_x86.tstate = TSTATE_NONE;
  3106 :}
  3107 STC.L Rm_BANK, @-Rn {:  
  3108     COUNT_INST(I_STCM);
  3109     check_priv();
  3110     load_reg( REG_EAX, Rn );
  3111     check_walign32( REG_EAX );
  3112     ADDL_imms_r32( -4, REG_EAX );
  3113     MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EDX );
  3114     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3115     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3116     sh4_x86.tstate = TSTATE_NONE;
  3117 :}
  3118 STC.L GBR, @-Rn {:  
  3119     COUNT_INST(I_STCM);
  3120     load_reg( REG_EAX, Rn );
  3121     check_walign32( REG_EAX );
  3122     ADDL_imms_r32( -4, REG_EAX );
  3123     MOVL_rbpdisp_r32( R_GBR, REG_EDX );
  3124     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3125     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3126     sh4_x86.tstate = TSTATE_NONE;
  3127 :}
  3128 STS FPSCR, Rn {:  
  3129     COUNT_INST(I_STSFPSCR);
  3130     check_fpuen();
  3131     MOVL_rbpdisp_r32( R_FPSCR, REG_EAX );
  3132     store_reg( REG_EAX, Rn );
  3133 :}
  3134 STS.L FPSCR, @-Rn {:  
  3135     COUNT_INST(I_STSFPSCRM);
  3136     check_fpuen();
  3137     load_reg( REG_EAX, Rn );
  3138     check_walign32( REG_EAX );
  3139     ADDL_imms_r32( -4, REG_EAX );
  3140     MOVL_rbpdisp_r32( R_FPSCR, REG_EDX );
  3141     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3142     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3143     sh4_x86.tstate = TSTATE_NONE;
  3144 :}
  3145 STS FPUL, Rn {:  
  3146     COUNT_INST(I_STS);
  3147     check_fpuen();
  3148     MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  3149     store_reg( REG_EAX, Rn );
  3150 :}
  3151 STS.L FPUL, @-Rn {:  
  3152     COUNT_INST(I_STSM);
  3153     check_fpuen();
  3154     load_reg( REG_EAX, Rn );
  3155     check_walign32( REG_EAX );
  3156     ADDL_imms_r32( -4, REG_EAX );
  3157     MOVL_rbpdisp_r32( R_FPUL, REG_EDX );
  3158     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3159     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3160     sh4_x86.tstate = TSTATE_NONE;
  3161 :}
  3162 STS MACH, Rn {:  
  3163     COUNT_INST(I_STS);
  3164     MOVL_rbpdisp_r32( R_MACH, REG_EAX );
  3165     store_reg( REG_EAX, Rn );
  3166 :}
  3167 STS.L MACH, @-Rn {:  
  3168     COUNT_INST(I_STSM);
  3169     load_reg( REG_EAX, Rn );
  3170     check_walign32( REG_EAX );
  3171     ADDL_imms_r32( -4, REG_EAX );
  3172     MOVL_rbpdisp_r32( R_MACH, REG_EDX );
  3173     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3174     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3175     sh4_x86.tstate = TSTATE_NONE;
  3176 :}
  3177 STS MACL, Rn {:  
  3178     COUNT_INST(I_STS);
  3179     MOVL_rbpdisp_r32( R_MACL, REG_EAX );
  3180     store_reg( REG_EAX, Rn );
  3181 :}
  3182 STS.L MACL, @-Rn {:  
  3183     COUNT_INST(I_STSM);
  3184     load_reg( REG_EAX, Rn );
  3185     check_walign32( REG_EAX );
  3186     ADDL_imms_r32( -4, REG_EAX );
  3187     MOVL_rbpdisp_r32( R_MACL, REG_EDX );
  3188     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3189     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3190     sh4_x86.tstate = TSTATE_NONE;
  3191 :}
  3192 STS PR, Rn {:  
  3193     COUNT_INST(I_STS);
  3194     MOVL_rbpdisp_r32( R_PR, REG_EAX );
  3195     store_reg( REG_EAX, Rn );
  3196 :}
  3197 STS.L PR, @-Rn {:  
  3198     COUNT_INST(I_STSM);
  3199     load_reg( REG_EAX, Rn );
  3200     check_walign32( REG_EAX );
  3201     ADDL_imms_r32( -4, REG_EAX );
  3202     MOVL_rbpdisp_r32( R_PR, REG_EDX );
  3203     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3204     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3205     sh4_x86.tstate = TSTATE_NONE;
  3206 :}
  3208 NOP {: 
  3209     COUNT_INST(I_NOP);
  3210     /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ 
  3211 :}
  3212 %%
  3213     sh4_x86.in_delay_slot = DELAY_NONE;
  3214     return 0;
  3218 /**
  3219  * The unwind methods only work if we compiled with DWARF2 frame information
  3220  * (ie -fexceptions), otherwise we have to use the direct frame scan.
  3221  */
  3222 #ifdef HAVE_EXCEPTIONS
  3223 #include <unwind.h>
  3225 struct UnwindInfo {
  3226     uintptr_t block_start;
  3227     uintptr_t block_end;
  3228     void *pc;
  3229 };
  3231 static _Unwind_Reason_Code xlat_check_frame( struct _Unwind_Context *context, void *arg )
  3233     struct UnwindInfo *info = arg;
  3234     void *pc = (void *)_Unwind_GetIP(context);
  3235     if( ((uintptr_t)pc) >= info->block_start && ((uintptr_t)pc) < info->block_end ) {
  3236         info->pc = pc;
  3237         return _URC_NORMAL_STOP;
  3239     return _URC_NO_REASON;
  3242 void *xlat_get_native_pc( void *code, uint32_t code_size )
  3244     struct _Unwind_Exception exc;
  3245     struct UnwindInfo info;
  3247     info.pc = NULL;
  3248     info.block_start = (uintptr_t)code;
  3249     info.block_end = info.block_start + code_size;
  3250     void *result = NULL;
  3251     _Unwind_Backtrace( xlat_check_frame, &info );
  3252     return info.pc;
  3254 #else
  3255 /* Assume this is an ia32 build - amd64 should always have dwarf information */
  3256 void *xlat_get_native_pc( void *code, uint32_t code_size )
  3258     void *result = NULL;
  3259     __asm__(
  3260         "mov %%ebp, %%eax\n\t"
  3261         "mov $0x8, %%ecx\n\t"
  3262         "mov %1, %%edx\n"
  3263         "frame_loop: test %%eax, %%eax\n\t"
  3264         "je frame_not_found\n\t"
  3265         "cmp (%%eax), %%edx\n\t"
  3266         "je frame_found\n\t"
  3267         "sub $0x1, %%ecx\n\t"
  3268         "je frame_not_found\n\t"
  3269         "movl (%%eax), %%eax\n\t"
  3270         "jmp frame_loop\n"
  3271         "frame_found: movl 0x4(%%eax), %0\n"
  3272         "frame_not_found:"
  3273         : "=r" (result)
  3274         : "r" (((uint8_t *)&sh4r) + 128 )
  3275         : "eax", "ecx", "edx" );
  3276     return result;
  3278 #endif
.