Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 1092:7c4ffe27e7b5
prev1091:186558374345
next1112:4cac5e474d4c
author nkeynes
date Sun Dec 20 21:01:03 2009 +1000 (11 years ago)
permissions -rw-r--r--
last change Fix 64-bit x86 disassembly
Add crash-report hook to SIGILL and SIGBUS
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "lxdream.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4dasm.h"
    31 #include "sh4/sh4trans.h"
    32 #include "sh4/sh4stat.h"
    33 #include "sh4/sh4mmio.h"
    34 #include "sh4/mmu.h"
    35 #include "xlat/xltcache.h"
    36 #include "xlat/x86/x86op.h"
    37 #include "x86dasm/x86dasm.h"
    38 #include "clock.h"
    40 #define DEFAULT_BACKPATCH_SIZE 4096
    42 /* Offset of a reg relative to the sh4r structure */
    43 #define REG_OFFSET(reg)  (((char *)&sh4r.reg) - ((char *)&sh4r) - 128)
    45 #define R_T      REG_OFFSET(t)
    46 #define R_Q      REG_OFFSET(q)
    47 #define R_S      REG_OFFSET(s)
    48 #define R_M      REG_OFFSET(m)
    49 #define R_SR     REG_OFFSET(sr)
    50 #define R_GBR    REG_OFFSET(gbr)
    51 #define R_SSR    REG_OFFSET(ssr)
    52 #define R_SPC    REG_OFFSET(spc)
    53 #define R_VBR    REG_OFFSET(vbr)
    54 #define R_MACH   REG_OFFSET(mac)+4
    55 #define R_MACL   REG_OFFSET(mac)
    56 #define R_PC     REG_OFFSET(pc)
    57 #define R_NEW_PC REG_OFFSET(new_pc)
    58 #define R_PR     REG_OFFSET(pr)
    59 #define R_SGR    REG_OFFSET(sgr)
    60 #define R_FPUL   REG_OFFSET(fpul)
    61 #define R_FPSCR  REG_OFFSET(fpscr)
    62 #define R_DBR    REG_OFFSET(dbr)
    63 #define R_R(rn)  REG_OFFSET(r[rn])
    64 #define R_FR(f)  REG_OFFSET(fr[0][(f)^1])
    65 #define R_XF(f)  REG_OFFSET(fr[1][(f)^1])
    66 #define R_DR(f)  REG_OFFSET(fr[(f)&1][(f)&0x0E])
    67 #define R_DRL(f) REG_OFFSET(fr[(f)&1][(f)|0x01])
    68 #define R_DRH(f) REG_OFFSET(fr[(f)&1][(f)&0x0E])
    70 #define DELAY_NONE 0
    71 #define DELAY_PC 1
    72 #define DELAY_PC_PR 2
    74 struct backpatch_record {
    75     uint32_t fixup_offset;
    76     uint32_t fixup_icount;
    77     int32_t exc_code;
    78 };
    80 /** 
    81  * Struct to manage internal translation state. This state is not saved -
    82  * it is only valid between calls to sh4_translate_begin_block() and
    83  * sh4_translate_end_block()
    84  */
    85 struct sh4_x86_state {
    86     int in_delay_slot;
    87     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    88     gboolean branch_taken; /* true if we branched unconditionally */
    89     gboolean double_prec; /* true if FPU is in double-precision mode */
    90     gboolean double_size; /* true if FPU is in double-size mode */
    91     gboolean sse3_enabled; /* true if host supports SSE3 instructions */
    92     uint32_t block_start_pc;
    93     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    94     int tstate;
    96     /* mode flags */
    97     gboolean tlb_on; /* True if tlb translation is active */
    99     /* Allocated memory for the (block-wide) back-patch list */
   100     struct backpatch_record *backpatch_list;
   101     uint32_t backpatch_posn;
   102     uint32_t backpatch_size;
   103 };
   105 static struct sh4_x86_state sh4_x86;
   107 static uint32_t max_int = 0x7FFFFFFF;
   108 static uint32_t min_int = 0x80000000;
   109 static uint32_t save_fcw; /* save value for fpu control word */
   110 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   112 static struct x86_symbol x86_symbol_table[] = {
   113     { "sh4r+128", ((char *)&sh4r)+128 },
   114     { "sh4_cpu_period", &sh4_cpu_period },
   115     { "sh4_address_space", NULL },
   116     { "sh4_user_address_space", NULL },
   117     { "sh4_write_fpscr", sh4_write_fpscr },
   118     { "sh4_write_sr", sh4_write_sr },
   119     { "sh4_read_sr", sh4_read_sr },
   120     { "sh4_sleep", sh4_sleep },
   121     { "sh4_fsca", sh4_fsca },
   122     { "sh4_ftrv", sh4_ftrv },
   123     { "sh4_switch_fr_banks", sh4_switch_fr_banks },
   124     { "sh4_execute_instruction", sh4_execute_instruction },
   125     { "signsat48", signsat48 },
   126     { "xlat_get_code_by_vma", xlat_get_code_by_vma },
   127     { "xlat_get_code", xlat_get_code }
   128 };
   131 gboolean is_sse3_supported()
   132 {
   133     uint32_t features;
   135     __asm__ __volatile__(
   136         "mov $0x01, %%eax\n\t"
   137         "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
   138     return (features & 1) ? TRUE : FALSE;
   139 }
   141 void sh4_translate_init(void)
   142 {
   143     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   144     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   145     sh4_x86.sse3_enabled = is_sse3_supported();
   146     x86_symbol_table[2].ptr = sh4_address_space;
   147     x86_symbol_table[3].ptr = sh4_user_address_space;    
   148     x86_disasm_init();
   149     x86_set_symtab( x86_symbol_table, sizeof(x86_symbol_table)/sizeof(struct x86_symbol) );
   150 }
   152 /**
   153  * Disassemble the given translated code block, and it's source SH4 code block
   154  * side-by-side. The current native pc will be marked if non-null.
   155  */
   156 void sh4_translate_disasm_block( FILE *out, void *code, sh4addr_t source_start, void *native_pc )
   157 {
   158     char buf[256];
   159     char op[256];
   161     uintptr_t target_start = (uintptr_t)code, target_pc;
   162     uintptr_t target_end = target_start + xlat_get_code_size(code);
   163     uint32_t source_pc = source_start;
   164     uint32_t source_end = source_pc;
   165     xlat_recovery_record_t source_recov_table = XLAT_RECOVERY_TABLE(code);
   166     xlat_recovery_record_t source_recov_end = source_recov_table + XLAT_BLOCK_FOR_CODE(code)->recover_table_size - 1;
   168     for( target_pc = target_start; target_pc < target_end;  ) {
   169         uintptr_t pc2 = x86_disasm_instruction( target_pc, buf, sizeof(buf), op );
   170 #if SIZEOF_VOID_P == 8
   171         fprintf( out, "%c%016lx: %-30s %-40s", (target_pc == (uintptr_t)native_pc ? '*' : ' '),
   172                       target_pc, op, buf );
   173 #else
   174         fprintf( out, "%c%08x: %-30s %-40s", (target_pc == (uintptr_t)native_pc ? '*' : ' '),
   175                       target_pc, op, buf );
   176 #endif        
   177         if( source_recov_table < source_recov_end && 
   178             target_pc >= (target_start + source_recov_table->xlat_offset) ) {
   179             source_recov_table++;
   180             if( source_end < (source_start + (source_recov_table->sh4_icount)*2) )
   181                 source_end = source_start + (source_recov_table->sh4_icount)*2;
   182         }
   184         if( source_pc < source_end ) {
   185             uint32_t source_pc2 = sh4_disasm_instruction( source_pc, buf, sizeof(buf), op );
   186             fprintf( out, " %08X: %s  %s\n", source_pc, op, buf );
   187             source_pc = source_pc2;
   188         } else {
   189             fprintf( out, "\n" );
   190         }
   192         target_pc = pc2;
   193     }
   195     while( source_pc < source_end ) {
   196         uint32_t source_pc2 = sh4_disasm_instruction( source_pc, buf, sizeof(buf), op );
   197         fprintf( out, "%*c %08X: %s  %s\n", 72,' ', source_pc, op, buf );
   198         source_pc = source_pc2;
   199     }
   200 }
   202 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   203 {
   204     int reloc_size = 4;
   206     if( exc_code == -2 ) {
   207         reloc_size = sizeof(void *);
   208     }
   210     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   211 	sh4_x86.backpatch_size <<= 1;
   212 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   213 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   214 	assert( sh4_x86.backpatch_list != NULL );
   215     }
   216     if( sh4_x86.in_delay_slot ) {
   217 	fixup_pc -= 2;
   218     }
   220     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
   221 	(((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code)) - reloc_size;
   222     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   223     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   224     sh4_x86.backpatch_posn++;
   225 }
   227 #define TSTATE_NONE -1
   228 #define TSTATE_O    X86_COND_O
   229 #define TSTATE_C    X86_COND_C
   230 #define TSTATE_E    X86_COND_E
   231 #define TSTATE_NE   X86_COND_NE
   232 #define TSTATE_G    X86_COND_G
   233 #define TSTATE_GE   X86_COND_GE
   234 #define TSTATE_A    X86_COND_A
   235 #define TSTATE_AE   X86_COND_AE
   237 #define MARK_JMP8(x) uint8_t *_mark_jmp_##x = (xlat_output-1)
   238 #define JMP_TARGET(x) *_mark_jmp_##x += (xlat_output - _mark_jmp_##x)
   240 /* Convenience instructions */
   241 #define LDC_t()          CMPB_imms_rbpdisp(1,R_T); CMC()
   242 #define SETE_t()         SETCCB_cc_rbpdisp(X86_COND_E,R_T)
   243 #define SETA_t()         SETCCB_cc_rbpdisp(X86_COND_A,R_T)
   244 #define SETAE_t()        SETCCB_cc_rbpdisp(X86_COND_AE,R_T)
   245 #define SETG_t()         SETCCB_cc_rbpdisp(X86_COND_G,R_T)
   246 #define SETGE_t()        SETCCB_cc_rbpdisp(X86_COND_GE,R_T)
   247 #define SETC_t()         SETCCB_cc_rbpdisp(X86_COND_C,R_T)
   248 #define SETO_t()         SETCCB_cc_rbpdisp(X86_COND_O,R_T)
   249 #define SETNE_t()        SETCCB_cc_rbpdisp(X86_COND_NE,R_T)
   250 #define SETC_r8(r1)      SETCCB_cc_r8(X86_COND_C, r1)
   251 #define JAE_label(label) JCC_cc_rel8(X86_COND_AE,-1); MARK_JMP8(label)
   252 #define JE_label(label)  JCC_cc_rel8(X86_COND_E,-1); MARK_JMP8(label)
   253 #define JGE_label(label) JCC_cc_rel8(X86_COND_GE,-1); MARK_JMP8(label)
   254 #define JNA_label(label) JCC_cc_rel8(X86_COND_NA,-1); MARK_JMP8(label)
   255 #define JNE_label(label) JCC_cc_rel8(X86_COND_NE,-1); MARK_JMP8(label)
   256 #define JNO_label(label) JCC_cc_rel8(X86_COND_NO,-1); MARK_JMP8(label)
   257 #define JS_label(label)  JCC_cc_rel8(X86_COND_S,-1); MARK_JMP8(label)
   258 #define JMP_label(label) JMP_rel8(-1); MARK_JMP8(label)
   259 #define JNE_exc(exc)     JCC_cc_rel32(X86_COND_NE,0); sh4_x86_add_backpatch(xlat_output, pc, exc)
   261 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
   262 #define JT_label(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
   263 	CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
   264     JCC_cc_rel8(sh4_x86.tstate,-1); MARK_JMP8(label)
   266 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
   267 #define JF_label(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
   268 	CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
   269     JCC_cc_rel8(sh4_x86.tstate^1, -1); MARK_JMP8(label)
   272 #define load_reg(x86reg,sh4reg)     MOVL_rbpdisp_r32( REG_OFFSET(r[sh4reg]), x86reg )
   273 #define store_reg(x86reg,sh4reg)    MOVL_r32_rbpdisp( x86reg, REG_OFFSET(r[sh4reg]) )
   275 /**
   276  * Load an FR register (single-precision floating point) into an integer x86
   277  * register (eg for register-to-register moves)
   278  */
   279 #define load_fr(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[0][(frm)^1]), reg )
   280 #define load_xf(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[1][(frm)^1]), reg )
   282 /**
   283  * Load the low half of a DR register (DR or XD) into an integer x86 register 
   284  */
   285 #define load_dr0(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm|0x01]), reg )
   286 #define load_dr1(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm&0x0E]), reg )
   288 /**
   289  * Store an FR register (single-precision floating point) from an integer x86+
   290  * register (eg for register-to-register moves)
   291  */
   292 #define store_fr(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[0][(frm)^1]) )
   293 #define store_xf(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[1][(frm)^1]) )
   295 #define store_dr0(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   296 #define store_dr1(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   299 #define push_fpul()  FLDF_rbpdisp(R_FPUL)
   300 #define pop_fpul()   FSTPF_rbpdisp(R_FPUL)
   301 #define push_fr(frm) FLDF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
   302 #define pop_fr(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
   303 #define push_xf(frm) FLDF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
   304 #define pop_xf(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
   305 #define push_dr(frm) FLDD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
   306 #define pop_dr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
   307 #define push_xdr(frm) FLDD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
   308 #define pop_xdr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
   310 #ifdef ENABLE_SH4STATS
   311 #define COUNT_INST(id) MOVL_imm32_r32( id, REG_EAX ); CALL1_ptr_r32(sh4_stats_add, REG_EAX); sh4_x86.tstate = TSTATE_NONE
   312 #else
   313 #define COUNT_INST(id)
   314 #endif
   317 /* Exception checks - Note that all exception checks will clobber EAX */
   319 #define check_priv( ) \
   320     if( (sh4r.xlat_sh4_mode & SR_MD) == 0 ) { \
   321         if( sh4_x86.in_delay_slot ) { \
   322             exit_block_exc(EXC_SLOT_ILLEGAL, (pc-2) ); \
   323         } else { \
   324             exit_block_exc(EXC_ILLEGAL, pc); \
   325         } \
   326         sh4_x86.branch_taken = TRUE; \
   327         sh4_x86.in_delay_slot = DELAY_NONE; \
   328         return 2; \
   329     }
   331 #define check_fpuen( ) \
   332     if( !sh4_x86.fpuen_checked ) {\
   333 	sh4_x86.fpuen_checked = TRUE;\
   334 	MOVL_rbpdisp_r32( R_SR, REG_EAX );\
   335 	ANDL_imms_r32( SR_FD, REG_EAX );\
   336 	if( sh4_x86.in_delay_slot ) {\
   337 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   338 	} else {\
   339 	    JNE_exc(EXC_FPU_DISABLED);\
   340 	}\
   341 	sh4_x86.tstate = TSTATE_NONE; \
   342     }
   344 #define check_ralign16( x86reg ) \
   345     TESTL_imms_r32( 0x00000001, x86reg ); \
   346     JNE_exc(EXC_DATA_ADDR_READ)
   348 #define check_walign16( x86reg ) \
   349     TESTL_imms_r32( 0x00000001, x86reg ); \
   350     JNE_exc(EXC_DATA_ADDR_WRITE);
   352 #define check_ralign32( x86reg ) \
   353     TESTL_imms_r32( 0x00000003, x86reg ); \
   354     JNE_exc(EXC_DATA_ADDR_READ)
   356 #define check_walign32( x86reg ) \
   357     TESTL_imms_r32( 0x00000003, x86reg ); \
   358     JNE_exc(EXC_DATA_ADDR_WRITE);
   360 #define check_ralign64( x86reg ) \
   361     TESTL_imms_r32( 0x00000007, x86reg ); \
   362     JNE_exc(EXC_DATA_ADDR_READ)
   364 #define check_walign64( x86reg ) \
   365     TESTL_imms_r32( 0x00000007, x86reg ); \
   366     JNE_exc(EXC_DATA_ADDR_WRITE);
   368 #define address_space() ((sh4r.xlat_sh4_mode&SR_MD) ? (uintptr_t)sh4_address_space : (uintptr_t)sh4_user_address_space)
   370 #define UNDEF(ir)
   371 /* Note: For SR.MD == 1 && MMUCR.AT == 0, there are no memory exceptions, so 
   372  * don't waste the cycles expecting them. Otherwise we need to save the exception pointer.
   373  */
   374 #ifdef HAVE_FRAME_ADDRESS
   375 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
   376 {
   377     decode_address(address_space(), addr_reg);
   378     if( !sh4_x86.tlb_on && (sh4r.xlat_sh4_mode & SR_MD) ) { 
   379         CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
   380     } else {
   381         if( addr_reg != REG_ARG1 ) {
   382             MOVL_r32_r32( addr_reg, REG_ARG1 );
   383         }
   384         MOVP_immptr_rptr( 0, REG_ARG2 );
   385         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   386         CALL2_r32disp_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2);
   387     }
   388     if( value_reg != REG_RESULT1 ) { 
   389         MOVL_r32_r32( REG_RESULT1, value_reg );
   390     }
   391 }
   393 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
   394 {
   395     decode_address(address_space(), addr_reg);
   396     if( !sh4_x86.tlb_on && (sh4r.xlat_sh4_mode & SR_MD) ) { 
   397         CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
   398     } else {
   399         if( value_reg != REG_ARG2 ) {
   400             MOVL_r32_r32( value_reg, REG_ARG2 );
   401 	}        
   402         if( addr_reg != REG_ARG1 ) {
   403             MOVL_r32_r32( addr_reg, REG_ARG1 );
   404         }
   405 #if MAX_REG_ARG > 2        
   406         MOVP_immptr_rptr( 0, REG_ARG3 );
   407         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   408         CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, REG_ARG3);
   409 #else
   410         MOVL_imm32_rspdisp( 0, 0 );
   411         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   412         CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, 0);
   413 #endif
   414     }
   415 }
   416 #else
   417 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
   418 {
   419     decode_address(address_space(), addr_reg);
   420     CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
   421     if( value_reg != REG_RESULT1 ) {
   422         MOVL_r32_r32( REG_RESULT1, value_reg );
   423     }
   424 }     
   426 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
   427 {
   428     decode_address(address_space(), addr_reg);
   429     CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
   430 }
   431 #endif
   433 #define MEM_REGION_PTR(name) offsetof( struct mem_region_fn, name )
   434 #define MEM_READ_BYTE( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_byte), pc)
   435 #define MEM_READ_BYTE_FOR_WRITE( addr_reg, value_reg ) call_read_func( addr_reg, value_reg, MEM_REGION_PTR(read_byte_for_write), pc) 
   436 #define MEM_READ_WORD( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_word), pc)
   437 #define MEM_READ_LONG( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_long), pc)
   438 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_byte), pc)
   439 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_word), pc)
   440 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_long), pc)
   441 #define MEM_PREFETCH( addr_reg ) call_read_func(addr_reg, REG_RESULT1, MEM_REGION_PTR(prefetch), pc)
   443 #define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2); sh4_x86.in_delay_slot = DELAY_NONE; return 2;
   445 void sh4_translate_begin_block( sh4addr_t pc ) 
   446 {
   447     enter_block();
   448     MOVP_immptr_rptr( ((uint8_t *)&sh4r) + 128, REG_EBP );
   449     sh4_x86.in_delay_slot = FALSE;
   450     sh4_x86.fpuen_checked = FALSE;
   451     sh4_x86.branch_taken = FALSE;
   452     sh4_x86.backpatch_posn = 0;
   453     sh4_x86.block_start_pc = pc;
   454     sh4_x86.tlb_on = IS_TLB_ENABLED();
   455     sh4_x86.tstate = TSTATE_NONE;
   456     sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
   457     sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
   458 }
   461 uint32_t sh4_translate_end_block_size()
   462 {
   463     if( sh4_x86.backpatch_posn <= 3 ) {
   464         return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*24);
   465     } else {
   466         return EPILOGUE_SIZE + 72 + (sh4_x86.backpatch_posn-3)*27;
   467     }
   468 }
   471 /**
   472  * Embed a breakpoint into the generated code
   473  */
   474 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   475 {
   476     MOVL_imm32_r32( pc, REG_EAX );
   477     CALL1_ptr_r32( sh4_translate_breakpoint_hit, REG_EAX );
   478     sh4_x86.tstate = TSTATE_NONE;
   479 }
   482 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   484 /**
   485  * Exit the block with sh4r.pc already written
   486  */
   487 void exit_block_pcset( sh4addr_t pc )
   488 {
   489     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   490     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   491     MOVL_rbpdisp_r32( R_PC, REG_ARG1 );
   492     if( sh4_x86.tlb_on ) {
   493         CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
   494     } else {
   495         CALL1_ptr_r32(xlat_get_code,REG_ARG1);
   496     }
   497     exit_block();
   498 }
   500 /**
   501  * Exit the block with sh4r.new_pc written with the target pc
   502  */
   503 void exit_block_newpcset( sh4addr_t pc )
   504 {
   505     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   506     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   507     MOVL_rbpdisp_r32( R_NEW_PC, REG_ARG1 );
   508     MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   509     if( sh4_x86.tlb_on ) {
   510         CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
   511     } else {
   512         CALL1_ptr_r32(xlat_get_code,REG_ARG1);
   513     }
   514     exit_block();
   515 }
   518 /**
   519  * Exit the block to an absolute PC
   520  */
   521 void exit_block_abs( sh4addr_t pc, sh4addr_t endpc )
   522 {
   523     MOVL_imm32_r32( pc, REG_ECX );
   524     MOVL_r32_rbpdisp( REG_ECX, R_PC );
   525     if( IS_IN_ICACHE(pc) ) {
   526         MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
   527         ANDP_imms_rptr( -4, REG_EAX );
   528     } else if( sh4_x86.tlb_on ) {
   529         CALL1_ptr_r32(xlat_get_code_by_vma, REG_ECX);
   530     } else {
   531         CALL1_ptr_r32(xlat_get_code, REG_ECX);
   532     }
   533     MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   534     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   535     exit_block();
   536 }
   538 /**
   539  * Exit the block to a relative PC
   540  */
   541 void exit_block_rel( sh4addr_t pc, sh4addr_t endpc )
   542 {
   543     MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ECX );
   544     ADDL_rbpdisp_r32( R_PC, REG_ECX );
   545     MOVL_r32_rbpdisp( REG_ECX, R_PC );
   546     if( IS_IN_ICACHE(pc) ) {
   547         MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
   548         ANDP_imms_rptr( -4, REG_EAX );
   549     } else if( sh4_x86.tlb_on ) {
   550         CALL1_ptr_r32(xlat_get_code_by_vma, REG_ECX);
   551     } else {
   552         CALL1_ptr_r32(xlat_get_code, REG_ECX);
   553     }
   554     MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   555     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   556     exit_block();
   557 }
   559 /**
   560  * Exit unconditionally with a general exception
   561  */
   562 void exit_block_exc( int code, sh4addr_t pc )
   563 {
   564     MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ECX );
   565     ADDL_r32_rbpdisp( REG_ECX, R_PC );
   566     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   567     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   568     MOVL_imm32_r32( code, REG_ARG1 );
   569     CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
   570     MOVL_rbpdisp_r32( R_PC, REG_ARG1 );
   571     if( sh4_x86.tlb_on ) {
   572         CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
   573     } else {
   574         CALL1_ptr_r32(xlat_get_code,REG_ARG1);
   575     }
   577     exit_block();
   578 }    
   580 /**
   581  * Embed a call to sh4_execute_instruction for situations that we
   582  * can't translate (just page-crossing delay slots at the moment).
   583  * Caller is responsible for setting new_pc before calling this function.
   584  *
   585  * Performs:
   586  *   Set PC = endpc
   587  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   588  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   589  *   Call sh4_execute_instruction
   590  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   591  */
   592 void exit_block_emu( sh4vma_t endpc )
   593 {
   594     MOVL_imm32_r32( endpc - sh4_x86.block_start_pc, REG_ECX );   // 5
   595     ADDL_r32_rbpdisp( REG_ECX, R_PC );
   597     MOVL_imm32_r32( (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period, REG_ECX ); // 5
   598     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );     // 6
   599     MOVL_imm32_r32( sh4_x86.in_delay_slot ? 1 : 0, REG_ECX );
   600     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(in_delay_slot) );
   602     CALL_ptr( sh4_execute_instruction );    
   603     MOVL_rbpdisp_r32( R_PC, REG_EAX );
   604     if( sh4_x86.tlb_on ) {
   605 	CALL1_ptr_r32(xlat_get_code_by_vma,REG_EAX);
   606     } else {
   607 	CALL1_ptr_r32(xlat_get_code,REG_EAX);
   608     }
   609     exit_block();
   610 } 
   612 /**
   613  * Write the block trailer (exception handling block)
   614  */
   615 void sh4_translate_end_block( sh4addr_t pc ) {
   616     if( sh4_x86.branch_taken == FALSE ) {
   617         // Didn't exit unconditionally already, so write the termination here
   618         exit_block_rel( pc, pc );
   619     }
   620     if( sh4_x86.backpatch_posn != 0 ) {
   621         unsigned int i;
   622         // Exception raised - cleanup and exit
   623         uint8_t *end_ptr = xlat_output;
   624         MOVL_r32_r32( REG_EDX, REG_ECX );
   625         ADDL_r32_r32( REG_EDX, REG_ECX );
   626         ADDL_r32_rbpdisp( REG_ECX, R_SPC );
   627         MOVL_moffptr_eax( &sh4_cpu_period );
   628         MULL_r32( REG_EDX );
   629         ADDL_r32_rbpdisp( REG_EAX, REG_OFFSET(slice_cycle) );
   630         MOVL_rbpdisp_r32( R_PC, REG_ARG1 );
   631         if( sh4_x86.tlb_on ) {
   632             CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);
   633         } else {
   634             CALL1_ptr_r32(xlat_get_code, REG_ARG1);
   635         }
   636         exit_block();
   638         for( i=0; i< sh4_x86.backpatch_posn; i++ ) {
   639             uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];
   640             if( sh4_x86.backpatch_list[i].exc_code < 0 ) {
   641                 if( sh4_x86.backpatch_list[i].exc_code == -2 ) {
   642                     *((uintptr_t *)fixup_addr) = (uintptr_t)xlat_output; 
   643                 } else {
   644                     *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
   645                 }
   646                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
   647                 int rel = end_ptr - xlat_output;
   648                 JMP_prerel(rel);
   649             } else {
   650                 *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
   651                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].exc_code, REG_ARG1 );
   652                 CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
   653                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
   654                 int rel = end_ptr - xlat_output;
   655                 JMP_prerel(rel);
   656             }
   657         }
   658     }
   659 }
   661 /**
   662  * Translate a single instruction. Delayed branches are handled specially
   663  * by translating both branch and delayed instruction as a single unit (as
   664  * 
   665  * The instruction MUST be in the icache (assert check)
   666  *
   667  * @return true if the instruction marks the end of a basic block
   668  * (eg a branch or 
   669  */
   670 uint32_t sh4_translate_instruction( sh4vma_t pc )
   671 {
   672     uint32_t ir;
   673     /* Read instruction from icache */
   674     assert( IS_IN_ICACHE(pc) );
   675     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   677     if( !sh4_x86.in_delay_slot ) {
   678 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   679     }
   681     /* check for breakpoints at this pc */
   682     for( int i=0; i<sh4_breakpoint_count; i++ ) {
   683         if( sh4_breakpoints[i].address == pc ) {
   684             sh4_translate_emit_breakpoint(pc);
   685             break;
   686         }
   687     }
   688 %%
   689 /* ALU operations */
   690 ADD Rm, Rn {:
   691     COUNT_INST(I_ADD);
   692     load_reg( REG_EAX, Rm );
   693     load_reg( REG_ECX, Rn );
   694     ADDL_r32_r32( REG_EAX, REG_ECX );
   695     store_reg( REG_ECX, Rn );
   696     sh4_x86.tstate = TSTATE_NONE;
   697 :}
   698 ADD #imm, Rn {:  
   699     COUNT_INST(I_ADDI);
   700     ADDL_imms_rbpdisp( imm, REG_OFFSET(r[Rn]) );
   701     sh4_x86.tstate = TSTATE_NONE;
   702 :}
   703 ADDC Rm, Rn {:
   704     COUNT_INST(I_ADDC);
   705     if( sh4_x86.tstate != TSTATE_C ) {
   706         LDC_t();
   707     }
   708     load_reg( REG_EAX, Rm );
   709     load_reg( REG_ECX, Rn );
   710     ADCL_r32_r32( REG_EAX, REG_ECX );
   711     store_reg( REG_ECX, Rn );
   712     SETC_t();
   713     sh4_x86.tstate = TSTATE_C;
   714 :}
   715 ADDV Rm, Rn {:
   716     COUNT_INST(I_ADDV);
   717     load_reg( REG_EAX, Rm );
   718     load_reg( REG_ECX, Rn );
   719     ADDL_r32_r32( REG_EAX, REG_ECX );
   720     store_reg( REG_ECX, Rn );
   721     SETO_t();
   722     sh4_x86.tstate = TSTATE_O;
   723 :}
   724 AND Rm, Rn {:
   725     COUNT_INST(I_AND);
   726     load_reg( REG_EAX, Rm );
   727     load_reg( REG_ECX, Rn );
   728     ANDL_r32_r32( REG_EAX, REG_ECX );
   729     store_reg( REG_ECX, Rn );
   730     sh4_x86.tstate = TSTATE_NONE;
   731 :}
   732 AND #imm, R0 {:  
   733     COUNT_INST(I_ANDI);
   734     load_reg( REG_EAX, 0 );
   735     ANDL_imms_r32(imm, REG_EAX); 
   736     store_reg( REG_EAX, 0 );
   737     sh4_x86.tstate = TSTATE_NONE;
   738 :}
   739 AND.B #imm, @(R0, GBR) {: 
   740     COUNT_INST(I_ANDB);
   741     load_reg( REG_EAX, 0 );
   742     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
   743     MOVL_r32_rspdisp(REG_EAX, 0);
   744     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
   745     MOVL_rspdisp_r32(0, REG_EAX);
   746     ANDL_imms_r32(imm, REG_EDX );
   747     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
   748     sh4_x86.tstate = TSTATE_NONE;
   749 :}
   750 CMP/EQ Rm, Rn {:  
   751     COUNT_INST(I_CMPEQ);
   752     load_reg( REG_EAX, Rm );
   753     load_reg( REG_ECX, Rn );
   754     CMPL_r32_r32( REG_EAX, REG_ECX );
   755     SETE_t();
   756     sh4_x86.tstate = TSTATE_E;
   757 :}
   758 CMP/EQ #imm, R0 {:  
   759     COUNT_INST(I_CMPEQI);
   760     load_reg( REG_EAX, 0 );
   761     CMPL_imms_r32(imm, REG_EAX);
   762     SETE_t();
   763     sh4_x86.tstate = TSTATE_E;
   764 :}
   765 CMP/GE Rm, Rn {:  
   766     COUNT_INST(I_CMPGE);
   767     load_reg( REG_EAX, Rm );
   768     load_reg( REG_ECX, Rn );
   769     CMPL_r32_r32( REG_EAX, REG_ECX );
   770     SETGE_t();
   771     sh4_x86.tstate = TSTATE_GE;
   772 :}
   773 CMP/GT Rm, Rn {: 
   774     COUNT_INST(I_CMPGT);
   775     load_reg( REG_EAX, Rm );
   776     load_reg( REG_ECX, Rn );
   777     CMPL_r32_r32( REG_EAX, REG_ECX );
   778     SETG_t();
   779     sh4_x86.tstate = TSTATE_G;
   780 :}
   781 CMP/HI Rm, Rn {:  
   782     COUNT_INST(I_CMPHI);
   783     load_reg( REG_EAX, Rm );
   784     load_reg( REG_ECX, Rn );
   785     CMPL_r32_r32( REG_EAX, REG_ECX );
   786     SETA_t();
   787     sh4_x86.tstate = TSTATE_A;
   788 :}
   789 CMP/HS Rm, Rn {: 
   790     COUNT_INST(I_CMPHS);
   791     load_reg( REG_EAX, Rm );
   792     load_reg( REG_ECX, Rn );
   793     CMPL_r32_r32( REG_EAX, REG_ECX );
   794     SETAE_t();
   795     sh4_x86.tstate = TSTATE_AE;
   796  :}
   797 CMP/PL Rn {: 
   798     COUNT_INST(I_CMPPL);
   799     load_reg( REG_EAX, Rn );
   800     CMPL_imms_r32( 0, REG_EAX );
   801     SETG_t();
   802     sh4_x86.tstate = TSTATE_G;
   803 :}
   804 CMP/PZ Rn {:  
   805     COUNT_INST(I_CMPPZ);
   806     load_reg( REG_EAX, Rn );
   807     CMPL_imms_r32( 0, REG_EAX );
   808     SETGE_t();
   809     sh4_x86.tstate = TSTATE_GE;
   810 :}
   811 CMP/STR Rm, Rn {:  
   812     COUNT_INST(I_CMPSTR);
   813     load_reg( REG_EAX, Rm );
   814     load_reg( REG_ECX, Rn );
   815     XORL_r32_r32( REG_ECX, REG_EAX );
   816     TESTB_r8_r8( REG_AL, REG_AL );
   817     JE_label(target1);
   818     TESTB_r8_r8( REG_AH, REG_AH );
   819     JE_label(target2);
   820     SHRL_imm_r32( 16, REG_EAX );
   821     TESTB_r8_r8( REG_AL, REG_AL );
   822     JE_label(target3);
   823     TESTB_r8_r8( REG_AH, REG_AH );
   824     JMP_TARGET(target1);
   825     JMP_TARGET(target2);
   826     JMP_TARGET(target3);
   827     SETE_t();
   828     sh4_x86.tstate = TSTATE_E;
   829 :}
   830 DIV0S Rm, Rn {:
   831     COUNT_INST(I_DIV0S);
   832     load_reg( REG_EAX, Rm );
   833     load_reg( REG_ECX, Rn );
   834     SHRL_imm_r32( 31, REG_EAX );
   835     SHRL_imm_r32( 31, REG_ECX );
   836     MOVL_r32_rbpdisp( REG_EAX, R_M );
   837     MOVL_r32_rbpdisp( REG_ECX, R_Q );
   838     CMPL_r32_r32( REG_EAX, REG_ECX );
   839     SETNE_t();
   840     sh4_x86.tstate = TSTATE_NE;
   841 :}
   842 DIV0U {:  
   843     COUNT_INST(I_DIV0U);
   844     XORL_r32_r32( REG_EAX, REG_EAX );
   845     MOVL_r32_rbpdisp( REG_EAX, R_Q );
   846     MOVL_r32_rbpdisp( REG_EAX, R_M );
   847     MOVL_r32_rbpdisp( REG_EAX, R_T );
   848     sh4_x86.tstate = TSTATE_C; // works for DIV1
   849 :}
   850 DIV1 Rm, Rn {:
   851     COUNT_INST(I_DIV1);
   852     MOVL_rbpdisp_r32( R_M, REG_ECX );
   853     load_reg( REG_EAX, Rn );
   854     if( sh4_x86.tstate != TSTATE_C ) {
   855 	LDC_t();
   856     }
   857     RCLL_imm_r32( 1, REG_EAX );
   858     SETC_r8( REG_DL ); // Q'
   859     CMPL_rbpdisp_r32( R_Q, REG_ECX );
   860     JE_label(mqequal);
   861     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
   862     JMP_label(end);
   863     JMP_TARGET(mqequal);
   864     SUBL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
   865     JMP_TARGET(end);
   866     store_reg( REG_EAX, Rn ); // Done with Rn now
   867     SETC_r8(REG_AL); // tmp1
   868     XORB_r8_r8( REG_DL, REG_AL ); // Q' = Q ^ tmp1
   869     XORB_r8_r8( REG_AL, REG_CL ); // Q'' = Q' ^ M
   870     MOVL_r32_rbpdisp( REG_ECX, R_Q );
   871     XORL_imms_r32( 1, REG_AL );   // T = !Q'
   872     MOVZXL_r8_r32( REG_AL, REG_EAX );
   873     MOVL_r32_rbpdisp( REG_EAX, R_T );
   874     sh4_x86.tstate = TSTATE_NONE;
   875 :}
   876 DMULS.L Rm, Rn {:  
   877     COUNT_INST(I_DMULS);
   878     load_reg( REG_EAX, Rm );
   879     load_reg( REG_ECX, Rn );
   880     IMULL_r32(REG_ECX);
   881     MOVL_r32_rbpdisp( REG_EDX, R_MACH );
   882     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
   883     sh4_x86.tstate = TSTATE_NONE;
   884 :}
   885 DMULU.L Rm, Rn {:  
   886     COUNT_INST(I_DMULU);
   887     load_reg( REG_EAX, Rm );
   888     load_reg( REG_ECX, Rn );
   889     MULL_r32(REG_ECX);
   890     MOVL_r32_rbpdisp( REG_EDX, R_MACH );
   891     MOVL_r32_rbpdisp( REG_EAX, R_MACL );    
   892     sh4_x86.tstate = TSTATE_NONE;
   893 :}
   894 DT Rn {:  
   895     COUNT_INST(I_DT);
   896     load_reg( REG_EAX, Rn );
   897     ADDL_imms_r32( -1, REG_EAX );
   898     store_reg( REG_EAX, Rn );
   899     SETE_t();
   900     sh4_x86.tstate = TSTATE_E;
   901 :}
   902 EXTS.B Rm, Rn {:  
   903     COUNT_INST(I_EXTSB);
   904     load_reg( REG_EAX, Rm );
   905     MOVSXL_r8_r32( REG_EAX, REG_EAX );
   906     store_reg( REG_EAX, Rn );
   907 :}
   908 EXTS.W Rm, Rn {:  
   909     COUNT_INST(I_EXTSW);
   910     load_reg( REG_EAX, Rm );
   911     MOVSXL_r16_r32( REG_EAX, REG_EAX );
   912     store_reg( REG_EAX, Rn );
   913 :}
   914 EXTU.B Rm, Rn {:  
   915     COUNT_INST(I_EXTUB);
   916     load_reg( REG_EAX, Rm );
   917     MOVZXL_r8_r32( REG_EAX, REG_EAX );
   918     store_reg( REG_EAX, Rn );
   919 :}
   920 EXTU.W Rm, Rn {:  
   921     COUNT_INST(I_EXTUW);
   922     load_reg( REG_EAX, Rm );
   923     MOVZXL_r16_r32( REG_EAX, REG_EAX );
   924     store_reg( REG_EAX, Rn );
   925 :}
   926 MAC.L @Rm+, @Rn+ {:
   927     COUNT_INST(I_MACL);
   928     if( Rm == Rn ) {
   929 	load_reg( REG_EAX, Rm );
   930 	check_ralign32( REG_EAX );
   931 	MEM_READ_LONG( REG_EAX, REG_EAX );
   932 	MOVL_r32_rspdisp(REG_EAX, 0);
   933 	load_reg( REG_EAX, Rm );
   934 	LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
   935 	MEM_READ_LONG( REG_EAX, REG_EAX );
   936         ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rn]) );
   937     } else {
   938 	load_reg( REG_EAX, Rm );
   939 	check_ralign32( REG_EAX );
   940 	MEM_READ_LONG( REG_EAX, REG_EAX );
   941 	MOVL_r32_rspdisp( REG_EAX, 0 );
   942 	load_reg( REG_EAX, Rn );
   943 	check_ralign32( REG_EAX );
   944 	MEM_READ_LONG( REG_EAX, REG_EAX );
   945 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
   946 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
   947     }
   949     IMULL_rspdisp( 0 );
   950     ADDL_r32_rbpdisp( REG_EAX, R_MACL );
   951     ADCL_r32_rbpdisp( REG_EDX, R_MACH );
   953     MOVL_rbpdisp_r32( R_S, REG_ECX );
   954     TESTL_r32_r32(REG_ECX, REG_ECX);
   955     JE_label( nosat );
   956     CALL_ptr( signsat48 );
   957     JMP_TARGET( nosat );
   958     sh4_x86.tstate = TSTATE_NONE;
   959 :}
   960 MAC.W @Rm+, @Rn+ {:  
   961     COUNT_INST(I_MACW);
   962     if( Rm == Rn ) {
   963 	load_reg( REG_EAX, Rm );
   964 	check_ralign16( REG_EAX );
   965 	MEM_READ_WORD( REG_EAX, REG_EAX );
   966         MOVL_r32_rspdisp( REG_EAX, 0 );
   967 	load_reg( REG_EAX, Rm );
   968 	LEAL_r32disp_r32( REG_EAX, 2, REG_EAX );
   969 	MEM_READ_WORD( REG_EAX, REG_EAX );
   970 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
   971 	// Note translate twice in case of page boundaries. Maybe worth
   972 	// adding a page-boundary check to skip the second translation
   973     } else {
   974 	load_reg( REG_EAX, Rm );
   975 	check_ralign16( REG_EAX );
   976 	MEM_READ_WORD( REG_EAX, REG_EAX );
   977         MOVL_r32_rspdisp( REG_EAX, 0 );
   978 	load_reg( REG_EAX, Rn );
   979 	check_ralign16( REG_EAX );
   980 	MEM_READ_WORD( REG_EAX, REG_EAX );
   981 	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rn]) );
   982 	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
   983     }
   984     IMULL_rspdisp( 0 );
   985     MOVL_rbpdisp_r32( R_S, REG_ECX );
   986     TESTL_r32_r32( REG_ECX, REG_ECX );
   987     JE_label( nosat );
   989     ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
   990     JNO_label( end );            // 2
   991     MOVL_imm32_r32( 1, REG_EDX );         // 5
   992     MOVL_r32_rbpdisp( REG_EDX, R_MACH );   // 6
   993     JS_label( positive );        // 2
   994     MOVL_imm32_r32( 0x80000000, REG_EAX );// 5
   995     MOVL_r32_rbpdisp( REG_EAX, R_MACL );   // 6
   996     JMP_label(end2);           // 2
   998     JMP_TARGET(positive);
   999     MOVL_imm32_r32( 0x7FFFFFFF, REG_EAX );// 5
  1000     MOVL_r32_rbpdisp( REG_EAX, R_MACL );   // 6
  1001     JMP_label(end3);            // 2
  1003     JMP_TARGET(nosat);
  1004     ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
  1005     ADCL_r32_rbpdisp( REG_EDX, R_MACH );  // 6
  1006     JMP_TARGET(end);
  1007     JMP_TARGET(end2);
  1008     JMP_TARGET(end3);
  1009     sh4_x86.tstate = TSTATE_NONE;
  1010 :}
  1011 MOVT Rn {:  
  1012     COUNT_INST(I_MOVT);
  1013     MOVL_rbpdisp_r32( R_T, REG_EAX );
  1014     store_reg( REG_EAX, Rn );
  1015 :}
  1016 MUL.L Rm, Rn {:  
  1017     COUNT_INST(I_MULL);
  1018     load_reg( REG_EAX, Rm );
  1019     load_reg( REG_ECX, Rn );
  1020     MULL_r32( REG_ECX );
  1021     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1022     sh4_x86.tstate = TSTATE_NONE;
  1023 :}
  1024 MULS.W Rm, Rn {:
  1025     COUNT_INST(I_MULSW);
  1026     MOVSXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
  1027     MOVSXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
  1028     MULL_r32( REG_ECX );
  1029     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1030     sh4_x86.tstate = TSTATE_NONE;
  1031 :}
  1032 MULU.W Rm, Rn {:  
  1033     COUNT_INST(I_MULUW);
  1034     MOVZXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
  1035     MOVZXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
  1036     MULL_r32( REG_ECX );
  1037     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1038     sh4_x86.tstate = TSTATE_NONE;
  1039 :}
  1040 NEG Rm, Rn {:
  1041     COUNT_INST(I_NEG);
  1042     load_reg( REG_EAX, Rm );
  1043     NEGL_r32( REG_EAX );
  1044     store_reg( REG_EAX, Rn );
  1045     sh4_x86.tstate = TSTATE_NONE;
  1046 :}
  1047 NEGC Rm, Rn {:  
  1048     COUNT_INST(I_NEGC);
  1049     load_reg( REG_EAX, Rm );
  1050     XORL_r32_r32( REG_ECX, REG_ECX );
  1051     LDC_t();
  1052     SBBL_r32_r32( REG_EAX, REG_ECX );
  1053     store_reg( REG_ECX, Rn );
  1054     SETC_t();
  1055     sh4_x86.tstate = TSTATE_C;
  1056 :}
  1057 NOT Rm, Rn {:  
  1058     COUNT_INST(I_NOT);
  1059     load_reg( REG_EAX, Rm );
  1060     NOTL_r32( REG_EAX );
  1061     store_reg( REG_EAX, Rn );
  1062     sh4_x86.tstate = TSTATE_NONE;
  1063 :}
  1064 OR Rm, Rn {:  
  1065     COUNT_INST(I_OR);
  1066     load_reg( REG_EAX, Rm );
  1067     load_reg( REG_ECX, Rn );
  1068     ORL_r32_r32( REG_EAX, REG_ECX );
  1069     store_reg( REG_ECX, Rn );
  1070     sh4_x86.tstate = TSTATE_NONE;
  1071 :}
  1072 OR #imm, R0 {:
  1073     COUNT_INST(I_ORI);
  1074     load_reg( REG_EAX, 0 );
  1075     ORL_imms_r32(imm, REG_EAX);
  1076     store_reg( REG_EAX, 0 );
  1077     sh4_x86.tstate = TSTATE_NONE;
  1078 :}
  1079 OR.B #imm, @(R0, GBR) {:  
  1080     COUNT_INST(I_ORB);
  1081     load_reg( REG_EAX, 0 );
  1082     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
  1083     MOVL_r32_rspdisp( REG_EAX, 0 );
  1084     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
  1085     MOVL_rspdisp_r32( 0, REG_EAX );
  1086     ORL_imms_r32(imm, REG_EDX );
  1087     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1088     sh4_x86.tstate = TSTATE_NONE;
  1089 :}
  1090 ROTCL Rn {:
  1091     COUNT_INST(I_ROTCL);
  1092     load_reg( REG_EAX, Rn );
  1093     if( sh4_x86.tstate != TSTATE_C ) {
  1094 	LDC_t();
  1096     RCLL_imm_r32( 1, REG_EAX );
  1097     store_reg( REG_EAX, Rn );
  1098     SETC_t();
  1099     sh4_x86.tstate = TSTATE_C;
  1100 :}
  1101 ROTCR Rn {:  
  1102     COUNT_INST(I_ROTCR);
  1103     load_reg( REG_EAX, Rn );
  1104     if( sh4_x86.tstate != TSTATE_C ) {
  1105 	LDC_t();
  1107     RCRL_imm_r32( 1, REG_EAX );
  1108     store_reg( REG_EAX, Rn );
  1109     SETC_t();
  1110     sh4_x86.tstate = TSTATE_C;
  1111 :}
  1112 ROTL Rn {:  
  1113     COUNT_INST(I_ROTL);
  1114     load_reg( REG_EAX, Rn );
  1115     ROLL_imm_r32( 1, REG_EAX );
  1116     store_reg( REG_EAX, Rn );
  1117     SETC_t();
  1118     sh4_x86.tstate = TSTATE_C;
  1119 :}
  1120 ROTR Rn {:  
  1121     COUNT_INST(I_ROTR);
  1122     load_reg( REG_EAX, Rn );
  1123     RORL_imm_r32( 1, REG_EAX );
  1124     store_reg( REG_EAX, Rn );
  1125     SETC_t();
  1126     sh4_x86.tstate = TSTATE_C;
  1127 :}
  1128 SHAD Rm, Rn {:
  1129     COUNT_INST(I_SHAD);
  1130     /* Annoyingly enough, not directly convertible */
  1131     load_reg( REG_EAX, Rn );
  1132     load_reg( REG_ECX, Rm );
  1133     CMPL_imms_r32( 0, REG_ECX );
  1134     JGE_label(doshl);
  1136     NEGL_r32( REG_ECX );      // 2
  1137     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1138     JE_label(emptysar);     // 2
  1139     SARL_cl_r32( REG_EAX );       // 2
  1140     JMP_label(end);          // 2
  1142     JMP_TARGET(emptysar);
  1143     SARL_imm_r32(31, REG_EAX );  // 3
  1144     JMP_label(end2);
  1146     JMP_TARGET(doshl);
  1147     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1148     SHLL_cl_r32( REG_EAX );       // 2
  1149     JMP_TARGET(end);
  1150     JMP_TARGET(end2);
  1151     store_reg( REG_EAX, Rn );
  1152     sh4_x86.tstate = TSTATE_NONE;
  1153 :}
  1154 SHLD Rm, Rn {:  
  1155     COUNT_INST(I_SHLD);
  1156     load_reg( REG_EAX, Rn );
  1157     load_reg( REG_ECX, Rm );
  1158     CMPL_imms_r32( 0, REG_ECX );
  1159     JGE_label(doshl);
  1161     NEGL_r32( REG_ECX );      // 2
  1162     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1163     JE_label(emptyshr );
  1164     SHRL_cl_r32( REG_EAX );       // 2
  1165     JMP_label(end);          // 2
  1167     JMP_TARGET(emptyshr);
  1168     XORL_r32_r32( REG_EAX, REG_EAX );
  1169     JMP_label(end2);
  1171     JMP_TARGET(doshl);
  1172     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1173     SHLL_cl_r32( REG_EAX );       // 2
  1174     JMP_TARGET(end);
  1175     JMP_TARGET(end2);
  1176     store_reg( REG_EAX, Rn );
  1177     sh4_x86.tstate = TSTATE_NONE;
  1178 :}
  1179 SHAL Rn {: 
  1180     COUNT_INST(I_SHAL);
  1181     load_reg( REG_EAX, Rn );
  1182     SHLL_imm_r32( 1, REG_EAX );
  1183     SETC_t();
  1184     store_reg( REG_EAX, Rn );
  1185     sh4_x86.tstate = TSTATE_C;
  1186 :}
  1187 SHAR Rn {:  
  1188     COUNT_INST(I_SHAR);
  1189     load_reg( REG_EAX, Rn );
  1190     SARL_imm_r32( 1, REG_EAX );
  1191     SETC_t();
  1192     store_reg( REG_EAX, Rn );
  1193     sh4_x86.tstate = TSTATE_C;
  1194 :}
  1195 SHLL Rn {:  
  1196     COUNT_INST(I_SHLL);
  1197     load_reg( REG_EAX, Rn );
  1198     SHLL_imm_r32( 1, REG_EAX );
  1199     SETC_t();
  1200     store_reg( REG_EAX, Rn );
  1201     sh4_x86.tstate = TSTATE_C;
  1202 :}
  1203 SHLL2 Rn {:
  1204     COUNT_INST(I_SHLL);
  1205     load_reg( REG_EAX, Rn );
  1206     SHLL_imm_r32( 2, REG_EAX );
  1207     store_reg( REG_EAX, Rn );
  1208     sh4_x86.tstate = TSTATE_NONE;
  1209 :}
  1210 SHLL8 Rn {:  
  1211     COUNT_INST(I_SHLL);
  1212     load_reg( REG_EAX, Rn );
  1213     SHLL_imm_r32( 8, REG_EAX );
  1214     store_reg( REG_EAX, Rn );
  1215     sh4_x86.tstate = TSTATE_NONE;
  1216 :}
  1217 SHLL16 Rn {:  
  1218     COUNT_INST(I_SHLL);
  1219     load_reg( REG_EAX, Rn );
  1220     SHLL_imm_r32( 16, REG_EAX );
  1221     store_reg( REG_EAX, Rn );
  1222     sh4_x86.tstate = TSTATE_NONE;
  1223 :}
  1224 SHLR Rn {:  
  1225     COUNT_INST(I_SHLR);
  1226     load_reg( REG_EAX, Rn );
  1227     SHRL_imm_r32( 1, REG_EAX );
  1228     SETC_t();
  1229     store_reg( REG_EAX, Rn );
  1230     sh4_x86.tstate = TSTATE_C;
  1231 :}
  1232 SHLR2 Rn {:  
  1233     COUNT_INST(I_SHLR);
  1234     load_reg( REG_EAX, Rn );
  1235     SHRL_imm_r32( 2, REG_EAX );
  1236     store_reg( REG_EAX, Rn );
  1237     sh4_x86.tstate = TSTATE_NONE;
  1238 :}
  1239 SHLR8 Rn {:  
  1240     COUNT_INST(I_SHLR);
  1241     load_reg( REG_EAX, Rn );
  1242     SHRL_imm_r32( 8, REG_EAX );
  1243     store_reg( REG_EAX, Rn );
  1244     sh4_x86.tstate = TSTATE_NONE;
  1245 :}
  1246 SHLR16 Rn {:  
  1247     COUNT_INST(I_SHLR);
  1248     load_reg( REG_EAX, Rn );
  1249     SHRL_imm_r32( 16, REG_EAX );
  1250     store_reg( REG_EAX, Rn );
  1251     sh4_x86.tstate = TSTATE_NONE;
  1252 :}
  1253 SUB Rm, Rn {:  
  1254     COUNT_INST(I_SUB);
  1255     load_reg( REG_EAX, Rm );
  1256     load_reg( REG_ECX, Rn );
  1257     SUBL_r32_r32( REG_EAX, REG_ECX );
  1258     store_reg( REG_ECX, Rn );
  1259     sh4_x86.tstate = TSTATE_NONE;
  1260 :}
  1261 SUBC Rm, Rn {:  
  1262     COUNT_INST(I_SUBC);
  1263     load_reg( REG_EAX, Rm );
  1264     load_reg( REG_ECX, Rn );
  1265     if( sh4_x86.tstate != TSTATE_C ) {
  1266 	LDC_t();
  1268     SBBL_r32_r32( REG_EAX, REG_ECX );
  1269     store_reg( REG_ECX, Rn );
  1270     SETC_t();
  1271     sh4_x86.tstate = TSTATE_C;
  1272 :}
  1273 SUBV Rm, Rn {:  
  1274     COUNT_INST(I_SUBV);
  1275     load_reg( REG_EAX, Rm );
  1276     load_reg( REG_ECX, Rn );
  1277     SUBL_r32_r32( REG_EAX, REG_ECX );
  1278     store_reg( REG_ECX, Rn );
  1279     SETO_t();
  1280     sh4_x86.tstate = TSTATE_O;
  1281 :}
  1282 SWAP.B Rm, Rn {:  
  1283     COUNT_INST(I_SWAPB);
  1284     load_reg( REG_EAX, Rm );
  1285     XCHGB_r8_r8( REG_AL, REG_AH ); // NB: does not touch EFLAGS
  1286     store_reg( REG_EAX, Rn );
  1287 :}
  1288 SWAP.W Rm, Rn {:  
  1289     COUNT_INST(I_SWAPB);
  1290     load_reg( REG_EAX, Rm );
  1291     MOVL_r32_r32( REG_EAX, REG_ECX );
  1292     SHLL_imm_r32( 16, REG_ECX );
  1293     SHRL_imm_r32( 16, REG_EAX );
  1294     ORL_r32_r32( REG_EAX, REG_ECX );
  1295     store_reg( REG_ECX, Rn );
  1296     sh4_x86.tstate = TSTATE_NONE;
  1297 :}
  1298 TAS.B @Rn {:  
  1299     COUNT_INST(I_TASB);
  1300     load_reg( REG_EAX, Rn );
  1301     MOVL_r32_rspdisp( REG_EAX, 0 );
  1302     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
  1303     TESTB_r8_r8( REG_DL, REG_DL );
  1304     SETE_t();
  1305     ORB_imms_r8( 0x80, REG_DL );
  1306     MOVL_rspdisp_r32( 0, REG_EAX );
  1307     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1308     sh4_x86.tstate = TSTATE_NONE;
  1309 :}
  1310 TST Rm, Rn {:  
  1311     COUNT_INST(I_TST);
  1312     load_reg( REG_EAX, Rm );
  1313     load_reg( REG_ECX, Rn );
  1314     TESTL_r32_r32( REG_EAX, REG_ECX );
  1315     SETE_t();
  1316     sh4_x86.tstate = TSTATE_E;
  1317 :}
  1318 TST #imm, R0 {:  
  1319     COUNT_INST(I_TSTI);
  1320     load_reg( REG_EAX, 0 );
  1321     TESTL_imms_r32( imm, REG_EAX );
  1322     SETE_t();
  1323     sh4_x86.tstate = TSTATE_E;
  1324 :}
  1325 TST.B #imm, @(R0, GBR) {:  
  1326     COUNT_INST(I_TSTB);
  1327     load_reg( REG_EAX, 0);
  1328     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
  1329     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1330     TESTB_imms_r8( imm, REG_AL );
  1331     SETE_t();
  1332     sh4_x86.tstate = TSTATE_E;
  1333 :}
  1334 XOR Rm, Rn {:  
  1335     COUNT_INST(I_XOR);
  1336     load_reg( REG_EAX, Rm );
  1337     load_reg( REG_ECX, Rn );
  1338     XORL_r32_r32( REG_EAX, REG_ECX );
  1339     store_reg( REG_ECX, Rn );
  1340     sh4_x86.tstate = TSTATE_NONE;
  1341 :}
  1342 XOR #imm, R0 {:  
  1343     COUNT_INST(I_XORI);
  1344     load_reg( REG_EAX, 0 );
  1345     XORL_imms_r32( imm, REG_EAX );
  1346     store_reg( REG_EAX, 0 );
  1347     sh4_x86.tstate = TSTATE_NONE;
  1348 :}
  1349 XOR.B #imm, @(R0, GBR) {:  
  1350     COUNT_INST(I_XORB);
  1351     load_reg( REG_EAX, 0 );
  1352     ADDL_rbpdisp_r32( R_GBR, REG_EAX ); 
  1353     MOVL_r32_rspdisp( REG_EAX, 0 );
  1354     MEM_READ_BYTE_FOR_WRITE(REG_EAX, REG_EDX);
  1355     MOVL_rspdisp_r32( 0, REG_EAX );
  1356     XORL_imms_r32( imm, REG_EDX );
  1357     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1358     sh4_x86.tstate = TSTATE_NONE;
  1359 :}
  1360 XTRCT Rm, Rn {:
  1361     COUNT_INST(I_XTRCT);
  1362     load_reg( REG_EAX, Rm );
  1363     load_reg( REG_ECX, Rn );
  1364     SHLL_imm_r32( 16, REG_EAX );
  1365     SHRL_imm_r32( 16, REG_ECX );
  1366     ORL_r32_r32( REG_EAX, REG_ECX );
  1367     store_reg( REG_ECX, Rn );
  1368     sh4_x86.tstate = TSTATE_NONE;
  1369 :}
  1371 /* Data move instructions */
  1372 MOV Rm, Rn {:  
  1373     COUNT_INST(I_MOV);
  1374     load_reg( REG_EAX, Rm );
  1375     store_reg( REG_EAX, Rn );
  1376 :}
  1377 MOV #imm, Rn {:  
  1378     COUNT_INST(I_MOVI);
  1379     MOVL_imm32_r32( imm, REG_EAX );
  1380     store_reg( REG_EAX, Rn );
  1381 :}
  1382 MOV.B Rm, @Rn {:  
  1383     COUNT_INST(I_MOVB);
  1384     load_reg( REG_EAX, Rn );
  1385     load_reg( REG_EDX, Rm );
  1386     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1387     sh4_x86.tstate = TSTATE_NONE;
  1388 :}
  1389 MOV.B Rm, @-Rn {:  
  1390     COUNT_INST(I_MOVB);
  1391     load_reg( REG_EAX, Rn );
  1392     LEAL_r32disp_r32( REG_EAX, -1, REG_EAX );
  1393     load_reg( REG_EDX, Rm );
  1394     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1395     ADDL_imms_rbpdisp( -1, REG_OFFSET(r[Rn]) );
  1396     sh4_x86.tstate = TSTATE_NONE;
  1397 :}
  1398 MOV.B Rm, @(R0, Rn) {:  
  1399     COUNT_INST(I_MOVB);
  1400     load_reg( REG_EAX, 0 );
  1401     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1402     load_reg( REG_EDX, Rm );
  1403     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1404     sh4_x86.tstate = TSTATE_NONE;
  1405 :}
  1406 MOV.B R0, @(disp, GBR) {:  
  1407     COUNT_INST(I_MOVB);
  1408     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1409     ADDL_imms_r32( disp, REG_EAX );
  1410     load_reg( REG_EDX, 0 );
  1411     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1412     sh4_x86.tstate = TSTATE_NONE;
  1413 :}
  1414 MOV.B R0, @(disp, Rn) {:  
  1415     COUNT_INST(I_MOVB);
  1416     load_reg( REG_EAX, Rn );
  1417     ADDL_imms_r32( disp, REG_EAX );
  1418     load_reg( REG_EDX, 0 );
  1419     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1420     sh4_x86.tstate = TSTATE_NONE;
  1421 :}
  1422 MOV.B @Rm, Rn {:  
  1423     COUNT_INST(I_MOVB);
  1424     load_reg( REG_EAX, Rm );
  1425     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1426     store_reg( REG_EAX, Rn );
  1427     sh4_x86.tstate = TSTATE_NONE;
  1428 :}
  1429 MOV.B @Rm+, Rn {:  
  1430     COUNT_INST(I_MOVB);
  1431     load_reg( REG_EAX, Rm );
  1432     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1433     if( Rm != Rn ) {
  1434     	ADDL_imms_rbpdisp( 1, REG_OFFSET(r[Rm]) );
  1436     store_reg( REG_EAX, Rn );
  1437     sh4_x86.tstate = TSTATE_NONE;
  1438 :}
  1439 MOV.B @(R0, Rm), Rn {:  
  1440     COUNT_INST(I_MOVB);
  1441     load_reg( REG_EAX, 0 );
  1442     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1443     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1444     store_reg( REG_EAX, Rn );
  1445     sh4_x86.tstate = TSTATE_NONE;
  1446 :}
  1447 MOV.B @(disp, GBR), R0 {:  
  1448     COUNT_INST(I_MOVB);
  1449     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1450     ADDL_imms_r32( disp, REG_EAX );
  1451     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1452     store_reg( REG_EAX, 0 );
  1453     sh4_x86.tstate = TSTATE_NONE;
  1454 :}
  1455 MOV.B @(disp, Rm), R0 {:  
  1456     COUNT_INST(I_MOVB);
  1457     load_reg( REG_EAX, Rm );
  1458     ADDL_imms_r32( disp, REG_EAX );
  1459     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1460     store_reg( REG_EAX, 0 );
  1461     sh4_x86.tstate = TSTATE_NONE;
  1462 :}
  1463 MOV.L Rm, @Rn {:
  1464     COUNT_INST(I_MOVL);
  1465     load_reg( REG_EAX, Rn );
  1466     check_walign32(REG_EAX);
  1467     MOVL_r32_r32( REG_EAX, REG_ECX );
  1468     ANDL_imms_r32( 0xFC000000, REG_ECX );
  1469     CMPL_imms_r32( 0xE0000000, REG_ECX );
  1470     JNE_label( notsq );
  1471     ANDL_imms_r32( 0x3C, REG_EAX );
  1472     load_reg( REG_EDX, Rm );
  1473     MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
  1474     JMP_label(end);
  1475     JMP_TARGET(notsq);
  1476     load_reg( REG_EDX, Rm );
  1477     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1478     JMP_TARGET(end);
  1479     sh4_x86.tstate = TSTATE_NONE;
  1480 :}
  1481 MOV.L Rm, @-Rn {:  
  1482     COUNT_INST(I_MOVL);
  1483     load_reg( REG_EAX, Rn );
  1484     ADDL_imms_r32( -4, REG_EAX );
  1485     check_walign32( REG_EAX );
  1486     load_reg( REG_EDX, Rm );
  1487     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1488     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  1489     sh4_x86.tstate = TSTATE_NONE;
  1490 :}
  1491 MOV.L Rm, @(R0, Rn) {:  
  1492     COUNT_INST(I_MOVL);
  1493     load_reg( REG_EAX, 0 );
  1494     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1495     check_walign32( REG_EAX );
  1496     load_reg( REG_EDX, Rm );
  1497     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1498     sh4_x86.tstate = TSTATE_NONE;
  1499 :}
  1500 MOV.L R0, @(disp, GBR) {:  
  1501     COUNT_INST(I_MOVL);
  1502     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1503     ADDL_imms_r32( disp, REG_EAX );
  1504     check_walign32( REG_EAX );
  1505     load_reg( REG_EDX, 0 );
  1506     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1507     sh4_x86.tstate = TSTATE_NONE;
  1508 :}
  1509 MOV.L Rm, @(disp, Rn) {:  
  1510     COUNT_INST(I_MOVL);
  1511     load_reg( REG_EAX, Rn );
  1512     ADDL_imms_r32( disp, REG_EAX );
  1513     check_walign32( REG_EAX );
  1514     MOVL_r32_r32( REG_EAX, REG_ECX );
  1515     ANDL_imms_r32( 0xFC000000, REG_ECX );
  1516     CMPL_imms_r32( 0xE0000000, REG_ECX );
  1517     JNE_label( notsq );
  1518     ANDL_imms_r32( 0x3C, REG_EAX );
  1519     load_reg( REG_EDX, Rm );
  1520     MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
  1521     JMP_label(end);
  1522     JMP_TARGET(notsq);
  1523     load_reg( REG_EDX, Rm );
  1524     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1525     JMP_TARGET(end);
  1526     sh4_x86.tstate = TSTATE_NONE;
  1527 :}
  1528 MOV.L @Rm, Rn {:  
  1529     COUNT_INST(I_MOVL);
  1530     load_reg( REG_EAX, Rm );
  1531     check_ralign32( REG_EAX );
  1532     MEM_READ_LONG( REG_EAX, REG_EAX );
  1533     store_reg( REG_EAX, Rn );
  1534     sh4_x86.tstate = TSTATE_NONE;
  1535 :}
  1536 MOV.L @Rm+, Rn {:  
  1537     COUNT_INST(I_MOVL);
  1538     load_reg( REG_EAX, Rm );
  1539     check_ralign32( REG_EAX );
  1540     MEM_READ_LONG( REG_EAX, REG_EAX );
  1541     if( Rm != Rn ) {
  1542     	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  1544     store_reg( REG_EAX, Rn );
  1545     sh4_x86.tstate = TSTATE_NONE;
  1546 :}
  1547 MOV.L @(R0, Rm), Rn {:  
  1548     COUNT_INST(I_MOVL);
  1549     load_reg( REG_EAX, 0 );
  1550     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1551     check_ralign32( REG_EAX );
  1552     MEM_READ_LONG( REG_EAX, REG_EAX );
  1553     store_reg( REG_EAX, Rn );
  1554     sh4_x86.tstate = TSTATE_NONE;
  1555 :}
  1556 MOV.L @(disp, GBR), R0 {:
  1557     COUNT_INST(I_MOVL);
  1558     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1559     ADDL_imms_r32( disp, REG_EAX );
  1560     check_ralign32( REG_EAX );
  1561     MEM_READ_LONG( REG_EAX, REG_EAX );
  1562     store_reg( REG_EAX, 0 );
  1563     sh4_x86.tstate = TSTATE_NONE;
  1564 :}
  1565 MOV.L @(disp, PC), Rn {:  
  1566     COUNT_INST(I_MOVLPC);
  1567     if( sh4_x86.in_delay_slot ) {
  1568 	SLOTILLEGAL();
  1569     } else {
  1570 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1571 	if( IS_IN_ICACHE(target) ) {
  1572 	    // If the target address is in the same page as the code, it's
  1573 	    // pretty safe to just ref it directly and circumvent the whole
  1574 	    // memory subsystem. (this is a big performance win)
  1576 	    // FIXME: There's a corner-case that's not handled here when
  1577 	    // the current code-page is in the ITLB but not in the UTLB.
  1578 	    // (should generate a TLB miss although need to test SH4 
  1579 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1580 	    // behaviour though.
  1581 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1582 	    MOVL_moffptr_eax( ptr );
  1583 	} else {
  1584 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1585 	    // different virtual address than the translation was done with,
  1586 	    // but we can safely assume that the low bits are the same.
  1587 	    MOVL_imm32_r32( (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_EAX );
  1588 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1589 	    MEM_READ_LONG( REG_EAX, REG_EAX );
  1590 	    sh4_x86.tstate = TSTATE_NONE;
  1592 	store_reg( REG_EAX, Rn );
  1594 :}
  1595 MOV.L @(disp, Rm), Rn {:  
  1596     COUNT_INST(I_MOVL);
  1597     load_reg( REG_EAX, Rm );
  1598     ADDL_imms_r32( disp, REG_EAX );
  1599     check_ralign32( REG_EAX );
  1600     MEM_READ_LONG( REG_EAX, REG_EAX );
  1601     store_reg( REG_EAX, Rn );
  1602     sh4_x86.tstate = TSTATE_NONE;
  1603 :}
  1604 MOV.W Rm, @Rn {:  
  1605     COUNT_INST(I_MOVW);
  1606     load_reg( REG_EAX, Rn );
  1607     check_walign16( REG_EAX );
  1608     load_reg( REG_EDX, Rm );
  1609     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1610     sh4_x86.tstate = TSTATE_NONE;
  1611 :}
  1612 MOV.W Rm, @-Rn {:  
  1613     COUNT_INST(I_MOVW);
  1614     load_reg( REG_EAX, Rn );
  1615     check_walign16( REG_EAX );
  1616     LEAL_r32disp_r32( REG_EAX, -2, REG_EAX );
  1617     load_reg( REG_EDX, Rm );
  1618     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1619     ADDL_imms_rbpdisp( -2, REG_OFFSET(r[Rn]) );
  1620     sh4_x86.tstate = TSTATE_NONE;
  1621 :}
  1622 MOV.W Rm, @(R0, Rn) {:  
  1623     COUNT_INST(I_MOVW);
  1624     load_reg( REG_EAX, 0 );
  1625     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1626     check_walign16( REG_EAX );
  1627     load_reg( REG_EDX, Rm );
  1628     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1629     sh4_x86.tstate = TSTATE_NONE;
  1630 :}
  1631 MOV.W R0, @(disp, GBR) {:  
  1632     COUNT_INST(I_MOVW);
  1633     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1634     ADDL_imms_r32( disp, REG_EAX );
  1635     check_walign16( REG_EAX );
  1636     load_reg( REG_EDX, 0 );
  1637     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1638     sh4_x86.tstate = TSTATE_NONE;
  1639 :}
  1640 MOV.W R0, @(disp, Rn) {:  
  1641     COUNT_INST(I_MOVW);
  1642     load_reg( REG_EAX, Rn );
  1643     ADDL_imms_r32( disp, REG_EAX );
  1644     check_walign16( REG_EAX );
  1645     load_reg( REG_EDX, 0 );
  1646     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1647     sh4_x86.tstate = TSTATE_NONE;
  1648 :}
  1649 MOV.W @Rm, Rn {:  
  1650     COUNT_INST(I_MOVW);
  1651     load_reg( REG_EAX, Rm );
  1652     check_ralign16( REG_EAX );
  1653     MEM_READ_WORD( REG_EAX, REG_EAX );
  1654     store_reg( REG_EAX, Rn );
  1655     sh4_x86.tstate = TSTATE_NONE;
  1656 :}
  1657 MOV.W @Rm+, Rn {:  
  1658     COUNT_INST(I_MOVW);
  1659     load_reg( REG_EAX, Rm );
  1660     check_ralign16( REG_EAX );
  1661     MEM_READ_WORD( REG_EAX, REG_EAX );
  1662     if( Rm != Rn ) {
  1663         ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
  1665     store_reg( REG_EAX, Rn );
  1666     sh4_x86.tstate = TSTATE_NONE;
  1667 :}
  1668 MOV.W @(R0, Rm), Rn {:  
  1669     COUNT_INST(I_MOVW);
  1670     load_reg( REG_EAX, 0 );
  1671     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1672     check_ralign16( REG_EAX );
  1673     MEM_READ_WORD( REG_EAX, REG_EAX );
  1674     store_reg( REG_EAX, Rn );
  1675     sh4_x86.tstate = TSTATE_NONE;
  1676 :}
  1677 MOV.W @(disp, GBR), R0 {:  
  1678     COUNT_INST(I_MOVW);
  1679     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1680     ADDL_imms_r32( disp, REG_EAX );
  1681     check_ralign16( REG_EAX );
  1682     MEM_READ_WORD( REG_EAX, REG_EAX );
  1683     store_reg( REG_EAX, 0 );
  1684     sh4_x86.tstate = TSTATE_NONE;
  1685 :}
  1686 MOV.W @(disp, PC), Rn {:  
  1687     COUNT_INST(I_MOVW);
  1688     if( sh4_x86.in_delay_slot ) {
  1689 	SLOTILLEGAL();
  1690     } else {
  1691 	// See comments for MOV.L @(disp, PC), Rn
  1692 	uint32_t target = pc + disp + 4;
  1693 	if( IS_IN_ICACHE(target) ) {
  1694 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1695 	    MOVL_moffptr_eax( ptr );
  1696 	    MOVSXL_r16_r32( REG_EAX, REG_EAX );
  1697 	} else {
  1698 	    MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4, REG_EAX );
  1699 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1700 	    MEM_READ_WORD( REG_EAX, REG_EAX );
  1701 	    sh4_x86.tstate = TSTATE_NONE;
  1703 	store_reg( REG_EAX, Rn );
  1705 :}
  1706 MOV.W @(disp, Rm), R0 {:  
  1707     COUNT_INST(I_MOVW);
  1708     load_reg( REG_EAX, Rm );
  1709     ADDL_imms_r32( disp, REG_EAX );
  1710     check_ralign16( REG_EAX );
  1711     MEM_READ_WORD( REG_EAX, REG_EAX );
  1712     store_reg( REG_EAX, 0 );
  1713     sh4_x86.tstate = TSTATE_NONE;
  1714 :}
  1715 MOVA @(disp, PC), R0 {:  
  1716     COUNT_INST(I_MOVA);
  1717     if( sh4_x86.in_delay_slot ) {
  1718 	SLOTILLEGAL();
  1719     } else {
  1720 	MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_ECX );
  1721 	ADDL_rbpdisp_r32( R_PC, REG_ECX );
  1722 	store_reg( REG_ECX, 0 );
  1723 	sh4_x86.tstate = TSTATE_NONE;
  1725 :}
  1726 MOVCA.L R0, @Rn {:  
  1727     COUNT_INST(I_MOVCA);
  1728     load_reg( REG_EAX, Rn );
  1729     check_walign32( REG_EAX );
  1730     load_reg( REG_EDX, 0 );
  1731     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1732     sh4_x86.tstate = TSTATE_NONE;
  1733 :}
  1735 /* Control transfer instructions */
  1736 BF disp {:
  1737     COUNT_INST(I_BF);
  1738     if( sh4_x86.in_delay_slot ) {
  1739 	SLOTILLEGAL();
  1740     } else {
  1741 	sh4vma_t target = disp + pc + 4;
  1742 	JT_label( nottaken );
  1743 	exit_block_rel(target, pc+2 );
  1744 	JMP_TARGET(nottaken);
  1745 	return 2;
  1747 :}
  1748 BF/S disp {:
  1749     COUNT_INST(I_BFS);
  1750     if( sh4_x86.in_delay_slot ) {
  1751 	SLOTILLEGAL();
  1752     } else {
  1753 	sh4_x86.in_delay_slot = DELAY_PC;
  1754 	if( UNTRANSLATABLE(pc+2) ) {
  1755 	    MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1756 	    JT_label(nottaken);
  1757 	    ADDL_imms_r32( disp, REG_EAX );
  1758 	    JMP_TARGET(nottaken);
  1759 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1760 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1761 	    exit_block_emu(pc+2);
  1762 	    sh4_x86.branch_taken = TRUE;
  1763 	    return 2;
  1764 	} else {
  1765 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1766 		CMPL_imms_rbpdisp( 1, R_T );
  1767 		sh4_x86.tstate = TSTATE_E;
  1769 	    sh4vma_t target = disp + pc + 4;
  1770 	    JCC_cc_rel32(sh4_x86.tstate,0);
  1771 	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
  1772 	    int save_tstate = sh4_x86.tstate;
  1773 	    sh4_translate_instruction(pc+2);
  1774             sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
  1775 	    exit_block_rel( target, pc+4 );
  1777 	    // not taken
  1778 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1779 	    sh4_x86.tstate = save_tstate;
  1780 	    sh4_translate_instruction(pc+2);
  1781 	    return 4;
  1784 :}
  1785 BRA disp {:  
  1786     COUNT_INST(I_BRA);
  1787     if( sh4_x86.in_delay_slot ) {
  1788 	SLOTILLEGAL();
  1789     } else {
  1790 	sh4_x86.in_delay_slot = DELAY_PC;
  1791 	sh4_x86.branch_taken = TRUE;
  1792 	if( UNTRANSLATABLE(pc+2) ) {
  1793 	    MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1794 	    ADDL_imms_r32( pc + disp + 4 - sh4_x86.block_start_pc, REG_EAX );
  1795 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1796 	    exit_block_emu(pc+2);
  1797 	    return 2;
  1798 	} else {
  1799 	    sh4_translate_instruction( pc + 2 );
  1800 	    exit_block_rel( disp + pc + 4, pc+4 );
  1801 	    return 4;
  1804 :}
  1805 BRAF Rn {:  
  1806     COUNT_INST(I_BRAF);
  1807     if( sh4_x86.in_delay_slot ) {
  1808 	SLOTILLEGAL();
  1809     } else {
  1810 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1811 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1812 	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1813 	MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1814 	sh4_x86.in_delay_slot = DELAY_PC;
  1815 	sh4_x86.tstate = TSTATE_NONE;
  1816 	sh4_x86.branch_taken = TRUE;
  1817 	if( UNTRANSLATABLE(pc+2) ) {
  1818 	    exit_block_emu(pc+2);
  1819 	    return 2;
  1820 	} else {
  1821 	    sh4_translate_instruction( pc + 2 );
  1822 	    exit_block_newpcset(pc+4);
  1823 	    return 4;
  1826 :}
  1827 BSR disp {:  
  1828     COUNT_INST(I_BSR);
  1829     if( sh4_x86.in_delay_slot ) {
  1830 	SLOTILLEGAL();
  1831     } else {
  1832 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1833 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1834 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  1835 	sh4_x86.in_delay_slot = DELAY_PC;
  1836 	sh4_x86.branch_taken = TRUE;
  1837 	sh4_x86.tstate = TSTATE_NONE;
  1838 	if( UNTRANSLATABLE(pc+2) ) {
  1839 	    ADDL_imms_r32( disp, REG_EAX );
  1840 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1841 	    exit_block_emu(pc+2);
  1842 	    return 2;
  1843 	} else {
  1844 	    sh4_translate_instruction( pc + 2 );
  1845 	    exit_block_rel( disp + pc + 4, pc+4 );
  1846 	    return 4;
  1849 :}
  1850 BSRF Rn {:  
  1851     COUNT_INST(I_BSRF);
  1852     if( sh4_x86.in_delay_slot ) {
  1853 	SLOTILLEGAL();
  1854     } else {
  1855 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1856 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1857 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  1858 	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1859 	MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1861 	sh4_x86.in_delay_slot = DELAY_PC;
  1862 	sh4_x86.tstate = TSTATE_NONE;
  1863 	sh4_x86.branch_taken = TRUE;
  1864 	if( UNTRANSLATABLE(pc+2) ) {
  1865 	    exit_block_emu(pc+2);
  1866 	    return 2;
  1867 	} else {
  1868 	    sh4_translate_instruction( pc + 2 );
  1869 	    exit_block_newpcset(pc+4);
  1870 	    return 4;
  1873 :}
  1874 BT disp {:
  1875     COUNT_INST(I_BT);
  1876     if( sh4_x86.in_delay_slot ) {
  1877 	SLOTILLEGAL();
  1878     } else {
  1879 	sh4vma_t target = disp + pc + 4;
  1880 	JF_label( nottaken );
  1881 	exit_block_rel(target, pc+2 );
  1882 	JMP_TARGET(nottaken);
  1883 	return 2;
  1885 :}
  1886 BT/S disp {:
  1887     COUNT_INST(I_BTS);
  1888     if( sh4_x86.in_delay_slot ) {
  1889 	SLOTILLEGAL();
  1890     } else {
  1891 	sh4_x86.in_delay_slot = DELAY_PC;
  1892 	if( UNTRANSLATABLE(pc+2) ) {
  1893 	    MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1894 	    JF_label(nottaken);
  1895 	    ADDL_imms_r32( disp, REG_EAX );
  1896 	    JMP_TARGET(nottaken);
  1897 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1898 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1899 	    exit_block_emu(pc+2);
  1900 	    sh4_x86.branch_taken = TRUE;
  1901 	    return 2;
  1902 	} else {
  1903 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1904 		CMPL_imms_rbpdisp( 1, R_T );
  1905 		sh4_x86.tstate = TSTATE_E;
  1907 	    JCC_cc_rel32(sh4_x86.tstate^1,0);
  1908 	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
  1910 	    int save_tstate = sh4_x86.tstate;
  1911 	    sh4_translate_instruction(pc+2);
  1912             sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
  1913 	    exit_block_rel( disp + pc + 4, pc+4 );
  1914 	    // not taken
  1915 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1916 	    sh4_x86.tstate = save_tstate;
  1917 	    sh4_translate_instruction(pc+2);
  1918 	    return 4;
  1921 :}
  1922 JMP @Rn {:  
  1923     COUNT_INST(I_JMP);
  1924     if( sh4_x86.in_delay_slot ) {
  1925 	SLOTILLEGAL();
  1926     } else {
  1927 	load_reg( REG_ECX, Rn );
  1928 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  1929 	sh4_x86.in_delay_slot = DELAY_PC;
  1930 	sh4_x86.branch_taken = TRUE;
  1931 	if( UNTRANSLATABLE(pc+2) ) {
  1932 	    exit_block_emu(pc+2);
  1933 	    return 2;
  1934 	} else {
  1935 	    sh4_translate_instruction(pc+2);
  1936 	    exit_block_newpcset(pc+4);
  1937 	    return 4;
  1940 :}
  1941 JSR @Rn {:  
  1942     COUNT_INST(I_JSR);
  1943     if( sh4_x86.in_delay_slot ) {
  1944 	SLOTILLEGAL();
  1945     } else {
  1946 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1947 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1948 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  1949 	load_reg( REG_ECX, Rn );
  1950 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  1951 	sh4_x86.in_delay_slot = DELAY_PC;
  1952 	sh4_x86.branch_taken = TRUE;
  1953 	sh4_x86.tstate = TSTATE_NONE;
  1954 	if( UNTRANSLATABLE(pc+2) ) {
  1955 	    exit_block_emu(pc+2);
  1956 	    return 2;
  1957 	} else {
  1958 	    sh4_translate_instruction(pc+2);
  1959 	    exit_block_newpcset(pc+4);
  1960 	    return 4;
  1963 :}
  1964 RTE {:  
  1965     COUNT_INST(I_RTE);
  1966     if( sh4_x86.in_delay_slot ) {
  1967 	SLOTILLEGAL();
  1968     } else {
  1969 	check_priv();
  1970 	MOVL_rbpdisp_r32( R_SPC, REG_ECX );
  1971 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  1972 	MOVL_rbpdisp_r32( R_SSR, REG_EAX );
  1973 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  1974 	sh4_x86.in_delay_slot = DELAY_PC;
  1975 	sh4_x86.fpuen_checked = FALSE;
  1976 	sh4_x86.tstate = TSTATE_NONE;
  1977 	sh4_x86.branch_taken = TRUE;
  1978 	if( UNTRANSLATABLE(pc+2) ) {
  1979 	    exit_block_emu(pc+2);
  1980 	    return 2;
  1981 	} else {
  1982 	    sh4_translate_instruction(pc+2);
  1983 	    exit_block_newpcset(pc+4);
  1984 	    return 4;
  1987 :}
  1988 RTS {:  
  1989     COUNT_INST(I_RTS);
  1990     if( sh4_x86.in_delay_slot ) {
  1991 	SLOTILLEGAL();
  1992     } else {
  1993 	MOVL_rbpdisp_r32( R_PR, REG_ECX );
  1994 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  1995 	sh4_x86.in_delay_slot = DELAY_PC;
  1996 	sh4_x86.branch_taken = TRUE;
  1997 	if( UNTRANSLATABLE(pc+2) ) {
  1998 	    exit_block_emu(pc+2);
  1999 	    return 2;
  2000 	} else {
  2001 	    sh4_translate_instruction(pc+2);
  2002 	    exit_block_newpcset(pc+4);
  2003 	    return 4;
  2006 :}
  2007 TRAPA #imm {:  
  2008     COUNT_INST(I_TRAPA);
  2009     if( sh4_x86.in_delay_slot ) {
  2010 	SLOTILLEGAL();
  2011     } else {
  2012 	MOVL_imm32_r32( pc+2 - sh4_x86.block_start_pc, REG_ECX );   // 5
  2013 	ADDL_r32_rbpdisp( REG_ECX, R_PC );
  2014 	MOVL_imm32_r32( imm, REG_EAX );
  2015 	CALL1_ptr_r32( sh4_raise_trap, REG_EAX );
  2016 	sh4_x86.tstate = TSTATE_NONE;
  2017 	exit_block_pcset(pc+2);
  2018 	sh4_x86.branch_taken = TRUE;
  2019 	return 2;
  2021 :}
  2022 UNDEF {:  
  2023     COUNT_INST(I_UNDEF);
  2024     if( sh4_x86.in_delay_slot ) {
  2025 	exit_block_exc(EXC_SLOT_ILLEGAL, pc-2);    
  2026     } else {
  2027 	exit_block_exc(EXC_ILLEGAL, pc);    
  2028 	return 2;
  2030 :}
  2032 CLRMAC {:  
  2033     COUNT_INST(I_CLRMAC);
  2034     XORL_r32_r32(REG_EAX, REG_EAX);
  2035     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2036     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2037     sh4_x86.tstate = TSTATE_NONE;
  2038 :}
  2039 CLRS {:
  2040     COUNT_INST(I_CLRS);
  2041     CLC();
  2042     SETCCB_cc_rbpdisp(X86_COND_C, R_S);
  2043     sh4_x86.tstate = TSTATE_NONE;
  2044 :}
  2045 CLRT {:  
  2046     COUNT_INST(I_CLRT);
  2047     CLC();
  2048     SETC_t();
  2049     sh4_x86.tstate = TSTATE_C;
  2050 :}
  2051 SETS {:  
  2052     COUNT_INST(I_SETS);
  2053     STC();
  2054     SETCCB_cc_rbpdisp(X86_COND_C, R_S);
  2055     sh4_x86.tstate = TSTATE_NONE;
  2056 :}
  2057 SETT {:  
  2058     COUNT_INST(I_SETT);
  2059     STC();
  2060     SETC_t();
  2061     sh4_x86.tstate = TSTATE_C;
  2062 :}
  2064 /* Floating point moves */
  2065 FMOV FRm, FRn {:  
  2066     COUNT_INST(I_FMOV1);
  2067     check_fpuen();
  2068     if( sh4_x86.double_size ) {
  2069         load_dr0( REG_EAX, FRm );
  2070         load_dr1( REG_ECX, FRm );
  2071         store_dr0( REG_EAX, FRn );
  2072         store_dr1( REG_ECX, FRn );
  2073     } else {
  2074         load_fr( REG_EAX, FRm ); // SZ=0 branch
  2075         store_fr( REG_EAX, FRn );
  2077 :}
  2078 FMOV FRm, @Rn {: 
  2079     COUNT_INST(I_FMOV2);
  2080     check_fpuen();
  2081     load_reg( REG_EAX, Rn );
  2082     if( sh4_x86.double_size ) {
  2083         check_walign64( REG_EAX );
  2084         load_dr0( REG_EDX, FRm );
  2085         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2086         load_reg( REG_EAX, Rn );
  2087         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2088         load_dr1( REG_EDX, FRm );
  2089         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2090     } else {
  2091         check_walign32( REG_EAX );
  2092         load_fr( REG_EDX, FRm );
  2093         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2095     sh4_x86.tstate = TSTATE_NONE;
  2096 :}
  2097 FMOV @Rm, FRn {:  
  2098     COUNT_INST(I_FMOV5);
  2099     check_fpuen();
  2100     load_reg( REG_EAX, Rm );
  2101     if( sh4_x86.double_size ) {
  2102         check_ralign64( REG_EAX );
  2103         MEM_READ_LONG( REG_EAX, REG_EAX );
  2104         store_dr0( REG_EAX, FRn );
  2105         load_reg( REG_EAX, Rm );
  2106         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2107         MEM_READ_LONG( REG_EAX, REG_EAX );
  2108         store_dr1( REG_EAX, FRn );
  2109     } else {
  2110         check_ralign32( REG_EAX );
  2111         MEM_READ_LONG( REG_EAX, REG_EAX );
  2112         store_fr( REG_EAX, FRn );
  2114     sh4_x86.tstate = TSTATE_NONE;
  2115 :}
  2116 FMOV FRm, @-Rn {:  
  2117     COUNT_INST(I_FMOV3);
  2118     check_fpuen();
  2119     load_reg( REG_EAX, Rn );
  2120     if( sh4_x86.double_size ) {
  2121         check_walign64( REG_EAX );
  2122         LEAL_r32disp_r32( REG_EAX, -8, REG_EAX );
  2123         load_dr0( REG_EDX, FRm );
  2124         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2125         load_reg( REG_EAX, Rn );
  2126         LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2127         load_dr1( REG_EDX, FRm );
  2128         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2129         ADDL_imms_rbpdisp(-8,REG_OFFSET(r[Rn]));
  2130     } else {
  2131         check_walign32( REG_EAX );
  2132         LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2133         load_fr( REG_EDX, FRm );
  2134         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2135         ADDL_imms_rbpdisp(-4,REG_OFFSET(r[Rn]));
  2137     sh4_x86.tstate = TSTATE_NONE;
  2138 :}
  2139 FMOV @Rm+, FRn {:
  2140     COUNT_INST(I_FMOV6);
  2141     check_fpuen();
  2142     load_reg( REG_EAX, Rm );
  2143     if( sh4_x86.double_size ) {
  2144         check_ralign64( REG_EAX );
  2145         MEM_READ_LONG( REG_EAX, REG_EAX );
  2146         store_dr0( REG_EAX, FRn );
  2147         load_reg( REG_EAX, Rm );
  2148         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2149         MEM_READ_LONG( REG_EAX, REG_EAX );
  2150         store_dr1( REG_EAX, FRn );
  2151         ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rm]) );
  2152     } else {
  2153         check_ralign32( REG_EAX );
  2154         MEM_READ_LONG( REG_EAX, REG_EAX );
  2155         store_fr( REG_EAX, FRn );
  2156         ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2158     sh4_x86.tstate = TSTATE_NONE;
  2159 :}
  2160 FMOV FRm, @(R0, Rn) {:  
  2161     COUNT_INST(I_FMOV4);
  2162     check_fpuen();
  2163     load_reg( REG_EAX, Rn );
  2164     ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2165     if( sh4_x86.double_size ) {
  2166         check_walign64( REG_EAX );
  2167         load_dr0( REG_EDX, FRm );
  2168         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2169         load_reg( REG_EAX, Rn );
  2170         ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2171         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2172         load_dr1( REG_EDX, FRm );
  2173         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2174     } else {
  2175         check_walign32( REG_EAX );
  2176         load_fr( REG_EDX, FRm );
  2177         MEM_WRITE_LONG( REG_EAX, REG_EDX ); // 12
  2179     sh4_x86.tstate = TSTATE_NONE;
  2180 :}
  2181 FMOV @(R0, Rm), FRn {:  
  2182     COUNT_INST(I_FMOV7);
  2183     check_fpuen();
  2184     load_reg( REG_EAX, Rm );
  2185     ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2186     if( sh4_x86.double_size ) {
  2187         check_ralign64( REG_EAX );
  2188         MEM_READ_LONG( REG_EAX, REG_EAX );
  2189         store_dr0( REG_EAX, FRn );
  2190         load_reg( REG_EAX, Rm );
  2191         ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2192         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2193         MEM_READ_LONG( REG_EAX, REG_EAX );
  2194         store_dr1( REG_EAX, FRn );
  2195     } else {
  2196         check_ralign32( REG_EAX );
  2197         MEM_READ_LONG( REG_EAX, REG_EAX );
  2198         store_fr( REG_EAX, FRn );
  2200     sh4_x86.tstate = TSTATE_NONE;
  2201 :}
  2202 FLDI0 FRn {:  /* IFF PR=0 */
  2203     COUNT_INST(I_FLDI0);
  2204     check_fpuen();
  2205     if( sh4_x86.double_prec == 0 ) {
  2206         XORL_r32_r32( REG_EAX, REG_EAX );
  2207         store_fr( REG_EAX, FRn );
  2209     sh4_x86.tstate = TSTATE_NONE;
  2210 :}
  2211 FLDI1 FRn {:  /* IFF PR=0 */
  2212     COUNT_INST(I_FLDI1);
  2213     check_fpuen();
  2214     if( sh4_x86.double_prec == 0 ) {
  2215         MOVL_imm32_r32( 0x3F800000, REG_EAX );
  2216         store_fr( REG_EAX, FRn );
  2218 :}
  2220 FLOAT FPUL, FRn {:  
  2221     COUNT_INST(I_FLOAT);
  2222     check_fpuen();
  2223     FILD_rbpdisp(R_FPUL);
  2224     if( sh4_x86.double_prec ) {
  2225         pop_dr( FRn );
  2226     } else {
  2227         pop_fr( FRn );
  2229 :}
  2230 FTRC FRm, FPUL {:  
  2231     COUNT_INST(I_FTRC);
  2232     check_fpuen();
  2233     if( sh4_x86.double_prec ) {
  2234         push_dr( FRm );
  2235     } else {
  2236         push_fr( FRm );
  2238     MOVP_immptr_rptr( &max_int, REG_ECX );
  2239     FILD_r32disp( REG_ECX, 0 );
  2240     FCOMIP_st(1);
  2241     JNA_label( sat );
  2242     MOVP_immptr_rptr( &min_int, REG_ECX );
  2243     FILD_r32disp( REG_ECX, 0 );
  2244     FCOMIP_st(1);              
  2245     JAE_label( sat2 );            
  2246     MOVP_immptr_rptr( &save_fcw, REG_EAX );
  2247     FNSTCW_r32disp( REG_EAX, 0 );
  2248     MOVP_immptr_rptr( &trunc_fcw, REG_EDX );
  2249     FLDCW_r32disp( REG_EDX, 0 );
  2250     FISTP_rbpdisp(R_FPUL);             
  2251     FLDCW_r32disp( REG_EAX, 0 );
  2252     JMP_label(end);             
  2254     JMP_TARGET(sat);
  2255     JMP_TARGET(sat2);
  2256     MOVL_r32disp_r32( REG_ECX, 0, REG_ECX ); // 2
  2257     MOVL_r32_rbpdisp( REG_ECX, R_FPUL );
  2258     FPOP_st();
  2259     JMP_TARGET(end);
  2260     sh4_x86.tstate = TSTATE_NONE;
  2261 :}
  2262 FLDS FRm, FPUL {:  
  2263     COUNT_INST(I_FLDS);
  2264     check_fpuen();
  2265     load_fr( REG_EAX, FRm );
  2266     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2267 :}
  2268 FSTS FPUL, FRn {:  
  2269     COUNT_INST(I_FSTS);
  2270     check_fpuen();
  2271     MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  2272     store_fr( REG_EAX, FRn );
  2273 :}
  2274 FCNVDS FRm, FPUL {:  
  2275     COUNT_INST(I_FCNVDS);
  2276     check_fpuen();
  2277     if( sh4_x86.double_prec ) {
  2278         push_dr( FRm );
  2279         pop_fpul();
  2281 :}
  2282 FCNVSD FPUL, FRn {:  
  2283     COUNT_INST(I_FCNVSD);
  2284     check_fpuen();
  2285     if( sh4_x86.double_prec ) {
  2286         push_fpul();
  2287         pop_dr( FRn );
  2289 :}
  2291 /* Floating point instructions */
  2292 FABS FRn {:  
  2293     COUNT_INST(I_FABS);
  2294     check_fpuen();
  2295     if( sh4_x86.double_prec ) {
  2296         push_dr(FRn);
  2297         FABS_st0();
  2298         pop_dr(FRn);
  2299     } else {
  2300         push_fr(FRn);
  2301         FABS_st0();
  2302         pop_fr(FRn);
  2304 :}
  2305 FADD FRm, FRn {:  
  2306     COUNT_INST(I_FADD);
  2307     check_fpuen();
  2308     if( sh4_x86.double_prec ) {
  2309         push_dr(FRm);
  2310         push_dr(FRn);
  2311         FADDP_st(1);
  2312         pop_dr(FRn);
  2313     } else {
  2314         push_fr(FRm);
  2315         push_fr(FRn);
  2316         FADDP_st(1);
  2317         pop_fr(FRn);
  2319 :}
  2320 FDIV FRm, FRn {:  
  2321     COUNT_INST(I_FDIV);
  2322     check_fpuen();
  2323     if( sh4_x86.double_prec ) {
  2324         push_dr(FRn);
  2325         push_dr(FRm);
  2326         FDIVP_st(1);
  2327         pop_dr(FRn);
  2328     } else {
  2329         push_fr(FRn);
  2330         push_fr(FRm);
  2331         FDIVP_st(1);
  2332         pop_fr(FRn);
  2334 :}
  2335 FMAC FR0, FRm, FRn {:  
  2336     COUNT_INST(I_FMAC);
  2337     check_fpuen();
  2338     if( sh4_x86.double_prec ) {
  2339         push_dr( 0 );
  2340         push_dr( FRm );
  2341         FMULP_st(1);
  2342         push_dr( FRn );
  2343         FADDP_st(1);
  2344         pop_dr( FRn );
  2345     } else {
  2346         push_fr( 0 );
  2347         push_fr( FRm );
  2348         FMULP_st(1);
  2349         push_fr( FRn );
  2350         FADDP_st(1);
  2351         pop_fr( FRn );
  2353 :}
  2355 FMUL FRm, FRn {:  
  2356     COUNT_INST(I_FMUL);
  2357     check_fpuen();
  2358     if( sh4_x86.double_prec ) {
  2359         push_dr(FRm);
  2360         push_dr(FRn);
  2361         FMULP_st(1);
  2362         pop_dr(FRn);
  2363     } else {
  2364         push_fr(FRm);
  2365         push_fr(FRn);
  2366         FMULP_st(1);
  2367         pop_fr(FRn);
  2369 :}
  2370 FNEG FRn {:  
  2371     COUNT_INST(I_FNEG);
  2372     check_fpuen();
  2373     if( sh4_x86.double_prec ) {
  2374         push_dr(FRn);
  2375         FCHS_st0();
  2376         pop_dr(FRn);
  2377     } else {
  2378         push_fr(FRn);
  2379         FCHS_st0();
  2380         pop_fr(FRn);
  2382 :}
  2383 FSRRA FRn {:  
  2384     COUNT_INST(I_FSRRA);
  2385     check_fpuen();
  2386     if( sh4_x86.double_prec == 0 ) {
  2387         FLD1_st0();
  2388         push_fr(FRn);
  2389         FSQRT_st0();
  2390         FDIVP_st(1);
  2391         pop_fr(FRn);
  2393 :}
  2394 FSQRT FRn {:  
  2395     COUNT_INST(I_FSQRT);
  2396     check_fpuen();
  2397     if( sh4_x86.double_prec ) {
  2398         push_dr(FRn);
  2399         FSQRT_st0();
  2400         pop_dr(FRn);
  2401     } else {
  2402         push_fr(FRn);
  2403         FSQRT_st0();
  2404         pop_fr(FRn);
  2406 :}
  2407 FSUB FRm, FRn {:  
  2408     COUNT_INST(I_FSUB);
  2409     check_fpuen();
  2410     if( sh4_x86.double_prec ) {
  2411         push_dr(FRn);
  2412         push_dr(FRm);
  2413         FSUBP_st(1);
  2414         pop_dr(FRn);
  2415     } else {
  2416         push_fr(FRn);
  2417         push_fr(FRm);
  2418         FSUBP_st(1);
  2419         pop_fr(FRn);
  2421 :}
  2423 FCMP/EQ FRm, FRn {:  
  2424     COUNT_INST(I_FCMPEQ);
  2425     check_fpuen();
  2426     if( sh4_x86.double_prec ) {
  2427         push_dr(FRm);
  2428         push_dr(FRn);
  2429     } else {
  2430         push_fr(FRm);
  2431         push_fr(FRn);
  2433     FCOMIP_st(1);
  2434     SETE_t();
  2435     FPOP_st();
  2436     sh4_x86.tstate = TSTATE_E;
  2437 :}
  2438 FCMP/GT FRm, FRn {:  
  2439     COUNT_INST(I_FCMPGT);
  2440     check_fpuen();
  2441     if( sh4_x86.double_prec ) {
  2442         push_dr(FRm);
  2443         push_dr(FRn);
  2444     } else {
  2445         push_fr(FRm);
  2446         push_fr(FRn);
  2448     FCOMIP_st(1);
  2449     SETA_t();
  2450     FPOP_st();
  2451     sh4_x86.tstate = TSTATE_A;
  2452 :}
  2454 FSCA FPUL, FRn {:  
  2455     COUNT_INST(I_FSCA);
  2456     check_fpuen();
  2457     if( sh4_x86.double_prec == 0 ) {
  2458         LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FRn&0x0E]), REG_EDX );
  2459         MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  2460         CALL2_ptr_r32_r32( sh4_fsca, REG_EAX, REG_EDX );
  2462     sh4_x86.tstate = TSTATE_NONE;
  2463 :}
  2464 FIPR FVm, FVn {:  
  2465     COUNT_INST(I_FIPR);
  2466     check_fpuen();
  2467     if( sh4_x86.double_prec == 0 ) {
  2468         if( sh4_x86.sse3_enabled ) {
  2469             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
  2470             MULPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
  2471             HADDPS_xmm_xmm( 4, 4 ); 
  2472             HADDPS_xmm_xmm( 4, 4 );
  2473             MOVSS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
  2474         } else {
  2475             push_fr( FVm<<2 );
  2476             push_fr( FVn<<2 );
  2477             FMULP_st(1);
  2478             push_fr( (FVm<<2)+1);
  2479             push_fr( (FVn<<2)+1);
  2480             FMULP_st(1);
  2481             FADDP_st(1);
  2482             push_fr( (FVm<<2)+2);
  2483             push_fr( (FVn<<2)+2);
  2484             FMULP_st(1);
  2485             FADDP_st(1);
  2486             push_fr( (FVm<<2)+3);
  2487             push_fr( (FVn<<2)+3);
  2488             FMULP_st(1);
  2489             FADDP_st(1);
  2490             pop_fr( (FVn<<2)+3);
  2493 :}
  2494 FTRV XMTRX, FVn {:  
  2495     COUNT_INST(I_FTRV);
  2496     check_fpuen();
  2497     if( sh4_x86.double_prec == 0 ) {
  2498         if( sh4_x86.sse3_enabled ) {
  2499             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1  M0  M3  M2
  2500             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5  M4  M7  M6
  2501             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9  M8  M11 M10
  2502             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
  2504             MOVSLDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
  2505             MOVSHDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
  2506             MOV_xmm_xmm( 4, 6 );
  2507             MOV_xmm_xmm( 5, 7 );
  2508             MOVLHPS_xmm_xmm( 4, 4 );  // V1 V1 V1 V1
  2509             MOVHLPS_xmm_xmm( 6, 6 );  // V3 V3 V3 V3
  2510             MOVLHPS_xmm_xmm( 5, 5 );  // V0 V0 V0 V0
  2511             MOVHLPS_xmm_xmm( 7, 7 );  // V2 V2 V2 V2
  2512             MULPS_xmm_xmm( 0, 4 );
  2513             MULPS_xmm_xmm( 1, 5 );
  2514             MULPS_xmm_xmm( 2, 6 );
  2515             MULPS_xmm_xmm( 3, 7 );
  2516             ADDPS_xmm_xmm( 5, 4 );
  2517             ADDPS_xmm_xmm( 7, 6 );
  2518             ADDPS_xmm_xmm( 6, 4 );
  2519             MOVAPS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][FVn<<2]) );
  2520         } else {
  2521             LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FVn<<2]), REG_EAX );
  2522             CALL1_ptr_r32( sh4_ftrv, REG_EAX );
  2525     sh4_x86.tstate = TSTATE_NONE;
  2526 :}
  2528 FRCHG {:  
  2529     COUNT_INST(I_FRCHG);
  2530     check_fpuen();
  2531     XORL_imms_rbpdisp( FPSCR_FR, R_FPSCR );
  2532     CALL_ptr( sh4_switch_fr_banks );
  2533     sh4_x86.tstate = TSTATE_NONE;
  2534 :}
  2535 FSCHG {:  
  2536     COUNT_INST(I_FSCHG);
  2537     check_fpuen();
  2538     XORL_imms_rbpdisp( FPSCR_SZ, R_FPSCR);
  2539     XORL_imms_rbpdisp( FPSCR_SZ, REG_OFFSET(xlat_sh4_mode) );
  2540     sh4_x86.tstate = TSTATE_NONE;
  2541     sh4_x86.double_size = !sh4_x86.double_size;
  2542 :}
  2544 /* Processor control instructions */
  2545 LDC Rm, SR {:
  2546     COUNT_INST(I_LDCSR);
  2547     if( sh4_x86.in_delay_slot ) {
  2548 	SLOTILLEGAL();
  2549     } else {
  2550 	check_priv();
  2551 	load_reg( REG_EAX, Rm );
  2552 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2553 	sh4_x86.fpuen_checked = FALSE;
  2554 	sh4_x86.tstate = TSTATE_NONE;
  2555 	return 2;
  2557 :}
  2558 LDC Rm, GBR {: 
  2559     COUNT_INST(I_LDC);
  2560     load_reg( REG_EAX, Rm );
  2561     MOVL_r32_rbpdisp( REG_EAX, R_GBR );
  2562 :}
  2563 LDC Rm, VBR {:  
  2564     COUNT_INST(I_LDC);
  2565     check_priv();
  2566     load_reg( REG_EAX, Rm );
  2567     MOVL_r32_rbpdisp( REG_EAX, R_VBR );
  2568     sh4_x86.tstate = TSTATE_NONE;
  2569 :}
  2570 LDC Rm, SSR {:  
  2571     COUNT_INST(I_LDC);
  2572     check_priv();
  2573     load_reg( REG_EAX, Rm );
  2574     MOVL_r32_rbpdisp( REG_EAX, R_SSR );
  2575     sh4_x86.tstate = TSTATE_NONE;
  2576 :}
  2577 LDC Rm, SGR {:  
  2578     COUNT_INST(I_LDC);
  2579     check_priv();
  2580     load_reg( REG_EAX, Rm );
  2581     MOVL_r32_rbpdisp( REG_EAX, R_SGR );
  2582     sh4_x86.tstate = TSTATE_NONE;
  2583 :}
  2584 LDC Rm, SPC {:  
  2585     COUNT_INST(I_LDC);
  2586     check_priv();
  2587     load_reg( REG_EAX, Rm );
  2588     MOVL_r32_rbpdisp( REG_EAX, R_SPC );
  2589     sh4_x86.tstate = TSTATE_NONE;
  2590 :}
  2591 LDC Rm, DBR {:  
  2592     COUNT_INST(I_LDC);
  2593     check_priv();
  2594     load_reg( REG_EAX, Rm );
  2595     MOVL_r32_rbpdisp( REG_EAX, R_DBR );
  2596     sh4_x86.tstate = TSTATE_NONE;
  2597 :}
  2598 LDC Rm, Rn_BANK {:  
  2599     COUNT_INST(I_LDC);
  2600     check_priv();
  2601     load_reg( REG_EAX, Rm );
  2602     MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2603     sh4_x86.tstate = TSTATE_NONE;
  2604 :}
  2605 LDC.L @Rm+, GBR {:  
  2606     COUNT_INST(I_LDCM);
  2607     load_reg( REG_EAX, Rm );
  2608     check_ralign32( REG_EAX );
  2609     MEM_READ_LONG( REG_EAX, REG_EAX );
  2610     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2611     MOVL_r32_rbpdisp( REG_EAX, R_GBR );
  2612     sh4_x86.tstate = TSTATE_NONE;
  2613 :}
  2614 LDC.L @Rm+, SR {:
  2615     COUNT_INST(I_LDCSRM);
  2616     if( sh4_x86.in_delay_slot ) {
  2617 	SLOTILLEGAL();
  2618     } else {
  2619 	check_priv();
  2620 	load_reg( REG_EAX, Rm );
  2621 	check_ralign32( REG_EAX );
  2622 	MEM_READ_LONG( REG_EAX, REG_EAX );
  2623 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2624 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2625 	sh4_x86.fpuen_checked = FALSE;
  2626 	sh4_x86.tstate = TSTATE_NONE;
  2627 	return 2;
  2629 :}
  2630 LDC.L @Rm+, VBR {:  
  2631     COUNT_INST(I_LDCM);
  2632     check_priv();
  2633     load_reg( REG_EAX, Rm );
  2634     check_ralign32( REG_EAX );
  2635     MEM_READ_LONG( REG_EAX, REG_EAX );
  2636     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2637     MOVL_r32_rbpdisp( REG_EAX, R_VBR );
  2638     sh4_x86.tstate = TSTATE_NONE;
  2639 :}
  2640 LDC.L @Rm+, SSR {:
  2641     COUNT_INST(I_LDCM);
  2642     check_priv();
  2643     load_reg( REG_EAX, Rm );
  2644     check_ralign32( REG_EAX );
  2645     MEM_READ_LONG( REG_EAX, REG_EAX );
  2646     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2647     MOVL_r32_rbpdisp( REG_EAX, R_SSR );
  2648     sh4_x86.tstate = TSTATE_NONE;
  2649 :}
  2650 LDC.L @Rm+, SGR {:  
  2651     COUNT_INST(I_LDCM);
  2652     check_priv();
  2653     load_reg( REG_EAX, Rm );
  2654     check_ralign32( REG_EAX );
  2655     MEM_READ_LONG( REG_EAX, REG_EAX );
  2656     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2657     MOVL_r32_rbpdisp( REG_EAX, R_SGR );
  2658     sh4_x86.tstate = TSTATE_NONE;
  2659 :}
  2660 LDC.L @Rm+, SPC {:  
  2661     COUNT_INST(I_LDCM);
  2662     check_priv();
  2663     load_reg( REG_EAX, Rm );
  2664     check_ralign32( REG_EAX );
  2665     MEM_READ_LONG( REG_EAX, REG_EAX );
  2666     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2667     MOVL_r32_rbpdisp( REG_EAX, R_SPC );
  2668     sh4_x86.tstate = TSTATE_NONE;
  2669 :}
  2670 LDC.L @Rm+, DBR {:  
  2671     COUNT_INST(I_LDCM);
  2672     check_priv();
  2673     load_reg( REG_EAX, Rm );
  2674     check_ralign32( REG_EAX );
  2675     MEM_READ_LONG( REG_EAX, REG_EAX );
  2676     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2677     MOVL_r32_rbpdisp( REG_EAX, R_DBR );
  2678     sh4_x86.tstate = TSTATE_NONE;
  2679 :}
  2680 LDC.L @Rm+, Rn_BANK {:  
  2681     COUNT_INST(I_LDCM);
  2682     check_priv();
  2683     load_reg( REG_EAX, Rm );
  2684     check_ralign32( REG_EAX );
  2685     MEM_READ_LONG( REG_EAX, REG_EAX );
  2686     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2687     MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2688     sh4_x86.tstate = TSTATE_NONE;
  2689 :}
  2690 LDS Rm, FPSCR {:
  2691     COUNT_INST(I_LDSFPSCR);
  2692     check_fpuen();
  2693     load_reg( REG_EAX, Rm );
  2694     CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
  2695     sh4_x86.tstate = TSTATE_NONE;
  2696     return 2;
  2697 :}
  2698 LDS.L @Rm+, FPSCR {:  
  2699     COUNT_INST(I_LDSFPSCRM);
  2700     check_fpuen();
  2701     load_reg( REG_EAX, Rm );
  2702     check_ralign32( REG_EAX );
  2703     MEM_READ_LONG( REG_EAX, REG_EAX );
  2704     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2705     CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
  2706     sh4_x86.tstate = TSTATE_NONE;
  2707     return 2;
  2708 :}
  2709 LDS Rm, FPUL {:  
  2710     COUNT_INST(I_LDS);
  2711     check_fpuen();
  2712     load_reg( REG_EAX, Rm );
  2713     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2714 :}
  2715 LDS.L @Rm+, FPUL {:  
  2716     COUNT_INST(I_LDSM);
  2717     check_fpuen();
  2718     load_reg( REG_EAX, Rm );
  2719     check_ralign32( REG_EAX );
  2720     MEM_READ_LONG( REG_EAX, REG_EAX );
  2721     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2722     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2723     sh4_x86.tstate = TSTATE_NONE;
  2724 :}
  2725 LDS Rm, MACH {: 
  2726     COUNT_INST(I_LDS);
  2727     load_reg( REG_EAX, Rm );
  2728     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2729 :}
  2730 LDS.L @Rm+, MACH {:  
  2731     COUNT_INST(I_LDSM);
  2732     load_reg( REG_EAX, Rm );
  2733     check_ralign32( REG_EAX );
  2734     MEM_READ_LONG( REG_EAX, REG_EAX );
  2735     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2736     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2737     sh4_x86.tstate = TSTATE_NONE;
  2738 :}
  2739 LDS Rm, MACL {:  
  2740     COUNT_INST(I_LDS);
  2741     load_reg( REG_EAX, Rm );
  2742     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2743 :}
  2744 LDS.L @Rm+, MACL {:  
  2745     COUNT_INST(I_LDSM);
  2746     load_reg( REG_EAX, Rm );
  2747     check_ralign32( REG_EAX );
  2748     MEM_READ_LONG( REG_EAX, REG_EAX );
  2749     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2750     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2751     sh4_x86.tstate = TSTATE_NONE;
  2752 :}
  2753 LDS Rm, PR {:  
  2754     COUNT_INST(I_LDS);
  2755     load_reg( REG_EAX, Rm );
  2756     MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2757 :}
  2758 LDS.L @Rm+, PR {:  
  2759     COUNT_INST(I_LDSM);
  2760     load_reg( REG_EAX, Rm );
  2761     check_ralign32( REG_EAX );
  2762     MEM_READ_LONG( REG_EAX, REG_EAX );
  2763     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2764     MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2765     sh4_x86.tstate = TSTATE_NONE;
  2766 :}
  2767 LDTLB {:  
  2768     COUNT_INST(I_LDTLB);
  2769     CALL_ptr( MMU_ldtlb );
  2770     sh4_x86.tstate = TSTATE_NONE;
  2771 :}
  2772 OCBI @Rn {:
  2773     COUNT_INST(I_OCBI);
  2774 :}
  2775 OCBP @Rn {:
  2776     COUNT_INST(I_OCBP);
  2777 :}
  2778 OCBWB @Rn {:
  2779     COUNT_INST(I_OCBWB);
  2780 :}
  2781 PREF @Rn {:
  2782     COUNT_INST(I_PREF);
  2783     load_reg( REG_EAX, Rn );
  2784     MEM_PREFETCH( REG_EAX );
  2785     sh4_x86.tstate = TSTATE_NONE;
  2786 :}
  2787 SLEEP {: 
  2788     COUNT_INST(I_SLEEP);
  2789     check_priv();
  2790     CALL_ptr( sh4_sleep );
  2791     sh4_x86.tstate = TSTATE_NONE;
  2792     sh4_x86.in_delay_slot = DELAY_NONE;
  2793     return 2;
  2794 :}
  2795 STC SR, Rn {:
  2796     COUNT_INST(I_STCSR);
  2797     check_priv();
  2798     CALL_ptr(sh4_read_sr);
  2799     store_reg( REG_EAX, Rn );
  2800     sh4_x86.tstate = TSTATE_NONE;
  2801 :}
  2802 STC GBR, Rn {:  
  2803     COUNT_INST(I_STC);
  2804     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  2805     store_reg( REG_EAX, Rn );
  2806 :}
  2807 STC VBR, Rn {:  
  2808     COUNT_INST(I_STC);
  2809     check_priv();
  2810     MOVL_rbpdisp_r32( R_VBR, REG_EAX );
  2811     store_reg( REG_EAX, Rn );
  2812     sh4_x86.tstate = TSTATE_NONE;
  2813 :}
  2814 STC SSR, Rn {:  
  2815     COUNT_INST(I_STC);
  2816     check_priv();
  2817     MOVL_rbpdisp_r32( R_SSR, REG_EAX );
  2818     store_reg( REG_EAX, Rn );
  2819     sh4_x86.tstate = TSTATE_NONE;
  2820 :}
  2821 STC SPC, Rn {:  
  2822     COUNT_INST(I_STC);
  2823     check_priv();
  2824     MOVL_rbpdisp_r32( R_SPC, REG_EAX );
  2825     store_reg( REG_EAX, Rn );
  2826     sh4_x86.tstate = TSTATE_NONE;
  2827 :}
  2828 STC SGR, Rn {:  
  2829     COUNT_INST(I_STC);
  2830     check_priv();
  2831     MOVL_rbpdisp_r32( R_SGR, REG_EAX );
  2832     store_reg( REG_EAX, Rn );
  2833     sh4_x86.tstate = TSTATE_NONE;
  2834 :}
  2835 STC DBR, Rn {:  
  2836     COUNT_INST(I_STC);
  2837     check_priv();
  2838     MOVL_rbpdisp_r32( R_DBR, REG_EAX );
  2839     store_reg( REG_EAX, Rn );
  2840     sh4_x86.tstate = TSTATE_NONE;
  2841 :}
  2842 STC Rm_BANK, Rn {:
  2843     COUNT_INST(I_STC);
  2844     check_priv();
  2845     MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EAX );
  2846     store_reg( REG_EAX, Rn );
  2847     sh4_x86.tstate = TSTATE_NONE;
  2848 :}
  2849 STC.L SR, @-Rn {:
  2850     COUNT_INST(I_STCSRM);
  2851     check_priv();
  2852     CALL_ptr( sh4_read_sr );
  2853     MOVL_r32_r32( REG_EAX, REG_EDX );
  2854     load_reg( REG_EAX, Rn );
  2855     check_walign32( REG_EAX );
  2856     LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2857     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2858     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2859     sh4_x86.tstate = TSTATE_NONE;
  2860 :}
  2861 STC.L VBR, @-Rn {:  
  2862     COUNT_INST(I_STCM);
  2863     check_priv();
  2864     load_reg( REG_EAX, Rn );
  2865     check_walign32( REG_EAX );
  2866     ADDL_imms_r32( -4, REG_EAX );
  2867     MOVL_rbpdisp_r32( R_VBR, REG_EDX );
  2868     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2869     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2870     sh4_x86.tstate = TSTATE_NONE;
  2871 :}
  2872 STC.L SSR, @-Rn {:  
  2873     COUNT_INST(I_STCM);
  2874     check_priv();
  2875     load_reg( REG_EAX, Rn );
  2876     check_walign32( REG_EAX );
  2877     ADDL_imms_r32( -4, REG_EAX );
  2878     MOVL_rbpdisp_r32( R_SSR, REG_EDX );
  2879     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2880     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2881     sh4_x86.tstate = TSTATE_NONE;
  2882 :}
  2883 STC.L SPC, @-Rn {:
  2884     COUNT_INST(I_STCM);
  2885     check_priv();
  2886     load_reg( REG_EAX, Rn );
  2887     check_walign32( REG_EAX );
  2888     ADDL_imms_r32( -4, REG_EAX );
  2889     MOVL_rbpdisp_r32( R_SPC, REG_EDX );
  2890     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2891     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2892     sh4_x86.tstate = TSTATE_NONE;
  2893 :}
  2894 STC.L SGR, @-Rn {:  
  2895     COUNT_INST(I_STCM);
  2896     check_priv();
  2897     load_reg( REG_EAX, Rn );
  2898     check_walign32( REG_EAX );
  2899     ADDL_imms_r32( -4, REG_EAX );
  2900     MOVL_rbpdisp_r32( R_SGR, REG_EDX );
  2901     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2902     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2903     sh4_x86.tstate = TSTATE_NONE;
  2904 :}
  2905 STC.L DBR, @-Rn {:  
  2906     COUNT_INST(I_STCM);
  2907     check_priv();
  2908     load_reg( REG_EAX, Rn );
  2909     check_walign32( REG_EAX );
  2910     ADDL_imms_r32( -4, REG_EAX );
  2911     MOVL_rbpdisp_r32( R_DBR, REG_EDX );
  2912     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2913     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2914     sh4_x86.tstate = TSTATE_NONE;
  2915 :}
  2916 STC.L Rm_BANK, @-Rn {:  
  2917     COUNT_INST(I_STCM);
  2918     check_priv();
  2919     load_reg( REG_EAX, Rn );
  2920     check_walign32( REG_EAX );
  2921     ADDL_imms_r32( -4, REG_EAX );
  2922     MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EDX );
  2923     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2924     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2925     sh4_x86.tstate = TSTATE_NONE;
  2926 :}
  2927 STC.L GBR, @-Rn {:  
  2928     COUNT_INST(I_STCM);
  2929     load_reg( REG_EAX, Rn );
  2930     check_walign32( REG_EAX );
  2931     ADDL_imms_r32( -4, REG_EAX );
  2932     MOVL_rbpdisp_r32( R_GBR, REG_EDX );
  2933     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2934     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2935     sh4_x86.tstate = TSTATE_NONE;
  2936 :}
  2937 STS FPSCR, Rn {:  
  2938     COUNT_INST(I_STSFPSCR);
  2939     check_fpuen();
  2940     MOVL_rbpdisp_r32( R_FPSCR, REG_EAX );
  2941     store_reg( REG_EAX, Rn );
  2942 :}
  2943 STS.L FPSCR, @-Rn {:  
  2944     COUNT_INST(I_STSFPSCRM);
  2945     check_fpuen();
  2946     load_reg( REG_EAX, Rn );
  2947     check_walign32( REG_EAX );
  2948     ADDL_imms_r32( -4, REG_EAX );
  2949     MOVL_rbpdisp_r32( R_FPSCR, REG_EDX );
  2950     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2951     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2952     sh4_x86.tstate = TSTATE_NONE;
  2953 :}
  2954 STS FPUL, Rn {:  
  2955     COUNT_INST(I_STS);
  2956     check_fpuen();
  2957     MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  2958     store_reg( REG_EAX, Rn );
  2959 :}
  2960 STS.L FPUL, @-Rn {:  
  2961     COUNT_INST(I_STSM);
  2962     check_fpuen();
  2963     load_reg( REG_EAX, Rn );
  2964     check_walign32( REG_EAX );
  2965     ADDL_imms_r32( -4, REG_EAX );
  2966     MOVL_rbpdisp_r32( R_FPUL, REG_EDX );
  2967     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2968     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2969     sh4_x86.tstate = TSTATE_NONE;
  2970 :}
  2971 STS MACH, Rn {:  
  2972     COUNT_INST(I_STS);
  2973     MOVL_rbpdisp_r32( R_MACH, REG_EAX );
  2974     store_reg( REG_EAX, Rn );
  2975 :}
  2976 STS.L MACH, @-Rn {:  
  2977     COUNT_INST(I_STSM);
  2978     load_reg( REG_EAX, Rn );
  2979     check_walign32( REG_EAX );
  2980     ADDL_imms_r32( -4, REG_EAX );
  2981     MOVL_rbpdisp_r32( R_MACH, REG_EDX );
  2982     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2983     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2984     sh4_x86.tstate = TSTATE_NONE;
  2985 :}
  2986 STS MACL, Rn {:  
  2987     COUNT_INST(I_STS);
  2988     MOVL_rbpdisp_r32( R_MACL, REG_EAX );
  2989     store_reg( REG_EAX, Rn );
  2990 :}
  2991 STS.L MACL, @-Rn {:  
  2992     COUNT_INST(I_STSM);
  2993     load_reg( REG_EAX, Rn );
  2994     check_walign32( REG_EAX );
  2995     ADDL_imms_r32( -4, REG_EAX );
  2996     MOVL_rbpdisp_r32( R_MACL, REG_EDX );
  2997     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2998     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2999     sh4_x86.tstate = TSTATE_NONE;
  3000 :}
  3001 STS PR, Rn {:  
  3002     COUNT_INST(I_STS);
  3003     MOVL_rbpdisp_r32( R_PR, REG_EAX );
  3004     store_reg( REG_EAX, Rn );
  3005 :}
  3006 STS.L PR, @-Rn {:  
  3007     COUNT_INST(I_STSM);
  3008     load_reg( REG_EAX, Rn );
  3009     check_walign32( REG_EAX );
  3010     ADDL_imms_r32( -4, REG_EAX );
  3011     MOVL_rbpdisp_r32( R_PR, REG_EDX );
  3012     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3013     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3014     sh4_x86.tstate = TSTATE_NONE;
  3015 :}
  3017 NOP {: 
  3018     COUNT_INST(I_NOP);
  3019     /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ 
  3020 :}
  3021 %%
  3022     sh4_x86.in_delay_slot = DELAY_NONE;
  3023     return 0;
  3027 /**
  3028  * The unwind methods only work if we compiled with DWARF2 frame information
  3029  * (ie -fexceptions), otherwise we have to use the direct frame scan.
  3030  */
  3031 #ifdef HAVE_EXCEPTIONS
  3032 #include <unwind.h>
  3034 struct UnwindInfo {
  3035     uintptr_t block_start;
  3036     uintptr_t block_end;
  3037     void *pc;
  3038 };
  3040 static _Unwind_Reason_Code xlat_check_frame( struct _Unwind_Context *context, void *arg )
  3042     struct UnwindInfo *info = arg;
  3043     void *pc = (void *)_Unwind_GetIP(context);
  3044     if( ((uintptr_t)pc) >= info->block_start && ((uintptr_t)pc) < info->block_end ) {
  3045         info->pc = pc;
  3046         return _URC_NORMAL_STOP;
  3048     return _URC_NO_REASON;
  3051 void *xlat_get_native_pc( void *code, uint32_t code_size )
  3053     struct _Unwind_Exception exc;
  3054     struct UnwindInfo info;
  3056     info.pc = NULL;
  3057     info.block_start = (uintptr_t)code;
  3058     info.block_end = info.block_start + code_size;
  3059     void *result = NULL;
  3060     _Unwind_Backtrace( xlat_check_frame, &info );
  3061     return info.pc;
  3063 #else
  3064 /* Assume this is an ia32 build - amd64 should always have dwarf information */
  3065 void *xlat_get_native_pc( void *code, uint32_t code_size )
  3067     void *result = NULL;
  3068     asm(
  3069         "mov %%ebp, %%eax\n\t"
  3070         "mov $0x8, %%ecx\n\t"
  3071         "mov %1, %%edx\n"
  3072         "frame_loop: test %%eax, %%eax\n\t"
  3073         "je frame_not_found\n\t"
  3074         "cmp (%%eax), %%edx\n\t"
  3075         "je frame_found\n\t"
  3076         "sub $0x1, %%ecx\n\t"
  3077         "je frame_not_found\n\t"
  3078         "movl (%%eax), %%eax\n\t"
  3079         "jmp frame_loop\n"
  3080         "frame_found: movl 0x4(%%eax), %0\n"
  3081         "frame_not_found:"
  3082         : "=r" (result)
  3083         : "r" (((uint8_t *)&sh4r) + 128 )
  3084         : "eax", "ecx", "edx" );
  3085     return result;
  3087 #endif
.