Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 1011:fdd58619b760
prev1006:3a169c224c12
author nkeynes
date Sun Apr 19 05:14:19 2009 +0000 (15 years ago)
branchxlat-refactor
permissions -rw-r--r--
last change Remove branch instructions and replace with direct modification of PC + EXIT
Add MIN/MAX instructions (for bound checks)
Implement x86_target_is_legal
Correct a few sh4 instructions
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "lxdream.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4trans.h"
    31 #include "sh4/sh4stat.h"
    32 #include "sh4/sh4mmio.h"
    33 #include "sh4/mmu.h"
    34 #include "xlat/xltcache.h"
    35 #include "xlat/x86/x86op.h"
    36 #include "clock.h"
    38 #define DEFAULT_BACKPATCH_SIZE 4096
    40 /* Offset of a reg relative to the sh4r structure */
    41 #define REG_OFFSET(reg)  (((char *)&sh4r.reg) - ((char *)&sh4r) - 128)
    43 #define R_T      REG_OFFSET(t)
    44 #define R_Q      REG_OFFSET(q)
    45 #define R_S      REG_OFFSET(s)
    46 #define R_M      REG_OFFSET(m)
    47 #define R_SR     REG_OFFSET(sr)
    48 #define R_GBR    REG_OFFSET(gbr)
    49 #define R_SSR    REG_OFFSET(ssr)
    50 #define R_SPC    REG_OFFSET(spc)
    51 #define R_VBR    REG_OFFSET(vbr)
    52 #define R_MACH   REG_OFFSET(mac)+4
    53 #define R_MACL   REG_OFFSET(mac)
    54 #define R_PC     REG_OFFSET(pc)
    55 #define R_NEW_PC REG_OFFSET(new_pc)
    56 #define R_PR     REG_OFFSET(pr)
    57 #define R_SGR    REG_OFFSET(sgr)
    58 #define R_FPUL   REG_OFFSET(fpul)
    59 #define R_FPSCR  REG_OFFSET(fpscr)
    60 #define R_DBR    REG_OFFSET(dbr)
    61 #define R_R(rn)  REG_OFFSET(r[rn])
    62 #define R_FR(f)  REG_OFFSET(fr[0][(f)^1])
    63 #define R_XF(f)  REG_OFFSET(fr[1][(f)^1])
    64 #define R_DR(f)  REG_OFFSET(fr[(f)&1][(f)&0x0E])
    65 #define R_DRL(f) REG_OFFSET(fr[(f)&1][(f)|0x01])
    66 #define R_DRH(f) REG_OFFSET(fr[(f)&1][(f)&0x0E])
    68 #define DELAY_NONE 0
    69 #define DELAY_PC 1
    70 #define DELAY_PC_PR 2
    72 struct backpatch_record {
    73     uint32_t fixup_offset;
    74     uint32_t fixup_icount;
    75     int32_t exc_code;
    76 };
    78 /** 
    79  * Struct to manage internal translation state. This state is not saved -
    80  * it is only valid between calls to sh4_translate_begin_block() and
    81  * sh4_translate_end_block()
    82  */
    83 struct sh4_x86_state {
    84     int in_delay_slot;
    85     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    86     gboolean branch_taken; /* true if we branched unconditionally */
    87     gboolean double_prec; /* true if FPU is in double-precision mode */
    88     gboolean double_size; /* true if FPU is in double-size mode */
    89     gboolean sse3_enabled; /* true if host supports SSE3 instructions */
    90     uint32_t block_start_pc;
    91     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    92     int tstate;
    94     /* mode flags */
    95     gboolean tlb_on; /* True if tlb translation is active */
    97     /* Allocated memory for the (block-wide) back-patch list */
    98     struct backpatch_record *backpatch_list;
    99     uint32_t backpatch_posn;
   100     uint32_t backpatch_size;
   101 };
   103 static struct sh4_x86_state sh4_x86;
   105 static uint32_t max_int = 0x7FFFFFFF;
   106 static uint32_t min_int = 0x80000000;
   107 static uint32_t save_fcw; /* save value for fpu control word */
   108 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   110 gboolean is_sse3_supported()
   111 {
   112     uint32_t features;
   114     __asm__ __volatile__(
   115         "mov $0x01, %%eax\n\t"
   116         "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
   117     return (features & 1) ? TRUE : FALSE;
   118 }
   120 void sh4_translate_init(void)
   121 {
   122     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   123     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   124     sh4_x86.sse3_enabled = is_sse3_supported();
   125 }
   128 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   129 {
   130     int reloc_size = 4;
   132     if( exc_code == -2 ) {
   133         reloc_size = sizeof(void *);
   134     }
   136     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   137 	sh4_x86.backpatch_size <<= 1;
   138 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   139 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   140 	assert( sh4_x86.backpatch_list != NULL );
   141     }
   142     if( sh4_x86.in_delay_slot ) {
   143 	fixup_pc -= 2;
   144     }
   146     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
   147 	(((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code)) - reloc_size;
   148     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   149     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   150     sh4_x86.backpatch_posn++;
   151 }
   153 #define TSTATE_NONE -1
   154 #define TSTATE_O    X86_COND_O
   155 #define TSTATE_C    X86_COND_C
   156 #define TSTATE_E    X86_COND_E
   157 #define TSTATE_NE   X86_COND_NE
   158 #define TSTATE_G    X86_COND_G
   159 #define TSTATE_GE   X86_COND_GE
   160 #define TSTATE_A    X86_COND_A
   161 #define TSTATE_AE   X86_COND_AE
   163 /* Convenience instructions */
   164 #define LDC_t()          CMPB_imms_rbpdisp(1,R_T); CMC()
   165 #define SETE_t()         SETCCB_cc_rbpdisp(X86_COND_E,R_T)
   166 #define SETA_t()         SETCCB_cc_rbpdisp(X86_COND_A,R_T)
   167 #define SETAE_t()        SETCCB_cc_rbpdisp(X86_COND_AE,R_T)
   168 #define SETG_t()         SETCCB_cc_rbpdisp(X86_COND_G,R_T)
   169 #define SETGE_t()        SETCCB_cc_rbpdisp(X86_COND_GE,R_T)
   170 #define SETC_t()         SETCCB_cc_rbpdisp(X86_COND_C,R_T)
   171 #define SETO_t()         SETCCB_cc_rbpdisp(X86_COND_O,R_T)
   172 #define SETNE_t()        SETCCB_cc_rbpdisp(X86_COND_NE,R_T)
   173 #define SETC_r8(r1)      SETCCB_cc_r8(X86_COND_C, r1)
   174 #define JNE_exc(exc)     JCC_cc_rel32(X86_COND_NE,0); sh4_x86_add_backpatch(xlat_output, pc, exc)
   176 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
   177 #define JT_label(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
   178 	CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
   179     JCC_cc_rel8(sh4_x86.tstate,-1); _MARK_JMP8(label)
   181 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
   182 #define JF_label(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
   183 	CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
   184     JCC_cc_rel8(sh4_x86.tstate^1, -1); _MARK_JMP8(label)
   187 #define load_reg(x86reg,sh4reg)     MOVL_rbpdisp_r32( REG_OFFSET(r[sh4reg]), x86reg )
   188 #define store_reg(x86reg,sh4reg)    MOVL_r32_rbpdisp( x86reg, REG_OFFSET(r[sh4reg]) )
   190 /**
   191  * Load an FR register (single-precision floating point) into an integer x86
   192  * register (eg for register-to-register moves)
   193  */
   194 #define load_fr(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[0][(frm)^1]), reg )
   195 #define load_xf(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[1][(frm)^1]), reg )
   197 /**
   198  * Load the low half of a DR register (DR or XD) into an integer x86 register 
   199  */
   200 #define load_dr0(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm|0x01]), reg )
   201 #define load_dr1(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm&0x0E]), reg )
   203 /**
   204  * Store an FR register (single-precision floating point) from an integer x86+
   205  * register (eg for register-to-register moves)
   206  */
   207 #define store_fr(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[0][(frm)^1]) )
   208 #define store_xf(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[1][(frm)^1]) )
   210 #define store_dr0(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   211 #define store_dr1(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   214 #define push_fpul()  FLDF_rbpdisp(R_FPUL)
   215 #define pop_fpul()   FSTPF_rbpdisp(R_FPUL)
   216 #define push_fr(frm) FLDF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
   217 #define pop_fr(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
   218 #define push_xf(frm) FLDF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
   219 #define pop_xf(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
   220 #define push_dr(frm) FLDD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
   221 #define pop_dr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
   222 #define push_xdr(frm) FLDD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
   223 #define pop_xdr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
   225 #ifdef ENABLE_SH4STATS
   226 #define COUNT_INST(id) MOVL_imm32_r32( id, REG_EAX ); CALL1_ptr_r32(sh4_stats_add, REG_EAX); sh4_x86.tstate = TSTATE_NONE
   227 #else
   228 #define COUNT_INST(id)
   229 #endif
   232 /* Exception checks - Note that all exception checks will clobber EAX */
   234 #define check_priv( ) \
   235     if( (sh4r.xlat_sh4_mode & SR_MD) == 0 ) { \
   236         if( sh4_x86.in_delay_slot ) { \
   237             exit_block_exc(EXC_SLOT_ILLEGAL, (pc-2) ); \
   238         } else { \
   239             exit_block_exc(EXC_ILLEGAL, pc); \
   240         } \
   241         sh4_x86.branch_taken = TRUE; \
   242         sh4_x86.in_delay_slot = DELAY_NONE; \
   243         return 2; \
   244     }
   246 #define check_fpuen( ) \
   247     if( !sh4_x86.fpuen_checked ) {\
   248 	sh4_x86.fpuen_checked = TRUE;\
   249 	MOVL_rbpdisp_r32( R_SR, REG_EAX );\
   250 	ANDL_imms_r32( SR_FD, REG_EAX );\
   251 	if( sh4_x86.in_delay_slot ) {\
   252 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   253 	} else {\
   254 	    JNE_exc(EXC_FPU_DISABLED);\
   255 	}\
   256 	sh4_x86.tstate = TSTATE_NONE; \
   257     }
   259 #define check_ralign16( x86reg ) \
   260     TESTL_imms_r32( 0x00000001, x86reg ); \
   261     JNE_exc(EXC_DATA_ADDR_READ)
   263 #define check_walign16( x86reg ) \
   264     TESTL_imms_r32( 0x00000001, x86reg ); \
   265     JNE_exc(EXC_DATA_ADDR_WRITE);
   267 #define check_ralign32( x86reg ) \
   268     TESTL_imms_r32( 0x00000003, x86reg ); \
   269     JNE_exc(EXC_DATA_ADDR_READ)
   271 #define check_walign32( x86reg ) \
   272     TESTL_imms_r32( 0x00000003, x86reg ); \
   273     JNE_exc(EXC_DATA_ADDR_WRITE);
   275 #define check_ralign64( x86reg ) \
   276     TESTL_imms_r32( 0x00000007, x86reg ); \
   277     JNE_exc(EXC_DATA_ADDR_READ)
   279 #define check_walign64( x86reg ) \
   280     TESTL_imms_r32( 0x00000007, x86reg ); \
   281     JNE_exc(EXC_DATA_ADDR_WRITE);
   283 #define address_space() ((sh4r.xlat_sh4_mode&SR_MD) ? (uintptr_t)sh4_address_space : (uintptr_t)sh4_user_address_space)
   285 #define UNDEF(ir)
   286 /* Note: For SR.MD == 1 && MMUCR.AT == 0, there are no memory exceptions, so 
   287  * don't waste the cycles expecting them. Otherwise we need to save the exception pointer.
   288  */
   289 #ifdef HAVE_FRAME_ADDRESS
   290 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
   291 {
   292     decode_address(address_space(), addr_reg);
   293     if( !sh4_x86.tlb_on && (sh4r.xlat_sh4_mode & SR_MD) ) { 
   294         CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
   295     } else {
   296         if( addr_reg != REG_ARG1 ) {
   297             MOVL_r32_r32( addr_reg, REG_ARG1 );
   298         }
   299         MOVP_immptr_rptr( 0, REG_ARG2 );
   300         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   301         CALL2_r32disp_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2);
   302     }
   303     if( value_reg != REG_RESULT1 ) { 
   304         MOVL_r32_r32( REG_RESULT1, value_reg );
   305     }
   306 }
   308 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
   309 {
   310     decode_address(address_space(), addr_reg);
   311     if( !sh4_x86.tlb_on && (sh4r.xlat_sh4_mode & SR_MD) ) { 
   312         CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
   313     } else {
   314         if( value_reg != REG_ARG2 ) {
   315             MOVL_r32_r32( value_reg, REG_ARG2 );
   316 	}        
   317         if( addr_reg != REG_ARG1 ) {
   318             MOVL_r32_r32( addr_reg, REG_ARG1 );
   319         }
   320 #if MAX_REG_ARG > 2        
   321         MOVP_immptr_rptr( 0, REG_ARG3 );
   322         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   323         CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, REG_ARG3);
   324 #else
   325         MOVL_imm32_rspdisp( 0, 0 );
   326         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   327         CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, 0);
   328 #endif
   329     }
   330 }
   331 #else
   332 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
   333 {
   334     decode_address(address_space(), addr_reg);
   335     CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
   336     if( value_reg != REG_RESULT1 ) {
   337         MOVL_r32_r32( REG_RESULT1, value_reg );
   338     }
   339 }     
   341 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
   342 {
   343     decode_address(address_space(), addr_reg);
   344     CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
   345 }
   346 #endif
   348 #define MEM_REGION_PTR(name) offsetof( struct mem_region_fn, name )
   349 #define MEM_READ_BYTE( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_byte), pc)
   350 #define MEM_READ_BYTE_FOR_WRITE( addr_reg, value_reg ) call_read_func( addr_reg, value_reg, MEM_REGION_PTR(read_byte_for_write), pc) 
   351 #define MEM_READ_WORD( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_word), pc)
   352 #define MEM_READ_LONG( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_long), pc)
   353 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_byte), pc)
   354 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_word), pc)
   355 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_long), pc)
   356 #define MEM_PREFETCH( addr_reg ) call_read_func(addr_reg, REG_RESULT1, MEM_REGION_PTR(prefetch), pc)
   358 #define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2); sh4_x86.in_delay_slot = DELAY_NONE; return 2;
   360 void sh4_translate_begin_block( sh4addr_t pc ) 
   361 {
   362     enter_block();
   363     MOVP_immptr_rptr( ((uint8_t *)&sh4r) + 128, REG_EBP );
   364     sh4_x86.in_delay_slot = FALSE;
   365     sh4_x86.fpuen_checked = FALSE;
   366     sh4_x86.branch_taken = FALSE;
   367     sh4_x86.backpatch_posn = 0;
   368     sh4_x86.block_start_pc = pc;
   369     sh4_x86.tlb_on = IS_TLB_ENABLED();
   370     sh4_x86.tstate = TSTATE_NONE;
   371     sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
   372     sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
   373 }
   376 uint32_t sh4_translate_end_block_size()
   377 {
   378     if( sh4_x86.backpatch_posn <= 3 ) {
   379         return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);
   380     } else {
   381         return EPILOGUE_SIZE + 48 + (sh4_x86.backpatch_posn-3)*15;
   382     }
   383 }
   386 /**
   387  * Embed a breakpoint into the generated code
   388  */
   389 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   390 {
   391     MOVL_imm32_r32( pc, REG_EAX );
   392     CALL1_ptr_r32( sh4_translate_breakpoint_hit, REG_EAX );
   393     sh4_x86.tstate = TSTATE_NONE;
   394 }
   397 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   399 /**
   400  * Exit the block with sh4r.pc already written
   401  */
   402 void exit_block_pcset( sh4addr_t pc )
   403 {
   404     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   405     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   406     MOVL_rbpdisp_r32( R_PC, REG_ARG1 );
   407     if( sh4_x86.tlb_on ) {
   408         CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
   409     } else {
   410         CALL1_ptr_r32(xlat_get_code,REG_ARG1);
   411     }
   412     exit_block();
   413 }
   415 /**
   416  * Exit the block with sh4r.new_pc written with the target pc
   417  */
   418 void exit_block_newpcset( sh4addr_t pc )
   419 {
   420     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   421     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   422     MOVL_rbpdisp_r32( R_NEW_PC, REG_ARG1 );
   423     MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   424     if( sh4_x86.tlb_on ) {
   425         CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
   426     } else {
   427         CALL1_ptr_r32(xlat_get_code,REG_ARG1);
   428     }
   429     exit_block();
   430 }
   433 /**
   434  * Exit the block to an absolute PC
   435  */
   436 void exit_block_abs( sh4addr_t pc, sh4addr_t endpc )
   437 {
   438     MOVL_imm32_r32( pc, REG_ECX );
   439     MOVL_r32_rbpdisp( REG_ECX, R_PC );
   440     if( IS_IN_ICACHE(pc) ) {
   441         MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
   442         ANDP_imms_rptr( -4, REG_EAX );
   443     } else if( sh4_x86.tlb_on ) {
   444         CALL1_ptr_r32(xlat_get_code_by_vma, REG_ECX);
   445     } else {
   446         CALL1_ptr_r32(xlat_get_code, REG_ECX);
   447     }
   448     MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   449     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   450     exit_block();
   451 }
   453 /**
   454  * Exit the block to a relative PC
   455  */
   456 void exit_block_rel( sh4addr_t pc, sh4addr_t endpc )
   457 {
   458     MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ECX );
   459     ADDL_rbpdisp_r32( R_PC, REG_ECX );
   460     MOVL_r32_rbpdisp( REG_ECX, R_PC );
   461     if( IS_IN_ICACHE(pc) ) {
   462         MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
   463         ANDP_imms_rptr( -4, REG_EAX );
   464     } else if( sh4_x86.tlb_on ) {
   465         CALL1_ptr_r32(xlat_get_code_by_vma, REG_ECX);
   466     } else {
   467         CALL1_ptr_r32(xlat_get_code, REG_ECX);
   468     }
   469     MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   470     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   471     exit_block();
   472 }
   474 /**
   475  * Exit unconditionally with a general exception
   476  */
   477 void exit_block_exc( int code, sh4addr_t pc )
   478 {
   479     MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ECX );
   480     ADDL_r32_rbpdisp( REG_ECX, R_PC );
   481     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   482     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   483     MOVL_imm32_r32( code, REG_ARG1 );
   484     CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
   485     MOVL_rbpdisp_r32( R_PC, REG_ARG1 );
   486     if( sh4_x86.tlb_on ) {
   487         CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
   488     } else {
   489         CALL1_ptr_r32(xlat_get_code,REG_ARG1);
   490     }
   492     exit_block();
   493 }    
   495 /**
   496  * Embed a call to sh4_execute_instruction for situations that we
   497  * can't translate (just page-crossing delay slots at the moment).
   498  * Caller is responsible for setting new_pc before calling this function.
   499  *
   500  * Performs:
   501  *   Set PC = endpc
   502  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   503  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   504  *   Call sh4_execute_instruction
   505  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   506  */
   507 void exit_block_emu( sh4vma_t endpc )
   508 {
   509     MOVL_imm32_r32( endpc - sh4_x86.block_start_pc, REG_ECX );   // 5
   510     ADDL_r32_rbpdisp( REG_ECX, R_PC );
   512     MOVL_imm32_r32( (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period, REG_ECX ); // 5
   513     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );     // 6
   514     MOVL_imm32_r32( sh4_x86.in_delay_slot ? 1 : 0, REG_ECX );
   515     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(in_delay_slot) );
   517     CALL_ptr( sh4_execute_instruction );    
   518     MOVL_rbpdisp_r32( R_PC, REG_EAX );
   519     if( sh4_x86.tlb_on ) {
   520 	CALL1_ptr_r32(xlat_get_code_by_vma,REG_EAX);
   521     } else {
   522 	CALL1_ptr_r32(xlat_get_code,REG_EAX);
   523     }
   524     exit_block();
   525 } 
   527 /**
   528  * Write the block trailer (exception handling block)
   529  */
   530 void sh4_translate_end_block( sh4addr_t pc ) {
   531     if( sh4_x86.branch_taken == FALSE ) {
   532         // Didn't exit unconditionally already, so write the termination here
   533         exit_block_rel( pc, pc );
   534     }
   535     if( sh4_x86.backpatch_posn != 0 ) {
   536         unsigned int i;
   537         // Exception raised - cleanup and exit
   538         uint8_t *end_ptr = xlat_output;
   539         MOVL_r32_r32( REG_EDX, REG_ECX );
   540         ADDL_r32_r32( REG_EDX, REG_ECX );
   541         ADDL_r32_rbpdisp( REG_ECX, R_SPC );
   542         MOVL_moffptr_eax( &sh4_cpu_period );
   543         MULL_r32( REG_EDX );
   544         ADDL_r32_rbpdisp( REG_EAX, REG_OFFSET(slice_cycle) );
   545         MOVL_rbpdisp_r32( R_PC, REG_ARG1 );
   546         if( sh4_x86.tlb_on ) {
   547             CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);
   548         } else {
   549             CALL1_ptr_r32(xlat_get_code, REG_ARG1);
   550         }
   551         exit_block();
   553         for( i=0; i< sh4_x86.backpatch_posn; i++ ) {
   554             uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];
   555             if( sh4_x86.backpatch_list[i].exc_code < 0 ) {
   556                 if( sh4_x86.backpatch_list[i].exc_code == -2 ) {
   557                     *((uintptr_t *)fixup_addr) = (uintptr_t)xlat_output; 
   558                 } else {
   559                     *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
   560                 }
   561                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
   562                 int rel = end_ptr - xlat_output;
   563                 JMP_prerel(rel);
   564             } else {
   565                 *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
   566                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].exc_code, REG_ARG1 );
   567                 CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
   568                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
   569                 int rel = end_ptr - xlat_output;
   570                 JMP_prerel(rel);
   571             }
   572         }
   573     }
   574 }
   576 /**
   577  * Translate a single instruction. Delayed branches are handled specially
   578  * by translating both branch and delayed instruction as a single unit (as
   579  * 
   580  * The instruction MUST be in the icache (assert check)
   581  *
   582  * @return true if the instruction marks the end of a basic block
   583  * (eg a branch or 
   584  */
   585 uint32_t sh4_translate_instruction( sh4vma_t pc )
   586 {
   587     uint32_t ir;
   588     /* Read instruction from icache */
   589     assert( IS_IN_ICACHE(pc) );
   590     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   592     if( !sh4_x86.in_delay_slot ) {
   593 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   594     }
   596     /* check for breakpoints at this pc */
   597     for( int i=0; i<sh4_breakpoint_count; i++ ) {
   598         if( sh4_breakpoints[i].address == pc ) {
   599             sh4_translate_emit_breakpoint(pc);
   600             break;
   601         }
   602     }
   603 %%
   604 /* ALU operations */
   605 ADD Rm, Rn {:
   606     COUNT_INST(I_ADD);
   607     load_reg( REG_EAX, Rm );
   608     load_reg( REG_ECX, Rn );
   609     ADDL_r32_r32( REG_EAX, REG_ECX );
   610     store_reg( REG_ECX, Rn );
   611     sh4_x86.tstate = TSTATE_NONE;
   612 :}
   613 ADD #imm, Rn {:  
   614     COUNT_INST(I_ADDI);
   615     ADDL_imms_rbpdisp( imm, REG_OFFSET(r[Rn]) );
   616     sh4_x86.tstate = TSTATE_NONE;
   617 :}
   618 ADDC Rm, Rn {:
   619     COUNT_INST(I_ADDC);
   620     if( sh4_x86.tstate != TSTATE_C ) {
   621         LDC_t();
   622     }
   623     load_reg( REG_EAX, Rm );
   624     load_reg( REG_ECX, Rn );
   625     ADCL_r32_r32( REG_EAX, REG_ECX );
   626     store_reg( REG_ECX, Rn );
   627     SETC_t();
   628     sh4_x86.tstate = TSTATE_C;
   629 :}
   630 ADDV Rm, Rn {:
   631     COUNT_INST(I_ADDV);
   632     load_reg( REG_EAX, Rm );
   633     load_reg( REG_ECX, Rn );
   634     ADDL_r32_r32( REG_EAX, REG_ECX );
   635     store_reg( REG_ECX, Rn );
   636     SETO_t();
   637     sh4_x86.tstate = TSTATE_O;
   638 :}
   639 AND Rm, Rn {:
   640     COUNT_INST(I_AND);
   641     load_reg( REG_EAX, Rm );
   642     load_reg( REG_ECX, Rn );
   643     ANDL_r32_r32( REG_EAX, REG_ECX );
   644     store_reg( REG_ECX, Rn );
   645     sh4_x86.tstate = TSTATE_NONE;
   646 :}
   647 AND #imm, R0 {:  
   648     COUNT_INST(I_ANDI);
   649     load_reg( REG_EAX, 0 );
   650     ANDL_imms_r32(imm, REG_EAX); 
   651     store_reg( REG_EAX, 0 );
   652     sh4_x86.tstate = TSTATE_NONE;
   653 :}
   654 AND.B #imm, @(R0, GBR) {: 
   655     COUNT_INST(I_ANDB);
   656     load_reg( REG_EAX, 0 );
   657     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
   658     MOVL_r32_rspdisp(REG_EAX, 0);
   659     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
   660     MOVL_rspdisp_r32(0, REG_EAX);
   661     ANDL_imms_r32(imm, REG_EDX );
   662     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
   663     sh4_x86.tstate = TSTATE_NONE;
   664 :}
   665 CMP/EQ Rm, Rn {:  
   666     COUNT_INST(I_CMPEQ);
   667     load_reg( REG_EAX, Rm );
   668     load_reg( REG_ECX, Rn );
   669     CMPL_r32_r32( REG_EAX, REG_ECX );
   670     SETE_t();
   671     sh4_x86.tstate = TSTATE_E;
   672 :}
   673 CMP/EQ #imm, R0 {:  
   674     COUNT_INST(I_CMPEQI);
   675     load_reg( REG_EAX, 0 );
   676     CMPL_imms_r32(imm, REG_EAX);
   677     SETE_t();
   678     sh4_x86.tstate = TSTATE_E;
   679 :}
   680 CMP/GE Rm, Rn {:  
   681     COUNT_INST(I_CMPGE);
   682     load_reg( REG_EAX, Rm );
   683     load_reg( REG_ECX, Rn );
   684     CMPL_r32_r32( REG_EAX, REG_ECX );
   685     SETGE_t();
   686     sh4_x86.tstate = TSTATE_GE;
   687 :}
   688 CMP/GT Rm, Rn {: 
   689     COUNT_INST(I_CMPGT);
   690     load_reg( REG_EAX, Rm );
   691     load_reg( REG_ECX, Rn );
   692     CMPL_r32_r32( REG_EAX, REG_ECX );
   693     SETG_t();
   694     sh4_x86.tstate = TSTATE_G;
   695 :}
   696 CMP/HI Rm, Rn {:  
   697     COUNT_INST(I_CMPHI);
   698     load_reg( REG_EAX, Rm );
   699     load_reg( REG_ECX, Rn );
   700     CMPL_r32_r32( REG_EAX, REG_ECX );
   701     SETA_t();
   702     sh4_x86.tstate = TSTATE_A;
   703 :}
   704 CMP/HS Rm, Rn {: 
   705     COUNT_INST(I_CMPHS);
   706     load_reg( REG_EAX, Rm );
   707     load_reg( REG_ECX, Rn );
   708     CMPL_r32_r32( REG_EAX, REG_ECX );
   709     SETAE_t();
   710     sh4_x86.tstate = TSTATE_AE;
   711  :}
   712 CMP/PL Rn {: 
   713     COUNT_INST(I_CMPPL);
   714     load_reg( REG_EAX, Rn );
   715     CMPL_imms_r32( 0, REG_EAX );
   716     SETG_t();
   717     sh4_x86.tstate = TSTATE_G;
   718 :}
   719 CMP/PZ Rn {:  
   720     COUNT_INST(I_CMPPZ);
   721     load_reg( REG_EAX, Rn );
   722     CMPL_imms_r32( 0, REG_EAX );
   723     SETGE_t();
   724     sh4_x86.tstate = TSTATE_GE;
   725 :}
   726 CMP/STR Rm, Rn {:  
   727     COUNT_INST(I_CMPSTR);
   728     load_reg( REG_EAX, Rm );
   729     load_reg( REG_ECX, Rn );
   730     XORL_r32_r32( REG_ECX, REG_EAX );
   731     TESTB_r8_r8( REG_AL, REG_AL );
   732     JE_label(target1);
   733     TESTB_r8_r8( REG_AH, REG_AH );
   734     JE_label(target2);
   735     SHRL_imm_r32( 16, REG_EAX );
   736     TESTB_r8_r8( REG_AL, REG_AL );
   737     JE_label(target3);
   738     TESTB_r8_r8( REG_AH, REG_AH );
   739     JMP_TARGET(target1);
   740     JMP_TARGET(target2);
   741     JMP_TARGET(target3);
   742     SETE_t();
   743     sh4_x86.tstate = TSTATE_E;
   744 :}
   745 DIV0S Rm, Rn {:
   746     COUNT_INST(I_DIV0S);
   747     load_reg( REG_EAX, Rm );
   748     load_reg( REG_ECX, Rn );
   749     SHRL_imm_r32( 31, REG_EAX );
   750     SHRL_imm_r32( 31, REG_ECX );
   751     MOVL_r32_rbpdisp( REG_EAX, R_M );
   752     MOVL_r32_rbpdisp( REG_ECX, R_Q );
   753     CMPL_r32_r32( REG_EAX, REG_ECX );
   754     SETNE_t();
   755     sh4_x86.tstate = TSTATE_NE;
   756 :}
   757 DIV0U {:  
   758     COUNT_INST(I_DIV0U);
   759     XORL_r32_r32( REG_EAX, REG_EAX );
   760     MOVL_r32_rbpdisp( REG_EAX, R_Q );
   761     MOVL_r32_rbpdisp( REG_EAX, R_M );
   762     MOVL_r32_rbpdisp( REG_EAX, R_T );
   763     sh4_x86.tstate = TSTATE_C; // works for DIV1
   764 :}
   765 DIV1 Rm, Rn {:
   766     COUNT_INST(I_DIV1);
   767     MOVL_rbpdisp_r32( R_M, REG_ECX );
   768     load_reg( REG_EAX, Rn );
   769     if( sh4_x86.tstate != TSTATE_C ) {
   770 	LDC_t();
   771     }
   772     RCLL_imm_r32( 1, REG_EAX );
   773     SETC_r8( REG_DL ); // Q'
   774     CMPL_rbpdisp_r32( R_Q, REG_ECX );
   775     JE_label(mqequal);
   776     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
   777     JMP_label(end);
   778     JMP_TARGET(mqequal);
   779     SUBL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
   780     JMP_TARGET(end);
   781     store_reg( REG_EAX, Rn ); // Done with Rn now
   782     SETC_r8(REG_AL); // tmp1
   783     XORB_r8_r8( REG_DL, REG_AL ); // Q' = Q ^ tmp1
   784     XORB_r8_r8( REG_AL, REG_CL ); // Q'' = Q' ^ M
   785     MOVL_r32_rbpdisp( REG_ECX, R_Q );
   786     XORL_imms_r32( 1, REG_AL );   // T = !Q'
   787     MOVZXL_r8_r32( REG_AL, REG_EAX );
   788     MOVL_r32_rbpdisp( REG_EAX, R_T );
   789     sh4_x86.tstate = TSTATE_NONE;
   790 :}
   791 DMULS.L Rm, Rn {:  
   792     COUNT_INST(I_DMULS);
   793     load_reg( REG_EAX, Rm );
   794     load_reg( REG_ECX, Rn );
   795     IMULL_r32(REG_ECX);
   796     MOVL_r32_rbpdisp( REG_EDX, R_MACH );
   797     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
   798     sh4_x86.tstate = TSTATE_NONE;
   799 :}
   800 DMULU.L Rm, Rn {:  
   801     COUNT_INST(I_DMULU);
   802     load_reg( REG_EAX, Rm );
   803     load_reg( REG_ECX, Rn );
   804     MULL_r32(REG_ECX);
   805     MOVL_r32_rbpdisp( REG_EDX, R_MACH );
   806     MOVL_r32_rbpdisp( REG_EAX, R_MACL );    
   807     sh4_x86.tstate = TSTATE_NONE;
   808 :}
   809 DT Rn {:  
   810     COUNT_INST(I_DT);
   811     load_reg( REG_EAX, Rn );
   812     ADDL_imms_r32( -1, REG_EAX );
   813     store_reg( REG_EAX, Rn );
   814     SETE_t();
   815     sh4_x86.tstate = TSTATE_E;
   816 :}
   817 EXTS.B Rm, Rn {:  
   818     COUNT_INST(I_EXTSB);
   819     load_reg( REG_EAX, Rm );
   820     MOVSXL_r8_r32( REG_EAX, REG_EAX );
   821     store_reg( REG_EAX, Rn );
   822 :}
   823 EXTS.W Rm, Rn {:  
   824     COUNT_INST(I_EXTSW);
   825     load_reg( REG_EAX, Rm );
   826     MOVSXL_r16_r32( REG_EAX, REG_EAX );
   827     store_reg( REG_EAX, Rn );
   828 :}
   829 EXTU.B Rm, Rn {:  
   830     COUNT_INST(I_EXTUB);
   831     load_reg( REG_EAX, Rm );
   832     MOVZXL_r8_r32( REG_EAX, REG_EAX );
   833     store_reg( REG_EAX, Rn );
   834 :}
   835 EXTU.W Rm, Rn {:  
   836     COUNT_INST(I_EXTUW);
   837     load_reg( REG_EAX, Rm );
   838     MOVZXL_r16_r32( REG_EAX, REG_EAX );
   839     store_reg( REG_EAX, Rn );
   840 :}
   841 MAC.L @Rm+, @Rn+ {:
   842     COUNT_INST(I_MACL);
   843     if( Rm == Rn ) {
   844 	load_reg( REG_EAX, Rm );
   845 	check_ralign32( REG_EAX );
   846 	MEM_READ_LONG( REG_EAX, REG_EAX );
   847 	MOVL_r32_rspdisp(REG_EAX, 0);
   848 	load_reg( REG_EAX, Rm );
   849 	LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
   850 	MEM_READ_LONG( REG_EAX, REG_EAX );
   851         ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rn]) );
   852     } else {
   853 	load_reg( REG_EAX, Rm );
   854 	check_ralign32( REG_EAX );
   855 	MEM_READ_LONG( REG_EAX, REG_EAX );
   856 	MOVL_r32_rspdisp( REG_EAX, 0 );
   857 	load_reg( REG_EAX, Rn );
   858 	check_ralign32( REG_EAX );
   859 	MEM_READ_LONG( REG_EAX, REG_EAX );
   860 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
   861 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
   862     }
   864     IMULL_rspdisp( 0 );
   865     ADDL_r32_rbpdisp( REG_EAX, R_MACL );
   866     ADCL_r32_rbpdisp( REG_EDX, R_MACH );
   868     MOVL_rbpdisp_r32( R_S, REG_ECX );
   869     TESTL_r32_r32(REG_ECX, REG_ECX);
   870     JE_label( nosat );
   871     CALL_ptr( signsat48 );
   872     JMP_TARGET( nosat );
   873     sh4_x86.tstate = TSTATE_NONE;
   874 :}
   875 MAC.W @Rm+, @Rn+ {:  
   876     COUNT_INST(I_MACW);
   877     if( Rm == Rn ) {
   878 	load_reg( REG_EAX, Rm );
   879 	check_ralign16( REG_EAX );
   880 	MEM_READ_WORD( REG_EAX, REG_EAX );
   881         MOVL_r32_rspdisp( REG_EAX, 0 );
   882 	load_reg( REG_EAX, Rm );
   883 	LEAL_r32disp_r32( REG_EAX, 2, REG_EAX );
   884 	MEM_READ_WORD( REG_EAX, REG_EAX );
   885 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
   886 	// Note translate twice in case of page boundaries. Maybe worth
   887 	// adding a page-boundary check to skip the second translation
   888     } else {
   889 	load_reg( REG_EAX, Rm );
   890 	check_ralign16( REG_EAX );
   891 	MEM_READ_WORD( REG_EAX, REG_EAX );
   892         MOVL_r32_rspdisp( REG_EAX, 0 );
   893 	load_reg( REG_EAX, Rn );
   894 	check_ralign16( REG_EAX );
   895 	MEM_READ_WORD( REG_EAX, REG_EAX );
   896 	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rn]) );
   897 	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
   898     }
   899     IMULL_rspdisp( 0 );
   900     MOVL_rbpdisp_r32( R_S, REG_ECX );
   901     TESTL_r32_r32( REG_ECX, REG_ECX );
   902     JE_label( nosat );
   904     ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
   905     JNO_label( end );            // 2
   906     MOVL_imm32_r32( 1, REG_EDX );         // 5
   907     MOVL_r32_rbpdisp( REG_EDX, R_MACH );   // 6
   908     JS_label( positive );        // 2
   909     MOVL_imm32_r32( 0x80000000, REG_EAX );// 5
   910     MOVL_r32_rbpdisp( REG_EAX, R_MACL );   // 6
   911     JMP_label(end2);           // 2
   913     JMP_TARGET(positive);
   914     MOVL_imm32_r32( 0x7FFFFFFF, REG_EAX );// 5
   915     MOVL_r32_rbpdisp( REG_EAX, R_MACL );   // 6
   916     JMP_label(end3);            // 2
   918     JMP_TARGET(nosat);
   919     ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
   920     ADCL_r32_rbpdisp( REG_EDX, R_MACH );  // 6
   921     JMP_TARGET(end);
   922     JMP_TARGET(end2);
   923     JMP_TARGET(end3);
   924     sh4_x86.tstate = TSTATE_NONE;
   925 :}
   926 MOVT Rn {:  
   927     COUNT_INST(I_MOVT);
   928     MOVL_rbpdisp_r32( R_T, REG_EAX );
   929     store_reg( REG_EAX, Rn );
   930 :}
   931 MUL.L Rm, Rn {:  
   932     COUNT_INST(I_MULL);
   933     load_reg( REG_EAX, Rm );
   934     load_reg( REG_ECX, Rn );
   935     MULL_r32( REG_ECX );
   936     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
   937     sh4_x86.tstate = TSTATE_NONE;
   938 :}
   939 MULS.W Rm, Rn {:
   940     COUNT_INST(I_MULSW);
   941     MOVSXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
   942     MOVSXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
   943     MULL_r32( REG_ECX );
   944     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
   945     sh4_x86.tstate = TSTATE_NONE;
   946 :}
   947 MULU.W Rm, Rn {:  
   948     COUNT_INST(I_MULUW);
   949     MOVZXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
   950     MOVZXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
   951     MULL_r32( REG_ECX );
   952     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
   953     sh4_x86.tstate = TSTATE_NONE;
   954 :}
   955 NEG Rm, Rn {:
   956     COUNT_INST(I_NEG);
   957     load_reg( REG_EAX, Rm );
   958     NEGL_r32( REG_EAX );
   959     store_reg( REG_EAX, Rn );
   960     sh4_x86.tstate = TSTATE_NONE;
   961 :}
   962 NEGC Rm, Rn {:  
   963     COUNT_INST(I_NEGC);
   964     load_reg( REG_EAX, Rm );
   965     XORL_r32_r32( REG_ECX, REG_ECX );
   966     LDC_t();
   967     SBBL_r32_r32( REG_EAX, REG_ECX );
   968     store_reg( REG_ECX, Rn );
   969     SETC_t();
   970     sh4_x86.tstate = TSTATE_C;
   971 :}
   972 NOT Rm, Rn {:  
   973     COUNT_INST(I_NOT);
   974     load_reg( REG_EAX, Rm );
   975     NOTL_r32( REG_EAX );
   976     store_reg( REG_EAX, Rn );
   977     sh4_x86.tstate = TSTATE_NONE;
   978 :}
   979 OR Rm, Rn {:  
   980     COUNT_INST(I_OR);
   981     load_reg( REG_EAX, Rm );
   982     load_reg( REG_ECX, Rn );
   983     ORL_r32_r32( REG_EAX, REG_ECX );
   984     store_reg( REG_ECX, Rn );
   985     sh4_x86.tstate = TSTATE_NONE;
   986 :}
   987 OR #imm, R0 {:
   988     COUNT_INST(I_ORI);
   989     load_reg( REG_EAX, 0 );
   990     ORL_imms_r32(imm, REG_EAX);
   991     store_reg( REG_EAX, 0 );
   992     sh4_x86.tstate = TSTATE_NONE;
   993 :}
   994 OR.B #imm, @(R0, GBR) {:  
   995     COUNT_INST(I_ORB);
   996     load_reg( REG_EAX, 0 );
   997     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
   998     MOVL_r32_rspdisp( REG_EAX, 0 );
   999     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
  1000     MOVL_rspdisp_r32( 0, REG_EAX );
  1001     ORL_imms_r32(imm, REG_EDX );
  1002     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1003     sh4_x86.tstate = TSTATE_NONE;
  1004 :}
  1005 ROTCL Rn {:
  1006     COUNT_INST(I_ROTCL);
  1007     load_reg( REG_EAX, Rn );
  1008     if( sh4_x86.tstate != TSTATE_C ) {
  1009 	LDC_t();
  1011     RCLL_imm_r32( 1, REG_EAX );
  1012     store_reg( REG_EAX, Rn );
  1013     SETC_t();
  1014     sh4_x86.tstate = TSTATE_C;
  1015 :}
  1016 ROTCR Rn {:  
  1017     COUNT_INST(I_ROTCR);
  1018     load_reg( REG_EAX, Rn );
  1019     if( sh4_x86.tstate != TSTATE_C ) {
  1020 	LDC_t();
  1022     RCRL_imm_r32( 1, REG_EAX );
  1023     store_reg( REG_EAX, Rn );
  1024     SETC_t();
  1025     sh4_x86.tstate = TSTATE_C;
  1026 :}
  1027 ROTL Rn {:  
  1028     COUNT_INST(I_ROTL);
  1029     load_reg( REG_EAX, Rn );
  1030     ROLL_imm_r32( 1, REG_EAX );
  1031     store_reg( REG_EAX, Rn );
  1032     SETC_t();
  1033     sh4_x86.tstate = TSTATE_C;
  1034 :}
  1035 ROTR Rn {:  
  1036     COUNT_INST(I_ROTR);
  1037     load_reg( REG_EAX, Rn );
  1038     RORL_imm_r32( 1, REG_EAX );
  1039     store_reg( REG_EAX, Rn );
  1040     SETC_t();
  1041     sh4_x86.tstate = TSTATE_C;
  1042 :}
  1043 SHAD Rm, Rn {:
  1044     COUNT_INST(I_SHAD);
  1045     /* Annoyingly enough, not directly convertible */
  1046     load_reg( REG_EAX, Rn );
  1047     load_reg( REG_ECX, Rm );
  1048     CMPL_imms_r32( 0, REG_ECX );
  1049     JGE_label(doshl);
  1051     NEGL_r32( REG_ECX );      // 2
  1052     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1053     JE_label(emptysar);     // 2
  1054     SARL_cl_r32( REG_EAX );       // 2
  1055     JMP_label(end);          // 2
  1057     JMP_TARGET(emptysar);
  1058     SARL_imm_r32(31, REG_EAX );  // 3
  1059     JMP_label(end2);
  1061     JMP_TARGET(doshl);
  1062     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1063     SHLL_cl_r32( REG_EAX );       // 2
  1064     JMP_TARGET(end);
  1065     JMP_TARGET(end2);
  1066     store_reg( REG_EAX, Rn );
  1067     sh4_x86.tstate = TSTATE_NONE;
  1068 :}
  1069 SHLD Rm, Rn {:  
  1070     COUNT_INST(I_SHLD);
  1071     load_reg( REG_EAX, Rn );
  1072     load_reg( REG_ECX, Rm );
  1073     CMPL_imms_r32( 0, REG_ECX );
  1074     JGE_label(doshl);
  1076     NEGL_r32( REG_ECX );      // 2
  1077     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1078     JE_label(emptyshr );
  1079     SHRL_cl_r32( REG_EAX );       // 2
  1080     JMP_label(end);          // 2
  1082     JMP_TARGET(emptyshr);
  1083     XORL_r32_r32( REG_EAX, REG_EAX );
  1084     JMP_label(end2);
  1086     JMP_TARGET(doshl);
  1087     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1088     SHLL_cl_r32( REG_EAX );       // 2
  1089     JMP_TARGET(end);
  1090     JMP_TARGET(end2);
  1091     store_reg( REG_EAX, Rn );
  1092     sh4_x86.tstate = TSTATE_NONE;
  1093 :}
  1094 SHAL Rn {: 
  1095     COUNT_INST(I_SHAL);
  1096     load_reg( REG_EAX, Rn );
  1097     SHLL_imm_r32( 1, REG_EAX );
  1098     SETC_t();
  1099     store_reg( REG_EAX, Rn );
  1100     sh4_x86.tstate = TSTATE_C;
  1101 :}
  1102 SHAR Rn {:  
  1103     COUNT_INST(I_SHAR);
  1104     load_reg( REG_EAX, Rn );
  1105     SARL_imm_r32( 1, REG_EAX );
  1106     SETC_t();
  1107     store_reg( REG_EAX, Rn );
  1108     sh4_x86.tstate = TSTATE_C;
  1109 :}
  1110 SHLL Rn {:  
  1111     COUNT_INST(I_SHLL);
  1112     load_reg( REG_EAX, Rn );
  1113     SHLL_imm_r32( 1, REG_EAX );
  1114     SETC_t();
  1115     store_reg( REG_EAX, Rn );
  1116     sh4_x86.tstate = TSTATE_C;
  1117 :}
  1118 SHLL2 Rn {:
  1119     COUNT_INST(I_SHLL);
  1120     load_reg( REG_EAX, Rn );
  1121     SHLL_imm_r32( 2, REG_EAX );
  1122     store_reg( REG_EAX, Rn );
  1123     sh4_x86.tstate = TSTATE_NONE;
  1124 :}
  1125 SHLL8 Rn {:  
  1126     COUNT_INST(I_SHLL);
  1127     load_reg( REG_EAX, Rn );
  1128     SHLL_imm_r32( 8, REG_EAX );
  1129     store_reg( REG_EAX, Rn );
  1130     sh4_x86.tstate = TSTATE_NONE;
  1131 :}
  1132 SHLL16 Rn {:  
  1133     COUNT_INST(I_SHLL);
  1134     load_reg( REG_EAX, Rn );
  1135     SHLL_imm_r32( 16, REG_EAX );
  1136     store_reg( REG_EAX, Rn );
  1137     sh4_x86.tstate = TSTATE_NONE;
  1138 :}
  1139 SHLR Rn {:  
  1140     COUNT_INST(I_SHLR);
  1141     load_reg( REG_EAX, Rn );
  1142     SHRL_imm_r32( 1, REG_EAX );
  1143     SETC_t();
  1144     store_reg( REG_EAX, Rn );
  1145     sh4_x86.tstate = TSTATE_C;
  1146 :}
  1147 SHLR2 Rn {:  
  1148     COUNT_INST(I_SHLR);
  1149     load_reg( REG_EAX, Rn );
  1150     SHRL_imm_r32( 2, REG_EAX );
  1151     store_reg( REG_EAX, Rn );
  1152     sh4_x86.tstate = TSTATE_NONE;
  1153 :}
  1154 SHLR8 Rn {:  
  1155     COUNT_INST(I_SHLR);
  1156     load_reg( REG_EAX, Rn );
  1157     SHRL_imm_r32( 8, REG_EAX );
  1158     store_reg( REG_EAX, Rn );
  1159     sh4_x86.tstate = TSTATE_NONE;
  1160 :}
  1161 SHLR16 Rn {:  
  1162     COUNT_INST(I_SHLR);
  1163     load_reg( REG_EAX, Rn );
  1164     SHRL_imm_r32( 16, REG_EAX );
  1165     store_reg( REG_EAX, Rn );
  1166     sh4_x86.tstate = TSTATE_NONE;
  1167 :}
  1168 SUB Rm, Rn {:  
  1169     COUNT_INST(I_SUB);
  1170     load_reg( REG_EAX, Rm );
  1171     load_reg( REG_ECX, Rn );
  1172     SUBL_r32_r32( REG_EAX, REG_ECX );
  1173     store_reg( REG_ECX, Rn );
  1174     sh4_x86.tstate = TSTATE_NONE;
  1175 :}
  1176 SUBC Rm, Rn {:  
  1177     COUNT_INST(I_SUBC);
  1178     load_reg( REG_EAX, Rm );
  1179     load_reg( REG_ECX, Rn );
  1180     if( sh4_x86.tstate != TSTATE_C ) {
  1181 	LDC_t();
  1183     SBBL_r32_r32( REG_EAX, REG_ECX );
  1184     store_reg( REG_ECX, Rn );
  1185     SETC_t();
  1186     sh4_x86.tstate = TSTATE_C;
  1187 :}
  1188 SUBV Rm, Rn {:  
  1189     COUNT_INST(I_SUBV);
  1190     load_reg( REG_EAX, Rm );
  1191     load_reg( REG_ECX, Rn );
  1192     SUBL_r32_r32( REG_EAX, REG_ECX );
  1193     store_reg( REG_ECX, Rn );
  1194     SETO_t();
  1195     sh4_x86.tstate = TSTATE_O;
  1196 :}
  1197 SWAP.B Rm, Rn {:  
  1198     COUNT_INST(I_SWAPB);
  1199     load_reg( REG_EAX, Rm );
  1200     XCHGB_r8_r8( REG_AL, REG_AH ); // NB: does not touch EFLAGS
  1201     store_reg( REG_EAX, Rn );
  1202 :}
  1203 SWAP.W Rm, Rn {:  
  1204     COUNT_INST(I_SWAPB);
  1205     load_reg( REG_EAX, Rm );
  1206     MOVL_r32_r32( REG_EAX, REG_ECX );
  1207     SHLL_imm_r32( 16, REG_ECX );
  1208     SHRL_imm_r32( 16, REG_EAX );
  1209     ORL_r32_r32( REG_EAX, REG_ECX );
  1210     store_reg( REG_ECX, Rn );
  1211     sh4_x86.tstate = TSTATE_NONE;
  1212 :}
  1213 TAS.B @Rn {:  
  1214     COUNT_INST(I_TASB);
  1215     load_reg( REG_EAX, Rn );
  1216     MOVL_r32_rspdisp( REG_EAX, 0 );
  1217     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
  1218     TESTB_r8_r8( REG_DL, REG_DL );
  1219     SETE_t();
  1220     ORB_imms_r8( 0x80, REG_DL );
  1221     MOVL_rspdisp_r32( 0, REG_EAX );
  1222     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1223     sh4_x86.tstate = TSTATE_NONE;
  1224 :}
  1225 TST Rm, Rn {:  
  1226     COUNT_INST(I_TST);
  1227     load_reg( REG_EAX, Rm );
  1228     load_reg( REG_ECX, Rn );
  1229     TESTL_r32_r32( REG_EAX, REG_ECX );
  1230     SETE_t();
  1231     sh4_x86.tstate = TSTATE_E;
  1232 :}
  1233 TST #imm, R0 {:  
  1234     COUNT_INST(I_TSTI);
  1235     load_reg( REG_EAX, 0 );
  1236     TESTL_imms_r32( imm, REG_EAX );
  1237     SETE_t();
  1238     sh4_x86.tstate = TSTATE_E;
  1239 :}
  1240 TST.B #imm, @(R0, GBR) {:  
  1241     COUNT_INST(I_TSTB);
  1242     load_reg( REG_EAX, 0);
  1243     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
  1244     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1245     TESTB_imms_r8( imm, REG_AL );
  1246     SETE_t();
  1247     sh4_x86.tstate = TSTATE_E;
  1248 :}
  1249 XOR Rm, Rn {:  
  1250     COUNT_INST(I_XOR);
  1251     load_reg( REG_EAX, Rm );
  1252     load_reg( REG_ECX, Rn );
  1253     XORL_r32_r32( REG_EAX, REG_ECX );
  1254     store_reg( REG_ECX, Rn );
  1255     sh4_x86.tstate = TSTATE_NONE;
  1256 :}
  1257 XOR #imm, R0 {:  
  1258     COUNT_INST(I_XORI);
  1259     load_reg( REG_EAX, 0 );
  1260     XORL_imms_r32( imm, REG_EAX );
  1261     store_reg( REG_EAX, 0 );
  1262     sh4_x86.tstate = TSTATE_NONE;
  1263 :}
  1264 XOR.B #imm, @(R0, GBR) {:  
  1265     COUNT_INST(I_XORB);
  1266     load_reg( REG_EAX, 0 );
  1267     ADDL_rbpdisp_r32( R_GBR, REG_EAX ); 
  1268     MOVL_r32_rspdisp( REG_EAX, 0 );
  1269     MEM_READ_BYTE_FOR_WRITE(REG_EAX, REG_EDX);
  1270     MOVL_rspdisp_r32( 0, REG_EAX );
  1271     XORL_imms_r32( imm, REG_EDX );
  1272     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1273     sh4_x86.tstate = TSTATE_NONE;
  1274 :}
  1275 XTRCT Rm, Rn {:
  1276     COUNT_INST(I_XTRCT);
  1277     load_reg( REG_EAX, Rm );
  1278     load_reg( REG_ECX, Rn );
  1279     SHLL_imm_r32( 16, REG_EAX );
  1280     SHRL_imm_r32( 16, REG_ECX );
  1281     ORL_r32_r32( REG_EAX, REG_ECX );
  1282     store_reg( REG_ECX, Rn );
  1283     sh4_x86.tstate = TSTATE_NONE;
  1284 :}
  1286 /* Data move instructions */
  1287 MOV Rm, Rn {:  
  1288     COUNT_INST(I_MOV);
  1289     load_reg( REG_EAX, Rm );
  1290     store_reg( REG_EAX, Rn );
  1291 :}
  1292 MOV #imm, Rn {:  
  1293     COUNT_INST(I_MOVI);
  1294     MOVL_imm32_r32( imm, REG_EAX );
  1295     store_reg( REG_EAX, Rn );
  1296 :}
  1297 MOV.B Rm, @Rn {:  
  1298     COUNT_INST(I_MOVB);
  1299     load_reg( REG_EAX, Rn );
  1300     load_reg( REG_EDX, Rm );
  1301     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1302     sh4_x86.tstate = TSTATE_NONE;
  1303 :}
  1304 MOV.B Rm, @-Rn {:  
  1305     COUNT_INST(I_MOVB);
  1306     load_reg( REG_EAX, Rn );
  1307     LEAL_r32disp_r32( REG_EAX, -1, REG_EAX );
  1308     load_reg( REG_EDX, Rm );
  1309     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1310     ADDL_imms_rbpdisp( -1, REG_OFFSET(r[Rn]) );
  1311     sh4_x86.tstate = TSTATE_NONE;
  1312 :}
  1313 MOV.B Rm, @(R0, Rn) {:  
  1314     COUNT_INST(I_MOVB);
  1315     load_reg( REG_EAX, 0 );
  1316     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1317     load_reg( REG_EDX, Rm );
  1318     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1319     sh4_x86.tstate = TSTATE_NONE;
  1320 :}
  1321 MOV.B R0, @(disp, GBR) {:  
  1322     COUNT_INST(I_MOVB);
  1323     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1324     ADDL_imms_r32( disp, REG_EAX );
  1325     load_reg( REG_EDX, 0 );
  1326     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1327     sh4_x86.tstate = TSTATE_NONE;
  1328 :}
  1329 MOV.B R0, @(disp, Rn) {:  
  1330     COUNT_INST(I_MOVB);
  1331     load_reg( REG_EAX, Rn );
  1332     ADDL_imms_r32( disp, REG_EAX );
  1333     load_reg( REG_EDX, 0 );
  1334     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1335     sh4_x86.tstate = TSTATE_NONE;
  1336 :}
  1337 MOV.B @Rm, Rn {:  
  1338     COUNT_INST(I_MOVB);
  1339     load_reg( REG_EAX, Rm );
  1340     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1341     store_reg( REG_EAX, Rn );
  1342     sh4_x86.tstate = TSTATE_NONE;
  1343 :}
  1344 MOV.B @Rm+, Rn {:  
  1345     COUNT_INST(I_MOVB);
  1346     load_reg( REG_EAX, Rm );
  1347     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1348     if( Rm != Rn ) {
  1349     	ADDL_imms_rbpdisp( 1, REG_OFFSET(r[Rm]) );
  1351     store_reg( REG_EAX, Rn );
  1352     sh4_x86.tstate = TSTATE_NONE;
  1353 :}
  1354 MOV.B @(R0, Rm), Rn {:  
  1355     COUNT_INST(I_MOVB);
  1356     load_reg( REG_EAX, 0 );
  1357     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1358     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1359     store_reg( REG_EAX, Rn );
  1360     sh4_x86.tstate = TSTATE_NONE;
  1361 :}
  1362 MOV.B @(disp, GBR), R0 {:  
  1363     COUNT_INST(I_MOVB);
  1364     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1365     ADDL_imms_r32( disp, REG_EAX );
  1366     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1367     store_reg( REG_EAX, 0 );
  1368     sh4_x86.tstate = TSTATE_NONE;
  1369 :}
  1370 MOV.B @(disp, Rm), R0 {:  
  1371     COUNT_INST(I_MOVB);
  1372     load_reg( REG_EAX, Rm );
  1373     ADDL_imms_r32( disp, REG_EAX );
  1374     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1375     store_reg( REG_EAX, 0 );
  1376     sh4_x86.tstate = TSTATE_NONE;
  1377 :}
  1378 MOV.L Rm, @Rn {:
  1379     COUNT_INST(I_MOVL);
  1380     load_reg( REG_EAX, Rn );
  1381     check_walign32(REG_EAX);
  1382     MOVL_r32_r32( REG_EAX, REG_ECX );
  1383     ANDL_imms_r32( 0xFC000000, REG_ECX );
  1384     CMPL_imms_r32( 0xE0000000, REG_ECX );
  1385     JNE_label( notsq );
  1386     ANDL_imms_r32( 0x3C, REG_EAX );
  1387     load_reg( REG_EDX, Rm );
  1388     MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
  1389     JMP_label(end);
  1390     JMP_TARGET(notsq);
  1391     load_reg( REG_EDX, Rm );
  1392     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1393     JMP_TARGET(end);
  1394     sh4_x86.tstate = TSTATE_NONE;
  1395 :}
  1396 MOV.L Rm, @-Rn {:  
  1397     COUNT_INST(I_MOVL);
  1398     load_reg( REG_EAX, Rn );
  1399     ADDL_imms_r32( -4, REG_EAX );
  1400     check_walign32( REG_EAX );
  1401     load_reg( REG_EDX, Rm );
  1402     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1403     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  1404     sh4_x86.tstate = TSTATE_NONE;
  1405 :}
  1406 MOV.L Rm, @(R0, Rn) {:  
  1407     COUNT_INST(I_MOVL);
  1408     load_reg( REG_EAX, 0 );
  1409     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1410     check_walign32( REG_EAX );
  1411     load_reg( REG_EDX, Rm );
  1412     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1413     sh4_x86.tstate = TSTATE_NONE;
  1414 :}
  1415 MOV.L R0, @(disp, GBR) {:  
  1416     COUNT_INST(I_MOVL);
  1417     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1418     ADDL_imms_r32( disp, REG_EAX );
  1419     check_walign32( REG_EAX );
  1420     load_reg( REG_EDX, 0 );
  1421     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1422     sh4_x86.tstate = TSTATE_NONE;
  1423 :}
  1424 MOV.L Rm, @(disp, Rn) {:  
  1425     COUNT_INST(I_MOVL);
  1426     load_reg( REG_EAX, Rn );
  1427     ADDL_imms_r32( disp, REG_EAX );
  1428     check_walign32( REG_EAX );
  1429     MOVL_r32_r32( REG_EAX, REG_ECX );
  1430     ANDL_imms_r32( 0xFC000000, REG_ECX );
  1431     CMPL_imms_r32( 0xE0000000, REG_ECX );
  1432     JNE_label( notsq );
  1433     ANDL_imms_r32( 0x3C, REG_EAX );
  1434     load_reg( REG_EDX, Rm );
  1435     MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
  1436     JMP_label(end);
  1437     JMP_TARGET(notsq);
  1438     load_reg( REG_EDX, Rm );
  1439     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1440     JMP_TARGET(end);
  1441     sh4_x86.tstate = TSTATE_NONE;
  1442 :}
  1443 MOV.L @Rm, Rn {:  
  1444     COUNT_INST(I_MOVL);
  1445     load_reg( REG_EAX, Rm );
  1446     check_ralign32( REG_EAX );
  1447     MEM_READ_LONG( REG_EAX, REG_EAX );
  1448     store_reg( REG_EAX, Rn );
  1449     sh4_x86.tstate = TSTATE_NONE;
  1450 :}
  1451 MOV.L @Rm+, Rn {:  
  1452     COUNT_INST(I_MOVL);
  1453     load_reg( REG_EAX, Rm );
  1454     check_ralign32( REG_EAX );
  1455     MEM_READ_LONG( REG_EAX, REG_EAX );
  1456     if( Rm != Rn ) {
  1457     	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  1459     store_reg( REG_EAX, Rn );
  1460     sh4_x86.tstate = TSTATE_NONE;
  1461 :}
  1462 MOV.L @(R0, Rm), Rn {:  
  1463     COUNT_INST(I_MOVL);
  1464     load_reg( REG_EAX, 0 );
  1465     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1466     check_ralign32( REG_EAX );
  1467     MEM_READ_LONG( REG_EAX, REG_EAX );
  1468     store_reg( REG_EAX, Rn );
  1469     sh4_x86.tstate = TSTATE_NONE;
  1470 :}
  1471 MOV.L @(disp, GBR), R0 {:
  1472     COUNT_INST(I_MOVL);
  1473     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1474     ADDL_imms_r32( disp, REG_EAX );
  1475     check_ralign32( REG_EAX );
  1476     MEM_READ_LONG( REG_EAX, REG_EAX );
  1477     store_reg( REG_EAX, 0 );
  1478     sh4_x86.tstate = TSTATE_NONE;
  1479 :}
  1480 MOV.L @(disp, PC), Rn {:  
  1481     COUNT_INST(I_MOVLPC);
  1482     if( sh4_x86.in_delay_slot ) {
  1483 	SLOTILLEGAL();
  1484     } else {
  1485 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1486 	if( IS_IN_ICACHE(target) ) {
  1487 	    // If the target address is in the same page as the code, it's
  1488 	    // pretty safe to just ref it directly and circumvent the whole
  1489 	    // memory subsystem. (this is a big performance win)
  1491 	    // FIXME: There's a corner-case that's not handled here when
  1492 	    // the current code-page is in the ITLB but not in the UTLB.
  1493 	    // (should generate a TLB miss although need to test SH4 
  1494 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1495 	    // behaviour though.
  1496 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1497 	    MOVL_moffptr_eax( ptr );
  1498 	} else {
  1499 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1500 	    // different virtual address than the translation was done with,
  1501 	    // but we can safely assume that the low bits are the same.
  1502 	    MOVL_imm32_r32( (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_EAX );
  1503 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1504 	    MEM_READ_LONG( REG_EAX, REG_EAX );
  1505 	    sh4_x86.tstate = TSTATE_NONE;
  1507 	store_reg( REG_EAX, Rn );
  1509 :}
  1510 MOV.L @(disp, Rm), Rn {:  
  1511     COUNT_INST(I_MOVL);
  1512     load_reg( REG_EAX, Rm );
  1513     ADDL_imms_r32( disp, REG_EAX );
  1514     check_ralign32( REG_EAX );
  1515     MEM_READ_LONG( REG_EAX, REG_EAX );
  1516     store_reg( REG_EAX, Rn );
  1517     sh4_x86.tstate = TSTATE_NONE;
  1518 :}
  1519 MOV.W Rm, @Rn {:  
  1520     COUNT_INST(I_MOVW);
  1521     load_reg( REG_EAX, Rn );
  1522     check_walign16( REG_EAX );
  1523     load_reg( REG_EDX, Rm );
  1524     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1525     sh4_x86.tstate = TSTATE_NONE;
  1526 :}
  1527 MOV.W Rm, @-Rn {:  
  1528     COUNT_INST(I_MOVW);
  1529     load_reg( REG_EAX, Rn );
  1530     check_walign16( REG_EAX );
  1531     LEAL_r32disp_r32( REG_EAX, -2, REG_EAX );
  1532     load_reg( REG_EDX, Rm );
  1533     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1534     ADDL_imms_rbpdisp( -2, REG_OFFSET(r[Rn]) );
  1535     sh4_x86.tstate = TSTATE_NONE;
  1536 :}
  1537 MOV.W Rm, @(R0, Rn) {:  
  1538     COUNT_INST(I_MOVW);
  1539     load_reg( REG_EAX, 0 );
  1540     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1541     check_walign16( REG_EAX );
  1542     load_reg( REG_EDX, Rm );
  1543     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1544     sh4_x86.tstate = TSTATE_NONE;
  1545 :}
  1546 MOV.W R0, @(disp, GBR) {:  
  1547     COUNT_INST(I_MOVW);
  1548     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1549     ADDL_imms_r32( disp, REG_EAX );
  1550     check_walign16( REG_EAX );
  1551     load_reg( REG_EDX, 0 );
  1552     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1553     sh4_x86.tstate = TSTATE_NONE;
  1554 :}
  1555 MOV.W R0, @(disp, Rn) {:  
  1556     COUNT_INST(I_MOVW);
  1557     load_reg( REG_EAX, Rn );
  1558     ADDL_imms_r32( disp, REG_EAX );
  1559     check_walign16( REG_EAX );
  1560     load_reg( REG_EDX, 0 );
  1561     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1562     sh4_x86.tstate = TSTATE_NONE;
  1563 :}
  1564 MOV.W @Rm, Rn {:  
  1565     COUNT_INST(I_MOVW);
  1566     load_reg( REG_EAX, Rm );
  1567     check_ralign16( REG_EAX );
  1568     MEM_READ_WORD( REG_EAX, REG_EAX );
  1569     store_reg( REG_EAX, Rn );
  1570     sh4_x86.tstate = TSTATE_NONE;
  1571 :}
  1572 MOV.W @Rm+, Rn {:  
  1573     COUNT_INST(I_MOVW);
  1574     load_reg( REG_EAX, Rm );
  1575     check_ralign16( REG_EAX );
  1576     MEM_READ_WORD( REG_EAX, REG_EAX );
  1577     if( Rm != Rn ) {
  1578         ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
  1580     store_reg( REG_EAX, Rn );
  1581     sh4_x86.tstate = TSTATE_NONE;
  1582 :}
  1583 MOV.W @(R0, Rm), Rn {:  
  1584     COUNT_INST(I_MOVW);
  1585     load_reg( REG_EAX, 0 );
  1586     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1587     check_ralign16( REG_EAX );
  1588     MEM_READ_WORD( REG_EAX, REG_EAX );
  1589     store_reg( REG_EAX, Rn );
  1590     sh4_x86.tstate = TSTATE_NONE;
  1591 :}
  1592 MOV.W @(disp, GBR), R0 {:  
  1593     COUNT_INST(I_MOVW);
  1594     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1595     ADDL_imms_r32( disp, REG_EAX );
  1596     check_ralign16( REG_EAX );
  1597     MEM_READ_WORD( REG_EAX, REG_EAX );
  1598     store_reg( REG_EAX, 0 );
  1599     sh4_x86.tstate = TSTATE_NONE;
  1600 :}
  1601 MOV.W @(disp, PC), Rn {:  
  1602     COUNT_INST(I_MOVW);
  1603     if( sh4_x86.in_delay_slot ) {
  1604 	SLOTILLEGAL();
  1605     } else {
  1606 	// See comments for MOV.L @(disp, PC), Rn
  1607 	uint32_t target = pc + disp + 4;
  1608 	if( IS_IN_ICACHE(target) ) {
  1609 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1610 	    MOVL_moffptr_eax( ptr );
  1611 	    MOVSXL_r16_r32( REG_EAX, REG_EAX );
  1612 	} else {
  1613 	    MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4, REG_EAX );
  1614 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1615 	    MEM_READ_WORD( REG_EAX, REG_EAX );
  1616 	    sh4_x86.tstate = TSTATE_NONE;
  1618 	store_reg( REG_EAX, Rn );
  1620 :}
  1621 MOV.W @(disp, Rm), R0 {:  
  1622     COUNT_INST(I_MOVW);
  1623     load_reg( REG_EAX, Rm );
  1624     ADDL_imms_r32( disp, REG_EAX );
  1625     check_ralign16( REG_EAX );
  1626     MEM_READ_WORD( REG_EAX, REG_EAX );
  1627     store_reg( REG_EAX, 0 );
  1628     sh4_x86.tstate = TSTATE_NONE;
  1629 :}
  1630 MOVA @(disp, PC), R0 {:  
  1631     COUNT_INST(I_MOVA);
  1632     if( sh4_x86.in_delay_slot ) {
  1633 	SLOTILLEGAL();
  1634     } else {
  1635 	MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_ECX );
  1636 	ADDL_rbpdisp_r32( R_PC, REG_ECX );
  1637 	store_reg( REG_ECX, 0 );
  1638 	sh4_x86.tstate = TSTATE_NONE;
  1640 :}
  1641 MOVCA.L R0, @Rn {:  
  1642     COUNT_INST(I_MOVCA);
  1643     load_reg( REG_EAX, Rn );
  1644     check_walign32( REG_EAX );
  1645     load_reg( REG_EDX, 0 );
  1646     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1647     sh4_x86.tstate = TSTATE_NONE;
  1648 :}
  1650 /* Control transfer instructions */
  1651 BF disp {:
  1652     COUNT_INST(I_BF);
  1653     if( sh4_x86.in_delay_slot ) {
  1654 	SLOTILLEGAL();
  1655     } else {
  1656 	sh4vma_t target = disp + pc + 4;
  1657 	JT_label( nottaken );
  1658 	exit_block_rel(target, pc+2 );
  1659 	JMP_TARGET(nottaken);
  1660 	return 2;
  1662 :}
  1663 BF/S disp {:
  1664     COUNT_INST(I_BFS);
  1665     if( sh4_x86.in_delay_slot ) {
  1666 	SLOTILLEGAL();
  1667     } else {
  1668 	sh4_x86.in_delay_slot = DELAY_PC;
  1669 	if( UNTRANSLATABLE(pc+2) ) {
  1670 	    MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1671 	    JT_label(nottaken);
  1672 	    ADDL_imms_r32( disp, REG_EAX );
  1673 	    JMP_TARGET(nottaken);
  1674 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1675 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1676 	    exit_block_emu(pc+2);
  1677 	    sh4_x86.branch_taken = TRUE;
  1678 	    return 2;
  1679 	} else {
  1680 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1681 		CMPL_imms_rbpdisp( 1, R_T );
  1682 		sh4_x86.tstate = TSTATE_E;
  1684 	    sh4vma_t target = disp + pc + 4;
  1685 	    JCC_cc_rel32(sh4_x86.tstate,0);
  1686 	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
  1687 	    int save_tstate = sh4_x86.tstate;
  1688 	    sh4_translate_instruction(pc+2);
  1689 	    exit_block_rel( target, pc+4 );
  1691 	    // not taken
  1692 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1693 	    sh4_x86.tstate = save_tstate;
  1694 	    sh4_translate_instruction(pc+2);
  1695 	    return 4;
  1698 :}
  1699 BRA disp {:  
  1700     COUNT_INST(I_BRA);
  1701     if( sh4_x86.in_delay_slot ) {
  1702 	SLOTILLEGAL();
  1703     } else {
  1704 	sh4_x86.in_delay_slot = DELAY_PC;
  1705 	sh4_x86.branch_taken = TRUE;
  1706 	if( UNTRANSLATABLE(pc+2) ) {
  1707 	    MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1708 	    ADDL_imms_r32( pc + disp + 4 - sh4_x86.block_start_pc, REG_EAX );
  1709 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1710 	    exit_block_emu(pc+2);
  1711 	    return 2;
  1712 	} else {
  1713 	    sh4_translate_instruction( pc + 2 );
  1714 	    exit_block_rel( disp + pc + 4, pc+4 );
  1715 	    return 4;
  1718 :}
  1719 BRAF Rn {:  
  1720     COUNT_INST(I_BRAF);
  1721     if( sh4_x86.in_delay_slot ) {
  1722 	SLOTILLEGAL();
  1723     } else {
  1724 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1725 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1726 	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1727 	MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1728 	sh4_x86.in_delay_slot = DELAY_PC;
  1729 	sh4_x86.tstate = TSTATE_NONE;
  1730 	sh4_x86.branch_taken = TRUE;
  1731 	if( UNTRANSLATABLE(pc+2) ) {
  1732 	    exit_block_emu(pc+2);
  1733 	    return 2;
  1734 	} else {
  1735 	    sh4_translate_instruction( pc + 2 );
  1736 	    exit_block_newpcset(pc+4);
  1737 	    return 4;
  1740 :}
  1741 BSR disp {:  
  1742     COUNT_INST(I_BSR);
  1743     if( sh4_x86.in_delay_slot ) {
  1744 	SLOTILLEGAL();
  1745     } else {
  1746 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1747 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1748 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  1749 	sh4_x86.in_delay_slot = DELAY_PC;
  1750 	sh4_x86.branch_taken = TRUE;
  1751 	sh4_x86.tstate = TSTATE_NONE;
  1752 	if( UNTRANSLATABLE(pc+2) ) {
  1753 	    ADDL_imms_r32( disp, REG_EAX );
  1754 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1755 	    exit_block_emu(pc+2);
  1756 	    return 2;
  1757 	} else {
  1758 	    sh4_translate_instruction( pc + 2 );
  1759 	    exit_block_rel( disp + pc + 4, pc+4 );
  1760 	    return 4;
  1763 :}
  1764 BSRF Rn {:  
  1765     COUNT_INST(I_BSRF);
  1766     if( sh4_x86.in_delay_slot ) {
  1767 	SLOTILLEGAL();
  1768     } else {
  1769 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1770 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1771 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  1772 	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1773 	MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1775 	sh4_x86.in_delay_slot = DELAY_PC;
  1776 	sh4_x86.tstate = TSTATE_NONE;
  1777 	sh4_x86.branch_taken = TRUE;
  1778 	if( UNTRANSLATABLE(pc+2) ) {
  1779 	    exit_block_emu(pc+2);
  1780 	    return 2;
  1781 	} else {
  1782 	    sh4_translate_instruction( pc + 2 );
  1783 	    exit_block_newpcset(pc+4);
  1784 	    return 4;
  1787 :}
  1788 BT disp {:
  1789     COUNT_INST(I_BT);
  1790     if( sh4_x86.in_delay_slot ) {
  1791 	SLOTILLEGAL();
  1792     } else {
  1793 	sh4vma_t target = disp + pc + 4;
  1794 	JF_label( nottaken );
  1795 	exit_block_rel(target, pc+2 );
  1796 	JMP_TARGET(nottaken);
  1797 	return 2;
  1799 :}
  1800 BT/S disp {:
  1801     COUNT_INST(I_BTS);
  1802     if( sh4_x86.in_delay_slot ) {
  1803 	SLOTILLEGAL();
  1804     } else {
  1805 	sh4_x86.in_delay_slot = DELAY_PC;
  1806 	if( UNTRANSLATABLE(pc+2) ) {
  1807 	    MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1808 	    JF_label(nottaken);
  1809 	    ADDL_imms_r32( disp, REG_EAX );
  1810 	    JMP_TARGET(nottaken);
  1811 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1812 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1813 	    exit_block_emu(pc+2);
  1814 	    sh4_x86.branch_taken = TRUE;
  1815 	    return 2;
  1816 	} else {
  1817 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1818 		CMPL_imms_rbpdisp( 1, R_T );
  1819 		sh4_x86.tstate = TSTATE_E;
  1821 	    JCC_cc_rel32(sh4_x86.tstate^1,0);
  1822 	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
  1824 	    int save_tstate = sh4_x86.tstate;
  1825 	    sh4_translate_instruction(pc+2);
  1826 	    exit_block_rel( disp + pc + 4, pc+4 );
  1827 	    // not taken
  1828 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1829 	    sh4_x86.tstate = save_tstate;
  1830 	    sh4_translate_instruction(pc+2);
  1831 	    return 4;
  1834 :}
  1835 JMP @Rn {:  
  1836     COUNT_INST(I_JMP);
  1837     if( sh4_x86.in_delay_slot ) {
  1838 	SLOTILLEGAL();
  1839     } else {
  1840 	load_reg( REG_ECX, Rn );
  1841 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  1842 	sh4_x86.in_delay_slot = DELAY_PC;
  1843 	sh4_x86.branch_taken = TRUE;
  1844 	if( UNTRANSLATABLE(pc+2) ) {
  1845 	    exit_block_emu(pc+2);
  1846 	    return 2;
  1847 	} else {
  1848 	    sh4_translate_instruction(pc+2);
  1849 	    exit_block_newpcset(pc+4);
  1850 	    return 4;
  1853 :}
  1854 JSR @Rn {:  
  1855     COUNT_INST(I_JSR);
  1856     if( sh4_x86.in_delay_slot ) {
  1857 	SLOTILLEGAL();
  1858     } else {
  1859 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1860 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1861 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  1862 	load_reg( REG_ECX, Rn );
  1863 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  1864 	sh4_x86.in_delay_slot = DELAY_PC;
  1865 	sh4_x86.branch_taken = TRUE;
  1866 	sh4_x86.tstate = TSTATE_NONE;
  1867 	if( UNTRANSLATABLE(pc+2) ) {
  1868 	    exit_block_emu(pc+2);
  1869 	    return 2;
  1870 	} else {
  1871 	    sh4_translate_instruction(pc+2);
  1872 	    exit_block_newpcset(pc+4);
  1873 	    return 4;
  1876 :}
  1877 RTE {:  
  1878     COUNT_INST(I_RTE);
  1879     if( sh4_x86.in_delay_slot ) {
  1880 	SLOTILLEGAL();
  1881     } else {
  1882 	check_priv();
  1883 	MOVL_rbpdisp_r32( R_SPC, REG_ECX );
  1884 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  1885 	MOVL_rbpdisp_r32( R_SSR, REG_EAX );
  1886 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  1887 	sh4_x86.in_delay_slot = DELAY_PC;
  1888 	sh4_x86.fpuen_checked = FALSE;
  1889 	sh4_x86.tstate = TSTATE_NONE;
  1890 	sh4_x86.branch_taken = TRUE;
  1891 	if( UNTRANSLATABLE(pc+2) ) {
  1892 	    exit_block_emu(pc+2);
  1893 	    return 2;
  1894 	} else {
  1895 	    sh4_translate_instruction(pc+2);
  1896 	    exit_block_newpcset(pc+4);
  1897 	    return 4;
  1900 :}
  1901 RTS {:  
  1902     COUNT_INST(I_RTS);
  1903     if( sh4_x86.in_delay_slot ) {
  1904 	SLOTILLEGAL();
  1905     } else {
  1906 	MOVL_rbpdisp_r32( R_PR, REG_ECX );
  1907 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  1908 	sh4_x86.in_delay_slot = DELAY_PC;
  1909 	sh4_x86.branch_taken = TRUE;
  1910 	if( UNTRANSLATABLE(pc+2) ) {
  1911 	    exit_block_emu(pc+2);
  1912 	    return 2;
  1913 	} else {
  1914 	    sh4_translate_instruction(pc+2);
  1915 	    exit_block_newpcset(pc+4);
  1916 	    return 4;
  1919 :}
  1920 TRAPA #imm {:  
  1921     COUNT_INST(I_TRAPA);
  1922     if( sh4_x86.in_delay_slot ) {
  1923 	SLOTILLEGAL();
  1924     } else {
  1925 	MOVL_imm32_r32( pc+2 - sh4_x86.block_start_pc, REG_ECX );   // 5
  1926 	ADDL_r32_rbpdisp( REG_ECX, R_PC );
  1927 	MOVL_imm32_r32( imm, REG_EAX );
  1928 	CALL1_ptr_r32( sh4_raise_trap, REG_EAX );
  1929 	sh4_x86.tstate = TSTATE_NONE;
  1930 	exit_block_pcset(pc+2);
  1931 	sh4_x86.branch_taken = TRUE;
  1932 	return 2;
  1934 :}
  1935 UNDEF {:  
  1936     COUNT_INST(I_UNDEF);
  1937     if( sh4_x86.in_delay_slot ) {
  1938 	exit_block_exc(EXC_SLOT_ILLEGAL, pc-2);    
  1939     } else {
  1940 	exit_block_exc(EXC_ILLEGAL, pc);    
  1941 	return 2;
  1943 :}
  1945 CLRMAC {:  
  1946     COUNT_INST(I_CLRMAC);
  1947     XORL_r32_r32(REG_EAX, REG_EAX);
  1948     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1949     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  1950     sh4_x86.tstate = TSTATE_NONE;
  1951 :}
  1952 CLRS {:
  1953     COUNT_INST(I_CLRS);
  1954     CLC();
  1955     SETCCB_cc_rbpdisp(X86_COND_C, R_S);
  1956     sh4_x86.tstate = TSTATE_NONE;
  1957 :}
  1958 CLRT {:  
  1959     COUNT_INST(I_CLRT);
  1960     CLC();
  1961     SETC_t();
  1962     sh4_x86.tstate = TSTATE_C;
  1963 :}
  1964 SETS {:  
  1965     COUNT_INST(I_SETS);
  1966     STC();
  1967     SETCCB_cc_rbpdisp(X86_COND_C, R_S);
  1968     sh4_x86.tstate = TSTATE_NONE;
  1969 :}
  1970 SETT {:  
  1971     COUNT_INST(I_SETT);
  1972     STC();
  1973     SETC_t();
  1974     sh4_x86.tstate = TSTATE_C;
  1975 :}
  1977 /* Floating point moves */
  1978 FMOV FRm, FRn {:  
  1979     COUNT_INST(I_FMOV1);
  1980     check_fpuen();
  1981     if( sh4_x86.double_size ) {
  1982         load_dr0( REG_EAX, FRm );
  1983         load_dr1( REG_ECX, FRm );
  1984         store_dr0( REG_EAX, FRn );
  1985         store_dr1( REG_ECX, FRn );
  1986     } else {
  1987         load_fr( REG_EAX, FRm ); // SZ=0 branch
  1988         store_fr( REG_EAX, FRn );
  1990 :}
  1991 FMOV FRm, @Rn {: 
  1992     COUNT_INST(I_FMOV2);
  1993     check_fpuen();
  1994     load_reg( REG_EAX, Rn );
  1995     if( sh4_x86.double_size ) {
  1996         check_walign64( REG_EAX );
  1997         load_dr0( REG_EDX, FRm );
  1998         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1999         load_reg( REG_EAX, Rn );
  2000         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2001         load_dr1( REG_EDX, FRm );
  2002         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2003     } else {
  2004         check_walign32( REG_EAX );
  2005         load_fr( REG_EDX, FRm );
  2006         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2008     sh4_x86.tstate = TSTATE_NONE;
  2009 :}
  2010 FMOV @Rm, FRn {:  
  2011     COUNT_INST(I_FMOV5);
  2012     check_fpuen();
  2013     load_reg( REG_EAX, Rm );
  2014     if( sh4_x86.double_size ) {
  2015         check_ralign64( REG_EAX );
  2016         MEM_READ_LONG( REG_EAX, REG_EAX );
  2017         store_dr0( REG_EAX, FRn );
  2018         load_reg( REG_EAX, Rm );
  2019         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2020         MEM_READ_LONG( REG_EAX, REG_EAX );
  2021         store_dr1( REG_EAX, FRn );
  2022     } else {
  2023         check_ralign32( REG_EAX );
  2024         MEM_READ_LONG( REG_EAX, REG_EAX );
  2025         store_fr( REG_EAX, FRn );
  2027     sh4_x86.tstate = TSTATE_NONE;
  2028 :}
  2029 FMOV FRm, @-Rn {:  
  2030     COUNT_INST(I_FMOV3);
  2031     check_fpuen();
  2032     load_reg( REG_EAX, Rn );
  2033     if( sh4_x86.double_size ) {
  2034         check_walign64( REG_EAX );
  2035         LEAL_r32disp_r32( REG_EAX, -8, REG_EAX );
  2036         load_dr0( REG_EDX, FRm );
  2037         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2038         load_reg( REG_EAX, Rn );
  2039         LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2040         load_dr1( REG_EDX, FRm );
  2041         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2042         ADDL_imms_rbpdisp(-8,REG_OFFSET(r[Rn]));
  2043     } else {
  2044         check_walign32( REG_EAX );
  2045         LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2046         load_fr( REG_EDX, FRm );
  2047         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2048         ADDL_imms_rbpdisp(-4,REG_OFFSET(r[Rn]));
  2050     sh4_x86.tstate = TSTATE_NONE;
  2051 :}
  2052 FMOV @Rm+, FRn {:
  2053     COUNT_INST(I_FMOV6);
  2054     check_fpuen();
  2055     load_reg( REG_EAX, Rm );
  2056     if( sh4_x86.double_size ) {
  2057         check_ralign64( REG_EAX );
  2058         MEM_READ_LONG( REG_EAX, REG_EAX );
  2059         store_dr0( REG_EAX, FRn );
  2060         load_reg( REG_EAX, Rm );
  2061         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2062         MEM_READ_LONG( REG_EAX, REG_EAX );
  2063         store_dr1( REG_EAX, FRn );
  2064         ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rm]) );
  2065     } else {
  2066         check_ralign32( REG_EAX );
  2067         MEM_READ_LONG( REG_EAX, REG_EAX );
  2068         store_fr( REG_EAX, FRn );
  2069         ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2071     sh4_x86.tstate = TSTATE_NONE;
  2072 :}
  2073 FMOV FRm, @(R0, Rn) {:  
  2074     COUNT_INST(I_FMOV4);
  2075     check_fpuen();
  2076     load_reg( REG_EAX, Rn );
  2077     ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2078     if( sh4_x86.double_size ) {
  2079         check_walign64( REG_EAX );
  2080         load_dr0( REG_EDX, FRm );
  2081         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2082         load_reg( REG_EAX, Rn );
  2083         ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2084         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2085         load_dr1( REG_EDX, FRm );
  2086         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2087     } else {
  2088         check_walign32( REG_EAX );
  2089         load_fr( REG_EDX, FRm );
  2090         MEM_WRITE_LONG( REG_EAX, REG_EDX ); // 12
  2092     sh4_x86.tstate = TSTATE_NONE;
  2093 :}
  2094 FMOV @(R0, Rm), FRn {:  
  2095     COUNT_INST(I_FMOV7);
  2096     check_fpuen();
  2097     load_reg( REG_EAX, Rm );
  2098     ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2099     if( sh4_x86.double_size ) {
  2100         check_ralign64( REG_EAX );
  2101         MEM_READ_LONG( REG_EAX, REG_EAX );
  2102         store_dr0( REG_EAX, FRn );
  2103         load_reg( REG_EAX, Rm );
  2104         ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2105         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2106         MEM_READ_LONG( REG_EAX, REG_EAX );
  2107         store_dr1( REG_EAX, FRn );
  2108     } else {
  2109         check_ralign32( REG_EAX );
  2110         MEM_READ_LONG( REG_EAX, REG_EAX );
  2111         store_fr( REG_EAX, FRn );
  2113     sh4_x86.tstate = TSTATE_NONE;
  2114 :}
  2115 FLDI0 FRn {:  /* IFF PR=0 */
  2116     COUNT_INST(I_FLDI0);
  2117     check_fpuen();
  2118     if( sh4_x86.double_prec == 0 ) {
  2119         XORL_r32_r32( REG_EAX, REG_EAX );
  2120         store_fr( REG_EAX, FRn );
  2122     sh4_x86.tstate = TSTATE_NONE;
  2123 :}
  2124 FLDI1 FRn {:  /* IFF PR=0 */
  2125     COUNT_INST(I_FLDI1);
  2126     check_fpuen();
  2127     if( sh4_x86.double_prec == 0 ) {
  2128         MOVL_imm32_r32( 0x3F800000, REG_EAX );
  2129         store_fr( REG_EAX, FRn );
  2131 :}
  2133 FLOAT FPUL, FRn {:  
  2134     COUNT_INST(I_FLOAT);
  2135     check_fpuen();
  2136     FILD_rbpdisp(R_FPUL);
  2137     if( sh4_x86.double_prec ) {
  2138         pop_dr( FRn );
  2139     } else {
  2140         pop_fr( FRn );
  2142 :}
  2143 FTRC FRm, FPUL {:  
  2144     COUNT_INST(I_FTRC);
  2145     check_fpuen();
  2146     if( sh4_x86.double_prec ) {
  2147         push_dr( FRm );
  2148     } else {
  2149         push_fr( FRm );
  2151     MOVP_immptr_rptr( &max_int, REG_ECX );
  2152     FILD_r32disp( REG_ECX, 0 );
  2153     FCOMIP_st(1);
  2154     JNA_label( sat );
  2155     MOVP_immptr_rptr( &min_int, REG_ECX );
  2156     FILD_r32disp( REG_ECX, 0 );
  2157     FCOMIP_st(1);              
  2158     JAE_label( sat2 );            
  2159     MOVP_immptr_rptr( &save_fcw, REG_EAX );
  2160     FNSTCW_r32disp( REG_EAX, 0 );
  2161     MOVP_immptr_rptr( &trunc_fcw, REG_EDX );
  2162     FLDCW_r32disp( REG_EDX, 0 );
  2163     FISTP_rbpdisp(R_FPUL);             
  2164     FLDCW_r32disp( REG_EAX, 0 );
  2165     JMP_label(end);             
  2167     JMP_TARGET(sat);
  2168     JMP_TARGET(sat2);
  2169     MOVL_r32disp_r32( REG_ECX, 0, REG_ECX ); // 2
  2170     MOVL_r32_rbpdisp( REG_ECX, R_FPUL );
  2171     FPOP_st();
  2172     JMP_TARGET(end);
  2173     sh4_x86.tstate = TSTATE_NONE;
  2174 :}
  2175 FLDS FRm, FPUL {:  
  2176     COUNT_INST(I_FLDS);
  2177     check_fpuen();
  2178     load_fr( REG_EAX, FRm );
  2179     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2180 :}
  2181 FSTS FPUL, FRn {:  
  2182     COUNT_INST(I_FSTS);
  2183     check_fpuen();
  2184     MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  2185     store_fr( REG_EAX, FRn );
  2186 :}
  2187 FCNVDS FRm, FPUL {:  
  2188     COUNT_INST(I_FCNVDS);
  2189     check_fpuen();
  2190     if( sh4_x86.double_prec ) {
  2191         push_dr( FRm );
  2192         pop_fpul();
  2194 :}
  2195 FCNVSD FPUL, FRn {:  
  2196     COUNT_INST(I_FCNVSD);
  2197     check_fpuen();
  2198     if( sh4_x86.double_prec ) {
  2199         push_fpul();
  2200         pop_dr( FRn );
  2202 :}
  2204 /* Floating point instructions */
  2205 FABS FRn {:  
  2206     COUNT_INST(I_FABS);
  2207     check_fpuen();
  2208     if( sh4_x86.double_prec ) {
  2209         push_dr(FRn);
  2210         FABS_st0();
  2211         pop_dr(FRn);
  2212     } else {
  2213         push_fr(FRn);
  2214         FABS_st0();
  2215         pop_fr(FRn);
  2217 :}
  2218 FADD FRm, FRn {:  
  2219     COUNT_INST(I_FADD);
  2220     check_fpuen();
  2221     if( sh4_x86.double_prec ) {
  2222         push_dr(FRm);
  2223         push_dr(FRn);
  2224         FADDP_st(1);
  2225         pop_dr(FRn);
  2226     } else {
  2227         push_fr(FRm);
  2228         push_fr(FRn);
  2229         FADDP_st(1);
  2230         pop_fr(FRn);
  2232 :}
  2233 FDIV FRm, FRn {:  
  2234     COUNT_INST(I_FDIV);
  2235     check_fpuen();
  2236     if( sh4_x86.double_prec ) {
  2237         push_dr(FRn);
  2238         push_dr(FRm);
  2239         FDIVP_st(1);
  2240         pop_dr(FRn);
  2241     } else {
  2242         push_fr(FRn);
  2243         push_fr(FRm);
  2244         FDIVP_st(1);
  2245         pop_fr(FRn);
  2247 :}
  2248 FMAC FR0, FRm, FRn {:  
  2249     COUNT_INST(I_FMAC);
  2250     check_fpuen();
  2251     if( sh4_x86.double_prec ) {
  2252         push_dr( 0 );
  2253         push_dr( FRm );
  2254         FMULP_st(1);
  2255         push_dr( FRn );
  2256         FADDP_st(1);
  2257         pop_dr( FRn );
  2258     } else {
  2259         push_fr( 0 );
  2260         push_fr( FRm );
  2261         FMULP_st(1);
  2262         push_fr( FRn );
  2263         FADDP_st(1);
  2264         pop_fr( FRn );
  2266 :}
  2268 FMUL FRm, FRn {:  
  2269     COUNT_INST(I_FMUL);
  2270     check_fpuen();
  2271     if( sh4_x86.double_prec ) {
  2272         push_dr(FRm);
  2273         push_dr(FRn);
  2274         FMULP_st(1);
  2275         pop_dr(FRn);
  2276     } else {
  2277         push_fr(FRm);
  2278         push_fr(FRn);
  2279         FMULP_st(1);
  2280         pop_fr(FRn);
  2282 :}
  2283 FNEG FRn {:  
  2284     COUNT_INST(I_FNEG);
  2285     check_fpuen();
  2286     if( sh4_x86.double_prec ) {
  2287         push_dr(FRn);
  2288         FCHS_st0();
  2289         pop_dr(FRn);
  2290     } else {
  2291         push_fr(FRn);
  2292         FCHS_st0();
  2293         pop_fr(FRn);
  2295 :}
  2296 FSRRA FRn {:  
  2297     COUNT_INST(I_FSRRA);
  2298     check_fpuen();
  2299     if( sh4_x86.double_prec == 0 ) {
  2300         FLD1_st0();
  2301         push_fr(FRn);
  2302         FSQRT_st0();
  2303         FDIVP_st(1);
  2304         pop_fr(FRn);
  2306 :}
  2307 FSQRT FRn {:  
  2308     COUNT_INST(I_FSQRT);
  2309     check_fpuen();
  2310     if( sh4_x86.double_prec ) {
  2311         push_dr(FRn);
  2312         FSQRT_st0();
  2313         pop_dr(FRn);
  2314     } else {
  2315         push_fr(FRn);
  2316         FSQRT_st0();
  2317         pop_fr(FRn);
  2319 :}
  2320 FSUB FRm, FRn {:  
  2321     COUNT_INST(I_FSUB);
  2322     check_fpuen();
  2323     if( sh4_x86.double_prec ) {
  2324         push_dr(FRn);
  2325         push_dr(FRm);
  2326         FSUBP_st(1);
  2327         pop_dr(FRn);
  2328     } else {
  2329         push_fr(FRn);
  2330         push_fr(FRm);
  2331         FSUBP_st(1);
  2332         pop_fr(FRn);
  2334 :}
  2336 FCMP/EQ FRm, FRn {:  
  2337     COUNT_INST(I_FCMPEQ);
  2338     check_fpuen();
  2339     if( sh4_x86.double_prec ) {
  2340         push_dr(FRm);
  2341         push_dr(FRn);
  2342     } else {
  2343         push_fr(FRm);
  2344         push_fr(FRn);
  2346     FCOMIP_st(1);
  2347     SETE_t();
  2348     FPOP_st();
  2349     sh4_x86.tstate = TSTATE_E;
  2350 :}
  2351 FCMP/GT FRm, FRn {:  
  2352     COUNT_INST(I_FCMPGT);
  2353     check_fpuen();
  2354     if( sh4_x86.double_prec ) {
  2355         push_dr(FRm);
  2356         push_dr(FRn);
  2357     } else {
  2358         push_fr(FRm);
  2359         push_fr(FRn);
  2361     FCOMIP_st(1);
  2362     SETA_t();
  2363     FPOP_st();
  2364     sh4_x86.tstate = TSTATE_A;
  2365 :}
  2367 FSCA FPUL, FRn {:  
  2368     COUNT_INST(I_FSCA);
  2369     check_fpuen();
  2370     if( sh4_x86.double_prec == 0 ) {
  2371         LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FRn&0x0E]), REG_EDX );
  2372         MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  2373         CALL2_ptr_r32_r32( sh4_fsca, REG_EAX, REG_EDX );
  2375     sh4_x86.tstate = TSTATE_NONE;
  2376 :}
  2377 FIPR FVm, FVn {:  
  2378     COUNT_INST(I_FIPR);
  2379     check_fpuen();
  2380     if( sh4_x86.double_prec == 0 ) {
  2381         if( sh4_x86.sse3_enabled ) {
  2382             MOVAPS_r32disp_xmm( REG_RBP, REG_OFFSET(fr[0][FVm<<2]), 4 );
  2383             MULPS_r32disp_xmm( REG_RBP, REG_OFFSET(fr[0][FVn<<2]), 4 );
  2384             HADDPS_xmm_xmm( 4, 4 ); 
  2385             HADDPS_xmm_xmm( 4, 4 );
  2386             MOVSS_xmm_r32disp( 4, REG_RBP, REG_OFFSET(fr[0][(FVn<<2)+2]) );
  2387         } else {
  2388             push_fr( FVm<<2 );
  2389             push_fr( FVn<<2 );
  2390             FMULP_st(1);
  2391             push_fr( (FVm<<2)+1);
  2392             push_fr( (FVn<<2)+1);
  2393             FMULP_st(1);
  2394             FADDP_st(1);
  2395             push_fr( (FVm<<2)+2);
  2396             push_fr( (FVn<<2)+2);
  2397             FMULP_st(1);
  2398             FADDP_st(1);
  2399             push_fr( (FVm<<2)+3);
  2400             push_fr( (FVn<<2)+3);
  2401             FMULP_st(1);
  2402             FADDP_st(1);
  2403             pop_fr( (FVn<<2)+3);
  2406 :}
  2407 FTRV XMTRX, FVn {:  
  2408     COUNT_INST(I_FTRV);
  2409     check_fpuen();
  2410     if( sh4_x86.double_prec == 0 ) {
  2411         if( sh4_x86.sse3_enabled ) {
  2412             MOVAPS_r32disp_xmm( REG_RBP, REG_OFFSET(fr[1][0]), 1 ); // M1  M0  M3  M2
  2413             MOVAPS_r32disp_xmm( REG_RBP, REG_OFFSET(fr[1][4]), 0 ); // M5  M4  M7  M6
  2414             MOVAPS_r32disp_xmm( REG_RBP, REG_OFFSET(fr[1][8]), 3 ); // M9  M8  M11 M10
  2415             MOVAPS_r32disp_xmm( REG_RBP, REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
  2417             MOVSLDUP_r32disp_xmm( REG_RBP, REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
  2418             MOVSHDUP_r32disp_xmm( REG_RBP, REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
  2419             MOV_xmm_xmm( 4, 6 );
  2420             MOV_xmm_xmm( 5, 7 );
  2421             MOVLHPS_xmm_xmm( 4, 4 );  // V1 V1 V1 V1
  2422             MOVHLPS_xmm_xmm( 6, 6 );  // V3 V3 V3 V3
  2423             MOVLHPS_xmm_xmm( 5, 5 );  // V0 V0 V0 V0
  2424             MOVHLPS_xmm_xmm( 7, 7 );  // V2 V2 V2 V2
  2425             MULPS_xmm_xmm( 0, 4 );
  2426             MULPS_xmm_xmm( 1, 5 );
  2427             MULPS_xmm_xmm( 2, 6 );
  2428             MULPS_xmm_xmm( 3, 7 );
  2429             ADDPS_xmm_xmm( 5, 4 );
  2430             ADDPS_xmm_xmm( 7, 6 );
  2431             ADDPS_xmm_xmm( 6, 4 );
  2432             MOVAPS_xmm_r32disp( 4, REG_RBP, REG_OFFSET(fr[0][FVn<<2]) );
  2433         } else {
  2434             LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FVn<<2]), REG_EAX );
  2435             CALL1_ptr_r32( sh4_ftrv, REG_EAX );
  2438     sh4_x86.tstate = TSTATE_NONE;
  2439 :}
  2441 FRCHG {:  
  2442     COUNT_INST(I_FRCHG);
  2443     check_fpuen();
  2444     XORL_imms_rbpdisp( FPSCR_FR, R_FPSCR );
  2445     CALL_ptr( sh4_switch_fr_banks );
  2446     sh4_x86.tstate = TSTATE_NONE;
  2447 :}
  2448 FSCHG {:  
  2449     COUNT_INST(I_FSCHG);
  2450     check_fpuen();
  2451     XORL_imms_rbpdisp( FPSCR_SZ, R_FPSCR);
  2452     XORL_imms_rbpdisp( FPSCR_SZ, REG_OFFSET(xlat_sh4_mode) );
  2453     sh4_x86.tstate = TSTATE_NONE;
  2454     sh4_x86.double_size = !sh4_x86.double_size;
  2455 :}
  2457 /* Processor control instructions */
  2458 LDC Rm, SR {:
  2459     COUNT_INST(I_LDCSR);
  2460     if( sh4_x86.in_delay_slot ) {
  2461 	SLOTILLEGAL();
  2462     } else {
  2463 	check_priv();
  2464 	load_reg( REG_EAX, Rm );
  2465 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2466 	sh4_x86.fpuen_checked = FALSE;
  2467 	sh4_x86.tstate = TSTATE_NONE;
  2468 	return 2;
  2470 :}
  2471 LDC Rm, GBR {: 
  2472     COUNT_INST(I_LDC);
  2473     load_reg( REG_EAX, Rm );
  2474     MOVL_r32_rbpdisp( REG_EAX, R_GBR );
  2475 :}
  2476 LDC Rm, VBR {:  
  2477     COUNT_INST(I_LDC);
  2478     check_priv();
  2479     load_reg( REG_EAX, Rm );
  2480     MOVL_r32_rbpdisp( REG_EAX, R_VBR );
  2481     sh4_x86.tstate = TSTATE_NONE;
  2482 :}
  2483 LDC Rm, SSR {:  
  2484     COUNT_INST(I_LDC);
  2485     check_priv();
  2486     load_reg( REG_EAX, Rm );
  2487     MOVL_r32_rbpdisp( REG_EAX, R_SSR );
  2488     sh4_x86.tstate = TSTATE_NONE;
  2489 :}
  2490 LDC Rm, SGR {:  
  2491     COUNT_INST(I_LDC);
  2492     check_priv();
  2493     load_reg( REG_EAX, Rm );
  2494     MOVL_r32_rbpdisp( REG_EAX, R_SGR );
  2495     sh4_x86.tstate = TSTATE_NONE;
  2496 :}
  2497 LDC Rm, SPC {:  
  2498     COUNT_INST(I_LDC);
  2499     check_priv();
  2500     load_reg( REG_EAX, Rm );
  2501     MOVL_r32_rbpdisp( REG_EAX, R_SPC );
  2502     sh4_x86.tstate = TSTATE_NONE;
  2503 :}
  2504 LDC Rm, DBR {:  
  2505     COUNT_INST(I_LDC);
  2506     check_priv();
  2507     load_reg( REG_EAX, Rm );
  2508     MOVL_r32_rbpdisp( REG_EAX, R_DBR );
  2509     sh4_x86.tstate = TSTATE_NONE;
  2510 :}
  2511 LDC Rm, Rn_BANK {:  
  2512     COUNT_INST(I_LDC);
  2513     check_priv();
  2514     load_reg( REG_EAX, Rm );
  2515     MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2516     sh4_x86.tstate = TSTATE_NONE;
  2517 :}
  2518 LDC.L @Rm+, GBR {:  
  2519     COUNT_INST(I_LDCM);
  2520     load_reg( REG_EAX, Rm );
  2521     check_ralign32( REG_EAX );
  2522     MEM_READ_LONG( REG_EAX, REG_EAX );
  2523     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2524     MOVL_r32_rbpdisp( REG_EAX, R_GBR );
  2525     sh4_x86.tstate = TSTATE_NONE;
  2526 :}
  2527 LDC.L @Rm+, SR {:
  2528     COUNT_INST(I_LDCSRM);
  2529     if( sh4_x86.in_delay_slot ) {
  2530 	SLOTILLEGAL();
  2531     } else {
  2532 	check_priv();
  2533 	load_reg( REG_EAX, Rm );
  2534 	check_ralign32( REG_EAX );
  2535 	MEM_READ_LONG( REG_EAX, REG_EAX );
  2536 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2537 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2538 	sh4_x86.fpuen_checked = FALSE;
  2539 	sh4_x86.tstate = TSTATE_NONE;
  2540 	return 2;
  2542 :}
  2543 LDC.L @Rm+, VBR {:  
  2544     COUNT_INST(I_LDCM);
  2545     check_priv();
  2546     load_reg( REG_EAX, Rm );
  2547     check_ralign32( REG_EAX );
  2548     MEM_READ_LONG( REG_EAX, REG_EAX );
  2549     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2550     MOVL_r32_rbpdisp( REG_EAX, R_VBR );
  2551     sh4_x86.tstate = TSTATE_NONE;
  2552 :}
  2553 LDC.L @Rm+, SSR {:
  2554     COUNT_INST(I_LDCM);
  2555     check_priv();
  2556     load_reg( REG_EAX, Rm );
  2557     check_ralign32( REG_EAX );
  2558     MEM_READ_LONG( REG_EAX, REG_EAX );
  2559     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2560     MOVL_r32_rbpdisp( REG_EAX, R_SSR );
  2561     sh4_x86.tstate = TSTATE_NONE;
  2562 :}
  2563 LDC.L @Rm+, SGR {:  
  2564     COUNT_INST(I_LDCM);
  2565     check_priv();
  2566     load_reg( REG_EAX, Rm );
  2567     check_ralign32( REG_EAX );
  2568     MEM_READ_LONG( REG_EAX, REG_EAX );
  2569     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2570     MOVL_r32_rbpdisp( REG_EAX, R_SGR );
  2571     sh4_x86.tstate = TSTATE_NONE;
  2572 :}
  2573 LDC.L @Rm+, SPC {:  
  2574     COUNT_INST(I_LDCM);
  2575     check_priv();
  2576     load_reg( REG_EAX, Rm );
  2577     check_ralign32( REG_EAX );
  2578     MEM_READ_LONG( REG_EAX, REG_EAX );
  2579     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2580     MOVL_r32_rbpdisp( REG_EAX, R_SPC );
  2581     sh4_x86.tstate = TSTATE_NONE;
  2582 :}
  2583 LDC.L @Rm+, DBR {:  
  2584     COUNT_INST(I_LDCM);
  2585     check_priv();
  2586     load_reg( REG_EAX, Rm );
  2587     check_ralign32( REG_EAX );
  2588     MEM_READ_LONG( REG_EAX, REG_EAX );
  2589     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2590     MOVL_r32_rbpdisp( REG_EAX, R_DBR );
  2591     sh4_x86.tstate = TSTATE_NONE;
  2592 :}
  2593 LDC.L @Rm+, Rn_BANK {:  
  2594     COUNT_INST(I_LDCM);
  2595     check_priv();
  2596     load_reg( REG_EAX, Rm );
  2597     check_ralign32( REG_EAX );
  2598     MEM_READ_LONG( REG_EAX, REG_EAX );
  2599     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2600     MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2601     sh4_x86.tstate = TSTATE_NONE;
  2602 :}
  2603 LDS Rm, FPSCR {:
  2604     COUNT_INST(I_LDSFPSCR);
  2605     check_fpuen();
  2606     load_reg( REG_EAX, Rm );
  2607     CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
  2608     sh4_x86.tstate = TSTATE_NONE;
  2609     return 2;
  2610 :}
  2611 LDS.L @Rm+, FPSCR {:  
  2612     COUNT_INST(I_LDSFPSCRM);
  2613     check_fpuen();
  2614     load_reg( REG_EAX, Rm );
  2615     check_ralign32( REG_EAX );
  2616     MEM_READ_LONG( REG_EAX, REG_EAX );
  2617     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2618     CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
  2619     sh4_x86.tstate = TSTATE_NONE;
  2620     return 2;
  2621 :}
  2622 LDS Rm, FPUL {:  
  2623     COUNT_INST(I_LDS);
  2624     check_fpuen();
  2625     load_reg( REG_EAX, Rm );
  2626     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2627 :}
  2628 LDS.L @Rm+, FPUL {:  
  2629     COUNT_INST(I_LDSM);
  2630     check_fpuen();
  2631     load_reg( REG_EAX, Rm );
  2632     check_ralign32( REG_EAX );
  2633     MEM_READ_LONG( REG_EAX, REG_EAX );
  2634     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2635     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2636     sh4_x86.tstate = TSTATE_NONE;
  2637 :}
  2638 LDS Rm, MACH {: 
  2639     COUNT_INST(I_LDS);
  2640     load_reg( REG_EAX, Rm );
  2641     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2642 :}
  2643 LDS.L @Rm+, MACH {:  
  2644     COUNT_INST(I_LDSM);
  2645     load_reg( REG_EAX, Rm );
  2646     check_ralign32( REG_EAX );
  2647     MEM_READ_LONG( REG_EAX, REG_EAX );
  2648     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2649     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2650     sh4_x86.tstate = TSTATE_NONE;
  2651 :}
  2652 LDS Rm, MACL {:  
  2653     COUNT_INST(I_LDS);
  2654     load_reg( REG_EAX, Rm );
  2655     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2656 :}
  2657 LDS.L @Rm+, MACL {:  
  2658     COUNT_INST(I_LDSM);
  2659     load_reg( REG_EAX, Rm );
  2660     check_ralign32( REG_EAX );
  2661     MEM_READ_LONG( REG_EAX, REG_EAX );
  2662     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2663     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2664     sh4_x86.tstate = TSTATE_NONE;
  2665 :}
  2666 LDS Rm, PR {:  
  2667     COUNT_INST(I_LDS);
  2668     load_reg( REG_EAX, Rm );
  2669     MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2670 :}
  2671 LDS.L @Rm+, PR {:  
  2672     COUNT_INST(I_LDSM);
  2673     load_reg( REG_EAX, Rm );
  2674     check_ralign32( REG_EAX );
  2675     MEM_READ_LONG( REG_EAX, REG_EAX );
  2676     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2677     MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2678     sh4_x86.tstate = TSTATE_NONE;
  2679 :}
  2680 LDTLB {:  
  2681     COUNT_INST(I_LDTLB);
  2682     CALL_ptr( MMU_ldtlb );
  2683     sh4_x86.tstate = TSTATE_NONE;
  2684 :}
  2685 OCBI @Rn {:
  2686     COUNT_INST(I_OCBI);
  2687 :}
  2688 OCBP @Rn {:
  2689     COUNT_INST(I_OCBP);
  2690 :}
  2691 OCBWB @Rn {:
  2692     COUNT_INST(I_OCBWB);
  2693 :}
  2694 PREF @Rn {:
  2695     COUNT_INST(I_PREF);
  2696     load_reg( REG_EAX, Rn );
  2697     MEM_PREFETCH( REG_EAX );
  2698     sh4_x86.tstate = TSTATE_NONE;
  2699 :}
  2700 SLEEP {: 
  2701     COUNT_INST(I_SLEEP);
  2702     check_priv();
  2703     CALL_ptr( sh4_sleep );
  2704     sh4_x86.tstate = TSTATE_NONE;
  2705     sh4_x86.in_delay_slot = DELAY_NONE;
  2706     return 2;
  2707 :}
  2708 STC SR, Rn {:
  2709     COUNT_INST(I_STCSR);
  2710     check_priv();
  2711     CALL_ptr(sh4_read_sr);
  2712     store_reg( REG_EAX, Rn );
  2713     sh4_x86.tstate = TSTATE_NONE;
  2714 :}
  2715 STC GBR, Rn {:  
  2716     COUNT_INST(I_STC);
  2717     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  2718     store_reg( REG_EAX, Rn );
  2719 :}
  2720 STC VBR, Rn {:  
  2721     COUNT_INST(I_STC);
  2722     check_priv();
  2723     MOVL_rbpdisp_r32( R_VBR, REG_EAX );
  2724     store_reg( REG_EAX, Rn );
  2725     sh4_x86.tstate = TSTATE_NONE;
  2726 :}
  2727 STC SSR, Rn {:  
  2728     COUNT_INST(I_STC);
  2729     check_priv();
  2730     MOVL_rbpdisp_r32( R_SSR, REG_EAX );
  2731     store_reg( REG_EAX, Rn );
  2732     sh4_x86.tstate = TSTATE_NONE;
  2733 :}
  2734 STC SPC, Rn {:  
  2735     COUNT_INST(I_STC);
  2736     check_priv();
  2737     MOVL_rbpdisp_r32( R_SPC, REG_EAX );
  2738     store_reg( REG_EAX, Rn );
  2739     sh4_x86.tstate = TSTATE_NONE;
  2740 :}
  2741 STC SGR, Rn {:  
  2742     COUNT_INST(I_STC);
  2743     check_priv();
  2744     MOVL_rbpdisp_r32( R_SGR, REG_EAX );
  2745     store_reg( REG_EAX, Rn );
  2746     sh4_x86.tstate = TSTATE_NONE;
  2747 :}
  2748 STC DBR, Rn {:  
  2749     COUNT_INST(I_STC);
  2750     check_priv();
  2751     MOVL_rbpdisp_r32( R_DBR, REG_EAX );
  2752     store_reg( REG_EAX, Rn );
  2753     sh4_x86.tstate = TSTATE_NONE;
  2754 :}
  2755 STC Rm_BANK, Rn {:
  2756     COUNT_INST(I_STC);
  2757     check_priv();
  2758     MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EAX );
  2759     store_reg( REG_EAX, Rn );
  2760     sh4_x86.tstate = TSTATE_NONE;
  2761 :}
  2762 STC.L SR, @-Rn {:
  2763     COUNT_INST(I_STCSRM);
  2764     check_priv();
  2765     CALL_ptr( sh4_read_sr );
  2766     MOVL_r32_r32( REG_EAX, REG_EDX );
  2767     load_reg( REG_EAX, Rn );
  2768     check_walign32( REG_EAX );
  2769     LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2770     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2771     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2772     sh4_x86.tstate = TSTATE_NONE;
  2773 :}
  2774 STC.L VBR, @-Rn {:  
  2775     COUNT_INST(I_STCM);
  2776     check_priv();
  2777     load_reg( REG_EAX, Rn );
  2778     check_walign32( REG_EAX );
  2779     ADDL_imms_r32( -4, REG_EAX );
  2780     MOVL_rbpdisp_r32( R_VBR, REG_EDX );
  2781     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2782     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2783     sh4_x86.tstate = TSTATE_NONE;
  2784 :}
  2785 STC.L SSR, @-Rn {:  
  2786     COUNT_INST(I_STCM);
  2787     check_priv();
  2788     load_reg( REG_EAX, Rn );
  2789     check_walign32( REG_EAX );
  2790     ADDL_imms_r32( -4, REG_EAX );
  2791     MOVL_rbpdisp_r32( R_SSR, REG_EDX );
  2792     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2793     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2794     sh4_x86.tstate = TSTATE_NONE;
  2795 :}
  2796 STC.L SPC, @-Rn {:
  2797     COUNT_INST(I_STCM);
  2798     check_priv();
  2799     load_reg( REG_EAX, Rn );
  2800     check_walign32( REG_EAX );
  2801     ADDL_imms_r32( -4, REG_EAX );
  2802     MOVL_rbpdisp_r32( R_SPC, REG_EDX );
  2803     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2804     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2805     sh4_x86.tstate = TSTATE_NONE;
  2806 :}
  2807 STC.L SGR, @-Rn {:  
  2808     COUNT_INST(I_STCM);
  2809     check_priv();
  2810     load_reg( REG_EAX, Rn );
  2811     check_walign32( REG_EAX );
  2812     ADDL_imms_r32( -4, REG_EAX );
  2813     MOVL_rbpdisp_r32( R_SGR, REG_EDX );
  2814     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2815     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2816     sh4_x86.tstate = TSTATE_NONE;
  2817 :}
  2818 STC.L DBR, @-Rn {:  
  2819     COUNT_INST(I_STCM);
  2820     check_priv();
  2821     load_reg( REG_EAX, Rn );
  2822     check_walign32( REG_EAX );
  2823     ADDL_imms_r32( -4, REG_EAX );
  2824     MOVL_rbpdisp_r32( R_DBR, REG_EDX );
  2825     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2826     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2827     sh4_x86.tstate = TSTATE_NONE;
  2828 :}
  2829 STC.L Rm_BANK, @-Rn {:  
  2830     COUNT_INST(I_STCM);
  2831     check_priv();
  2832     load_reg( REG_EAX, Rn );
  2833     check_walign32( REG_EAX );
  2834     ADDL_imms_r32( -4, REG_EAX );
  2835     MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EDX );
  2836     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2837     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2838     sh4_x86.tstate = TSTATE_NONE;
  2839 :}
  2840 STC.L GBR, @-Rn {:  
  2841     COUNT_INST(I_STCM);
  2842     load_reg( REG_EAX, Rn );
  2843     check_walign32( REG_EAX );
  2844     ADDL_imms_r32( -4, REG_EAX );
  2845     MOVL_rbpdisp_r32( R_GBR, REG_EDX );
  2846     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2847     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2848     sh4_x86.tstate = TSTATE_NONE;
  2849 :}
  2850 STS FPSCR, Rn {:  
  2851     COUNT_INST(I_STSFPSCR);
  2852     check_fpuen();
  2853     MOVL_rbpdisp_r32( R_FPSCR, REG_EAX );
  2854     store_reg( REG_EAX, Rn );
  2855 :}
  2856 STS.L FPSCR, @-Rn {:  
  2857     COUNT_INST(I_STSFPSCRM);
  2858     check_fpuen();
  2859     load_reg( REG_EAX, Rn );
  2860     check_walign32( REG_EAX );
  2861     ADDL_imms_r32( -4, REG_EAX );
  2862     MOVL_rbpdisp_r32( R_FPSCR, REG_EDX );
  2863     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2864     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2865     sh4_x86.tstate = TSTATE_NONE;
  2866 :}
  2867 STS FPUL, Rn {:  
  2868     COUNT_INST(I_STS);
  2869     check_fpuen();
  2870     MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  2871     store_reg( REG_EAX, Rn );
  2872 :}
  2873 STS.L FPUL, @-Rn {:  
  2874     COUNT_INST(I_STSM);
  2875     check_fpuen();
  2876     load_reg( REG_EAX, Rn );
  2877     check_walign32( REG_EAX );
  2878     ADDL_imms_r32( -4, REG_EAX );
  2879     MOVL_rbpdisp_r32( R_FPUL, REG_EDX );
  2880     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2881     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2882     sh4_x86.tstate = TSTATE_NONE;
  2883 :}
  2884 STS MACH, Rn {:  
  2885     COUNT_INST(I_STS);
  2886     MOVL_rbpdisp_r32( R_MACH, REG_EAX );
  2887     store_reg( REG_EAX, Rn );
  2888 :}
  2889 STS.L MACH, @-Rn {:  
  2890     COUNT_INST(I_STSM);
  2891     load_reg( REG_EAX, Rn );
  2892     check_walign32( REG_EAX );
  2893     ADDL_imms_r32( -4, REG_EAX );
  2894     MOVL_rbpdisp_r32( R_MACH, REG_EDX );
  2895     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2896     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2897     sh4_x86.tstate = TSTATE_NONE;
  2898 :}
  2899 STS MACL, Rn {:  
  2900     COUNT_INST(I_STS);
  2901     MOVL_rbpdisp_r32( R_MACL, REG_EAX );
  2902     store_reg( REG_EAX, Rn );
  2903 :}
  2904 STS.L MACL, @-Rn {:  
  2905     COUNT_INST(I_STSM);
  2906     load_reg( REG_EAX, Rn );
  2907     check_walign32( REG_EAX );
  2908     ADDL_imms_r32( -4, REG_EAX );
  2909     MOVL_rbpdisp_r32( R_MACL, REG_EDX );
  2910     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2911     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2912     sh4_x86.tstate = TSTATE_NONE;
  2913 :}
  2914 STS PR, Rn {:  
  2915     COUNT_INST(I_STS);
  2916     MOVL_rbpdisp_r32( R_PR, REG_EAX );
  2917     store_reg( REG_EAX, Rn );
  2918 :}
  2919 STS.L PR, @-Rn {:  
  2920     COUNT_INST(I_STSM);
  2921     load_reg( REG_EAX, Rn );
  2922     check_walign32( REG_EAX );
  2923     ADDL_imms_r32( -4, REG_EAX );
  2924     MOVL_rbpdisp_r32( R_PR, REG_EDX );
  2925     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2926     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2927     sh4_x86.tstate = TSTATE_NONE;
  2928 :}
  2930 NOP {: 
  2931     COUNT_INST(I_NOP);
  2932     /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ 
  2933 :}
  2934 %%
  2935     sh4_x86.in_delay_slot = DELAY_NONE;
  2936     return 0;
  2940 /**
  2941  * The unwind methods only work if we compiled with DWARF2 frame information
  2942  * (ie -fexceptions), otherwise we have to use the direct frame scan.
  2943  */
  2944 #ifdef HAVE_EXCEPTIONS
  2945 #include <unwind.h>
  2947 struct UnwindInfo {
  2948     uintptr_t block_start;
  2949     uintptr_t block_end;
  2950     void *pc;
  2951 };
  2953 static _Unwind_Reason_Code xlat_check_frame( struct _Unwind_Context *context, void *arg )
  2955     struct UnwindInfo *info = arg;
  2956     void *pc = (void *)_Unwind_GetIP(context);
  2957     if( ((uintptr_t)pc) >= info->block_start && ((uintptr_t)pc) < info->block_end ) {
  2958         info->pc = pc;
  2959         return _URC_NORMAL_STOP;
  2961     return _URC_NO_REASON;
  2964 void *xlat_get_native_pc( void *code, uint32_t code_size )
  2966     struct _Unwind_Exception exc;
  2967     struct UnwindInfo info;
  2969     info.pc = NULL;
  2970     info.block_start = (uintptr_t)code;
  2971     info.block_end = info.block_start + code_size;
  2972     void *result = NULL;
  2973     _Unwind_Backtrace( xlat_check_frame, &info );
  2974     return info.pc;
  2976 #else
  2977 /* Assume this is an ia32 build - amd64 should always have dwarf information */
  2978 void *xlat_get_native_pc( void *code, uint32_t code_size )
  2980     void *result = NULL;
  2981     asm(
  2982         "mov %%ebp, %%eax\n\t"
  2983         "mov $0x8, %%ecx\n\t"
  2984         "mov %1, %%edx\n"
  2985         "frame_loop: test %%eax, %%eax\n\t"
  2986         "je frame_not_found\n\t"
  2987         "cmp (%%eax), %%edx\n\t"
  2988         "je frame_found\n\t"
  2989         "sub $0x1, %%ecx\n\t"
  2990         "je frame_not_found\n\t"
  2991         "movl (%%eax), %%eax\n\t"
  2992         "jmp frame_loop\n"
  2993         "frame_found: movl 0x4(%%eax), %0\n"
  2994         "frame_not_found:"
  2995         : "=r" (result)
  2996         : "r" (((uint8_t *)&sh4r) + 128 )
  2997         : "eax", "ecx", "edx" );
  2998     return result;
  3000 #endif
.