Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 1292:799fdd4f704a
prev1263:b3de98d19faf
next1298:d0eb2307b847
author nkeynes
date Fri Aug 24 08:53:50 2012 +1000 (8 years ago)
permissions -rw-r--r--
last change Move the generated prologue/epilogue code out into a common entry stub
(reduces space requirements) and pre-save all saved registers. Change
FASTCALL to use 3 regs instead of 2 since we can now keep everything in
regs.
file annotate diff log raw
nkeynes@359
     1
/**
nkeynes@586
     2
 * $Id$
nkeynes@359
     3
 * 
nkeynes@359
     4
 * SH4 => x86 translation. This version does no real optimization, it just
nkeynes@359
     5
 * outputs straight-line x86 code - it mainly exists to provide a baseline
nkeynes@359
     6
 * to test the optimizing versions against.
nkeynes@359
     7
 *
nkeynes@359
     8
 * Copyright (c) 2007 Nathan Keynes.
nkeynes@359
     9
 *
nkeynes@359
    10
 * This program is free software; you can redistribute it and/or modify
nkeynes@359
    11
 * it under the terms of the GNU General Public License as published by
nkeynes@359
    12
 * the Free Software Foundation; either version 2 of the License, or
nkeynes@359
    13
 * (at your option) any later version.
nkeynes@359
    14
 *
nkeynes@359
    15
 * This program is distributed in the hope that it will be useful,
nkeynes@359
    16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
nkeynes@359
    17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
nkeynes@359
    18
 * GNU General Public License for more details.
nkeynes@359
    19
 */
nkeynes@359
    20
nkeynes@368
    21
#include <assert.h>
nkeynes@388
    22
#include <math.h>
nkeynes@368
    23
nkeynes@380
    24
#ifndef NDEBUG
nkeynes@380
    25
#define DEBUG_JUMPS 1
nkeynes@380
    26
#endif
nkeynes@380
    27
nkeynes@905
    28
#include "lxdream.h"
nkeynes@368
    29
#include "sh4/sh4core.h"
nkeynes@1091
    30
#include "sh4/sh4dasm.h"
nkeynes@368
    31
#include "sh4/sh4trans.h"
nkeynes@671
    32
#include "sh4/sh4stat.h"
nkeynes@388
    33
#include "sh4/sh4mmio.h"
nkeynes@939
    34
#include "sh4/mmu.h"
nkeynes@991
    35
#include "xlat/xltcache.h"
nkeynes@991
    36
#include "xlat/x86/x86op.h"
nkeynes@1263
    37
#include "xlat/xlatdasm.h"
nkeynes@368
    38
#include "clock.h"
nkeynes@368
    39
nkeynes@368
    40
#define DEFAULT_BACKPATCH_SIZE 4096
nkeynes@368
    41
nkeynes@991
    42
/* Offset of a reg relative to the sh4r structure */
nkeynes@991
    43
#define REG_OFFSET(reg)  (((char *)&sh4r.reg) - ((char *)&sh4r) - 128)
nkeynes@991
    44
nkeynes@995
    45
#define R_T      REG_OFFSET(t)
nkeynes@995
    46
#define R_Q      REG_OFFSET(q)
nkeynes@995
    47
#define R_S      REG_OFFSET(s)
nkeynes@995
    48
#define R_M      REG_OFFSET(m)
nkeynes@995
    49
#define R_SR     REG_OFFSET(sr)
nkeynes@995
    50
#define R_GBR    REG_OFFSET(gbr)
nkeynes@995
    51
#define R_SSR    REG_OFFSET(ssr)
nkeynes@995
    52
#define R_SPC    REG_OFFSET(spc)
nkeynes@995
    53
#define R_VBR    REG_OFFSET(vbr)
nkeynes@995
    54
#define R_MACH   REG_OFFSET(mac)+4
nkeynes@995
    55
#define R_MACL   REG_OFFSET(mac)
nkeynes@995
    56
#define R_PC     REG_OFFSET(pc)
nkeynes@991
    57
#define R_NEW_PC REG_OFFSET(new_pc)
nkeynes@995
    58
#define R_PR     REG_OFFSET(pr)
nkeynes@995
    59
#define R_SGR    REG_OFFSET(sgr)
nkeynes@995
    60
#define R_FPUL   REG_OFFSET(fpul)
nkeynes@995
    61
#define R_FPSCR  REG_OFFSET(fpscr)
nkeynes@995
    62
#define R_DBR    REG_OFFSET(dbr)
nkeynes@995
    63
#define R_R(rn)  REG_OFFSET(r[rn])
nkeynes@995
    64
#define R_FR(f)  REG_OFFSET(fr[0][(f)^1])
nkeynes@995
    65
#define R_XF(f)  REG_OFFSET(fr[1][(f)^1])
nkeynes@995
    66
#define R_DR(f)  REG_OFFSET(fr[(f)&1][(f)&0x0E])
nkeynes@995
    67
#define R_DRL(f) REG_OFFSET(fr[(f)&1][(f)|0x01])
nkeynes@995
    68
#define R_DRH(f) REG_OFFSET(fr[(f)&1][(f)&0x0E])
nkeynes@995
    69
nkeynes@995
    70
#define DELAY_NONE 0
nkeynes@995
    71
#define DELAY_PC 1
nkeynes@995
    72
#define DELAY_PC_PR 2
nkeynes@991
    73
nkeynes@1112
    74
#define SH4_MODE_UNKNOWN -1
nkeynes@1112
    75
nkeynes@586
    76
struct backpatch_record {
nkeynes@604
    77
    uint32_t fixup_offset;
nkeynes@586
    78
    uint32_t fixup_icount;
nkeynes@596
    79
    int32_t exc_code;
nkeynes@586
    80
};
nkeynes@586
    81
nkeynes@368
    82
/** 
nkeynes@368
    83
 * Struct to manage internal translation state. This state is not saved -
nkeynes@368
    84
 * it is only valid between calls to sh4_translate_begin_block() and
nkeynes@368
    85
 * sh4_translate_end_block()
nkeynes@368
    86
 */
nkeynes@368
    87
struct sh4_x86_state {
nkeynes@590
    88
    int in_delay_slot;
nkeynes@1112
    89
    uint8_t *code;
nkeynes@368
    90
    gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
nkeynes@409
    91
    gboolean branch_taken; /* true if we branched unconditionally */
nkeynes@901
    92
    gboolean double_prec; /* true if FPU is in double-precision mode */
nkeynes@903
    93
    gboolean double_size; /* true if FPU is in double-size mode */
nkeynes@903
    94
    gboolean sse3_enabled; /* true if host supports SSE3 instructions */
nkeynes@408
    95
    uint32_t block_start_pc;
nkeynes@547
    96
    uint32_t stack_posn;   /* Trace stack height for alignment purposes */
nkeynes@1112
    97
    uint32_t sh4_mode;     /* Mirror of sh4r.xlat_sh4_mode */
nkeynes@417
    98
    int tstate;
nkeynes@368
    99
nkeynes@1125
   100
    /* mode settings */
nkeynes@586
   101
    gboolean tlb_on; /* True if tlb translation is active */
nkeynes@1125
   102
    struct mem_region_fn **priv_address_space;
nkeynes@1125
   103
    struct mem_region_fn **user_address_space;
nkeynes@586
   104
nkeynes@1125
   105
    /* Instrumentation */
nkeynes@1125
   106
    xlat_block_begin_callback_t begin_callback;
nkeynes@1125
   107
    xlat_block_end_callback_t end_callback;
nkeynes@1125
   108
    gboolean fastmem;
nkeynes@1125
   109
    
nkeynes@368
   110
    /* Allocated memory for the (block-wide) back-patch list */
nkeynes@586
   111
    struct backpatch_record *backpatch_list;
nkeynes@368
   112
    uint32_t backpatch_posn;
nkeynes@368
   113
    uint32_t backpatch_size;
nkeynes@368
   114
};
nkeynes@368
   115
nkeynes@368
   116
static struct sh4_x86_state sh4_x86;
nkeynes@368
   117
nkeynes@1292
   118
static uint8_t sh4_entry_stub[128];
nkeynes@1292
   119
void FASTCALL (*sh4_translate_enter)(void *code);
nkeynes@1292
   120
nkeynes@388
   121
static uint32_t max_int = 0x7FFFFFFF;
nkeynes@388
   122
static uint32_t min_int = 0x80000000;
nkeynes@394
   123
static uint32_t save_fcw; /* save value for fpu control word */
nkeynes@394
   124
static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
nkeynes@386
   125
nkeynes@1214
   126
static void sh4_x86_translate_unlink_block( void *use_list );
nkeynes@1196
   127
nkeynes@1214
   128
static struct xlat_target_fns x86_target_fns = {
nkeynes@1214
   129
	sh4_x86_translate_unlink_block
nkeynes@1214
   130
};	
nkeynes@1214
   131
nkeynes@1091
   132
nkeynes@903
   133
gboolean is_sse3_supported()
nkeynes@903
   134
{
nkeynes@903
   135
    uint32_t features;
nkeynes@903
   136
    
nkeynes@903
   137
    __asm__ __volatile__(
nkeynes@903
   138
        "mov $0x01, %%eax\n\t"
nkeynes@908
   139
        "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
nkeynes@903
   140
    return (features & 1) ? TRUE : FALSE;
nkeynes@903
   141
}
nkeynes@903
   142
nkeynes@1125
   143
void sh4_translate_set_address_space( struct mem_region_fn **priv, struct mem_region_fn **user )
nkeynes@1125
   144
{
nkeynes@1125
   145
    sh4_x86.priv_address_space = priv;
nkeynes@1125
   146
    sh4_x86.user_address_space = user;
nkeynes@1125
   147
}
nkeynes@1125
   148
nkeynes@1292
   149
void sh4_translate_write_entry_stub(void)
nkeynes@1292
   150
{
nkeynes@1292
   151
	mem_unprotect(sh4_entry_stub, sizeof(sh4_entry_stub));
nkeynes@1292
   152
	xlat_output = sh4_entry_stub;
nkeynes@1292
   153
	PUSH_r32(REG_EBP);
nkeynes@1292
   154
	MOVP_immptr_rptr( ((uint8_t *)&sh4r) + 128, REG_EBP );
nkeynes@1292
   155
	PUSH_r32(REG_EBX);
nkeynes@1292
   156
	PUSH_r32(REG_SAVE1);
nkeynes@1292
   157
	PUSH_r32(REG_SAVE2);
nkeynes@1292
   158
#if SIZEOF_VOID_P == 8
nkeynes@1292
   159
    PUSH_r32(REG_SAVE3);
nkeynes@1292
   160
    PUSH_r32(REG_SAVE4);
nkeynes@1292
   161
    CALL_r32( REG_ARG1 );
nkeynes@1292
   162
    POP_r32(REG_SAVE4);
nkeynes@1292
   163
    POP_r32(REG_SAVE3);
nkeynes@1292
   164
#else
nkeynes@1292
   165
    SUBL_imms_r32( 8, REG_ESP ); 
nkeynes@1292
   166
	CALL_r32( REG_ARG1 );
nkeynes@1292
   167
	ADDL_imms_r32( 8, REG_ESP );
nkeynes@1292
   168
#endif
nkeynes@1292
   169
	POP_r32(REG_SAVE2);	
nkeynes@1292
   170
	POP_r32(REG_SAVE1);
nkeynes@1292
   171
	POP_r32(REG_EBX);
nkeynes@1292
   172
	POP_r32(REG_EBP);
nkeynes@1292
   173
	RET();
nkeynes@1292
   174
	sh4_translate_enter = sh4_entry_stub;
nkeynes@1292
   175
}
nkeynes@1292
   176
nkeynes@669
   177
void sh4_translate_init(void)
nkeynes@368
   178
{
nkeynes@368
   179
    sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
nkeynes@586
   180
    sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
nkeynes@1125
   181
    sh4_x86.begin_callback = NULL;
nkeynes@1125
   182
    sh4_x86.end_callback = NULL;
nkeynes@1125
   183
    sh4_x86.fastmem = TRUE;
nkeynes@903
   184
    sh4_x86.sse3_enabled = is_sse3_supported();
nkeynes@1216
   185
    xlat_set_target_fns(&x86_target_fns);
nkeynes@1292
   186
    sh4_translate_set_address_space( sh4_address_space, sh4_user_address_space );
nkeynes@1292
   187
    sh4_translate_write_entry_stub();
nkeynes@368
   188
}
nkeynes@368
   189
nkeynes@1125
   190
void sh4_translate_set_callbacks( xlat_block_begin_callback_t begin, xlat_block_end_callback_t end )
nkeynes@1125
   191
{
nkeynes@1125
   192
    sh4_x86.begin_callback = begin;
nkeynes@1125
   193
    sh4_x86.end_callback = end;
nkeynes@1125
   194
}
nkeynes@1125
   195
nkeynes@1125
   196
void sh4_translate_set_fastmem( gboolean flag )
nkeynes@1125
   197
{
nkeynes@1125
   198
    sh4_x86.fastmem = flag;
nkeynes@1125
   199
}
nkeynes@1125
   200
nkeynes@586
   201
static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
nkeynes@368
   202
{
nkeynes@991
   203
    int reloc_size = 4;
nkeynes@991
   204
    
nkeynes@991
   205
    if( exc_code == -2 ) {
nkeynes@991
   206
        reloc_size = sizeof(void *);
nkeynes@991
   207
    }
nkeynes@991
   208
    
nkeynes@368
   209
    if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
nkeynes@368
   210
	sh4_x86.backpatch_size <<= 1;
nkeynes@586
   211
	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
nkeynes@586
   212
					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
nkeynes@368
   213
	assert( sh4_x86.backpatch_list != NULL );
nkeynes@368
   214
    }
nkeynes@586
   215
    if( sh4_x86.in_delay_slot ) {
nkeynes@586
   216
	fixup_pc -= 2;
nkeynes@586
   217
    }
nkeynes@991
   218
nkeynes@604
   219
    sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
nkeynes@991
   220
	(((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code)) - reloc_size;
nkeynes@586
   221
    sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
nkeynes@586
   222
    sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
nkeynes@586
   223
    sh4_x86.backpatch_posn++;
nkeynes@368
   224
}
nkeynes@368
   225
nkeynes@991
   226
#define TSTATE_NONE -1
nkeynes@995
   227
#define TSTATE_O    X86_COND_O
nkeynes@995
   228
#define TSTATE_C    X86_COND_C
nkeynes@995
   229
#define TSTATE_E    X86_COND_E
nkeynes@995
   230
#define TSTATE_NE   X86_COND_NE
nkeynes@995
   231
#define TSTATE_G    X86_COND_G
nkeynes@995
   232
#define TSTATE_GE   X86_COND_GE
nkeynes@995
   233
#define TSTATE_A    X86_COND_A
nkeynes@995
   234
#define TSTATE_AE   X86_COND_AE
nkeynes@359
   235
nkeynes@991
   236
#define MARK_JMP8(x) uint8_t *_mark_jmp_##x = (xlat_output-1)
nkeynes@991
   237
#define JMP_TARGET(x) *_mark_jmp_##x += (xlat_output - _mark_jmp_##x)
nkeynes@368
   238
nkeynes@991
   239
/* Convenience instructions */
nkeynes@991
   240
#define LDC_t()          CMPB_imms_rbpdisp(1,R_T); CMC()
nkeynes@991
   241
#define SETE_t()         SETCCB_cc_rbpdisp(X86_COND_E,R_T)
nkeynes@991
   242
#define SETA_t()         SETCCB_cc_rbpdisp(X86_COND_A,R_T)
nkeynes@991
   243
#define SETAE_t()        SETCCB_cc_rbpdisp(X86_COND_AE,R_T)
nkeynes@991
   244
#define SETG_t()         SETCCB_cc_rbpdisp(X86_COND_G,R_T)
nkeynes@991
   245
#define SETGE_t()        SETCCB_cc_rbpdisp(X86_COND_GE,R_T)
nkeynes@991
   246
#define SETC_t()         SETCCB_cc_rbpdisp(X86_COND_C,R_T)
nkeynes@991
   247
#define SETO_t()         SETCCB_cc_rbpdisp(X86_COND_O,R_T)
nkeynes@991
   248
#define SETNE_t()        SETCCB_cc_rbpdisp(X86_COND_NE,R_T)
nkeynes@991
   249
#define SETC_r8(r1)      SETCCB_cc_r8(X86_COND_C, r1)
nkeynes@991
   250
#define JAE_label(label) JCC_cc_rel8(X86_COND_AE,-1); MARK_JMP8(label)
nkeynes@1112
   251
#define JBE_label(label) JCC_cc_rel8(X86_COND_BE,-1); MARK_JMP8(label)
nkeynes@991
   252
#define JE_label(label)  JCC_cc_rel8(X86_COND_E,-1); MARK_JMP8(label)
nkeynes@991
   253
#define JGE_label(label) JCC_cc_rel8(X86_COND_GE,-1); MARK_JMP8(label)
nkeynes@991
   254
#define JNA_label(label) JCC_cc_rel8(X86_COND_NA,-1); MARK_JMP8(label)
nkeynes@991
   255
#define JNE_label(label) JCC_cc_rel8(X86_COND_NE,-1); MARK_JMP8(label)
nkeynes@991
   256
#define JNO_label(label) JCC_cc_rel8(X86_COND_NO,-1); MARK_JMP8(label)
nkeynes@1197
   257
#define JP_label(label)  JCC_cc_rel8(X86_COND_P,-1); MARK_JMP8(label)
nkeynes@991
   258
#define JS_label(label)  JCC_cc_rel8(X86_COND_S,-1); MARK_JMP8(label)
nkeynes@991
   259
#define JMP_label(label) JMP_rel8(-1); MARK_JMP8(label)
nkeynes@991
   260
#define JNE_exc(exc)     JCC_cc_rel32(X86_COND_NE,0); sh4_x86_add_backpatch(xlat_output, pc, exc)
nkeynes@374
   261
nkeynes@1197
   262
#define LOAD_t() if( sh4_x86.tstate == TSTATE_NONE ) { \
nkeynes@1197
   263
	CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; }     
nkeynes@1197
   264
nkeynes@991
   265
/** Branch if T is set (either in the current cflags, or in sh4r.t) */
nkeynes@1197
   266
#define JT_label(label) LOAD_t() \
nkeynes@991
   267
    JCC_cc_rel8(sh4_x86.tstate,-1); MARK_JMP8(label)
nkeynes@368
   268
nkeynes@991
   269
/** Branch if T is clear (either in the current cflags or in sh4r.t) */
nkeynes@1197
   270
#define JF_label(label) LOAD_t() \
nkeynes@991
   271
    JCC_cc_rel8(sh4_x86.tstate^1, -1); MARK_JMP8(label)
nkeynes@359
   272
nkeynes@939
   273
nkeynes@991
   274
#define load_reg(x86reg,sh4reg)     MOVL_rbpdisp_r32( REG_OFFSET(r[sh4reg]), x86reg )
nkeynes@991
   275
#define store_reg(x86reg,sh4reg)    MOVL_r32_rbpdisp( x86reg, REG_OFFSET(r[sh4reg]) )
nkeynes@374
   276
nkeynes@375
   277
/**
nkeynes@375
   278
 * Load an FR register (single-precision floating point) into an integer x86
nkeynes@375
   279
 * register (eg for register-to-register moves)
nkeynes@375
   280
 */
nkeynes@991
   281
#define load_fr(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[0][(frm)^1]), reg )
nkeynes@991
   282
#define load_xf(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[1][(frm)^1]), reg )
nkeynes@375
   283
nkeynes@375
   284
/**
nkeynes@669
   285
 * Load the low half of a DR register (DR or XD) into an integer x86 register 
nkeynes@669
   286
 */
nkeynes@991
   287
#define load_dr0(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm|0x01]), reg )
nkeynes@991
   288
#define load_dr1(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm&0x0E]), reg )
nkeynes@669
   289
nkeynes@669
   290
/**
nkeynes@669
   291
 * Store an FR register (single-precision floating point) from an integer x86+
nkeynes@375
   292
 * register (eg for register-to-register moves)
nkeynes@375
   293
 */
nkeynes@991
   294
#define store_fr(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[0][(frm)^1]) )
nkeynes@991
   295
#define store_xf(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[1][(frm)^1]) )
nkeynes@375
   296
nkeynes@991
   297
#define store_dr0(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
nkeynes@991
   298
#define store_dr1(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
nkeynes@375
   299
nkeynes@374
   300
nkeynes@991
   301
#define push_fpul()  FLDF_rbpdisp(R_FPUL)
nkeynes@991
   302
#define pop_fpul()   FSTPF_rbpdisp(R_FPUL)
nkeynes@991
   303
#define push_fr(frm) FLDF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
nkeynes@991
   304
#define pop_fr(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
nkeynes@991
   305
#define push_xf(frm) FLDF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
nkeynes@991
   306
#define pop_xf(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
nkeynes@991
   307
#define push_dr(frm) FLDD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
nkeynes@991
   308
#define pop_dr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
nkeynes@991
   309
#define push_xdr(frm) FLDD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
nkeynes@991
   310
#define pop_xdr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
nkeynes@377
   311
nkeynes@991
   312
#ifdef ENABLE_SH4STATS
nkeynes@995
   313
#define COUNT_INST(id) MOVL_imm32_r32( id, REG_EAX ); CALL1_ptr_r32(sh4_stats_add, REG_EAX); sh4_x86.tstate = TSTATE_NONE
nkeynes@991
   314
#else
nkeynes@991
   315
#define COUNT_INST(id)
nkeynes@991
   316
#endif
nkeynes@377
   317
nkeynes@374
   318
nkeynes@368
   319
/* Exception checks - Note that all exception checks will clobber EAX */
nkeynes@416
   320
nkeynes@416
   321
#define check_priv( ) \
nkeynes@1112
   322
    if( (sh4_x86.sh4_mode & SR_MD) == 0 ) { \
nkeynes@937
   323
        if( sh4_x86.in_delay_slot ) { \
nkeynes@1191
   324
            exit_block_exc(EXC_SLOT_ILLEGAL, (pc-2), 4 ); \
nkeynes@937
   325
        } else { \
nkeynes@1191
   326
            exit_block_exc(EXC_ILLEGAL, pc, 2); \
nkeynes@937
   327
        } \
nkeynes@956
   328
        sh4_x86.branch_taken = TRUE; \
nkeynes@937
   329
        sh4_x86.in_delay_slot = DELAY_NONE; \
nkeynes@937
   330
        return 2; \
nkeynes@937
   331
    }
nkeynes@416
   332
nkeynes@416
   333
#define check_fpuen( ) \
nkeynes@416
   334
    if( !sh4_x86.fpuen_checked ) {\
nkeynes@416
   335
	sh4_x86.fpuen_checked = TRUE;\
nkeynes@995
   336
	MOVL_rbpdisp_r32( R_SR, REG_EAX );\
nkeynes@991
   337
	ANDL_imms_r32( SR_FD, REG_EAX );\
nkeynes@416
   338
	if( sh4_x86.in_delay_slot ) {\
nkeynes@586
   339
	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
nkeynes@416
   340
	} else {\
nkeynes@586
   341
	    JNE_exc(EXC_FPU_DISABLED);\
nkeynes@416
   342
	}\
nkeynes@875
   343
	sh4_x86.tstate = TSTATE_NONE; \
nkeynes@416
   344
    }
nkeynes@416
   345
nkeynes@586
   346
#define check_ralign16( x86reg ) \
nkeynes@991
   347
    TESTL_imms_r32( 0x00000001, x86reg ); \
nkeynes@586
   348
    JNE_exc(EXC_DATA_ADDR_READ)
nkeynes@416
   349
nkeynes@586
   350
#define check_walign16( x86reg ) \
nkeynes@991
   351
    TESTL_imms_r32( 0x00000001, x86reg ); \
nkeynes@586
   352
    JNE_exc(EXC_DATA_ADDR_WRITE);
nkeynes@368
   353
nkeynes@586
   354
#define check_ralign32( x86reg ) \
nkeynes@991
   355
    TESTL_imms_r32( 0x00000003, x86reg ); \
nkeynes@586
   356
    JNE_exc(EXC_DATA_ADDR_READ)
nkeynes@368
   357
nkeynes@586
   358
#define check_walign32( x86reg ) \
nkeynes@991
   359
    TESTL_imms_r32( 0x00000003, x86reg ); \
nkeynes@586
   360
    JNE_exc(EXC_DATA_ADDR_WRITE);
nkeynes@368
   361
nkeynes@732
   362
#define check_ralign64( x86reg ) \
nkeynes@991
   363
    TESTL_imms_r32( 0x00000007, x86reg ); \
nkeynes@732
   364
    JNE_exc(EXC_DATA_ADDR_READ)
nkeynes@732
   365
nkeynes@732
   366
#define check_walign64( x86reg ) \
nkeynes@991
   367
    TESTL_imms_r32( 0x00000007, x86reg ); \
nkeynes@732
   368
    JNE_exc(EXC_DATA_ADDR_WRITE);
nkeynes@732
   369
nkeynes@1125
   370
#define address_space() ((sh4_x86.sh4_mode&SR_MD) ? (uintptr_t)sh4_x86.priv_address_space : (uintptr_t)sh4_x86.user_address_space)
nkeynes@1004
   371
nkeynes@824
   372
#define UNDEF(ir)
nkeynes@939
   373
/* Note: For SR.MD == 1 && MMUCR.AT == 0, there are no memory exceptions, so 
nkeynes@939
   374
 * don't waste the cycles expecting them. Otherwise we need to save the exception pointer.
nkeynes@586
   375
 */
nkeynes@941
   376
#ifdef HAVE_FRAME_ADDRESS
nkeynes@995
   377
static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
nkeynes@995
   378
{
nkeynes@1292
   379
    decode_address(address_space(), addr_reg, REG_CALLPTR);
nkeynes@1112
   380
    if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) { 
nkeynes@1292
   381
        CALL1_r32disp_r32(REG_CALLPTR, offset, addr_reg);
nkeynes@995
   382
    } else {
nkeynes@995
   383
        if( addr_reg != REG_ARG1 ) {
nkeynes@995
   384
            MOVL_r32_r32( addr_reg, REG_ARG1 );
nkeynes@995
   385
        }
nkeynes@995
   386
        MOVP_immptr_rptr( 0, REG_ARG2 );
nkeynes@995
   387
        sh4_x86_add_backpatch( xlat_output, pc, -2 );
nkeynes@1292
   388
        CALL2_r32disp_r32_r32(REG_CALLPTR, offset, REG_ARG1, REG_ARG2);
nkeynes@995
   389
    }
nkeynes@995
   390
    if( value_reg != REG_RESULT1 ) { 
nkeynes@995
   391
        MOVL_r32_r32( REG_RESULT1, value_reg );
nkeynes@995
   392
    }
nkeynes@995
   393
}
nkeynes@995
   394
nkeynes@995
   395
static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
nkeynes@995
   396
{
nkeynes@1292
   397
    decode_address(address_space(), addr_reg, REG_CALLPTR);
nkeynes@1112
   398
    if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) { 
nkeynes@1292
   399
        CALL2_r32disp_r32_r32(REG_CALLPTR, offset, addr_reg, value_reg);
nkeynes@995
   400
    } else {
nkeynes@995
   401
        if( value_reg != REG_ARG2 ) {
nkeynes@995
   402
            MOVL_r32_r32( value_reg, REG_ARG2 );
nkeynes@995
   403
	}        
nkeynes@995
   404
        if( addr_reg != REG_ARG1 ) {
nkeynes@995
   405
            MOVL_r32_r32( addr_reg, REG_ARG1 );
nkeynes@995
   406
        }
nkeynes@995
   407
#if MAX_REG_ARG > 2        
nkeynes@995
   408
        MOVP_immptr_rptr( 0, REG_ARG3 );
nkeynes@995
   409
        sh4_x86_add_backpatch( xlat_output, pc, -2 );
nkeynes@1292
   410
        CALL3_r32disp_r32_r32_r32(REG_CALLPTR, offset, REG_ARG1, REG_ARG2, REG_ARG3);
nkeynes@995
   411
#else
nkeynes@995
   412
        MOVL_imm32_rspdisp( 0, 0 );
nkeynes@995
   413
        sh4_x86_add_backpatch( xlat_output, pc, -2 );
nkeynes@1292
   414
        CALL3_r32disp_r32_r32_r32(REG_CALLPTR, offset, REG_ARG1, REG_ARG2, 0);
nkeynes@995
   415
#endif
nkeynes@995
   416
    }
nkeynes@995
   417
}
nkeynes@995
   418
#else
nkeynes@995
   419
static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
nkeynes@995
   420
{
nkeynes@1292
   421
    decode_address(address_space(), addr_reg, REG_CALLPTR);
nkeynes@1292
   422
    CALL1_r32disp_r32(REG_CALLPTR, offset, addr_reg);
nkeynes@995
   423
    if( value_reg != REG_RESULT1 ) {
nkeynes@995
   424
        MOVL_r32_r32( REG_RESULT1, value_reg );
nkeynes@995
   425
    }
nkeynes@995
   426
}     
nkeynes@995
   427
nkeynes@996
   428
static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
nkeynes@995
   429
{
nkeynes@1292
   430
    decode_address(address_space(), addr_reg, REG_CALLPTR);
nkeynes@1292
   431
    CALL2_r32disp_r32_r32(REG_CALLPTR, offset, addr_reg, value_reg);
nkeynes@995
   432
}
nkeynes@941
   433
#endif
nkeynes@939
   434
                
nkeynes@995
   435
#define MEM_REGION_PTR(name) offsetof( struct mem_region_fn, name )
nkeynes@995
   436
#define MEM_READ_BYTE( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_byte), pc)
nkeynes@995
   437
#define MEM_READ_BYTE_FOR_WRITE( addr_reg, value_reg ) call_read_func( addr_reg, value_reg, MEM_REGION_PTR(read_byte_for_write), pc) 
nkeynes@995
   438
#define MEM_READ_WORD( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_word), pc)
nkeynes@995
   439
#define MEM_READ_LONG( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_long), pc)
nkeynes@995
   440
#define MEM_WRITE_BYTE( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_byte), pc)
nkeynes@995
   441
#define MEM_WRITE_WORD( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_word), pc)
nkeynes@995
   442
#define MEM_WRITE_LONG( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_long), pc)
nkeynes@995
   443
#define MEM_PREFETCH( addr_reg ) call_read_func(addr_reg, REG_RESULT1, MEM_REGION_PTR(prefetch), pc)
nkeynes@368
   444
nkeynes@1191
   445
#define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2, 4); sh4_x86.in_delay_slot = DELAY_NONE; return 2;
nkeynes@539
   446
nkeynes@1182
   447
/** Offset of xlat_sh4_mode field relative to the code pointer */ 
nkeynes@1186
   448
#define XLAT_SH4_MODE_CODE_OFFSET  (int32_t)(offsetof(struct xlat_cache_block, xlat_sh4_mode) - offsetof(struct xlat_cache_block,code) )
nkeynes@1186
   449
#define XLAT_CHAIN_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, chain) - offsetof(struct xlat_cache_block,code) )
nkeynes@1186
   450
#define XLAT_ACTIVE_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, active) - offsetof(struct xlat_cache_block,code) )
nkeynes@1182
   451
nkeynes@901
   452
void sh4_translate_begin_block( sh4addr_t pc ) 
nkeynes@901
   453
{
nkeynes@1112
   454
	sh4_x86.code = xlat_output;
nkeynes@901
   455
    sh4_x86.in_delay_slot = FALSE;
nkeynes@901
   456
    sh4_x86.fpuen_checked = FALSE;
nkeynes@901
   457
    sh4_x86.branch_taken = FALSE;
nkeynes@901
   458
    sh4_x86.backpatch_posn = 0;
nkeynes@901
   459
    sh4_x86.block_start_pc = pc;
nkeynes@939
   460
    sh4_x86.tlb_on = IS_TLB_ENABLED();
nkeynes@901
   461
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@901
   462
    sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
nkeynes@903
   463
    sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
nkeynes@1112
   464
    sh4_x86.sh4_mode = sh4r.xlat_sh4_mode;
nkeynes@1125
   465
    if( sh4_x86.begin_callback ) {
nkeynes@1125
   466
        CALL_ptr( sh4_x86.begin_callback );
nkeynes@1125
   467
    }
nkeynes@1218
   468
    if( sh4_profile_blocks ) {
nkeynes@1186
   469
    	MOVP_immptr_rptr( sh4_x86.code + XLAT_ACTIVE_CODE_OFFSET, REG_EAX );
nkeynes@1182
   470
    	ADDL_imms_r32disp( 1, REG_EAX, 0 );
nkeynes@1182
   471
    }  
nkeynes@901
   472
}
nkeynes@901
   473
nkeynes@901
   474
nkeynes@593
   475
uint32_t sh4_translate_end_block_size()
nkeynes@593
   476
{
nkeynes@1196
   477
	uint32_t epilogue_size = EPILOGUE_SIZE;
nkeynes@1196
   478
	if( sh4_x86.end_callback ) {
nkeynes@1196
   479
	    epilogue_size += (CALL1_PTR_MIN_SIZE - 1);
nkeynes@1196
   480
	}
nkeynes@596
   481
    if( sh4_x86.backpatch_posn <= 3 ) {
nkeynes@1196
   482
        epilogue_size += (sh4_x86.backpatch_posn*(12+CALL1_PTR_MIN_SIZE));
nkeynes@596
   483
    } else {
nkeynes@1196
   484
        epilogue_size += (3*(12+CALL1_PTR_MIN_SIZE)) + (sh4_x86.backpatch_posn-3)*(15+CALL1_PTR_MIN_SIZE);
nkeynes@596
   485
    }
nkeynes@1196
   486
    return epilogue_size;
nkeynes@593
   487
}
nkeynes@593
   488
nkeynes@593
   489
nkeynes@590
   490
/**
nkeynes@590
   491
 * Embed a breakpoint into the generated code
nkeynes@590
   492
 */
nkeynes@586
   493
void sh4_translate_emit_breakpoint( sh4vma_t pc )
nkeynes@586
   494
{
nkeynes@995
   495
    MOVL_imm32_r32( pc, REG_EAX );
nkeynes@995
   496
    CALL1_ptr_r32( sh4_translate_breakpoint_hit, REG_EAX );
nkeynes@875
   497
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@586
   498
}
nkeynes@590
   499
nkeynes@601
   500
nkeynes@601
   501
#define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
nkeynes@601
   502
nkeynes@1112
   503
/**
nkeynes@1112
   504
 * Test if the loaded target code pointer in %eax is valid, and if so jump
nkeynes@1112
   505
 * directly into it, bypassing the normal exit.
nkeynes@1112
   506
 */
nkeynes@1112
   507
static void jump_next_block()
nkeynes@1112
   508
{
nkeynes@1149
   509
	uint8_t *ptr = xlat_output;
nkeynes@1112
   510
	TESTP_rptr_rptr(REG_EAX, REG_EAX);
nkeynes@1112
   511
	JE_label(nocode);
nkeynes@1112
   512
	if( sh4_x86.sh4_mode == SH4_MODE_UNKNOWN ) {
nkeynes@1112
   513
	    /* sr/fpscr was changed, possibly updated xlat_sh4_mode, so reload it */
nkeynes@1112
   514
	    MOVL_rbpdisp_r32( REG_OFFSET(xlat_sh4_mode), REG_ECX );
nkeynes@1112
   515
	    CMPL_r32_r32disp( REG_ECX, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
nkeynes@1112
   516
	} else {
nkeynes@1112
   517
	    CMPL_imms_r32disp( sh4_x86.sh4_mode, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
nkeynes@1112
   518
	}
nkeynes@1112
   519
	JNE_label(wrongmode);
nkeynes@1125
   520
	if( sh4_x86.end_callback ) {
nkeynes@1125
   521
	    /* Note this does leave the stack out of alignment, but doesn't matter
nkeynes@1125
   522
	     * for what we're currently using it for.
nkeynes@1125
   523
	     */
nkeynes@1125
   524
	    PUSH_r32(REG_EAX);
nkeynes@1125
   525
	    MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
nkeynes@1125
   526
	    JMP_rptr(REG_ECX);
nkeynes@1125
   527
	} else {
nkeynes@1125
   528
	    JMP_rptr(REG_EAX);
nkeynes@1125
   529
	}
nkeynes@1149
   530
	JMP_TARGET(wrongmode);
nkeynes@1176
   531
	MOVP_rptrdisp_rptr( REG_EAX, XLAT_CHAIN_CODE_OFFSET, REG_EAX );
nkeynes@1149
   532
	int rel = ptr - xlat_output;
nkeynes@1149
   533
    JMP_prerel(rel);
nkeynes@1149
   534
	JMP_TARGET(nocode); 
nkeynes@1112
   535
}
nkeynes@1112
   536
nkeynes@1186
   537
/**
nkeynes@1186
   538
 * 
nkeynes@1186
   539
 */
nkeynes@1263
   540
void FASTCALL sh4_translate_link_block( uint32_t pc )
nkeynes@1186
   541
{
nkeynes@1186
   542
    uint8_t *target = (uint8_t *)xlat_get_code_by_vma(pc);
nkeynes@1186
   543
    while( target != NULL && sh4r.xlat_sh4_mode != XLAT_BLOCK_MODE(target) ) {
nkeynes@1186
   544
        target = XLAT_BLOCK_CHAIN(target);
nkeynes@1186
   545
	}
nkeynes@1186
   546
    if( target == NULL ) {
nkeynes@1186
   547
        target = sh4_translate_basic_block( pc );
nkeynes@1186
   548
    }
nkeynes@1186
   549
    uint8_t *backpatch = ((uint8_t *)__builtin_return_address(0)) - (CALL1_PTR_MIN_SIZE);
nkeynes@1186
   550
    *backpatch = 0xE9;
nkeynes@1292
   551
    *(uint32_t *)(backpatch+1) = (uint32_t)(target-backpatch)-5;
nkeynes@1186
   552
    *(void **)(backpatch+5) = XLAT_BLOCK_FOR_CODE(target)->use_list;
nkeynes@1186
   553
    XLAT_BLOCK_FOR_CODE(target)->use_list = backpatch; 
nkeynes@1186
   554
nkeynes@1198
   555
    uint8_t * volatile *retptr = ((uint8_t * volatile *)__builtin_frame_address(0))+1;
nkeynes@1186
   556
    assert( *retptr == ((uint8_t *)__builtin_return_address(0)) );
nkeynes@1186
   557
	*retptr = backpatch;
nkeynes@1186
   558
}
nkeynes@1186
   559
nkeynes@1186
   560
static void emit_translate_and_backpatch()
nkeynes@1186
   561
{
nkeynes@1186
   562
    /* NB: this is either 7 bytes (i386) or 12 bytes (x86-64) */
nkeynes@1263
   563
    CALL1_ptr_r32(sh4_translate_link_block, REG_ARG1);
nkeynes@1186
   564
nkeynes@1186
   565
    /* When patched, the jmp instruction will be 5 bytes (either platform) -
nkeynes@1186
   566
     * we need to reserve sizeof(void*) bytes for the use-list
nkeynes@1186
   567
	 * pointer
nkeynes@1186
   568
	 */ 
nkeynes@1186
   569
    if( sizeof(void*) == 8 ) {
nkeynes@1186
   570
        NOP();
nkeynes@1186
   571
    } else {
nkeynes@1186
   572
        NOP2();
nkeynes@1186
   573
    }
nkeynes@1186
   574
}
nkeynes@1186
   575
nkeynes@1186
   576
/**
nkeynes@1186
   577
 * If we're jumping to a fixed address (or at least fixed relative to the
nkeynes@1186
   578
 * current PC, then we can do a direct branch. REG_ARG1 should contain
nkeynes@1186
   579
 * the PC at this point.
nkeynes@1186
   580
 */
nkeynes@1186
   581
static void jump_next_block_fixed_pc( sh4addr_t pc )
nkeynes@1186
   582
{
nkeynes@1186
   583
	if( IS_IN_ICACHE(pc) ) {
nkeynes@1194
   584
	    if( sh4_x86.sh4_mode != SH4_MODE_UNKNOWN && sh4_x86.end_callback == NULL ) {
nkeynes@1186
   585
	        /* Fixed address, in cache, and fixed SH4 mode - generate a call to the
nkeynes@1186
   586
	         * fetch-and-backpatch routine, which will replace the call with a branch */
nkeynes@1186
   587
           emit_translate_and_backpatch();	         
nkeynes@1186
   588
           return;
nkeynes@1186
   589
		} else {
nkeynes@1186
   590
            MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
nkeynes@1186
   591
            ANDP_imms_rptr( -4, REG_EAX );
nkeynes@1186
   592
        }
nkeynes@1186
   593
	} else if( sh4_x86.tlb_on ) {
nkeynes@1186
   594
        CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);
nkeynes@1186
   595
    } else {
nkeynes@1186
   596
        CALL1_ptr_r32(xlat_get_code, REG_ARG1);
nkeynes@1186
   597
    }
nkeynes@1186
   598
    jump_next_block();
nkeynes@1186
   599
nkeynes@1186
   600
nkeynes@1186
   601
}
nkeynes@1186
   602
nkeynes@1214
   603
static void sh4_x86_translate_unlink_block( void *use_list )
nkeynes@1186
   604
{
nkeynes@1186
   605
	uint8_t *tmp = xlat_output; /* In case something is active, which should never happen */
nkeynes@1186
   606
	void *next = use_list;
nkeynes@1186
   607
	while( next != NULL ) {
nkeynes@1186
   608
    	xlat_output = (uint8_t *)next;
nkeynes@1186
   609
 	    next = *(void **)(xlat_output+5);
nkeynes@1186
   610
 		emit_translate_and_backpatch();
nkeynes@1186
   611
 	}
nkeynes@1186
   612
 	xlat_output = tmp;
nkeynes@1186
   613
}
nkeynes@1186
   614
nkeynes@1186
   615
nkeynes@1186
   616
nkeynes@1125
   617
static void exit_block()
nkeynes@1125
   618
{
nkeynes@1125
   619
	if( sh4_x86.end_callback ) {
nkeynes@1125
   620
	    MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
nkeynes@1125
   621
	    JMP_rptr(REG_ECX);
nkeynes@1125
   622
	} else {
nkeynes@1125
   623
	    RET();
nkeynes@1125
   624
	}
nkeynes@1125
   625
}
nkeynes@1125
   626
nkeynes@590
   627
/**
nkeynes@995
   628
 * Exit the block with sh4r.pc already written
nkeynes@995
   629
 */
nkeynes@995
   630
void exit_block_pcset( sh4addr_t pc )
nkeynes@995
   631
{
nkeynes@995
   632
    MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
nkeynes@1112
   633
    ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
nkeynes@1112
   634
    MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
nkeynes@1112
   635
    CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
nkeynes@1112
   636
    JBE_label(exitloop);
nkeynes@995
   637
    MOVL_rbpdisp_r32( R_PC, REG_ARG1 );
nkeynes@995
   638
    if( sh4_x86.tlb_on ) {
nkeynes@995
   639
        CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
nkeynes@995
   640
    } else {
nkeynes@995
   641
        CALL1_ptr_r32(xlat_get_code,REG_ARG1);
nkeynes@995
   642
    }
nkeynes@1112
   643
    
nkeynes@1112
   644
    jump_next_block();
nkeynes@1112
   645
    JMP_TARGET(exitloop);
nkeynes@995
   646
    exit_block();
nkeynes@995
   647
}
nkeynes@995
   648
nkeynes@995
   649
/**
nkeynes@995
   650
 * Exit the block with sh4r.new_pc written with the target pc
nkeynes@995
   651
 */
nkeynes@995
   652
void exit_block_newpcset( sh4addr_t pc )
nkeynes@995
   653
{
nkeynes@995
   654
    MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
nkeynes@1112
   655
    ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
nkeynes@1112
   656
    MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
nkeynes@995
   657
    MOVL_rbpdisp_r32( R_NEW_PC, REG_ARG1 );
nkeynes@995
   658
    MOVL_r32_rbpdisp( REG_ARG1, R_PC );
nkeynes@1112
   659
    CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
nkeynes@1112
   660
    JBE_label(exitloop);
nkeynes@995
   661
    if( sh4_x86.tlb_on ) {
nkeynes@995
   662
        CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
nkeynes@995
   663
    } else {
nkeynes@995
   664
        CALL1_ptr_r32(xlat_get_code,REG_ARG1);
nkeynes@995
   665
    }
nkeynes@1112
   666
	
nkeynes@1112
   667
	jump_next_block();
nkeynes@1112
   668
    JMP_TARGET(exitloop);
nkeynes@995
   669
    exit_block();
nkeynes@995
   670
}
nkeynes@995
   671
nkeynes@995
   672
nkeynes@995
   673
/**
nkeynes@995
   674
 * Exit the block to an absolute PC
nkeynes@995
   675
 */
nkeynes@995
   676
void exit_block_abs( sh4addr_t pc, sh4addr_t endpc )
nkeynes@995
   677
{
nkeynes@1112
   678
    MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
nkeynes@1112
   679
    ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
nkeynes@1112
   680
    MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
nkeynes@1112
   681
nkeynes@1112
   682
    MOVL_imm32_r32( pc, REG_ARG1 );
nkeynes@1112
   683
    MOVL_r32_rbpdisp( REG_ARG1, R_PC );
nkeynes@1112
   684
    CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
nkeynes@1112
   685
    JBE_label(exitloop);
nkeynes@1186
   686
    jump_next_block_fixed_pc(pc);    
nkeynes@1112
   687
    JMP_TARGET(exitloop);
nkeynes@995
   688
    exit_block();
nkeynes@995
   689
}
nkeynes@995
   690
nkeynes@995
   691
/**
nkeynes@995
   692
 * Exit the block to a relative PC
nkeynes@995
   693
 */
nkeynes@995
   694
void exit_block_rel( sh4addr_t pc, sh4addr_t endpc )
nkeynes@995
   695
{
nkeynes@1112
   696
    MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
nkeynes@1112
   697
    ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
nkeynes@1112
   698
    MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
nkeynes@1112
   699
nkeynes@1112
   700
	if( pc == sh4_x86.block_start_pc && sh4_x86.sh4_mode == sh4r.xlat_sh4_mode ) {
nkeynes@1112
   701
	    /* Special case for tight loops - the PC doesn't change, and
nkeynes@1112
   702
	     * we already know the target address. Just check events pending before
nkeynes@1112
   703
	     * looping.
nkeynes@1112
   704
	     */
nkeynes@1112
   705
        CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
nkeynes@1292
   706
        uint32_t backdisp = ((uintptr_t)(sh4_x86.code - xlat_output));
nkeynes@1112
   707
        JCC_cc_prerel(X86_COND_A, backdisp);
nkeynes@1112
   708
	} else {
nkeynes@1112
   709
        MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ARG1 );
nkeynes@1112
   710
        ADDL_rbpdisp_r32( R_PC, REG_ARG1 );
nkeynes@1112
   711
        MOVL_r32_rbpdisp( REG_ARG1, R_PC );
nkeynes@1112
   712
        CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
nkeynes@1112
   713
        JBE_label(exitloop2);
nkeynes@1186
   714
        
nkeynes@1186
   715
        jump_next_block_fixed_pc(pc);
nkeynes@1112
   716
        JMP_TARGET(exitloop2);
nkeynes@995
   717
    }
nkeynes@995
   718
    exit_block();
nkeynes@995
   719
}
nkeynes@995
   720
nkeynes@995
   721
/**
nkeynes@995
   722
 * Exit unconditionally with a general exception
nkeynes@995
   723
 */
nkeynes@1191
   724
void exit_block_exc( int code, sh4addr_t pc, int inst_adjust )
nkeynes@995
   725
{
nkeynes@995
   726
    MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ECX );
nkeynes@995
   727
    ADDL_r32_rbpdisp( REG_ECX, R_PC );
nkeynes@1191
   728
    MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc + inst_adjust)>>1)*sh4_cpu_period, REG_ECX );
nkeynes@995
   729
    ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
nkeynes@995
   730
    MOVL_imm32_r32( code, REG_ARG1 );
nkeynes@995
   731
    CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
nkeynes@995
   732
    exit_block();
nkeynes@995
   733
}    
nkeynes@995
   734
nkeynes@995
   735
/**
nkeynes@590
   736
 * Embed a call to sh4_execute_instruction for situations that we
nkeynes@601
   737
 * can't translate (just page-crossing delay slots at the moment).
nkeynes@601
   738
 * Caller is responsible for setting new_pc before calling this function.
nkeynes@601
   739
 *
nkeynes@601
   740
 * Performs:
nkeynes@601
   741
 *   Set PC = endpc
nkeynes@601
   742
 *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
nkeynes@601
   743
 *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
nkeynes@601
   744
 *   Call sh4_execute_instruction
nkeynes@601
   745
 *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
nkeynes@590
   746
 */
nkeynes@601
   747
void exit_block_emu( sh4vma_t endpc )
nkeynes@590
   748
{
nkeynes@995
   749
    MOVL_imm32_r32( endpc - sh4_x86.block_start_pc, REG_ECX );   // 5
nkeynes@991
   750
    ADDL_r32_rbpdisp( REG_ECX, R_PC );
nkeynes@586
   751
    
nkeynes@995
   752
    MOVL_imm32_r32( (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period, REG_ECX ); // 5
nkeynes@991
   753
    ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );     // 6
nkeynes@995
   754
    MOVL_imm32_r32( sh4_x86.in_delay_slot ? 1 : 0, REG_ECX );
nkeynes@995
   755
    MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(in_delay_slot) );
nkeynes@590
   756
nkeynes@1112
   757
    CALL_ptr( sh4_execute_instruction );
nkeynes@926
   758
    exit_block();
nkeynes@590
   759
} 
nkeynes@539
   760
nkeynes@359
   761
/**
nkeynes@995
   762
 * Write the block trailer (exception handling block)
nkeynes@995
   763
 */
nkeynes@995
   764
void sh4_translate_end_block( sh4addr_t pc ) {
nkeynes@995
   765
    if( sh4_x86.branch_taken == FALSE ) {
nkeynes@995
   766
        // Didn't exit unconditionally already, so write the termination here
nkeynes@995
   767
        exit_block_rel( pc, pc );
nkeynes@995
   768
    }
nkeynes@995
   769
    if( sh4_x86.backpatch_posn != 0 ) {
nkeynes@995
   770
        unsigned int i;
nkeynes@995
   771
        // Exception raised - cleanup and exit
nkeynes@995
   772
        uint8_t *end_ptr = xlat_output;
nkeynes@995
   773
        MOVL_r32_r32( REG_EDX, REG_ECX );
nkeynes@995
   774
        ADDL_r32_r32( REG_EDX, REG_ECX );
nkeynes@995
   775
        ADDL_r32_rbpdisp( REG_ECX, R_SPC );
nkeynes@995
   776
        MOVL_moffptr_eax( &sh4_cpu_period );
nkeynes@1191
   777
        INC_r32( REG_EDX );  /* Add 1 for the aborting instruction itself */ 
nkeynes@995
   778
        MULL_r32( REG_EDX );
nkeynes@995
   779
        ADDL_r32_rbpdisp( REG_EAX, REG_OFFSET(slice_cycle) );
nkeynes@995
   780
        exit_block();
nkeynes@995
   781
nkeynes@995
   782
        for( i=0; i< sh4_x86.backpatch_posn; i++ ) {
nkeynes@995
   783
            uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];
nkeynes@995
   784
            if( sh4_x86.backpatch_list[i].exc_code < 0 ) {
nkeynes@995
   785
                if( sh4_x86.backpatch_list[i].exc_code == -2 ) {
nkeynes@995
   786
                    *((uintptr_t *)fixup_addr) = (uintptr_t)xlat_output; 
nkeynes@995
   787
                } else {
nkeynes@995
   788
                    *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
nkeynes@995
   789
                }
nkeynes@995
   790
                MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
nkeynes@995
   791
                int rel = end_ptr - xlat_output;
nkeynes@995
   792
                JMP_prerel(rel);
nkeynes@995
   793
            } else {
nkeynes@995
   794
                *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
nkeynes@995
   795
                MOVL_imm32_r32( sh4_x86.backpatch_list[i].exc_code, REG_ARG1 );
nkeynes@995
   796
                CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
nkeynes@995
   797
                MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
nkeynes@995
   798
                int rel = end_ptr - xlat_output;
nkeynes@995
   799
                JMP_prerel(rel);
nkeynes@995
   800
            }
nkeynes@995
   801
        }
nkeynes@995
   802
    }
nkeynes@995
   803
}
nkeynes@539
   804
nkeynes@359
   805
/**
nkeynes@359
   806
 * Translate a single instruction. Delayed branches are handled specially
nkeynes@359
   807
 * by translating both branch and delayed instruction as a single unit (as
nkeynes@359
   808
 * 
nkeynes@586
   809
 * The instruction MUST be in the icache (assert check)
nkeynes@359
   810
 *
nkeynes@359
   811
 * @return true if the instruction marks the end of a basic block
nkeynes@359
   812
 * (eg a branch or 
nkeynes@359
   813
 */
nkeynes@590
   814
uint32_t sh4_translate_instruction( sh4vma_t pc )
nkeynes@359
   815
{
nkeynes@388
   816
    uint32_t ir;
nkeynes@586
   817
    /* Read instruction from icache */
nkeynes@586
   818
    assert( IS_IN_ICACHE(pc) );
nkeynes@586
   819
    ir = *(uint16_t *)GET_ICACHE_PTR(pc);
nkeynes@586
   820
    
nkeynes@586
   821
    if( !sh4_x86.in_delay_slot ) {
nkeynes@596
   822
	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
nkeynes@388
   823
    }
nkeynes@1003
   824
    
nkeynes@1003
   825
    /* check for breakpoints at this pc */
nkeynes@1003
   826
    for( int i=0; i<sh4_breakpoint_count; i++ ) {
nkeynes@1003
   827
        if( sh4_breakpoints[i].address == pc ) {
nkeynes@1003
   828
            sh4_translate_emit_breakpoint(pc);
nkeynes@1003
   829
            break;
nkeynes@1003
   830
        }
nkeynes@571
   831
    }
nkeynes@359
   832
%%
nkeynes@359
   833
/* ALU operations */
nkeynes@359
   834
ADD Rm, Rn {:
nkeynes@671
   835
    COUNT_INST(I_ADD);
nkeynes@991
   836
    load_reg( REG_EAX, Rm );
nkeynes@991
   837
    load_reg( REG_ECX, Rn );
nkeynes@991
   838
    ADDL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
   839
    store_reg( REG_ECX, Rn );
nkeynes@417
   840
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
   841
:}
nkeynes@359
   842
ADD #imm, Rn {:  
nkeynes@671
   843
    COUNT_INST(I_ADDI);
nkeynes@991
   844
    ADDL_imms_rbpdisp( imm, REG_OFFSET(r[Rn]) );
nkeynes@417
   845
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
   846
:}
nkeynes@359
   847
ADDC Rm, Rn {:
nkeynes@671
   848
    COUNT_INST(I_ADDC);
nkeynes@417
   849
    if( sh4_x86.tstate != TSTATE_C ) {
nkeynes@911
   850
        LDC_t();
nkeynes@417
   851
    }
nkeynes@991
   852
    load_reg( REG_EAX, Rm );
nkeynes@991
   853
    load_reg( REG_ECX, Rn );
nkeynes@991
   854
    ADCL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
   855
    store_reg( REG_ECX, Rn );
nkeynes@359
   856
    SETC_t();
nkeynes@417
   857
    sh4_x86.tstate = TSTATE_C;
nkeynes@359
   858
:}
nkeynes@359
   859
ADDV Rm, Rn {:
nkeynes@671
   860
    COUNT_INST(I_ADDV);
nkeynes@991
   861
    load_reg( REG_EAX, Rm );
nkeynes@991
   862
    load_reg( REG_ECX, Rn );
nkeynes@991
   863
    ADDL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
   864
    store_reg( REG_ECX, Rn );
nkeynes@359
   865
    SETO_t();
nkeynes@417
   866
    sh4_x86.tstate = TSTATE_O;
nkeynes@359
   867
:}
nkeynes@359
   868
AND Rm, Rn {:
nkeynes@671
   869
    COUNT_INST(I_AND);
nkeynes@991
   870
    load_reg( REG_EAX, Rm );
nkeynes@991
   871
    load_reg( REG_ECX, Rn );
nkeynes@991
   872
    ANDL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
   873
    store_reg( REG_ECX, Rn );
nkeynes@417
   874
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
   875
:}
nkeynes@359
   876
AND #imm, R0 {:  
nkeynes@671
   877
    COUNT_INST(I_ANDI);
nkeynes@991
   878
    load_reg( REG_EAX, 0 );
nkeynes@991
   879
    ANDL_imms_r32(imm, REG_EAX); 
nkeynes@991
   880
    store_reg( REG_EAX, 0 );
nkeynes@417
   881
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
   882
:}
nkeynes@359
   883
AND.B #imm, @(R0, GBR) {: 
nkeynes@671
   884
    COUNT_INST(I_ANDB);
nkeynes@991
   885
    load_reg( REG_EAX, 0 );
nkeynes@991
   886
    ADDL_rbpdisp_r32( R_GBR, REG_EAX );
nkeynes@1292
   887
    MOVL_r32_r32(REG_EAX, REG_SAVE1);
nkeynes@991
   888
    MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
nkeynes@1292
   889
    MOVL_r32_r32(REG_SAVE1, REG_EAX);
nkeynes@991
   890
    ANDL_imms_r32(imm, REG_EDX );
nkeynes@991
   891
    MEM_WRITE_BYTE( REG_EAX, REG_EDX );
nkeynes@417
   892
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
   893
:}
nkeynes@359
   894
CMP/EQ Rm, Rn {:  
nkeynes@671
   895
    COUNT_INST(I_CMPEQ);
nkeynes@991
   896
    load_reg( REG_EAX, Rm );
nkeynes@991
   897
    load_reg( REG_ECX, Rn );
nkeynes@991
   898
    CMPL_r32_r32( REG_EAX, REG_ECX );
nkeynes@359
   899
    SETE_t();
nkeynes@417
   900
    sh4_x86.tstate = TSTATE_E;
nkeynes@359
   901
:}
nkeynes@359
   902
CMP/EQ #imm, R0 {:  
nkeynes@671
   903
    COUNT_INST(I_CMPEQI);
nkeynes@991
   904
    load_reg( REG_EAX, 0 );
nkeynes@991
   905
    CMPL_imms_r32(imm, REG_EAX);
nkeynes@359
   906
    SETE_t();
nkeynes@417
   907
    sh4_x86.tstate = TSTATE_E;
nkeynes@359
   908
:}
nkeynes@359
   909
CMP/GE Rm, Rn {:  
nkeynes@671
   910
    COUNT_INST(I_CMPGE);
nkeynes@991
   911
    load_reg( REG_EAX, Rm );
nkeynes@991
   912
    load_reg( REG_ECX, Rn );
nkeynes@991
   913
    CMPL_r32_r32( REG_EAX, REG_ECX );
nkeynes@359
   914
    SETGE_t();
nkeynes@417
   915
    sh4_x86.tstate = TSTATE_GE;
nkeynes@359
   916
:}
nkeynes@359
   917
CMP/GT Rm, Rn {: 
nkeynes@671
   918
    COUNT_INST(I_CMPGT);
nkeynes@991
   919
    load_reg( REG_EAX, Rm );
nkeynes@991
   920
    load_reg( REG_ECX, Rn );
nkeynes@991
   921
    CMPL_r32_r32( REG_EAX, REG_ECX );
nkeynes@359
   922
    SETG_t();
nkeynes@417
   923
    sh4_x86.tstate = TSTATE_G;
nkeynes@359
   924
:}
nkeynes@359
   925
CMP/HI Rm, Rn {:  
nkeynes@671
   926
    COUNT_INST(I_CMPHI);
nkeynes@991
   927
    load_reg( REG_EAX, Rm );
nkeynes@991
   928
    load_reg( REG_ECX, Rn );
nkeynes@991
   929
    CMPL_r32_r32( REG_EAX, REG_ECX );
nkeynes@359
   930
    SETA_t();
nkeynes@417
   931
    sh4_x86.tstate = TSTATE_A;
nkeynes@359
   932
:}
nkeynes@359
   933
CMP/HS Rm, Rn {: 
nkeynes@671
   934
    COUNT_INST(I_CMPHS);
nkeynes@991
   935
    load_reg( REG_EAX, Rm );
nkeynes@991
   936
    load_reg( REG_ECX, Rn );
nkeynes@991
   937
    CMPL_r32_r32( REG_EAX, REG_ECX );
nkeynes@359
   938
    SETAE_t();
nkeynes@417
   939
    sh4_x86.tstate = TSTATE_AE;
nkeynes@359
   940
 :}
nkeynes@359
   941
CMP/PL Rn {: 
nkeynes@671
   942
    COUNT_INST(I_CMPPL);
nkeynes@991
   943
    load_reg( REG_EAX, Rn );
nkeynes@991
   944
    CMPL_imms_r32( 0, REG_EAX );
nkeynes@359
   945
    SETG_t();
nkeynes@417
   946
    sh4_x86.tstate = TSTATE_G;
nkeynes@359
   947
:}
nkeynes@359
   948
CMP/PZ Rn {:  
nkeynes@671
   949
    COUNT_INST(I_CMPPZ);
nkeynes@991
   950
    load_reg( REG_EAX, Rn );
nkeynes@991
   951
    CMPL_imms_r32( 0, REG_EAX );
nkeynes@359
   952
    SETGE_t();
nkeynes@417
   953
    sh4_x86.tstate = TSTATE_GE;
nkeynes@359
   954
:}
nkeynes@361
   955
CMP/STR Rm, Rn {:  
nkeynes@671
   956
    COUNT_INST(I_CMPSTR);
nkeynes@991
   957
    load_reg( REG_EAX, Rm );
nkeynes@991
   958
    load_reg( REG_ECX, Rn );
nkeynes@991
   959
    XORL_r32_r32( REG_ECX, REG_EAX );
nkeynes@991
   960
    TESTB_r8_r8( REG_AL, REG_AL );
nkeynes@991
   961
    JE_label(target1);
nkeynes@991
   962
    TESTB_r8_r8( REG_AH, REG_AH );
nkeynes@991
   963
    JE_label(target2);
nkeynes@991
   964
    SHRL_imm_r32( 16, REG_EAX );
nkeynes@991
   965
    TESTB_r8_r8( REG_AL, REG_AL );
nkeynes@991
   966
    JE_label(target3);
nkeynes@991
   967
    TESTB_r8_r8( REG_AH, REG_AH );
nkeynes@380
   968
    JMP_TARGET(target1);
nkeynes@380
   969
    JMP_TARGET(target2);
nkeynes@380
   970
    JMP_TARGET(target3);
nkeynes@368
   971
    SETE_t();
nkeynes@417
   972
    sh4_x86.tstate = TSTATE_E;
nkeynes@361
   973
:}
nkeynes@361
   974
DIV0S Rm, Rn {:
nkeynes@671
   975
    COUNT_INST(I_DIV0S);
nkeynes@991
   976
    load_reg( REG_EAX, Rm );
nkeynes@991
   977
    load_reg( REG_ECX, Rn );
nkeynes@991
   978
    SHRL_imm_r32( 31, REG_EAX );
nkeynes@991
   979
    SHRL_imm_r32( 31, REG_ECX );
nkeynes@995
   980
    MOVL_r32_rbpdisp( REG_EAX, R_M );
nkeynes@995
   981
    MOVL_r32_rbpdisp( REG_ECX, R_Q );
nkeynes@991
   982
    CMPL_r32_r32( REG_EAX, REG_ECX );
nkeynes@386
   983
    SETNE_t();
nkeynes@417
   984
    sh4_x86.tstate = TSTATE_NE;
nkeynes@361
   985
:}
nkeynes@361
   986
DIV0U {:  
nkeynes@671
   987
    COUNT_INST(I_DIV0U);
nkeynes@991
   988
    XORL_r32_r32( REG_EAX, REG_EAX );
nkeynes@995
   989
    MOVL_r32_rbpdisp( REG_EAX, R_Q );
nkeynes@995
   990
    MOVL_r32_rbpdisp( REG_EAX, R_M );
nkeynes@995
   991
    MOVL_r32_rbpdisp( REG_EAX, R_T );
nkeynes@417
   992
    sh4_x86.tstate = TSTATE_C; // works for DIV1
nkeynes@361
   993
:}
nkeynes@386
   994
DIV1 Rm, Rn {:
nkeynes@671
   995
    COUNT_INST(I_DIV1);
nkeynes@995
   996
    MOVL_rbpdisp_r32( R_M, REG_ECX );
nkeynes@991
   997
    load_reg( REG_EAX, Rn );
nkeynes@417
   998
    if( sh4_x86.tstate != TSTATE_C ) {
nkeynes@417
   999
	LDC_t();
nkeynes@417
  1000
    }
nkeynes@991
  1001
    RCLL_imm_r32( 1, REG_EAX );
nkeynes@991
  1002
    SETC_r8( REG_DL ); // Q'
nkeynes@991
  1003
    CMPL_rbpdisp_r32( R_Q, REG_ECX );
nkeynes@991
  1004
    JE_label(mqequal);
nkeynes@991
  1005
    ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
nkeynes@991
  1006
    JMP_label(end);
nkeynes@380
  1007
    JMP_TARGET(mqequal);
nkeynes@991
  1008
    SUBL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
nkeynes@386
  1009
    JMP_TARGET(end);
nkeynes@991
  1010
    store_reg( REG_EAX, Rn ); // Done with Rn now
nkeynes@991
  1011
    SETC_r8(REG_AL); // tmp1
nkeynes@991
  1012
    XORB_r8_r8( REG_DL, REG_AL ); // Q' = Q ^ tmp1
nkeynes@991
  1013
    XORB_r8_r8( REG_AL, REG_CL ); // Q'' = Q' ^ M
nkeynes@995
  1014
    MOVL_r32_rbpdisp( REG_ECX, R_Q );
nkeynes@991
  1015
    XORL_imms_r32( 1, REG_AL );   // T = !Q'
nkeynes@991
  1016
    MOVZXL_r8_r32( REG_AL, REG_EAX );
nkeynes@995
  1017
    MOVL_r32_rbpdisp( REG_EAX, R_T );
nkeynes@417
  1018
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@374
  1019
:}
nkeynes@361
  1020
DMULS.L Rm, Rn {:  
nkeynes@671
  1021
    COUNT_INST(I_DMULS);
nkeynes@991
  1022
    load_reg( REG_EAX, Rm );
nkeynes@991
  1023
    load_reg( REG_ECX, Rn );
nkeynes@991
  1024
    IMULL_r32(REG_ECX);
nkeynes@995
  1025
    MOVL_r32_rbpdisp( REG_EDX, R_MACH );
nkeynes@995
  1026
    MOVL_r32_rbpdisp( REG_EAX, R_MACL );
nkeynes@417
  1027
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1028
:}
nkeynes@361
  1029
DMULU.L Rm, Rn {:  
nkeynes@671
  1030
    COUNT_INST(I_DMULU);
nkeynes@991
  1031
    load_reg( REG_EAX, Rm );
nkeynes@991
  1032
    load_reg( REG_ECX, Rn );
nkeynes@991
  1033
    MULL_r32(REG_ECX);
nkeynes@995
  1034
    MOVL_r32_rbpdisp( REG_EDX, R_MACH );
nkeynes@995
  1035
    MOVL_r32_rbpdisp( REG_EAX, R_MACL );    
nkeynes@417
  1036
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1037
:}
nkeynes@359
  1038
DT Rn {:  
nkeynes@671
  1039
    COUNT_INST(I_DT);
nkeynes@991
  1040
    load_reg( REG_EAX, Rn );
nkeynes@991
  1041
    ADDL_imms_r32( -1, REG_EAX );
nkeynes@991
  1042
    store_reg( REG_EAX, Rn );
nkeynes@359
  1043
    SETE_t();
nkeynes@417
  1044
    sh4_x86.tstate = TSTATE_E;
nkeynes@359
  1045
:}
nkeynes@359
  1046
EXTS.B Rm, Rn {:  
nkeynes@671
  1047
    COUNT_INST(I_EXTSB);
nkeynes@991
  1048
    load_reg( REG_EAX, Rm );
nkeynes@991
  1049
    MOVSXL_r8_r32( REG_EAX, REG_EAX );
nkeynes@991
  1050
    store_reg( REG_EAX, Rn );
nkeynes@359
  1051
:}
nkeynes@361
  1052
EXTS.W Rm, Rn {:  
nkeynes@671
  1053
    COUNT_INST(I_EXTSW);
nkeynes@991
  1054
    load_reg( REG_EAX, Rm );
nkeynes@991
  1055
    MOVSXL_r16_r32( REG_EAX, REG_EAX );
nkeynes@991
  1056
    store_reg( REG_EAX, Rn );
nkeynes@361
  1057
:}
nkeynes@361
  1058
EXTU.B Rm, Rn {:  
nkeynes@671
  1059
    COUNT_INST(I_EXTUB);
nkeynes@991
  1060
    load_reg( REG_EAX, Rm );
nkeynes@991
  1061
    MOVZXL_r8_r32( REG_EAX, REG_EAX );
nkeynes@991
  1062
    store_reg( REG_EAX, Rn );
nkeynes@361
  1063
:}
nkeynes@361
  1064
EXTU.W Rm, Rn {:  
nkeynes@671
  1065
    COUNT_INST(I_EXTUW);
nkeynes@991
  1066
    load_reg( REG_EAX, Rm );
nkeynes@991
  1067
    MOVZXL_r16_r32( REG_EAX, REG_EAX );
nkeynes@991
  1068
    store_reg( REG_EAX, Rn );
nkeynes@361
  1069
:}
nkeynes@586
  1070
MAC.L @Rm+, @Rn+ {:
nkeynes@671
  1071
    COUNT_INST(I_MACL);
nkeynes@586
  1072
    if( Rm == Rn ) {
nkeynes@991
  1073
	load_reg( REG_EAX, Rm );
nkeynes@991
  1074
	check_ralign32( REG_EAX );
nkeynes@991
  1075
	MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@1292
  1076
	MOVL_r32_r32(REG_EAX, REG_SAVE1);
nkeynes@991
  1077
	load_reg( REG_EAX, Rm );
nkeynes@991
  1078
	LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
nkeynes@991
  1079
	MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  1080
        ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rn]) );
nkeynes@586
  1081
    } else {
nkeynes@991
  1082
	load_reg( REG_EAX, Rm );
nkeynes@991
  1083
	check_ralign32( REG_EAX );
nkeynes@991
  1084
	MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@1292
  1085
	MOVL_r32_r32(REG_EAX, REG_SAVE1);
nkeynes@991
  1086
	load_reg( REG_EAX, Rn );
nkeynes@991
  1087
	check_ralign32( REG_EAX );
nkeynes@991
  1088
	MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  1089
	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
nkeynes@991
  1090
	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
nkeynes@586
  1091
    }
nkeynes@939
  1092
    
nkeynes@1292
  1093
    IMULL_r32( REG_SAVE1 );
nkeynes@991
  1094
    ADDL_r32_rbpdisp( REG_EAX, R_MACL );
nkeynes@991
  1095
    ADCL_r32_rbpdisp( REG_EDX, R_MACH );
nkeynes@386
  1096
nkeynes@995
  1097
    MOVL_rbpdisp_r32( R_S, REG_ECX );
nkeynes@991
  1098
    TESTL_r32_r32(REG_ECX, REG_ECX);
nkeynes@991
  1099
    JE_label( nosat );
nkeynes@995
  1100
    CALL_ptr( signsat48 );
nkeynes@386
  1101
    JMP_TARGET( nosat );
nkeynes@417
  1102
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@386
  1103
:}
nkeynes@386
  1104
MAC.W @Rm+, @Rn+ {:  
nkeynes@671
  1105
    COUNT_INST(I_MACW);
nkeynes@586
  1106
    if( Rm == Rn ) {
nkeynes@991
  1107
	load_reg( REG_EAX, Rm );
nkeynes@991
  1108
	check_ralign16( REG_EAX );
nkeynes@991
  1109
	MEM_READ_WORD( REG_EAX, REG_EAX );
nkeynes@1292
  1110
        MOVL_r32_r32( REG_EAX, REG_SAVE1 );
nkeynes@991
  1111
	load_reg( REG_EAX, Rm );
nkeynes@991
  1112
	LEAL_r32disp_r32( REG_EAX, 2, REG_EAX );
nkeynes@991
  1113
	MEM_READ_WORD( REG_EAX, REG_EAX );
nkeynes@991
  1114
	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
nkeynes@586
  1115
	// Note translate twice in case of page boundaries. Maybe worth
nkeynes@586
  1116
	// adding a page-boundary check to skip the second translation
nkeynes@586
  1117
    } else {
nkeynes@1193
  1118
	load_reg( REG_EAX, Rn );
nkeynes@991
  1119
	check_ralign16( REG_EAX );
nkeynes@991
  1120
	MEM_READ_WORD( REG_EAX, REG_EAX );
nkeynes@1292
  1121
        MOVL_r32_r32( REG_EAX, REG_SAVE1 );
nkeynes@1193
  1122
	load_reg( REG_EAX, Rm );
nkeynes@991
  1123
	check_ralign16( REG_EAX );
nkeynes@991
  1124
	MEM_READ_WORD( REG_EAX, REG_EAX );
nkeynes@991
  1125
	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rn]) );
nkeynes@991
  1126
	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
nkeynes@586
  1127
    }
nkeynes@1292
  1128
    IMULL_r32( REG_SAVE1 );
nkeynes@995
  1129
    MOVL_rbpdisp_r32( R_S, REG_ECX );
nkeynes@991
  1130
    TESTL_r32_r32( REG_ECX, REG_ECX );
nkeynes@991
  1131
    JE_label( nosat );
nkeynes@386
  1132
nkeynes@991
  1133
    ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
nkeynes@991
  1134
    JNO_label( end );            // 2
nkeynes@995
  1135
    MOVL_imm32_r32( 1, REG_EDX );         // 5
nkeynes@995
  1136
    MOVL_r32_rbpdisp( REG_EDX, R_MACH );   // 6
nkeynes@991
  1137
    JS_label( positive );        // 2
nkeynes@995
  1138
    MOVL_imm32_r32( 0x80000000, REG_EAX );// 5
nkeynes@995
  1139
    MOVL_r32_rbpdisp( REG_EAX, R_MACL );   // 6
nkeynes@991
  1140
    JMP_label(end2);           // 2
nkeynes@386
  1141
nkeynes@386
  1142
    JMP_TARGET(positive);
nkeynes@995
  1143
    MOVL_imm32_r32( 0x7FFFFFFF, REG_EAX );// 5
nkeynes@995
  1144
    MOVL_r32_rbpdisp( REG_EAX, R_MACL );   // 6
nkeynes@991
  1145
    JMP_label(end3);            // 2
nkeynes@386
  1146
nkeynes@386
  1147
    JMP_TARGET(nosat);
nkeynes@991
  1148
    ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
nkeynes@991
  1149
    ADCL_r32_rbpdisp( REG_EDX, R_MACH );  // 6
nkeynes@386
  1150
    JMP_TARGET(end);
nkeynes@386
  1151
    JMP_TARGET(end2);
nkeynes@386
  1152
    JMP_TARGET(end3);
nkeynes@417
  1153
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@386
  1154
:}
nkeynes@359
  1155
MOVT Rn {:  
nkeynes@671
  1156
    COUNT_INST(I_MOVT);
nkeynes@995
  1157
    MOVL_rbpdisp_r32( R_T, REG_EAX );
nkeynes@991
  1158
    store_reg( REG_EAX, Rn );
nkeynes@359
  1159
:}
nkeynes@361
  1160
MUL.L Rm, Rn {:  
nkeynes@671
  1161
    COUNT_INST(I_MULL);
nkeynes@991
  1162
    load_reg( REG_EAX, Rm );
nkeynes@991
  1163
    load_reg( REG_ECX, Rn );
nkeynes@991
  1164
    MULL_r32( REG_ECX );
nkeynes@995
  1165
    MOVL_r32_rbpdisp( REG_EAX, R_MACL );
nkeynes@417
  1166
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1167
:}
nkeynes@374
  1168
MULS.W Rm, Rn {:
nkeynes@671
  1169
    COUNT_INST(I_MULSW);
nkeynes@995
  1170
    MOVSXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
nkeynes@995
  1171
    MOVSXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
nkeynes@991
  1172
    MULL_r32( REG_ECX );
nkeynes@995
  1173
    MOVL_r32_rbpdisp( REG_EAX, R_MACL );
nkeynes@417
  1174
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1175
:}
nkeynes@374
  1176
MULU.W Rm, Rn {:  
nkeynes@671
  1177
    COUNT_INST(I_MULUW);
nkeynes@995
  1178
    MOVZXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
nkeynes@995
  1179
    MOVZXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
nkeynes@991
  1180
    MULL_r32( REG_ECX );
nkeynes@995
  1181
    MOVL_r32_rbpdisp( REG_EAX, R_MACL );
nkeynes@417
  1182
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@374
  1183
:}
nkeynes@359
  1184
NEG Rm, Rn {:
nkeynes@671
  1185
    COUNT_INST(I_NEG);
nkeynes@991
  1186
    load_reg( REG_EAX, Rm );
nkeynes@991
  1187
    NEGL_r32( REG_EAX );
nkeynes@991
  1188
    store_reg( REG_EAX, Rn );
nkeynes@417
  1189
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1190
:}
nkeynes@359
  1191
NEGC Rm, Rn {:  
nkeynes@671
  1192
    COUNT_INST(I_NEGC);
nkeynes@991
  1193
    load_reg( REG_EAX, Rm );
nkeynes@991
  1194
    XORL_r32_r32( REG_ECX, REG_ECX );
nkeynes@359
  1195
    LDC_t();
nkeynes@991
  1196
    SBBL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
  1197
    store_reg( REG_ECX, Rn );
nkeynes@359
  1198
    SETC_t();
nkeynes@417
  1199
    sh4_x86.tstate = TSTATE_C;
nkeynes@359
  1200
:}
nkeynes@359
  1201
NOT Rm, Rn {:  
nkeynes@671
  1202
    COUNT_INST(I_NOT);
nkeynes@991
  1203
    load_reg( REG_EAX, Rm );
nkeynes@991
  1204
    NOTL_r32( REG_EAX );
nkeynes@991
  1205
    store_reg( REG_EAX, Rn );
nkeynes@417
  1206
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1207
:}
nkeynes@359
  1208
OR Rm, Rn {:  
nkeynes@671
  1209
    COUNT_INST(I_OR);
nkeynes@991
  1210
    load_reg( REG_EAX, Rm );
nkeynes@991
  1211
    load_reg( REG_ECX, Rn );
nkeynes@991
  1212
    ORL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
  1213
    store_reg( REG_ECX, Rn );
nkeynes@417
  1214
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1215
:}
nkeynes@359
  1216
OR #imm, R0 {:
nkeynes@671
  1217
    COUNT_INST(I_ORI);
nkeynes@991
  1218
    load_reg( REG_EAX, 0 );
nkeynes@991
  1219
    ORL_imms_r32(imm, REG_EAX);
nkeynes@991
  1220
    store_reg( REG_EAX, 0 );
nkeynes@417
  1221
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1222
:}
nkeynes@374
  1223
OR.B #imm, @(R0, GBR) {:  
nkeynes@671
  1224
    COUNT_INST(I_ORB);
nkeynes@991
  1225
    load_reg( REG_EAX, 0 );
nkeynes@991
  1226
    ADDL_rbpdisp_r32( R_GBR, REG_EAX );
nkeynes@1292
  1227
    MOVL_r32_r32( REG_EAX, REG_SAVE1 );
nkeynes@991
  1228
    MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
nkeynes@1292
  1229
    MOVL_r32_r32( REG_SAVE1, REG_EAX );
nkeynes@991
  1230
    ORL_imms_r32(imm, REG_EDX );
nkeynes@991
  1231
    MEM_WRITE_BYTE( REG_EAX, REG_EDX );
nkeynes@417
  1232
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@374
  1233
:}
nkeynes@359
  1234
ROTCL Rn {:
nkeynes@671
  1235
    COUNT_INST(I_ROTCL);
nkeynes@991
  1236
    load_reg( REG_EAX, Rn );
nkeynes@417
  1237
    if( sh4_x86.tstate != TSTATE_C ) {
nkeynes@417
  1238
	LDC_t();
nkeynes@417
  1239
    }
nkeynes@991
  1240
    RCLL_imm_r32( 1, REG_EAX );
nkeynes@991
  1241
    store_reg( REG_EAX, Rn );
nkeynes@359
  1242
    SETC_t();
nkeynes@417
  1243
    sh4_x86.tstate = TSTATE_C;
nkeynes@359
  1244
:}
nkeynes@359
  1245
ROTCR Rn {:  
nkeynes@671
  1246
    COUNT_INST(I_ROTCR);
nkeynes@991
  1247
    load_reg( REG_EAX, Rn );
nkeynes@417
  1248
    if( sh4_x86.tstate != TSTATE_C ) {
nkeynes@417
  1249
	LDC_t();
nkeynes@417
  1250
    }
nkeynes@991
  1251
    RCRL_imm_r32( 1, REG_EAX );
nkeynes@991
  1252
    store_reg( REG_EAX, Rn );
nkeynes@359
  1253
    SETC_t();
nkeynes@417
  1254
    sh4_x86.tstate = TSTATE_C;
nkeynes@359
  1255
:}
nkeynes@359
  1256
ROTL Rn {:  
nkeynes@671
  1257
    COUNT_INST(I_ROTL);
nkeynes@991
  1258
    load_reg( REG_EAX, Rn );
nkeynes@991
  1259
    ROLL_imm_r32( 1, REG_EAX );
nkeynes@991
  1260
    store_reg( REG_EAX, Rn );
nkeynes@359
  1261
    SETC_t();
nkeynes@417
  1262
    sh4_x86.tstate = TSTATE_C;
nkeynes@359
  1263
:}
nkeynes@359
  1264
ROTR Rn {:  
nkeynes@671
  1265
    COUNT_INST(I_ROTR);
nkeynes@991
  1266
    load_reg( REG_EAX, Rn );
nkeynes@991
  1267
    RORL_imm_r32( 1, REG_EAX );
nkeynes@991
  1268
    store_reg( REG_EAX, Rn );
nkeynes@359
  1269
    SETC_t();
nkeynes@417
  1270
    sh4_x86.tstate = TSTATE_C;
nkeynes@359
  1271
:}
nkeynes@359
  1272
SHAD Rm, Rn {:
nkeynes@671
  1273
    COUNT_INST(I_SHAD);
nkeynes@359
  1274
    /* Annoyingly enough, not directly convertible */
nkeynes@991
  1275
    load_reg( REG_EAX, Rn );
nkeynes@991
  1276
    load_reg( REG_ECX, Rm );
nkeynes@991
  1277
    CMPL_imms_r32( 0, REG_ECX );
nkeynes@991
  1278
    JGE_label(doshl);
nkeynes@361
  1279
                    
nkeynes@991
  1280
    NEGL_r32( REG_ECX );      // 2
nkeynes@991
  1281
    ANDB_imms_r8( 0x1F, REG_CL ); // 3
nkeynes@991
  1282
    JE_label(emptysar);     // 2
nkeynes@991
  1283
    SARL_cl_r32( REG_EAX );       // 2
nkeynes@991
  1284
    JMP_label(end);          // 2
nkeynes@386
  1285
nkeynes@386
  1286
    JMP_TARGET(emptysar);
nkeynes@991
  1287
    SARL_imm_r32(31, REG_EAX );  // 3
nkeynes@991
  1288
    JMP_label(end2);
nkeynes@382
  1289
nkeynes@380
  1290
    JMP_TARGET(doshl);
nkeynes@991
  1291
    ANDB_imms_r8( 0x1F, REG_CL ); // 3
nkeynes@991
  1292
    SHLL_cl_r32( REG_EAX );       // 2
nkeynes@380
  1293
    JMP_TARGET(end);
nkeynes@386
  1294
    JMP_TARGET(end2);
nkeynes@991
  1295
    store_reg( REG_EAX, Rn );
nkeynes@417
  1296
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1297
:}
nkeynes@359
  1298
SHLD Rm, Rn {:  
nkeynes@671
  1299
    COUNT_INST(I_SHLD);
nkeynes@991
  1300
    load_reg( REG_EAX, Rn );
nkeynes@991
  1301
    load_reg( REG_ECX, Rm );
nkeynes@991
  1302
    CMPL_imms_r32( 0, REG_ECX );
nkeynes@991
  1303
    JGE_label(doshl);
nkeynes@368
  1304
nkeynes@991
  1305
    NEGL_r32( REG_ECX );      // 2
nkeynes@991
  1306
    ANDB_imms_r8( 0x1F, REG_CL ); // 3
nkeynes@991
  1307
    JE_label(emptyshr );
nkeynes@991
  1308
    SHRL_cl_r32( REG_EAX );       // 2
nkeynes@991
  1309
    JMP_label(end);          // 2
nkeynes@386
  1310
nkeynes@386
  1311
    JMP_TARGET(emptyshr);
nkeynes@991
  1312
    XORL_r32_r32( REG_EAX, REG_EAX );
nkeynes@991
  1313
    JMP_label(end2);
nkeynes@382
  1314
nkeynes@382
  1315
    JMP_TARGET(doshl);
nkeynes@991
  1316
    ANDB_imms_r8( 0x1F, REG_CL ); // 3
nkeynes@991
  1317
    SHLL_cl_r32( REG_EAX );       // 2
nkeynes@382
  1318
    JMP_TARGET(end);
nkeynes@386
  1319
    JMP_TARGET(end2);
nkeynes@991
  1320
    store_reg( REG_EAX, Rn );
nkeynes@417
  1321
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1322
:}
nkeynes@359
  1323
SHAL Rn {: 
nkeynes@671
  1324
    COUNT_INST(I_SHAL);
nkeynes@991
  1325
    load_reg( REG_EAX, Rn );
nkeynes@991
  1326
    SHLL_imm_r32( 1, REG_EAX );
nkeynes@397
  1327
    SETC_t();
nkeynes@991
  1328
    store_reg( REG_EAX, Rn );
nkeynes@417
  1329
    sh4_x86.tstate = TSTATE_C;
nkeynes@359
  1330
:}
nkeynes@359
  1331
SHAR Rn {:  
nkeynes@671
  1332
    COUNT_INST(I_SHAR);
nkeynes@991
  1333
    load_reg( REG_EAX, Rn );
nkeynes@991
  1334
    SARL_imm_r32( 1, REG_EAX );
nkeynes@397
  1335
    SETC_t();
nkeynes@991
  1336
    store_reg( REG_EAX, Rn );
nkeynes@417
  1337
    sh4_x86.tstate = TSTATE_C;
nkeynes@359
  1338
:}
nkeynes@359
  1339
SHLL Rn {:  
nkeynes@671
  1340
    COUNT_INST(I_SHLL);
nkeynes@991
  1341
    load_reg( REG_EAX, Rn );
nkeynes@991
  1342
    SHLL_imm_r32( 1, REG_EAX );
nkeynes@397
  1343
    SETC_t();
nkeynes@991
  1344
    store_reg( REG_EAX, Rn );
nkeynes@417
  1345
    sh4_x86.tstate = TSTATE_C;
nkeynes@359
  1346
:}
nkeynes@359
  1347
SHLL2 Rn {:
nkeynes@671
  1348
    COUNT_INST(I_SHLL);
nkeynes@991
  1349
    load_reg( REG_EAX, Rn );
nkeynes@991
  1350
    SHLL_imm_r32( 2, REG_EAX );
nkeynes@991
  1351
    store_reg( REG_EAX, Rn );
nkeynes@417
  1352
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1353
:}
nkeynes@359
  1354
SHLL8 Rn {:  
nkeynes@671
  1355
    COUNT_INST(I_SHLL);
nkeynes@991
  1356
    load_reg( REG_EAX, Rn );
nkeynes@991
  1357
    SHLL_imm_r32( 8, REG_EAX );
nkeynes@991
  1358
    store_reg( REG_EAX, Rn );
nkeynes@417
  1359
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1360
:}
nkeynes@359
  1361
SHLL16 Rn {:  
nkeynes@671
  1362
    COUNT_INST(I_SHLL);
nkeynes@991
  1363
    load_reg( REG_EAX, Rn );
nkeynes@991
  1364
    SHLL_imm_r32( 16, REG_EAX );
nkeynes@991
  1365
    store_reg( REG_EAX, Rn );
nkeynes@417
  1366
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1367
:}
nkeynes@359
  1368
SHLR Rn {:  
nkeynes@671
  1369
    COUNT_INST(I_SHLR);
nkeynes@991
  1370
    load_reg( REG_EAX, Rn );
nkeynes@991
  1371
    SHRL_imm_r32( 1, REG_EAX );
nkeynes@397
  1372
    SETC_t();
nkeynes@991
  1373
    store_reg( REG_EAX, Rn );
nkeynes@417
  1374
    sh4_x86.tstate = TSTATE_C;
nkeynes@359
  1375
:}
nkeynes@359
  1376
SHLR2 Rn {:  
nkeynes@671
  1377
    COUNT_INST(I_SHLR);
nkeynes@991
  1378
    load_reg( REG_EAX, Rn );
nkeynes@991
  1379
    SHRL_imm_r32( 2, REG_EAX );
nkeynes@991
  1380
    store_reg( REG_EAX, Rn );
nkeynes@417
  1381
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1382
:}
nkeynes@359
  1383
SHLR8 Rn {:  
nkeynes@671
  1384
    COUNT_INST(I_SHLR);
nkeynes@991
  1385
    load_reg( REG_EAX, Rn );
nkeynes@991
  1386
    SHRL_imm_r32( 8, REG_EAX );
nkeynes@991
  1387
    store_reg( REG_EAX, Rn );
nkeynes@417
  1388
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1389
:}
nkeynes@359
  1390
SHLR16 Rn {:  
nkeynes@671
  1391
    COUNT_INST(I_SHLR);
nkeynes@991
  1392
    load_reg( REG_EAX, Rn );
nkeynes@991
  1393
    SHRL_imm_r32( 16, REG_EAX );
nkeynes@991
  1394
    store_reg( REG_EAX, Rn );
nkeynes@417
  1395
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1396
:}
nkeynes@359
  1397
SUB Rm, Rn {:  
nkeynes@671
  1398
    COUNT_INST(I_SUB);
nkeynes@991
  1399
    load_reg( REG_EAX, Rm );
nkeynes@991
  1400
    load_reg( REG_ECX, Rn );
nkeynes@991
  1401
    SUBL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
  1402
    store_reg( REG_ECX, Rn );
nkeynes@417
  1403
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1404
:}
nkeynes@359
  1405
SUBC Rm, Rn {:  
nkeynes@671
  1406
    COUNT_INST(I_SUBC);
nkeynes@991
  1407
    load_reg( REG_EAX, Rm );
nkeynes@991
  1408
    load_reg( REG_ECX, Rn );
nkeynes@417
  1409
    if( sh4_x86.tstate != TSTATE_C ) {
nkeynes@417
  1410
	LDC_t();
nkeynes@417
  1411
    }
nkeynes@991
  1412
    SBBL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
  1413
    store_reg( REG_ECX, Rn );
nkeynes@394
  1414
    SETC_t();
nkeynes@417
  1415
    sh4_x86.tstate = TSTATE_C;
nkeynes@359
  1416
:}
nkeynes@359
  1417
SUBV Rm, Rn {:  
nkeynes@671
  1418
    COUNT_INST(I_SUBV);
nkeynes@991
  1419
    load_reg( REG_EAX, Rm );
nkeynes@991
  1420
    load_reg( REG_ECX, Rn );
nkeynes@991
  1421
    SUBL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
  1422
    store_reg( REG_ECX, Rn );
nkeynes@359
  1423
    SETO_t();
nkeynes@417
  1424
    sh4_x86.tstate = TSTATE_O;
nkeynes@359
  1425
:}
nkeynes@359
  1426
SWAP.B Rm, Rn {:  
nkeynes@671
  1427
    COUNT_INST(I_SWAPB);
nkeynes@991
  1428
    load_reg( REG_EAX, Rm );
nkeynes@991
  1429
    XCHGB_r8_r8( REG_AL, REG_AH ); // NB: does not touch EFLAGS
nkeynes@991
  1430
    store_reg( REG_EAX, Rn );
nkeynes@359
  1431
:}
nkeynes@359
  1432
SWAP.W Rm, Rn {:  
nkeynes@671
  1433
    COUNT_INST(I_SWAPB);
nkeynes@991
  1434
    load_reg( REG_EAX, Rm );
nkeynes@991
  1435
    MOVL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
  1436
    SHLL_imm_r32( 16, REG_ECX );
nkeynes@991
  1437
    SHRL_imm_r32( 16, REG_EAX );
nkeynes@991
  1438
    ORL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
  1439
    store_reg( REG_ECX, Rn );
nkeynes@417
  1440
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1441
:}
nkeynes@361
  1442
TAS.B @Rn {:  
nkeynes@671
  1443
    COUNT_INST(I_TASB);
nkeynes@991
  1444
    load_reg( REG_EAX, Rn );
nkeynes@1292
  1445
    MOVL_r32_r32( REG_EAX, REG_SAVE1 );
nkeynes@991
  1446
    MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
nkeynes@991
  1447
    TESTB_r8_r8( REG_DL, REG_DL );
nkeynes@361
  1448
    SETE_t();
nkeynes@991
  1449
    ORB_imms_r8( 0x80, REG_DL );
nkeynes@1292
  1450
    MOVL_r32_r32( REG_SAVE1, REG_EAX );
nkeynes@991
  1451
    MEM_WRITE_BYTE( REG_EAX, REG_EDX );
nkeynes@417
  1452
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1453
:}
nkeynes@361
  1454
TST Rm, Rn {:  
nkeynes@671
  1455
    COUNT_INST(I_TST);
nkeynes@991
  1456
    load_reg( REG_EAX, Rm );
nkeynes@991
  1457
    load_reg( REG_ECX, Rn );
nkeynes@991
  1458
    TESTL_r32_r32( REG_EAX, REG_ECX );
nkeynes@361
  1459
    SETE_t();
nkeynes@417
  1460
    sh4_x86.tstate = TSTATE_E;
nkeynes@361
  1461
:}
nkeynes@368
  1462
TST #imm, R0 {:  
nkeynes@671
  1463
    COUNT_INST(I_TSTI);
nkeynes@991
  1464
    load_reg( REG_EAX, 0 );
nkeynes@991
  1465
    TESTL_imms_r32( imm, REG_EAX );
nkeynes@368
  1466
    SETE_t();
nkeynes@417
  1467
    sh4_x86.tstate = TSTATE_E;
nkeynes@368
  1468
:}
nkeynes@368
  1469
TST.B #imm, @(R0, GBR) {:  
nkeynes@671
  1470
    COUNT_INST(I_TSTB);
nkeynes@991
  1471
    load_reg( REG_EAX, 0);
nkeynes@991
  1472
    ADDL_rbpdisp_r32( R_GBR, REG_EAX );
nkeynes@991
  1473
    MEM_READ_BYTE( REG_EAX, REG_EAX );
nkeynes@991
  1474
    TESTB_imms_r8( imm, REG_AL );
nkeynes@368
  1475
    SETE_t();
nkeynes@417
  1476
    sh4_x86.tstate = TSTATE_E;
nkeynes@368
  1477
:}
nkeynes@359
  1478
XOR Rm, Rn {:  
nkeynes@671
  1479
    COUNT_INST(I_XOR);
nkeynes@991
  1480
    load_reg( REG_EAX, Rm );
nkeynes@991
  1481
    load_reg( REG_ECX, Rn );
nkeynes@991
  1482
    XORL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
  1483
    store_reg( REG_ECX, Rn );
nkeynes@417
  1484
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1485
:}
nkeynes@359
  1486
XOR #imm, R0 {:  
nkeynes@671
  1487
    COUNT_INST(I_XORI);
nkeynes@991
  1488
    load_reg( REG_EAX, 0 );
nkeynes@991
  1489
    XORL_imms_r32( imm, REG_EAX );
nkeynes@991
  1490
    store_reg( REG_EAX, 0 );
nkeynes@417
  1491
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1492
:}
nkeynes@359
  1493
XOR.B #imm, @(R0, GBR) {:  
nkeynes@671
  1494
    COUNT_INST(I_XORB);
nkeynes@991
  1495
    load_reg( REG_EAX, 0 );
nkeynes@991
  1496
    ADDL_rbpdisp_r32( R_GBR, REG_EAX ); 
nkeynes@1292
  1497
    MOVL_r32_r32( REG_EAX, REG_SAVE1 );
nkeynes@991
  1498
    MEM_READ_BYTE_FOR_WRITE(REG_EAX, REG_EDX);
nkeynes@1292
  1499
    MOVL_r32_r32( REG_SAVE1, REG_EAX );
nkeynes@991
  1500
    XORL_imms_r32( imm, REG_EDX );
nkeynes@991
  1501
    MEM_WRITE_BYTE( REG_EAX, REG_EDX );
nkeynes@417
  1502
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1503
:}
nkeynes@361
  1504
XTRCT Rm, Rn {:
nkeynes@671
  1505
    COUNT_INST(I_XTRCT);
nkeynes@991
  1506
    load_reg( REG_EAX, Rm );
nkeynes@991
  1507
    load_reg( REG_ECX, Rn );
nkeynes@991
  1508
    SHLL_imm_r32( 16, REG_EAX );
nkeynes@991
  1509
    SHRL_imm_r32( 16, REG_ECX );
nkeynes@991
  1510
    ORL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
  1511
    store_reg( REG_ECX, Rn );
nkeynes@417
  1512
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1513
:}
nkeynes@359
  1514
nkeynes@359
  1515
/* Data move instructions */
nkeynes@359
  1516
MOV Rm, Rn {:  
nkeynes@671
  1517
    COUNT_INST(I_MOV);
nkeynes@991
  1518
    load_reg( REG_EAX, Rm );
nkeynes@991
  1519
    store_reg( REG_EAX, Rn );
nkeynes@359
  1520
:}
nkeynes@359
  1521
MOV #imm, Rn {:  
nkeynes@671
  1522
    COUNT_INST(I_MOVI);
nkeynes@995
  1523
    MOVL_imm32_r32( imm, REG_EAX );
nkeynes@991
  1524
    store_reg( REG_EAX, Rn );
nkeynes@359
  1525
:}
nkeynes@359
  1526
MOV.B Rm, @Rn {:  
nkeynes@671
  1527
    COUNT_INST(I_MOVB);
nkeynes@991
  1528
    load_reg( REG_EAX, Rn );
nkeynes@991
  1529
    load_reg( REG_EDX, Rm );
nkeynes@991
  1530
    MEM_WRITE_BYTE( REG_EAX, REG_EDX );
nkeynes@417
  1531
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1532
:}
nkeynes@359
  1533
MOV.B Rm, @-Rn {:  
nkeynes@671
  1534
    COUNT_INST(I_MOVB);
nkeynes@991
  1535
    load_reg( REG_EAX, Rn );
nkeynes@991
  1536
    LEAL_r32disp_r32( REG_EAX, -1, REG_EAX );
nkeynes@991
  1537
    load_reg( REG_EDX, Rm );
nkeynes@991
  1538
    MEM_WRITE_BYTE( REG_EAX, REG_EDX );
nkeynes@991
  1539
    ADDL_imms_rbpdisp( -1, REG_OFFSET(r[Rn]) );
nkeynes@417
  1540
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1541
:}
nkeynes@359
  1542
MOV.B Rm, @(R0, Rn) {:  
nkeynes@671
  1543
    COUNT_INST(I_MOVB);
nkeynes@991
  1544
    load_reg( REG_EAX, 0 );
nkeynes@991
  1545
    ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
nkeynes@991
  1546
    load_reg( REG_EDX, Rm );
nkeynes@991
  1547
    MEM_WRITE_BYTE( REG_EAX, REG_EDX );
nkeynes@417
  1548
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1549
:}
nkeynes@359
  1550
MOV.B R0, @(disp, GBR) {:  
nkeynes@671
  1551
    COUNT_INST(I_MOVB);
nkeynes@995
  1552
    MOVL_rbpdisp_r32( R_GBR, REG_EAX );
nkeynes@991
  1553
    ADDL_imms_r32( disp, REG_EAX );
nkeynes@991
  1554
    load_reg( REG_EDX, 0 );
nkeynes@991
  1555
    MEM_WRITE_BYTE( REG_EAX, REG_EDX );
nkeynes@417
  1556
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1557
:}
nkeynes@359
  1558
MOV.B R0, @(disp, Rn) {:  
nkeynes@671
  1559
    COUNT_INST(I_MOVB);
nkeynes@991
  1560
    load_reg( REG_EAX, Rn );
nkeynes@991
  1561
    ADDL_imms_r32( disp, REG_EAX );
nkeynes@991
  1562
    load_reg( REG_EDX, 0 );
nkeynes@991
  1563
    MEM_WRITE_BYTE( REG_EAX, REG_EDX );
nkeynes@417
  1564
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1565
:}
nkeynes@359
  1566
MOV.B @Rm, Rn {:  
nkeynes@671
  1567
    COUNT_INST(I_MOVB);
nkeynes@991
  1568
    load_reg( REG_EAX, Rm );
nkeynes@991
  1569
    MEM_READ_BYTE( REG_EAX, REG_EAX );
nkeynes@991
  1570
    store_reg( REG_EAX, Rn );
nkeynes@417
  1571
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1572
:}
nkeynes@359
  1573
MOV.B @Rm+, Rn {:  
nkeynes@671
  1574
    COUNT_INST(I_MOVB);
nkeynes@991
  1575
    load_reg( REG_EAX, Rm );
nkeynes@991
  1576
    MEM_READ_BYTE( REG_EAX, REG_EAX );
nkeynes@939
  1577
    if( Rm != Rn ) {
nkeynes@991
  1578
    	ADDL_imms_rbpdisp( 1, REG_OFFSET(r[Rm]) );
nkeynes@939
  1579
    }
nkeynes@991
  1580
    store_reg( REG_EAX, Rn );
nkeynes@417
  1581
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1582
:}
nkeynes@359
  1583
MOV.B @(R0, Rm), Rn {:  
nkeynes@671
  1584
    COUNT_INST(I_MOVB);
nkeynes@991
  1585
    load_reg( REG_EAX, 0 );
nkeynes@991
  1586
    ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
nkeynes@991
  1587
    MEM_READ_BYTE( REG_EAX, REG_EAX );
nkeynes@991
  1588
    store_reg( REG_EAX, Rn );
nkeynes@417
  1589
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1590
:}
nkeynes@359
  1591
MOV.B @(disp, GBR), R0 {:  
nkeynes@671
  1592
    COUNT_INST(I_MOVB);
nkeynes@995
  1593
    MOVL_rbpdisp_r32( R_GBR, REG_EAX );
nkeynes@991
  1594
    ADDL_imms_r32( disp, REG_EAX );
nkeynes@991
  1595
    MEM_READ_BYTE( REG_EAX, REG_EAX );
nkeynes@991
  1596
    store_reg( REG_EAX, 0 );
nkeynes@417
  1597
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1598
:}
nkeynes@359
  1599
MOV.B @(disp, Rm), R0 {:  
nkeynes@671
  1600
    COUNT_INST(I_MOVB);
nkeynes@991
  1601
    load_reg( REG_EAX, Rm );
nkeynes@991
  1602
    ADDL_imms_r32( disp, REG_EAX );
nkeynes@991
  1603
    MEM_READ_BYTE( REG_EAX, REG_EAX );
nkeynes@991
  1604
    store_reg( REG_EAX, 0 );
nkeynes@417
  1605
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1606
:}
nkeynes@374
  1607
MOV.L Rm, @Rn {:
nkeynes@671
  1608
    COUNT_INST(I_MOVL);
nkeynes@991
  1609
    load_reg( REG_EAX, Rn );
nkeynes@991
  1610
    check_walign32(REG_EAX);
nkeynes@991
  1611
    MOVL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
  1612
    ANDL_imms_r32( 0xFC000000, REG_ECX );
nkeynes@991
  1613
    CMPL_imms_r32( 0xE0000000, REG_ECX );
nkeynes@991
  1614
    JNE_label( notsq );
nkeynes@991
  1615
    ANDL_imms_r32( 0x3C, REG_EAX );
nkeynes@991
  1616
    load_reg( REG_EDX, Rm );
nkeynes@991
  1617
    MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
nkeynes@991
  1618
    JMP_label(end);
nkeynes@930
  1619
    JMP_TARGET(notsq);
nkeynes@991
  1620
    load_reg( REG_EDX, Rm );
nkeynes@991
  1621
    MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@930
  1622
    JMP_TARGET(end);
nkeynes@417
  1623
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1624
:}
nkeynes@361
  1625
MOV.L Rm, @-Rn {:  
nkeynes@671
  1626
    COUNT_INST(I_MOVL);
nkeynes@991
  1627
    load_reg( REG_EAX, Rn );
nkeynes@991
  1628
    ADDL_imms_r32( -4, REG_EAX );
nkeynes@991
  1629
    check_walign32( REG_EAX );
nkeynes@991
  1630
    load_reg( REG_EDX, Rm );
nkeynes@991
  1631
    MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@991
  1632
    ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
nkeynes@417
  1633
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1634
:}
nkeynes@361
  1635
MOV.L Rm, @(R0, Rn) {:  
nkeynes@671
  1636
    COUNT_INST(I_MOVL);
nkeynes@991
  1637
    load_reg( REG_EAX, 0 );
nkeynes@991
  1638
    ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
nkeynes@991
  1639
    check_walign32( REG_EAX );
nkeynes@991
  1640
    load_reg( REG_EDX, Rm );
nkeynes@991
  1641
    MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@417
  1642
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1643
:}
nkeynes@361
  1644
MOV.L R0, @(disp, GBR) {:  
nkeynes@671
  1645
    COUNT_INST(I_MOVL);
nkeynes@995
  1646
    MOVL_rbpdisp_r32( R_GBR, REG_EAX );
nkeynes@991
  1647
    ADDL_imms_r32( disp, REG_EAX );
nkeynes@991
  1648
    check_walign32( REG_EAX );
nkeynes@991
  1649
    load_reg( REG_EDX, 0 );
nkeynes@991
  1650
    MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@417
  1651
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1652
:}
nkeynes@361
  1653
MOV.L Rm, @(disp, Rn) {:  
nkeynes@671
  1654
    COUNT_INST(I_MOVL);
nkeynes@991
  1655
    load_reg( REG_EAX, Rn );
nkeynes@991
  1656
    ADDL_imms_r32( disp, REG_EAX );
nkeynes@991
  1657
    check_walign32( REG_EAX );
nkeynes@991
  1658
    MOVL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
  1659
    ANDL_imms_r32( 0xFC000000, REG_ECX );
nkeynes@991
  1660
    CMPL_imms_r32( 0xE0000000, REG_ECX );
nkeynes@991
  1661
    JNE_label( notsq );
nkeynes@991
  1662
    ANDL_imms_r32( 0x3C, REG_EAX );
nkeynes@991
  1663
    load_reg( REG_EDX, Rm );
nkeynes@991
  1664
    MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
nkeynes@991
  1665
    JMP_label(end);
nkeynes@930
  1666
    JMP_TARGET(notsq);
nkeynes@991
  1667
    load_reg( REG_EDX, Rm );
nkeynes@991
  1668
    MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@930
  1669
    JMP_TARGET(end);
nkeynes@417
  1670
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1671
:}
nkeynes@361
  1672
MOV.L @Rm, Rn {:  
nkeynes@671
  1673
    COUNT_INST(I_MOVL);
nkeynes@991
  1674
    load_reg( REG_EAX, Rm );
nkeynes@991
  1675
    check_ralign32( REG_EAX );
nkeynes@991
  1676
    MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  1677
    store_reg( REG_EAX, Rn );
nkeynes@417
  1678
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1679
:}
nkeynes@361
  1680
MOV.L @Rm+, Rn {:  
nkeynes@671
  1681
    COUNT_INST(I_MOVL);
nkeynes@991
  1682
    load_reg( REG_EAX, Rm );
nkeynes@991
  1683
    check_ralign32( REG_EAX );
nkeynes@991
  1684
    MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@939
  1685
    if( Rm != Rn ) {
nkeynes@991
  1686
    	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
nkeynes@939
  1687
    }
nkeynes@991
  1688
    store_reg( REG_EAX, Rn );
nkeynes@417
  1689
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1690
:}
nkeynes@361
  1691
MOV.L @(R0, Rm), Rn {:  
nkeynes@671
  1692
    COUNT_INST(I_MOVL);
nkeynes@991
  1693
    load_reg( REG_EAX, 0 );
nkeynes@991
  1694
    ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
nkeynes@991
  1695
    check_ralign32( REG_EAX );
nkeynes@991
  1696
    MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  1697
    store_reg( REG_EAX, Rn );
nkeynes@417
  1698
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1699
:}
nkeynes@361
  1700
MOV.L @(disp, GBR), R0 {:
nkeynes@671
  1701
    COUNT_INST(I_MOVL);
nkeynes@995
  1702
    MOVL_rbpdisp_r32( R_GBR, REG_EAX );
nkeynes@991
  1703
    ADDL_imms_r32( disp, REG_EAX );
nkeynes@991
  1704
    check_ralign32( REG_EAX );
nkeynes@991
  1705
    MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  1706
    store_reg( REG_EAX, 0 );
nkeynes@417
  1707
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1708
:}
nkeynes@361
  1709
MOV.L @(disp, PC), Rn {:  
nkeynes@671
  1710
    COUNT_INST(I_MOVLPC);
nkeynes@374
  1711
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  1712
	SLOTILLEGAL();
nkeynes@374
  1713
    } else {
nkeynes@388
  1714
	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
nkeynes@1125
  1715
	if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
nkeynes@586
  1716
	    // If the target address is in the same page as the code, it's
nkeynes@586
  1717
	    // pretty safe to just ref it directly and circumvent the whole
nkeynes@586
  1718
	    // memory subsystem. (this is a big performance win)
nkeynes@586
  1719
nkeynes@586
  1720
	    // FIXME: There's a corner-case that's not handled here when
nkeynes@586
  1721
	    // the current code-page is in the ITLB but not in the UTLB.
nkeynes@586
  1722
	    // (should generate a TLB miss although need to test SH4 
nkeynes@586
  1723
	    // behaviour to confirm) Unlikely to be anyone depending on this
nkeynes@586
  1724
	    // behaviour though.
nkeynes@586
  1725
	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
nkeynes@991
  1726
	    MOVL_moffptr_eax( ptr );
nkeynes@388
  1727
	} else {
nkeynes@586
  1728
	    // Note: we use sh4r.pc for the calc as we could be running at a
nkeynes@586
  1729
	    // different virtual address than the translation was done with,
nkeynes@586
  1730
	    // but we can safely assume that the low bits are the same.
nkeynes@995
  1731
	    MOVL_imm32_r32( (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_EAX );
nkeynes@991
  1732
	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
nkeynes@991
  1733
	    MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@586
  1734
	    sh4_x86.tstate = TSTATE_NONE;
nkeynes@388
  1735
	}
nkeynes@991
  1736
	store_reg( REG_EAX, Rn );
nkeynes@374
  1737
    }
nkeynes@361
  1738
:}
nkeynes@361
  1739
MOV.L @(disp, Rm), Rn {:  
nkeynes@671
  1740
    COUNT_INST(I_MOVL);
nkeynes@991
  1741
    load_reg( REG_EAX, Rm );
nkeynes@991
  1742
    ADDL_imms_r32( disp, REG_EAX );
nkeynes@991
  1743
    check_ralign32( REG_EAX );
nkeynes@991
  1744
    MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  1745
    store_reg( REG_EAX, Rn );
nkeynes@417
  1746
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1747
:}
nkeynes@361
  1748
MOV.W Rm, @Rn {:  
nkeynes@671
  1749
    COUNT_INST(I_MOVW);
nkeynes@991
  1750
    load_reg( REG_EAX, Rn );
nkeynes@991
  1751
    check_walign16( REG_EAX );
nkeynes@991
  1752
    load_reg( REG_EDX, Rm );
nkeynes@991
  1753
    MEM_WRITE_WORD( REG_EAX, REG_EDX );
nkeynes@417
  1754
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1755
:}
nkeynes@361
  1756
MOV.W Rm, @-Rn {:  
nkeynes@671
  1757
    COUNT_INST(I_MOVW);
nkeynes@991
  1758
    load_reg( REG_EAX, Rn );
nkeynes@991
  1759
    check_walign16( REG_EAX );
nkeynes@991
  1760
    LEAL_r32disp_r32( REG_EAX, -2, REG_EAX );
nkeynes@991
  1761
    load_reg( REG_EDX, Rm );
nkeynes@991
  1762
    MEM_WRITE_WORD( REG_EAX, REG_EDX );
nkeynes@991
  1763
    ADDL_imms_rbpdisp( -2, REG_OFFSET(r[Rn]) );
nkeynes@417
  1764
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1765
:}
nkeynes@361
  1766
MOV.W Rm, @(R0, Rn) {:  
nkeynes@671
  1767
    COUNT_INST(I_MOVW);
nkeynes@991
  1768
    load_reg( REG_EAX, 0 );
nkeynes@991
  1769
    ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
nkeynes@991
  1770
    check_walign16( REG_EAX );
nkeynes@991
  1771
    load_reg( REG_EDX, Rm );
nkeynes@991
  1772
    MEM_WRITE_WORD( REG_EAX, REG_EDX );
nkeynes@417
  1773
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1774
:}
nkeynes@361
  1775
MOV.W R0, @(disp, GBR) {:  
nkeynes@671
  1776
    COUNT_INST(I_MOVW);
nkeynes@995
  1777
    MOVL_rbpdisp_r32( R_GBR, REG_EAX );
nkeynes@991
  1778
    ADDL_imms_r32( disp, REG_EAX );
nkeynes@991
  1779
    check_walign16( REG_EAX );
nkeynes@991
  1780
    load_reg( REG_EDX, 0 );
nkeynes@991
  1781
    MEM_WRITE_WORD( REG_EAX, REG_EDX );
nkeynes@417
  1782
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1783
:}
nkeynes@361
  1784
MOV.W R0, @(disp, Rn) {:  
nkeynes@671
  1785
    COUNT_INST(I_MOVW);
nkeynes@991
  1786
    load_reg( REG_EAX, Rn );
nkeynes@991
  1787
    ADDL_imms_r32( disp, REG_EAX );
nkeynes@991
  1788
    check_walign16( REG_EAX );
nkeynes@991
  1789
    load_reg( REG_EDX, 0 );
nkeynes@991
  1790
    MEM_WRITE_WORD( REG_EAX, REG_EDX );
nkeynes@417
  1791
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1792
:}
nkeynes@361
  1793
MOV.W @Rm, Rn {:  
nkeynes@671
  1794
    COUNT_INST(I_MOVW);
nkeynes@991
  1795
    load_reg( REG_EAX, Rm );
nkeynes@991
  1796
    check_ralign16( REG_EAX );
nkeynes@991
  1797
    MEM_READ_WORD( REG_EAX, REG_EAX );
nkeynes@991
  1798
    store_reg( REG_EAX, Rn );
nkeynes@417
  1799
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1800
:}
nkeynes@361
  1801
MOV.W @Rm+, Rn {:  
nkeynes@671
  1802
    COUNT_INST(I_MOVW);
nkeynes@991
  1803
    load_reg( REG_EAX, Rm );
nkeynes@991
  1804
    check_ralign16( REG_EAX );
nkeynes@991
  1805
    MEM_READ_WORD( REG_EAX, REG_EAX );
nkeynes@939
  1806
    if( Rm != Rn ) {
nkeynes@991
  1807
        ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
nkeynes@939
  1808
    }
nkeynes@991
  1809
    store_reg( REG_EAX, Rn );
nkeynes@417
  1810
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1811
:}
nkeynes@361
  1812
MOV.W @(R0, Rm), Rn {:  
nkeynes@671
  1813
    COUNT_INST(I_MOVW);
nkeynes@991
  1814
    load_reg( REG_EAX, 0 );
nkeynes@991
  1815
    ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
nkeynes@991
  1816
    check_ralign16( REG_EAX );
nkeynes@991
  1817
    MEM_READ_WORD( REG_EAX, REG_EAX );
nkeynes@991
  1818
    store_reg( REG_EAX, Rn );
nkeynes@417
  1819
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1820
:}
nkeynes@361
  1821
MOV.W @(disp, GBR), R0 {:  
nkeynes@671
  1822
    COUNT_INST(I_MOVW);
nkeynes@995
  1823
    MOVL_rbpdisp_r32( R_GBR, REG_EAX );
nkeynes@991
  1824
    ADDL_imms_r32( disp, REG_EAX );
nkeynes@991
  1825
    check_ralign16( REG_EAX );
nkeynes@991
  1826
    MEM_READ_WORD( REG_EAX, REG_EAX );
nkeynes@991
  1827
    store_reg( REG_EAX, 0 );
nkeynes@417
  1828
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1829
:}
nkeynes@361
  1830
MOV.W @(disp, PC), Rn {:  
nkeynes@671
  1831
    COUNT_INST(I_MOVW);
nkeynes@374
  1832
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  1833
	SLOTILLEGAL();
nkeynes@374
  1834
    } else {
nkeynes@586
  1835
	// See comments for MOV.L @(disp, PC), Rn
nkeynes@586
  1836
	uint32_t target = pc + disp + 4;
nkeynes@1125
  1837
	if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
nkeynes@586
  1838
	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
nkeynes@991
  1839
	    MOVL_moffptr_eax( ptr );
nkeynes@991
  1840
	    MOVSXL_r16_r32( REG_EAX, REG_EAX );
nkeynes@586
  1841
	} else {
nkeynes@995
  1842
	    MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4, REG_EAX );
nkeynes@991
  1843
	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
nkeynes@991
  1844
	    MEM_READ_WORD( REG_EAX, REG_EAX );
nkeynes@586
  1845
	    sh4_x86.tstate = TSTATE_NONE;
nkeynes@586
  1846
	}
nkeynes@991
  1847
	store_reg( REG_EAX, Rn );
nkeynes@374
  1848
    }
nkeynes@361
  1849
:}
nkeynes@361
  1850
MOV.W @(disp, Rm), R0 {:  
nkeynes@671
  1851
    COUNT_INST(I_MOVW);
nkeynes@991
  1852
    load_reg( REG_EAX, Rm );
nkeynes@991
  1853
    ADDL_imms_r32( disp, REG_EAX );
nkeynes@991
  1854
    check_ralign16( REG_EAX );
nkeynes@991
  1855
    MEM_READ_WORD( REG_EAX, REG_EAX );
nkeynes@991
  1856
    store_reg( REG_EAX, 0 );
nkeynes@417
  1857
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1858
:}
nkeynes@361
  1859
MOVA @(disp, PC), R0 {:  
nkeynes@671
  1860
    COUNT_INST(I_MOVA);
nkeynes@374
  1861
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  1862
	SLOTILLEGAL();
nkeynes@374
  1863
    } else {
nkeynes@995
  1864
	MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_ECX );
nkeynes@991
  1865
	ADDL_rbpdisp_r32( R_PC, REG_ECX );
nkeynes@991
  1866
	store_reg( REG_ECX, 0 );
nkeynes@586
  1867
	sh4_x86.tstate = TSTATE_NONE;
nkeynes@374
  1868
    }
nkeynes@361
  1869
:}
nkeynes@361
  1870
MOVCA.L R0, @Rn {:  
nkeynes@671
  1871
    COUNT_INST(I_MOVCA);
nkeynes@991
  1872
    load_reg( REG_EAX, Rn );
nkeynes@991
  1873
    check_walign32( REG_EAX );
nkeynes@991
  1874
    load_reg( REG_EDX, 0 );
nkeynes@991
  1875
    MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@417
  1876
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1877
:}
nkeynes@359
  1878
nkeynes@359
  1879
/* Control transfer instructions */
nkeynes@374
  1880
BF disp {:
nkeynes@671
  1881
    COUNT_INST(I_BF);
nkeynes@374
  1882
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  1883
	SLOTILLEGAL();
nkeynes@374
  1884
    } else {
nkeynes@586
  1885
	sh4vma_t target = disp + pc + 4;
nkeynes@991
  1886
	JT_label( nottaken );
nkeynes@586
  1887
	exit_block_rel(target, pc+2 );
nkeynes@380
  1888
	JMP_TARGET(nottaken);
nkeynes@408
  1889
	return 2;
nkeynes@374
  1890
    }
nkeynes@374
  1891
:}
nkeynes@374
  1892
BF/S disp {:
nkeynes@671
  1893
    COUNT_INST(I_BFS);
nkeynes@374
  1894
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  1895
	SLOTILLEGAL();
nkeynes@374
  1896
    } else {
nkeynes@590
  1897
	sh4_x86.in_delay_slot = DELAY_PC;
nkeynes@601
  1898
	if( UNTRANSLATABLE(pc+2) ) {
nkeynes@995
  1899
	    MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
nkeynes@991
  1900
	    JT_label(nottaken);
nkeynes@991
  1901
	    ADDL_imms_r32( disp, REG_EAX );
nkeynes@601
  1902
	    JMP_TARGET(nottaken);
nkeynes@991
  1903
	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
nkeynes@995
  1904
	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
nkeynes@601
  1905
	    exit_block_emu(pc+2);
nkeynes@601
  1906
	    sh4_x86.branch_taken = TRUE;
nkeynes@601
  1907
	    return 2;
nkeynes@601
  1908
	} else {
nkeynes@1197
  1909
	    LOAD_t();
nkeynes@601
  1910
	    sh4vma_t target = disp + pc + 4;
nkeynes@991
  1911
	    JCC_cc_rel32(sh4_x86.tstate,0);
nkeynes@991
  1912
	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
nkeynes@879
  1913
	    int save_tstate = sh4_x86.tstate;
nkeynes@601
  1914
	    sh4_translate_instruction(pc+2);
nkeynes@1091
  1915
            sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
nkeynes@601
  1916
	    exit_block_rel( target, pc+4 );
nkeynes@601
  1917
	    
nkeynes@601
  1918
	    // not taken
nkeynes@601
  1919
	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
nkeynes@879
  1920
	    sh4_x86.tstate = save_tstate;
nkeynes@601
  1921
	    sh4_translate_instruction(pc+2);
nkeynes@601
  1922
	    return 4;
nkeynes@417
  1923
	}
nkeynes@374
  1924
    }
nkeynes@374
  1925
:}
nkeynes@374
  1926
BRA disp {:  
nkeynes@671
  1927
    COUNT_INST(I_BRA);
nkeynes@374
  1928
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  1929
	SLOTILLEGAL();
nkeynes@374
  1930
    } else {
nkeynes@590
  1931
	sh4_x86.in_delay_slot = DELAY_PC;
nkeynes@409
  1932
	sh4_x86.branch_taken = TRUE;
nkeynes@601
  1933
	if( UNTRANSLATABLE(pc+2) ) {
nkeynes@995
  1934
	    MOVL_rbpdisp_r32( R_PC, REG_EAX );
nkeynes@991
  1935
	    ADDL_imms_r32( pc + disp + 4 - sh4_x86.block_start_pc, REG_EAX );
nkeynes@995
  1936
	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
nkeynes@601
  1937
	    exit_block_emu(pc+2);
nkeynes@601
  1938
	    return 2;
nkeynes@601
  1939
	} else {
nkeynes@601
  1940
	    sh4_translate_instruction( pc + 2 );
nkeynes@601
  1941
	    exit_block_rel( disp + pc + 4, pc+4 );
nkeynes@601
  1942
	    return 4;
nkeynes@601
  1943
	}
nkeynes@374
  1944
    }
nkeynes@374
  1945
:}
nkeynes@374
  1946
BRAF Rn {:  
nkeynes@671
  1947
    COUNT_INST(I_BRAF);
nkeynes@374
  1948
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  1949
	SLOTILLEGAL();
nkeynes@374
  1950
    } else {
nkeynes@995
  1951
	MOVL_rbpdisp_r32( R_PC, REG_EAX );
nkeynes@991
  1952
	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
nkeynes@991
  1953
	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
nkeynes@995
  1954
	MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
nkeynes@590
  1955
	sh4_x86.in_delay_slot = DELAY_PC;
nkeynes@417
  1956
	sh4_x86.tstate = TSTATE_NONE;
nkeynes@409
  1957
	sh4_x86.branch_taken = TRUE;
nkeynes@601
  1958
	if( UNTRANSLATABLE(pc+2) ) {
nkeynes@601
  1959
	    exit_block_emu(pc+2);
nkeynes@601
  1960
	    return 2;
nkeynes@601
  1961
	} else {
nkeynes@601
  1962
	    sh4_translate_instruction( pc + 2 );
nkeynes@974
  1963
	    exit_block_newpcset(pc+4);
nkeynes@601
  1964
	    return 4;
nkeynes@601
  1965
	}
nkeynes@374
  1966
    }
nkeynes@374
  1967
:}
nkeynes@374
  1968
BSR disp {:  
nkeynes@671
  1969
    COUNT_INST(I_BSR);
nkeynes@374
  1970
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  1971
	SLOTILLEGAL();
nkeynes@374
  1972
    } else {
nkeynes@995
  1973
	MOVL_rbpdisp_r32( R_PC, REG_EAX );
nkeynes@991
  1974
	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
nkeynes@995
  1975
	MOVL_r32_rbpdisp( REG_EAX, R_PR );
nkeynes@590
  1976
	sh4_x86.in_delay_slot = DELAY_PC;
nkeynes@409
  1977
	sh4_x86.branch_taken = TRUE;
nkeynes@601
  1978
	sh4_x86.tstate = TSTATE_NONE;
nkeynes@601
  1979
	if( UNTRANSLATABLE(pc+2) ) {
nkeynes@991
  1980
	    ADDL_imms_r32( disp, REG_EAX );
nkeynes@995
  1981
	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
nkeynes@601
  1982
	    exit_block_emu(pc+2);
nkeynes@601
  1983
	    return 2;
nkeynes@601
  1984
	} else {
nkeynes@601
  1985
	    sh4_translate_instruction( pc + 2 );
nkeynes@601
  1986
	    exit_block_rel( disp + pc + 4, pc+4 );
nkeynes@601
  1987
	    return 4;
nkeynes@601
  1988
	}
nkeynes@374
  1989
    }
nkeynes@374
  1990
:}
nkeynes@374
  1991
BSRF Rn {:  
nkeynes@671
  1992
    COUNT_INST(I_BSRF);
nkeynes@374
  1993
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  1994
	SLOTILLEGAL();
nkeynes@374
  1995
    } else {
nkeynes@995
  1996
	MOVL_rbpdisp_r32( R_PC, REG_EAX );
nkeynes@991
  1997
	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
nkeynes@995
  1998
	MOVL_r32_rbpdisp( REG_EAX, R_PR );
nkeynes@991
  1999
	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
nkeynes@995
  2000
	MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
nkeynes@590
  2001
nkeynes@601
  2002
	sh4_x86.in_delay_slot = DELAY_PC;
nkeynes@417
  2003
	sh4_x86.tstate = TSTATE_NONE;
nkeynes@409
  2004
	sh4_x86.branch_taken = TRUE;
nkeynes@601
  2005
	if( UNTRANSLATABLE(pc+2) ) {
nkeynes@601
  2006
	    exit_block_emu(pc+2);
nkeynes@601
  2007
	    return 2;
nkeynes@601
  2008
	} else {
nkeynes@601
  2009
	    sh4_translate_instruction( pc + 2 );
nkeynes@974
  2010
	    exit_block_newpcset(pc+4);
nkeynes@601
  2011
	    return 4;
nkeynes@601
  2012
	}
nkeynes@374
  2013
    }
nkeynes@374
  2014
:}
nkeynes@374
  2015
BT disp {:
nkeynes@671
  2016
    COUNT_INST(I_BT);
nkeynes@374
  2017
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  2018
	SLOTILLEGAL();
nkeynes@374
  2019
    } else {
nkeynes@586
  2020
	sh4vma_t target = disp + pc + 4;
nkeynes@991
  2021
	JF_label( nottaken );
nkeynes@586
  2022
	exit_block_rel(target, pc+2 );
nkeynes@380
  2023
	JMP_TARGET(nottaken);
nkeynes@408
  2024
	return 2;
nkeynes@374
  2025
    }
nkeynes@374
  2026
:}
nkeynes@374
  2027
BT/S disp {:
nkeynes@671
  2028
    COUNT_INST(I_BTS);
nkeynes@374
  2029
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  2030
	SLOTILLEGAL();
nkeynes@374
  2031
    } else {
nkeynes@590
  2032
	sh4_x86.in_delay_slot = DELAY_PC;
nkeynes@601
  2033
	if( UNTRANSLATABLE(pc+2) ) {
nkeynes@995
  2034
	    MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
nkeynes@991
  2035
	    JF_label(nottaken);
nkeynes@991
  2036
	    ADDL_imms_r32( disp, REG_EAX );
nkeynes@601
  2037
	    JMP_TARGET(nottaken);
nkeynes@991
  2038
	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
nkeynes@995
  2039
	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
nkeynes@601
  2040
	    exit_block_emu(pc+2);
nkeynes@601
  2041
	    sh4_x86.branch_taken = TRUE;
nkeynes@601
  2042
	    return 2;
nkeynes@601
  2043
	} else {
nkeynes@1197
  2044
		LOAD_t();
nkeynes@991
  2045
	    JCC_cc_rel32(sh4_x86.tstate^1,0);
nkeynes@991
  2046
	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
nkeynes@991
  2047
nkeynes@879
  2048
	    int save_tstate = sh4_x86.tstate;
nkeynes@601
  2049
	    sh4_translate_instruction(pc+2);
nkeynes@1091
  2050
            sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
nkeynes@601
  2051
	    exit_block_rel( disp + pc + 4, pc+4 );
nkeynes@601
  2052
	    // not taken
nkeynes@601
  2053
	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
nkeynes@879
  2054
	    sh4_x86.tstate = save_tstate;
nkeynes@601
  2055
	    sh4_translate_instruction(pc+2);
nkeynes@601
  2056
	    return 4;
nkeynes@417
  2057
	}
nkeynes@374
  2058
    }
nkeynes@374
  2059
:}
nkeynes@374
  2060
JMP @Rn {:  
nkeynes@671
  2061
    COUNT_INST(I_JMP);
nkeynes@374
  2062
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  2063
	SLOTILLEGAL();
nkeynes@374
  2064
    } else {
nkeynes@991
  2065
	load_reg( REG_ECX, Rn );
nkeynes@995
  2066
	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
nkeynes@590
  2067
	sh4_x86.in_delay_slot = DELAY_PC;
nkeynes@409
  2068
	sh4_x86.branch_taken = TRUE;
nkeynes@601
  2069
	if( UNTRANSLATABLE(pc+2) ) {
nkeynes@601
  2070
	    exit_block_emu(pc+2);
nkeynes@601
  2071
	    return 2;
nkeynes@601
  2072
	} else {
nkeynes@601
  2073
	    sh4_translate_instruction(pc+2);
nkeynes@974
  2074
	    exit_block_newpcset(pc+4);
nkeynes@601
  2075
	    return 4;
nkeynes@601
  2076
	}
nkeynes@374
  2077
    }
nkeynes@374
  2078
:}
nkeynes@374
  2079
JSR @Rn {:  
nkeynes@671
  2080
    COUNT_INST(I_JSR);
nkeynes@374
  2081
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  2082
	SLOTILLEGAL();
nkeynes@374
  2083
    } else {
nkeynes@995
  2084
	MOVL_rbpdisp_r32( R_PC, REG_EAX );
nkeynes@991
  2085
	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
nkeynes@995
  2086
	MOVL_r32_rbpdisp( REG_EAX, R_PR );
nkeynes@991
  2087
	load_reg( REG_ECX, Rn );
nkeynes@995
  2088
	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
nkeynes@601
  2089
	sh4_x86.in_delay_slot = DELAY_PC;
nkeynes@409
  2090
	sh4_x86.branch_taken = TRUE;
nkeynes@601
  2091
	sh4_x86.tstate = TSTATE_NONE;
nkeynes@601
  2092
	if( UNTRANSLATABLE(pc+2) ) {
nkeynes@601
  2093
	    exit_block_emu(pc+2);
nkeynes@601
  2094
	    return 2;
nkeynes@601
  2095
	} else {
nkeynes@601
  2096
	    sh4_translate_instruction(pc+2);
nkeynes@974
  2097
	    exit_block_newpcset(pc+4);
nkeynes@601
  2098
	    return 4;
nkeynes@601
  2099
	}
nkeynes@374
  2100
    }
nkeynes@374
  2101
:}
nkeynes@374
  2102
RTE {:  
nkeynes@671
  2103
    COUNT_INST(I_RTE);
nkeynes@374
  2104
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  2105
	SLOTILLEGAL();
nkeynes@374
  2106
    } else {
nkeynes@408
  2107
	check_priv();
nkeynes@995
  2108
	MOVL_rbpdisp_r32( R_SPC, REG_ECX );
nkeynes@995
  2109
	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
nkeynes@995
  2110
	MOVL_rbpdisp_r32( R_SSR, REG_EAX );
nkeynes@995
  2111
	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
nkeynes@590
  2112
	sh4_x86.in_delay_slot = DELAY_PC;
nkeynes@377
  2113
	sh4_x86.fpuen_checked = FALSE;
nkeynes@417
  2114
	sh4_x86.tstate = TSTATE_NONE;
nkeynes@409
  2115
	sh4_x86.branch_taken = TRUE;
nkeynes@1112
  2116
    sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
nkeynes@601
  2117
	if( UNTRANSLATABLE(pc+2) ) {
nkeynes@601
  2118
	    exit_block_emu(pc+2);
nkeynes@601
  2119
	    return 2;
nkeynes@601
  2120
	} else {
nkeynes@601
  2121
	    sh4_translate_instruction(pc+2);
nkeynes@974
  2122
	    exit_block_newpcset(pc+4);
nkeynes@601
  2123
	    return 4;
nkeynes@601
  2124
	}
nkeynes@374
  2125
    }
nkeynes@374
  2126
:}
nkeynes@374
  2127
RTS {:  
nkeynes@671
  2128
    COUNT_INST(I_RTS);
nkeynes@374
  2129
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  2130
	SLOTILLEGAL();
nkeynes@374
  2131
    } else {
nkeynes@995
  2132
	MOVL_rbpdisp_r32( R_PR, REG_ECX );
nkeynes@995
  2133
	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
nkeynes@590
  2134
	sh4_x86.in_delay_slot = DELAY_PC;
nkeynes@409
  2135
	sh4_x86.branch_taken = TRUE;
nkeynes@601
  2136
	if( UNTRANSLATABLE(pc+2) ) {
nkeynes@601
  2137
	    exit_block_emu(pc+2);
nkeynes@601
  2138
	    return 2;
nkeynes@601
  2139
	} else {
nkeynes@601
  2140
	    sh4_translate_instruction(pc+2);
nkeynes@974
  2141
	    exit_block_newpcset(pc+4);
nkeynes@601
  2142
	    return 4;
nkeynes@601
  2143
	}
nkeynes@374
  2144
    }
nkeynes@374
  2145
:}
nkeynes@374
  2146
TRAPA #imm {:  
nkeynes@671
  2147
    COUNT_INST(I_TRAPA);
nkeynes@374
  2148
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  2149
	SLOTILLEGAL();
nkeynes@374
  2150
    } else {
nkeynes@995
  2151
	MOVL_imm32_r32( pc+2 - sh4_x86.block_start_pc, REG_ECX );   // 5
nkeynes@991
  2152
	ADDL_r32_rbpdisp( REG_ECX, R_PC );
nkeynes@995
  2153
	MOVL_imm32_r32( imm, REG_EAX );
nkeynes@995
  2154
	CALL1_ptr_r32( sh4_raise_trap, REG_EAX );
nkeynes@417
  2155
	sh4_x86.tstate = TSTATE_NONE;
nkeynes@974
  2156
	exit_block_pcset(pc+2);
nkeynes@409
  2157
	sh4_x86.branch_taken = TRUE;
nkeynes@408
  2158
	return 2;
nkeynes@374
  2159
    }
nkeynes@374
  2160
:}
nkeynes@374
  2161
UNDEF {:  
nkeynes@671
  2162
    COUNT_INST(I_UNDEF);
nkeynes@374
  2163
    if( sh4_x86.in_delay_slot ) {
nkeynes@1191
  2164
	exit_block_exc(EXC_SLOT_ILLEGAL, pc-2, 4);    
nkeynes@374
  2165
    } else {
nkeynes@1191
  2166
	exit_block_exc(EXC_ILLEGAL, pc, 2);    
nkeynes@408
  2167
	return 2;
nkeynes@374
  2168
    }
nkeynes@368
  2169
:}
nkeynes@374
  2170
nkeynes@374
  2171
CLRMAC {:  
nkeynes@671
  2172
    COUNT_INST(I_CLRMAC);
nkeynes@991
  2173
    XORL_r32_r32(REG_EAX, REG_EAX);
nkeynes@995
  2174
    MOVL_r32_rbpdisp( REG_EAX, R_MACL );
nkeynes@995
  2175
    MOVL_r32_rbpdisp( REG_EAX, R_MACH );
nkeynes@417
  2176
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@368
  2177
:}
nkeynes@374
  2178
CLRS {:
nkeynes@671
  2179
    COUNT_INST(I_CLRS);
nkeynes@374
  2180
    CLC();
nkeynes@991
  2181
    SETCCB_cc_rbpdisp(X86_COND_C, R_S);
nkeynes@872
  2182
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@368
  2183
:}
nkeynes@374
  2184
CLRT {:  
nkeynes@671
  2185
    COUNT_INST(I_CLRT);
nkeynes@374
  2186
    CLC();
nkeynes@374
  2187
    SETC_t();
nkeynes@417
  2188
    sh4_x86.tstate = TSTATE_C;
nkeynes@359
  2189
:}
nkeynes@374
  2190
SETS {:  
nkeynes@671
  2191
    COUNT_INST(I_SETS);
nkeynes@374
  2192
    STC();
nkeynes@991
  2193
    SETCCB_cc_rbpdisp(X86_COND_C, R_S);
nkeynes@872
  2194
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  2195
:}
nkeynes@374
  2196
SETT {:  
nkeynes@671
  2197
    COUNT_INST(I_SETT);
nkeynes@374
  2198
    STC();
nkeynes@374
  2199
    SETC_t();
nkeynes@417
  2200
    sh4_x86.tstate = TSTATE_C;
nkeynes@374
  2201
:}
nkeynes@359
  2202
nkeynes@375
  2203
/* Floating point moves */
nkeynes@375
  2204
FMOV FRm, FRn {:  
nkeynes@671
  2205
    COUNT_INST(I_FMOV1);
nkeynes@377
  2206
    check_fpuen();
nkeynes@901
  2207
    if( sh4_x86.double_size ) {
nkeynes@991
  2208
        load_dr0( REG_EAX, FRm );
nkeynes@991
  2209
        load_dr1( REG_ECX, FRm );
nkeynes@991
  2210
        store_dr0( REG_EAX, FRn );
nkeynes@991
  2211
        store_dr1( REG_ECX, FRn );
nkeynes@901
  2212
    } else {
nkeynes@991
  2213
        load_fr( REG_EAX, FRm ); // SZ=0 branch
nkeynes@991
  2214
        store_fr( REG_EAX, FRn );
nkeynes@901
  2215
    }
nkeynes@375
  2216
:}
nkeynes@416
  2217
FMOV FRm, @Rn {: 
nkeynes@671
  2218
    COUNT_INST(I_FMOV2);
nkeynes@586
  2219
    check_fpuen();
nkeynes@991
  2220
    load_reg( REG_EAX, Rn );
nkeynes@901
  2221
    if( sh4_x86.double_size ) {
nkeynes@991
  2222
        check_walign64( REG_EAX );
nkeynes@991
  2223
        load_dr0( REG_EDX, FRm );
nkeynes@991
  2224
        MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@991
  2225
        load_reg( REG_EAX, Rn );
nkeynes@991
  2226
        LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
nkeynes@991
  2227
        load_dr1( REG_EDX, FRm );
nkeynes@991
  2228
        MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@901
  2229
    } else {
nkeynes@991
  2230
        check_walign32( REG_EAX );
nkeynes@991
  2231
        load_fr( REG_EDX, FRm );
nkeynes@991
  2232
        MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@901
  2233
    }
nkeynes@417
  2234
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@375
  2235
:}
nkeynes@375
  2236
FMOV @Rm, FRn {:  
nkeynes@671
  2237
    COUNT_INST(I_FMOV5);
nkeynes@586
  2238
    check_fpuen();
nkeynes@991
  2239
    load_reg( REG_EAX, Rm );
nkeynes@901
  2240
    if( sh4_x86.double_size ) {
nkeynes@991
  2241
        check_ralign64( REG_EAX );
nkeynes@991
  2242
        MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  2243
        store_dr0( REG_EAX, FRn );
nkeynes@991
  2244
        load_reg( REG_EAX, Rm );
nkeynes@991
  2245
        LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
nkeynes@991
  2246
        MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  2247
        store_dr1( REG_EAX, FRn );
nkeynes@901
  2248
    } else {
nkeynes@991
  2249
        check_ralign32( REG_EAX );
nkeynes@991
  2250
        MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  2251
        store_fr( REG_EAX, FRn );
nkeynes@901
  2252
    }
nkeynes@417
  2253
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@375
  2254
:}
nkeynes@377
  2255
FMOV FRm, @-Rn {:  
nkeynes@671
  2256
    COUNT_INST(I_FMOV3);
nkeynes@586
  2257
    check_fpuen();
nkeynes@991
  2258
    load_reg( REG_EAX, Rn );
nkeynes@901
  2259
    if( sh4_x86.double_size ) {
nkeynes@991
  2260
        check_walign64( REG_EAX );
nkeynes@991
  2261
        LEAL_r32disp_r32( REG_EAX, -8, REG_EAX );
nkeynes@991
  2262
        load_dr0( REG_EDX, FRm );
nkeynes@991
  2263
        MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@991
  2264
        load_reg( REG_EAX, Rn );
nkeynes@991
  2265
        LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
nkeynes@991
  2266
        load_dr1( REG_EDX, FRm );
nkeynes@991
  2267
        MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@991
  2268
        ADDL_imms_rbpdisp(-8,REG_OFFSET(r[Rn]));
nkeynes@901
  2269
    } else {
nkeynes@991
  2270
        check_walign32( REG_EAX );
nkeynes@991
  2271
        LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
nkeynes@991
  2272
        load_fr( REG_EDX, FRm );
nkeynes@991
  2273
        MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@991
  2274
        ADDL_imms_rbpdisp(-4,REG_OFFSET(r[Rn]));
nkeynes@901
  2275
    }
nkeynes@417
  2276
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@377
  2277
:}
nkeynes@416
  2278
FMOV @Rm+, FRn {:
nkeynes@671
  2279
    COUNT_INST(I_FMOV6);
nkeynes@586
  2280
    check_fpuen();
nkeynes@991
  2281
    load_reg( REG_EAX, Rm );
nkeynes@901
  2282
    if( sh4_x86.double_size ) {
nkeynes@991
  2283
        check_ralign64( REG_EAX );
nkeynes@991
  2284
        MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  2285
        store_dr0( REG_EAX, FRn );
nkeynes@991
  2286
        load_reg( REG_EAX, Rm );
nkeynes@991
  2287
        LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
nkeynes@991
  2288
        MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  2289
        store_dr1( REG_EAX, FRn );
nkeynes@991
  2290
        ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rm]) );
nkeynes@901
  2291
    } else {
nkeynes@991
  2292
        check_ralign32( REG_EAX );
nkeynes@991
  2293
        MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  2294
        store_fr( REG_EAX, FRn );
nkeynes@991
  2295
        ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
nkeynes@901
  2296
    }
nkeynes@417
  2297
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@377
  2298
:}
nkeynes@377
  2299
FMOV FRm, @(R0, Rn) {:  
nkeynes@671
  2300
    COUNT_INST(I_FMOV4);
nkeynes@586
  2301
    check_fpuen();
nkeynes@991
  2302
    load_reg( REG_EAX, Rn );
nkeynes@991
  2303
    ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
nkeynes@901
  2304
    if( sh4_x86.double_size ) {
nkeynes@991
  2305
        check_walign64( REG_EAX );
nkeynes@991
  2306
        load_dr0( REG_EDX, FRm );
nkeynes@991
  2307
        MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@991
  2308
        load_reg( REG_EAX, Rn );
nkeynes@991
  2309
        ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
nkeynes@991
  2310
        LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
nkeynes@991
  2311
        load_dr1( REG_EDX, FRm );
nkeynes@991
  2312
        MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@901
  2313
    } else {
nkeynes@991
  2314
        check_walign32( REG_EAX );
nkeynes@991
  2315
        load_fr( REG_EDX, FRm );
nkeynes@991
  2316
        MEM_WRITE_LONG( REG_EAX, REG_EDX ); // 12
nkeynes@901
  2317
    }
nkeynes@417
  2318
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@377
  2319
:}
nkeynes@377
  2320
FMOV @(R0, Rm), FRn {:  
nkeynes@671
  2321
    COUNT_INST(I_FMOV7);
nkeynes@586
  2322
    check_fpuen();
nkeynes@991
  2323
    load_reg( REG_EAX, Rm );
nkeynes@991
  2324
    ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
nkeynes@901
  2325
    if( sh4_x86.double_size ) {
nkeynes@991
  2326
        check_ralign64( REG_EAX );
nkeynes@991
  2327
        MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  2328
        store_dr0( REG_EAX, FRn );
nkeynes@991
  2329
        load_reg( REG_EAX, Rm );
nkeynes@991
  2330
        ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
nkeynes@991
  2331
        LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
nkeynes@991
  2332
        MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  2333
        store_dr1( REG_EAX, FRn );
nkeynes@901
  2334
    } else {
nkeynes@991
  2335
        check_ralign32( REG_EAX );
nkeynes@991
  2336
        MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  2337
        store_fr( REG_EAX, FRn );
nkeynes@901
  2338
    }
nkeynes@417
  2339
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@377
  2340
:}
nkeynes@377
  2341
FLDI0 FRn {:  /* IFF PR=0 */
nkeynes@671
  2342
    COUNT_INST(I_FLDI0);
nkeynes@377
  2343
    check_fpuen();
nkeynes@901
  2344
    if( sh4_x86.double_prec == 0 ) {
nkeynes@991
  2345
        XORL_r32_r32( REG_EAX, REG_EAX );
nkeynes@991
  2346
        store_fr( REG_EAX, FRn );
nkeynes@901
  2347
    }
nkeynes@417
  2348
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@377
  2349
:}
nkeynes@377
  2350
FLDI1 FRn {:  /* IFF PR=0 */
nkeynes@671
  2351
    COUNT_INST(I_FLDI1);
nkeynes@377
  2352
    check_fpuen();
nkeynes@901
  2353
    if( sh4_x86.double_prec == 0 ) {
nkeynes@995
  2354
        MOVL_imm32_r32( 0x3F800000, REG_EAX );
nkeynes@991
  2355
        store_fr( REG_EAX, FRn );
nkeynes@901
  2356
    }
nkeynes@377
  2357
:}
nkeynes@377
  2358
nkeynes@377
  2359
FLOAT FPUL, FRn {:  
nkeynes@671
  2360
    COUNT_INST(I_FLOAT);
nkeynes@377
  2361
    check_fpuen();
nkeynes@991
  2362
    FILD_rbpdisp(R_FPUL);
nkeynes@901
  2363
    if( sh4_x86.double_prec ) {
nkeynes@901
  2364
        pop_dr( FRn );
nkeynes@901
  2365
    } else {
nkeynes@901
  2366
        pop_fr( FRn );
nkeynes@901
  2367
    }
nkeynes@377
  2368
:}
nkeynes@377
  2369
FTRC FRm, FPUL {:  
nkeynes@671
  2370
    COUNT_INST(I_FTRC);
nkeynes@377
  2371
    check_fpuen();
nkeynes@901
  2372
    if( sh4_x86.double_prec ) {
nkeynes@901
  2373
        push_dr( FRm );
nkeynes@901
  2374
    } else {
nkeynes@901
  2375
        push_fr( FRm );
nkeynes@901
  2376
    }
nkeynes@1197
  2377
    MOVP_immptr_rptr( &min_int, REG_ECX );
nkeynes@1197
  2378
    FILD_r32disp( REG_ECX, 0 );
nkeynes@1197
  2379
    FCOMIP_st(1);              
nkeynes@1197
  2380
    JAE_label( sat );     
nkeynes@1197
  2381
    JP_label( sat2 );       
nkeynes@995
  2382
    MOVP_immptr_rptr( &max_int, REG_ECX );
nkeynes@991
  2383
    FILD_r32disp( REG_ECX, 0 );
nkeynes@388
  2384
    FCOMIP_st(1);
nkeynes@1197
  2385
    JNA_label( sat3 );
nkeynes@995
  2386
    MOVP_immptr_rptr( &save_fcw, REG_EAX );
nkeynes@991
  2387
    FNSTCW_r32disp( REG_EAX, 0 );
nkeynes@995
  2388
    MOVP_immptr_rptr( &trunc_fcw, REG_EDX );
nkeynes@991
  2389
    FLDCW_r32disp( REG_EDX, 0 );
nkeynes@995
  2390
    FISTP_rbpdisp(R_FPUL);             
nkeynes@991
  2391
    FLDCW_r32disp( REG_EAX, 0 );
nkeynes@995
  2392
    JMP_label(end);             
nkeynes@388
  2393
nkeynes@388
  2394
    JMP_TARGET(sat);
nkeynes@388
  2395
    JMP_TARGET(sat2);
nkeynes@1197
  2396
    JMP_TARGET(sat3);
nkeynes@991
  2397
    MOVL_r32disp_r32( REG_ECX, 0, REG_ECX ); // 2
nkeynes@995
  2398
    MOVL_r32_rbpdisp( REG_ECX, R_FPUL );
nkeynes@388
  2399
    FPOP_st();
nkeynes@388
  2400
    JMP_TARGET(end);
nkeynes@417
  2401
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@377
  2402
:}
nkeynes@377
  2403
FLDS FRm, FPUL {:  
nkeynes@671
  2404
    COUNT_INST(I_FLDS);
nkeynes@377
  2405
    check_fpuen();
nkeynes@991
  2406
    load_fr( REG_EAX, FRm );
nkeynes@995
  2407
    MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
nkeynes@377
  2408
:}
nkeynes@377
  2409
FSTS FPUL, FRn {:  
nkeynes@671
  2410
    COUNT_INST(I_FSTS);
nkeynes@377
  2411
    check_fpuen();
nkeynes@995
  2412
    MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
nkeynes@991
  2413
    store_fr( REG_EAX, FRn );
nkeynes@377
  2414
:}
nkeynes@377
  2415
FCNVDS FRm, FPUL {:  
nkeynes@671
  2416
    COUNT_INST(I_FCNVDS);
nkeynes@377
  2417
    check_fpuen();
nkeynes@901
  2418
    if( sh4_x86.double_prec ) {
nkeynes@901
  2419
        push_dr( FRm );
nkeynes@901
  2420
        pop_fpul();
nkeynes@901
  2421
    }
nkeynes@377
  2422
:}
nkeynes@377
  2423
FCNVSD FPUL, FRn {:  
nkeynes@671
  2424
    COUNT_INST(I_FCNVSD);
nkeynes@377
  2425
    check_fpuen();
nkeynes@901
  2426
    if( sh4_x86.double_prec ) {
nkeynes@901
  2427
        push_fpul();
nkeynes@901
  2428
        pop_dr( FRn );
nkeynes@901
  2429
    }
nkeynes@377
  2430
:}
nkeynes@375
  2431
nkeynes@359
  2432
/* Floating point instructions */
nkeynes@374
  2433
FABS FRn {:  
nkeynes@671
  2434
    COUNT_INST(I_FABS);
nkeynes@377
  2435
    check_fpuen();
nkeynes@901
  2436
    if( sh4_x86.double_prec ) {
nkeynes@901
  2437
        push_dr(FRn);
nkeynes@901
  2438
        FABS_st0();
nkeynes@901
  2439
        pop_dr(FRn);
nkeynes@901
  2440
    } else {
nkeynes@901
  2441
        push_fr(FRn);
nkeynes@901
  2442
        FABS_st0();
nkeynes@901
  2443
        pop_fr(FRn);
nkeynes@901
  2444
    }
nkeynes@374
  2445
:}
nkeynes@377
  2446
FADD FRm, FRn {:  
nkeynes@671
  2447
    COUNT_INST(I_FADD);
nkeynes@377
  2448
    check_fpuen();
nkeynes@901
  2449
    if( sh4_x86.double_prec ) {
nkeynes@901
  2450
        push_dr(FRm);
nkeynes@901
  2451
        push_dr(FRn);
nkeynes@901
  2452
        FADDP_st(1);
nkeynes@901
  2453
        pop_dr(FRn);
nkeynes@901
  2454
    } else {
nkeynes@901
  2455
        push_fr(FRm);
nkeynes@901
  2456
        push_fr(FRn);
nkeynes@901
  2457
        FADDP_st(1);
nkeynes@901
  2458
        pop_fr(FRn);
nkeynes@901
  2459
    }
nkeynes@375
  2460
:}
nkeynes@377
  2461
FDIV FRm, FRn {:  
nkeynes@671
  2462
    COUNT_INST(I_FDIV);
nkeynes@377
  2463
    check_fpuen();
nkeynes@901
  2464
    if( sh4_x86.double_prec ) {
nkeynes@901
  2465
        push_dr(FRn);
nkeynes@901
  2466
        push_dr(FRm);
nkeynes@901
  2467
        FDIVP_st(1);
nkeynes@901
  2468
        pop_dr(FRn);
nkeynes@901
  2469
    } else {
nkeynes@901
  2470
        push_fr(FRn);
nkeynes@901
  2471
        push_fr(FRm);
nkeynes@901
  2472
        FDIVP_st(1);
nkeynes@901
  2473
        pop_fr(FRn);
nkeynes@901
  2474
    }
nkeynes@375
  2475
:}
nkeynes@375
  2476
FMAC FR0, FRm, FRn {:  
nkeynes@671
  2477
    COUNT_INST(I_FMAC);
nkeynes@377
  2478
    check_fpuen();
nkeynes@901
  2479
    if( sh4_x86.double_prec ) {
nkeynes@901
  2480
        push_dr( 0 );
nkeynes@901
  2481
        push_dr( FRm );
nkeynes@901
  2482
        FMULP_st(1);
nkeynes@901
  2483
        push_dr( FRn );
nkeynes@901
  2484
        FADDP_st(1);
nkeynes@901
  2485
        pop_dr( FRn );
nkeynes@901
  2486
    } else {
nkeynes@901
  2487
        push_fr( 0 );
nkeynes@901
  2488
        push_fr( FRm );
nkeynes@901
  2489
        FMULP_st(1);
nkeynes@901
  2490
        push_fr( FRn );
nkeynes@901
  2491
        FADDP_st(1);
nkeynes@901
  2492
        pop_fr( FRn );
nkeynes@901
  2493
    }
nkeynes@375
  2494
:}
nkeynes@375
  2495
nkeynes@377
  2496
FMUL FRm, FRn {:  
nkeynes@671
  2497
    COUNT_INST(I_FMUL);
nkeynes@377
  2498
    check_fpuen();
nkeynes@901
  2499
    if( sh4_x86.double_prec ) {
nkeynes@901
  2500
        push_dr(FRm);
nkeynes@901
  2501
        push_dr(FRn);
nkeynes@901
  2502
        FMULP_st(1);
nkeynes@901
  2503
        pop_dr(FRn);
nkeynes@901
  2504
    } else {
nkeynes@901
  2505
        push_fr(FRm);
nkeynes@901
  2506
        push_fr(FRn);
nkeynes@901
  2507
        FMULP_st(1);
nkeynes@901
  2508
        pop_fr(FRn);
nkeynes@901
  2509
    }
nkeynes@377
  2510
:}
nkeynes@377
  2511
FNEG FRn {:  
nkeynes@671
  2512
    COUNT_INST(I_FNEG);
nkeynes@377
  2513
    check_fpuen();
nkeynes@901
  2514
    if( sh4_x86.double_prec ) {
nkeynes@901
  2515
        push_dr(FRn);
nkeynes@901
  2516
        FCHS_st0();
nkeynes@901
  2517
        pop_dr(FRn);
nkeynes@901
  2518
    } else {
nkeynes@901
  2519
        push_fr(FRn);
nkeynes@901
  2520
        FCHS_st0();
nkeynes@901
  2521
        pop_fr(FRn);
nkeynes@901
  2522
    }
nkeynes@377
  2523
:}
nkeynes@377
  2524
FSRRA FRn {:  
nkeynes@671
  2525
    COUNT_INST(I_FSRRA);
nkeynes@377
  2526
    check_fpuen();
nkeynes@901
  2527
    if( sh4_x86.double_prec == 0 ) {
nkeynes@901
  2528
        FLD1_st0();
nkeynes@901
  2529
        push_fr(FRn);
nkeynes@901
  2530
        FSQRT_st0();
nkeynes@901
  2531
        FDIVP_st(1);
nkeynes@901
  2532
        pop_fr(FRn);
nkeynes@901
  2533
    }
nkeynes@377
  2534
:}
nkeynes@377
  2535
FSQRT FRn {:  
nkeynes@671
  2536
    COUNT_INST(I_FSQRT);
nkeynes@377
  2537
    check_fpuen();
nkeynes@901
  2538
    if( sh4_x86.double_prec ) {
nkeynes@901
  2539
        push_dr(FRn);
nkeynes@901
  2540
        FSQRT_st0();
nkeynes@901
  2541
        pop_dr(FRn);
nkeynes@901
  2542
    } else {
nkeynes@901
  2543
        push_fr(FRn);
nkeynes@901
  2544
        FSQRT_st0();
nkeynes@901
  2545
        pop_fr(FRn);
nkeynes@901
  2546
    }
nkeynes@377
  2547
:}
nkeynes@377
  2548
FSUB FRm, FRn {:  
nkeynes@671
  2549
    COUNT_INST(I_FSUB);
nkeynes@377
  2550
    check_fpuen();
nkeynes@901
  2551
    if( sh4_x86.double_prec ) {
nkeynes@901
  2552
        push_dr(FRn);
nkeynes@901
  2553
        push_dr(FRm);
nkeynes@901
  2554
        FSUBP_st(1);
nkeynes@901
  2555
        pop_dr(FRn);
nkeynes@901
  2556
    } else {
nkeynes@901
  2557
        push_fr(FRn);
nkeynes@901
  2558
        push_fr(FRm);
nkeynes@901
  2559
        FSUBP_st(1);
nkeynes@901
  2560
        pop_fr(FRn);
nkeynes@901
  2561
    }
nkeynes@377
  2562
:}
nkeynes@377
  2563
nkeynes@377
  2564
FCMP/EQ FRm, FRn {:  
nkeynes@671
  2565
    COUNT_INST(I_FCMPEQ);
nkeynes@377
  2566
    check_fpuen();
nkeynes@901
  2567
    if( sh4_x86.double_prec ) {
nkeynes@901
  2568
        push_dr(FRm);
nkeynes@901
  2569
        push_dr(FRn);
nkeynes@901
  2570
    } else {
nkeynes@901
  2571
        push_fr(FRm);
nkeynes@901
  2572
        push_fr(FRn);
nkeynes@901
  2573
    }
nkeynes@1197
  2574
    XORL_r32_r32(REG_EAX, REG_EAX);
nkeynes@1197
  2575
    XORL_r32_r32(REG_EDX, REG_EDX);
nkeynes@377
  2576
    FCOMIP_st(1);
nkeynes@1197
  2577
    SETCCB_cc_r8(X86_COND_NP, REG_DL);
nkeynes@1197
  2578
    CMOVCCL_cc_r32_r32(X86_COND_E, REG_EDX, REG_EAX);
nkeynes@1197
  2579
    MOVL_r32_rbpdisp(REG_EAX, R_T);
nkeynes@377
  2580
    FPOP_st();
nkeynes@1197
  2581
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@377
  2582
:}
nkeynes@377
  2583
FCMP/GT FRm, FRn {:  
nkeynes@671
  2584
    COUNT_INST(I_FCMPGT);
nkeynes@377
  2585
    check_fpuen();
nkeynes@901
  2586
    if( sh4_x86.double_prec ) {
nkeynes@901
  2587
        push_dr(FRm);
nkeynes@901
  2588
        push_dr(FRn);
nkeynes@901
  2589
    } else {
nkeynes@901
  2590
        push_fr(FRm);
nkeynes@901
  2591
        push_fr(FRn);
nkeynes@901
  2592
    }
nkeynes@377
  2593
    FCOMIP_st(1);
nkeynes@377
  2594
    SETA_t();
nkeynes@377
  2595
    FPOP_st();
nkeynes@901
  2596
    sh4_x86.tstate = TSTATE_A;
nkeynes@377
  2597
:}
nkeynes@377
  2598
nkeynes@377
  2599
FSCA FPUL, FRn {:  
nkeynes@671
  2600
    COUNT_INST(I_FSCA);
nkeynes@377
  2601
    check_fpuen();
nkeynes@901
  2602
    if( sh4_x86.double_prec == 0 ) {
nkeynes@991
  2603
        LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FRn&0x0E]), REG_EDX );
nkeynes@995
  2604
        MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
nkeynes@995
  2605
        CALL2_ptr_r32_r32( sh4_fsca, REG_EAX, REG_EDX );
nkeynes@901
  2606
    }
nkeynes@417
  2607
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@377
  2608
:}
nkeynes@377
  2609
FIPR FVm, FVn {:  
nkeynes@671
  2610
    COUNT_INST(I_FIPR);
nkeynes@377
  2611
    check_fpuen();
nkeynes@901
  2612
    if( sh4_x86.double_prec == 0 ) {
nkeynes@904
  2613
        if( sh4_x86.sse3_enabled ) {
nkeynes@991
  2614
            MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
nkeynes@991
  2615
            MULPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
nkeynes@903
  2616
            HADDPS_xmm_xmm( 4, 4 ); 
nkeynes@903
  2617
            HADDPS_xmm_xmm( 4, 4 );
nkeynes@991
  2618
            MOVSS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
nkeynes@903
  2619
        } else {
nkeynes@904
  2620
            push_fr( FVm<<2 );
nkeynes@903
  2621
            push_fr( FVn<<2 );
nkeynes@903
  2622
            FMULP_st(1);
nkeynes@903
  2623
            push_fr( (FVm<<2)+1);
nkeynes@903
  2624
            push_fr( (FVn<<2)+1);
nkeynes@903
  2625
            FMULP_st(1);
nkeynes@903
  2626
            FADDP_st(1);
nkeynes@903
  2627
            push_fr( (FVm<<2)+2);
nkeynes@903
  2628
            push_fr( (FVn<<2)+2);
nkeynes@903
  2629
            FMULP_st(1);
nkeynes@903
  2630
            FADDP_st(1);
nkeynes@903
  2631
            push_fr( (FVm<<2)+3);
nkeynes@903
  2632
            push_fr( (FVn<<2)+3);
nkeynes@903
  2633
            FMULP_st(1);
nkeynes@903
  2634
            FADDP_st(1);
nkeynes@903
  2635
            pop_fr( (FVn<<2)+3);
nkeynes@904
  2636
        }
nkeynes@901
  2637
    }
nkeynes@377
  2638
:}
nkeynes@377
  2639
FTRV XMTRX, FVn {:  
nkeynes@671
  2640
    COUNT_INST(I_FTRV);
nkeynes@377
  2641
    check_fpuen();
nkeynes@901
  2642
    if( sh4_x86.double_prec == 0 ) {
nkeynes@1194
  2643
        if( sh4_x86.sse3_enabled && sh4_x86.begin_callback == NULL ) {
nkeynes@1194
  2644
        	/* FIXME: For now, disable this inlining when we're running in shadow mode -
nkeynes@1194
  2645
        	 * it gives slightly different results from the emu core. Need to
nkeynes@1194
  2646
        	 * fix the precision so both give the right results.
nkeynes@1194
  2647
        	 */
nkeynes@991
  2648
            MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1  M0  M3  M2
nkeynes@991
  2649
            MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5  M4  M7  M6
nkeynes@991
  2650
            MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9  M8  M11 M10
nkeynes@991
  2651
            MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
nkeynes@903
  2652
nkeynes@991
  2653
            MOVSLDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
nkeynes@991
  2654
            MOVSHDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
nkeynes@991
  2655
            MOV_xmm_xmm( 4, 6 );
nkeynes@991
  2656
            MOV_xmm_xmm( 5, 7 );
nkeynes@903
  2657
            MOVLHPS_xmm_xmm( 4, 4 );  // V1 V1 V1 V1
nkeynes@903
  2658
            MOVHLPS_xmm_xmm( 6, 6 );  // V3 V3 V3 V3
nkeynes@903
  2659
            MOVLHPS_xmm_xmm( 5, 5 );  // V0 V0 V0 V0
nkeynes@903
  2660
            MOVHLPS_xmm_xmm( 7, 7 );  // V2 V2 V2 V2
nkeynes@903
  2661
            MULPS_xmm_xmm( 0, 4 );
nkeynes@903
  2662
            MULPS_xmm_xmm( 1, 5 );
nkeynes@903
  2663
            MULPS_xmm_xmm( 2, 6 );
nkeynes@903
  2664
            MULPS_xmm_xmm( 3, 7 );
nkeynes@903
  2665
            ADDPS_xmm_xmm( 5, 4 );
nkeynes@903
  2666
            ADDPS_xmm_xmm( 7, 6 );
nkeynes@903
  2667
            ADDPS_xmm_xmm( 6, 4 );
nkeynes@991
  2668
            MOVAPS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][FVn<<2]) );
nkeynes@903
  2669
        } else {
nkeynes@991
  2670
            LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FVn<<2]), REG_EAX );