4 * SH4 => x86 translation. This version does no real optimization, it just
5 * outputs straight-line x86 code - it mainly exists to provide a baseline
6 * to test the optimizing versions against.
8 * Copyright (c) 2007 Nathan Keynes.
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
28 #include "sh4/xltcache.h"
29 #include "sh4/sh4core.h"
30 #include "sh4/sh4trans.h"
31 #include "sh4/sh4mmio.h"
32 #include "sh4/x86op.h"
35 #define DEFAULT_BACKPATCH_SIZE 4096
37 struct backpatch_record {
39 uint32_t fixup_icount;
44 * Struct to manage internal translation state. This state is not saved -
45 * it is only valid between calls to sh4_translate_begin_block() and
46 * sh4_translate_end_block()
48 struct sh4_x86_state {
49 gboolean in_delay_slot;
50 gboolean priv_checked; /* true if we've already checked the cpu mode. */
51 gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
52 gboolean branch_taken; /* true if we branched unconditionally */
53 uint32_t block_start_pc;
54 uint32_t stack_posn; /* Trace stack height for alignment purposes */
58 gboolean tlb_on; /* True if tlb translation is active */
60 /* Allocated memory for the (block-wide) back-patch list */
61 struct backpatch_record *backpatch_list;
62 uint32_t backpatch_posn;
63 uint32_t backpatch_size;
66 #define TSTATE_NONE -1
76 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
77 #define JT_rel8(rel8,label) if( sh4_x86.tstate == TSTATE_NONE ) { \
78 CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
79 OP(0x70+sh4_x86.tstate); OP(rel8); \
81 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
82 #define JF_rel8(rel8,label) if( sh4_x86.tstate == TSTATE_NONE ) { \
83 CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
84 OP(0x70+ (sh4_x86.tstate^1)); OP(rel8); \
87 static struct sh4_x86_state sh4_x86;
89 static uint32_t max_int = 0x7FFFFFFF;
90 static uint32_t min_int = 0x80000000;
91 static uint32_t save_fcw; /* save value for fpu control word */
92 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
96 sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
97 sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
101 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
103 if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
104 sh4_x86.backpatch_size <<= 1;
105 sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list,
106 sh4_x86.backpatch_size * sizeof(struct backpatch_record));
107 assert( sh4_x86.backpatch_list != NULL );
109 if( sh4_x86.in_delay_slot ) {
112 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_addr = (uint32_t *)fixup_addr;
113 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
114 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
115 sh4_x86.backpatch_posn++;
119 * Emit an instruction to load an SH4 reg into a real register
121 static inline void load_reg( int x86reg, int sh4reg )
123 /* mov [bp+n], reg */
125 OP(0x45 + (x86reg<<3));
126 OP(REG_OFFSET(r[sh4reg]));
129 static inline void load_reg16s( int x86reg, int sh4reg )
133 MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
136 static inline void load_reg16u( int x86reg, int sh4reg )
140 MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
144 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
145 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
147 * Emit an instruction to load an immediate value into a register
149 static inline void load_imm32( int x86reg, uint32_t value ) {
150 /* mov #value, reg */
156 * Load an immediate 64-bit quantity (note: x86-64 only)
158 static inline void load_imm64( int x86reg, uint32_t value ) {
159 /* mov #value, reg */
167 * Emit an instruction to store an SH4 reg (RN)
169 void static inline store_reg( int x86reg, int sh4reg ) {
170 /* mov reg, [bp+n] */
172 OP(0x45 + (x86reg<<3));
173 OP(REG_OFFSET(r[sh4reg]));
176 #define load_fr_bank(bankreg) load_spreg( bankreg, REG_OFFSET(fr_bank))
179 * Load an FR register (single-precision floating point) into an integer x86
180 * register (eg for register-to-register moves)
182 void static inline load_fr( int bankreg, int x86reg, int frm )
184 OP(0x8B); OP(0x40+bankreg+(x86reg<<3)); OP((frm^1)<<2);
188 * Store an FR register (single-precision floating point) into an integer x86
189 * register (eg for register-to-register moves)
191 void static inline store_fr( int bankreg, int x86reg, int frn )
193 OP(0x89); OP(0x40+bankreg+(x86reg<<3)); OP((frn^1)<<2);
198 * Load a pointer to the back fp back into the specified x86 register. The
199 * bankreg must have been previously loaded with FPSCR.
202 static inline void load_xf_bank( int bankreg )
205 SHR_imm8_r32( (21 - 6), bankreg ); // Extract bit 21 then *64 for bank size
206 AND_imm8s_r32( 0x40, bankreg ); // Complete extraction
207 OP(0x8D); OP(0x44+(bankreg<<3)); OP(0x28+bankreg); OP(REG_OFFSET(fr)); // LEA [ebp+bankreg+disp], bankreg
211 * Update the fr_bank pointer based on the current fpscr value.
213 static inline void update_fr_bank( int fpscrreg )
215 SHR_imm8_r32( (21 - 6), fpscrreg ); // Extract bit 21 then *64 for bank size
216 AND_imm8s_r32( 0x40, fpscrreg ); // Complete extraction
217 OP(0x8D); OP(0x44+(fpscrreg<<3)); OP(0x28+fpscrreg); OP(REG_OFFSET(fr)); // LEA [ebp+fpscrreg+disp], fpscrreg
218 store_spreg( fpscrreg, REG_OFFSET(fr_bank) );
221 * Push FPUL (as a 32-bit float) onto the FPU stack
223 static inline void push_fpul( )
225 OP(0xD9); OP(0x45); OP(R_FPUL);
229 * Pop FPUL (as a 32-bit float) from the FPU stack
231 static inline void pop_fpul( )
233 OP(0xD9); OP(0x5D); OP(R_FPUL);
237 * Push a 32-bit float onto the FPU stack, with bankreg previously loaded
238 * with the location of the current fp bank.
240 static inline void push_fr( int bankreg, int frm )
242 OP(0xD9); OP(0x40 + bankreg); OP((frm^1)<<2); // FLD.S [bankreg + frm^1*4]
246 * Pop a 32-bit float from the FPU stack and store it back into the fp bank,
247 * with bankreg previously loaded with the location of the current fp bank.
249 static inline void pop_fr( int bankreg, int frm )
251 OP(0xD9); OP(0x58 + bankreg); OP((frm^1)<<2); // FST.S [bankreg + frm^1*4]
255 * Push a 64-bit double onto the FPU stack, with bankreg previously loaded
256 * with the location of the current fp bank.
258 static inline void push_dr( int bankreg, int frm )
260 OP(0xDD); OP(0x40 + bankreg); OP(frm<<2); // FLD.D [bankreg + frm*4]
263 static inline void pop_dr( int bankreg, int frm )
265 OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]
268 /* Exception checks - Note that all exception checks will clobber EAX */
270 #define check_priv( ) \
271 if( !sh4_x86.priv_checked ) { \
272 sh4_x86.priv_checked = TRUE;\
273 load_spreg( R_EAX, R_SR );\
274 AND_imm32_r32( SR_MD, R_EAX );\
275 if( sh4_x86.in_delay_slot ) {\
276 JE_exc( EXC_SLOT_ILLEGAL );\
278 JE_exc( EXC_ILLEGAL );\
282 #define check_fpuen( ) \
283 if( !sh4_x86.fpuen_checked ) {\
284 sh4_x86.fpuen_checked = TRUE;\
285 load_spreg( R_EAX, R_SR );\
286 AND_imm32_r32( SR_FD, R_EAX );\
287 if( sh4_x86.in_delay_slot ) {\
288 JNE_exc(EXC_SLOT_FPU_DISABLED);\
290 JNE_exc(EXC_FPU_DISABLED);\
294 #define check_ralign16( x86reg ) \
295 TEST_imm32_r32( 0x00000001, x86reg ); \
296 JNE_exc(EXC_DATA_ADDR_READ)
298 #define check_walign16( x86reg ) \
299 TEST_imm32_r32( 0x00000001, x86reg ); \
300 JNE_exc(EXC_DATA_ADDR_WRITE);
302 #define check_ralign32( x86reg ) \
303 TEST_imm32_r32( 0x00000003, x86reg ); \
304 JNE_exc(EXC_DATA_ADDR_READ)
306 #define check_walign32( x86reg ) \
307 TEST_imm32_r32( 0x00000003, x86reg ); \
308 JNE_exc(EXC_DATA_ADDR_WRITE);
311 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
312 #define MEM_READ_BYTE_PHYS( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
313 #define MEM_READ_WORD_PHYS( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
314 #define MEM_READ_LONG_PHYS( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
315 #define MEM_WRITE_BYTE_PHYS( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
316 #define MEM_WRITE_WORD_PHYS( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
317 #define MEM_WRITE_LONG_PHYS( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
319 #define MEM_READ_BYTE_VMA( addr_reg, value_reg ) call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); call_func1(sh4_read_byte, R_EAX); MEM_RESULT(value_reg)
320 #define MEM_READ_WORD_VMA( addr_reg, value_reg ) call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); call_func1(sh4_read_word, R_EAX); MEM_RESULT(value_reg)
321 #define MEM_READ_LONG_VMA( addr_reg, value_reg ) call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); call_func1(sh4_read_long, R_EAX); MEM_RESULT(value_reg)
322 #define MEM_WRITE_BYTE_VMA( addr_reg, value_reg ) call_func1(mmu_vma_to_phys_write, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); call_func2(sh4_write_byte, R_EAX, value_reg)
323 #define MEM_WRITE_WORD_VMA( addr_reg, value_reg ) call_func1(mmu_vma_to_phys_write, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); call_func2(sh4_write_word, R_EAX, value_reg)
324 #define MEM_WRITE_LONG_VMA( addr_reg, value_reg ) call_func1(mmu_vma_to_phys_write, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); call_func2(sh4_write_long, R_EAX, value_reg)
326 #define MEM_READ_BYTE( addr_reg, value_reg ) if(sh4_x86.tlb_on){MEM_READ_BYTE_VMA(addr_reg,value_reg);}else{MEM_READ_BYTE_PHYS(addr_reg, value_reg);}
327 #define MEM_READ_WORD( addr_reg, value_reg ) if(sh4_x86.tlb_on){MEM_READ_WORD_VMA(addr_reg,value_reg);}else{MEM_READ_WORD_PHYS(addr_reg, value_reg);}
328 #define MEM_READ_LONG( addr_reg, value_reg ) if(sh4_x86.tlb_on){MEM_READ_LONG_VMA(addr_reg,value_reg);}else{MEM_READ_LONG_PHYS(addr_reg, value_reg);}
329 #define MEM_WRITE_BYTE( addr_reg, value_reg ) if(sh4_x86.tlb_on){MEM_WRITE_BYTE_VMA(addr_reg,value_reg);}else{MEM_WRITE_BYTE_PHYS(addr_reg, value_reg);}
330 #define MEM_WRITE_WORD( addr_reg, value_reg ) if(sh4_x86.tlb_on){MEM_WRITE_WORD_VMA(addr_reg,value_reg);}else{MEM_WRITE_WORD_PHYS(addr_reg, value_reg);}
331 #define MEM_WRITE_LONG( addr_reg, value_reg ) if(sh4_x86.tlb_on){MEM_WRITE_LONG_VMA(addr_reg,value_reg);}else{MEM_WRITE_LONG_PHYS(addr_reg, value_reg);}
333 #define MEM_READ_SIZE_PHYS (CALL_FUNC1_SIZE)
334 #define MEM_WRITE_SIZE_PHYS (CALL_FUNC2_SIZE)
335 #define MEM_READ_SIZE_VMA (CALL_FUNC1_SIZE + CALL_FUNC1_SIZE + 12)
336 #define MEM_WRITE_SIZE_VMA (CALL_FUNC1_SIZE + CALL_FUNC2_SIZE + 12)
338 #define MEM_READ_SIZE (sh4_x86.tlb_on?MEM_READ_SIZE_VMA:MEM_READ_SIZE_PHYS)
339 #define MEM_WRITE_SIZE (sh4_x86.tlb_on?MEM_WRITE_SIZE_VMA:MEM_WRITE_SIZE_PHYS)
341 #define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = FALSE; return 1;
343 /****** Import appropriate calling conventions ******/
344 #if SH4_TRANSLATOR == TARGET_X86_64
345 #include "sh4/ia64abi.h"
346 #else /* SH4_TRANSLATOR == TARGET_X86 */
348 #include "sh4/ia32mac.h"
350 #include "sh4/ia32abi.h"
356 * Translate a single instruction. Delayed branches are handled specially
357 * by translating both branch and delayed instruction as a single unit (as
360 * @return true if the instruction marks the end of a basic block
363 uint32_t sh4_translate_instruction( sh4addr_t pc )
366 /* Read instruction */
367 if( IS_IN_ICACHE(pc) ) {
368 ir = *(uint16_t *)GET_ICACHE_PTR(pc);
370 ir = sh4_read_word(pc);
375 load_reg( R_EAX, Rm );
376 load_reg( R_ECX, Rn );
377 ADD_r32_r32( R_EAX, R_ECX );
378 store_reg( R_ECX, Rn );
379 sh4_x86.tstate = TSTATE_NONE;
382 load_reg( R_EAX, Rn );
383 ADD_imm8s_r32( imm, R_EAX );
384 store_reg( R_EAX, Rn );
385 sh4_x86.tstate = TSTATE_NONE;
388 if( sh4_x86.tstate != TSTATE_C ) {
391 load_reg( R_EAX, Rm );
392 load_reg( R_ECX, Rn );
393 ADC_r32_r32( R_EAX, R_ECX );
394 store_reg( R_ECX, Rn );
396 sh4_x86.tstate = TSTATE_C;
399 load_reg( R_EAX, Rm );
400 load_reg( R_ECX, Rn );
401 ADD_r32_r32( R_EAX, R_ECX );
402 store_reg( R_ECX, Rn );
404 sh4_x86.tstate = TSTATE_O;
407 load_reg( R_EAX, Rm );
408 load_reg( R_ECX, Rn );
409 AND_r32_r32( R_EAX, R_ECX );
410 store_reg( R_ECX, Rn );
411 sh4_x86.tstate = TSTATE_NONE;
414 load_reg( R_EAX, 0 );
415 AND_imm32_r32(imm, R_EAX);
416 store_reg( R_EAX, 0 );
417 sh4_x86.tstate = TSTATE_NONE;
419 AND.B #imm, @(R0, GBR) {:
420 load_reg( R_EAX, 0 );
421 load_spreg( R_ECX, R_GBR );
422 ADD_r32_r32( R_EAX, R_ECX );
423 PUSH_realigned_r32(R_ECX);
424 MEM_READ_BYTE( R_ECX, R_EAX );
425 POP_realigned_r32(R_ECX);
426 AND_imm32_r32(imm, R_EAX );
427 MEM_WRITE_BYTE( R_ECX, R_EAX );
428 sh4_x86.tstate = TSTATE_NONE;
431 load_reg( R_EAX, Rm );
432 load_reg( R_ECX, Rn );
433 CMP_r32_r32( R_EAX, R_ECX );
435 sh4_x86.tstate = TSTATE_E;
438 load_reg( R_EAX, 0 );
439 CMP_imm8s_r32(imm, R_EAX);
441 sh4_x86.tstate = TSTATE_E;
444 load_reg( R_EAX, Rm );
445 load_reg( R_ECX, Rn );
446 CMP_r32_r32( R_EAX, R_ECX );
448 sh4_x86.tstate = TSTATE_GE;
451 load_reg( R_EAX, Rm );
452 load_reg( R_ECX, Rn );
453 CMP_r32_r32( R_EAX, R_ECX );
455 sh4_x86.tstate = TSTATE_G;
458 load_reg( R_EAX, Rm );
459 load_reg( R_ECX, Rn );
460 CMP_r32_r32( R_EAX, R_ECX );
462 sh4_x86.tstate = TSTATE_A;
465 load_reg( R_EAX, Rm );
466 load_reg( R_ECX, Rn );
467 CMP_r32_r32( R_EAX, R_ECX );
469 sh4_x86.tstate = TSTATE_AE;
472 load_reg( R_EAX, Rn );
473 CMP_imm8s_r32( 0, R_EAX );
475 sh4_x86.tstate = TSTATE_G;
478 load_reg( R_EAX, Rn );
479 CMP_imm8s_r32( 0, R_EAX );
481 sh4_x86.tstate = TSTATE_GE;
484 load_reg( R_EAX, Rm );
485 load_reg( R_ECX, Rn );
486 XOR_r32_r32( R_ECX, R_EAX );
487 TEST_r8_r8( R_AL, R_AL );
488 JE_rel8(13, target1);
489 TEST_r8_r8( R_AH, R_AH ); // 2
491 SHR_imm8_r32( 16, R_EAX ); // 3
492 TEST_r8_r8( R_AL, R_AL ); // 2
494 TEST_r8_r8( R_AH, R_AH ); // 2
499 sh4_x86.tstate = TSTATE_E;
502 load_reg( R_EAX, Rm );
503 load_reg( R_ECX, Rn );
504 SHR_imm8_r32( 31, R_EAX );
505 SHR_imm8_r32( 31, R_ECX );
506 store_spreg( R_EAX, R_M );
507 store_spreg( R_ECX, R_Q );
508 CMP_r32_r32( R_EAX, R_ECX );
510 sh4_x86.tstate = TSTATE_NE;
513 XOR_r32_r32( R_EAX, R_EAX );
514 store_spreg( R_EAX, R_Q );
515 store_spreg( R_EAX, R_M );
516 store_spreg( R_EAX, R_T );
517 sh4_x86.tstate = TSTATE_C; // works for DIV1
520 load_spreg( R_ECX, R_M );
521 load_reg( R_EAX, Rn );
522 if( sh4_x86.tstate != TSTATE_C ) {
526 SETC_r8( R_DL ); // Q'
527 CMP_sh4r_r32( R_Q, R_ECX );
529 ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
532 SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
534 store_reg( R_EAX, Rn ); // Done with Rn now
535 SETC_r8(R_AL); // tmp1
536 XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
537 XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
538 store_spreg( R_ECX, R_Q );
539 XOR_imm8s_r32( 1, R_AL ); // T = !Q'
540 MOVZX_r8_r32( R_AL, R_EAX );
541 store_spreg( R_EAX, R_T );
542 sh4_x86.tstate = TSTATE_NONE;
545 load_reg( R_EAX, Rm );
546 load_reg( R_ECX, Rn );
548 store_spreg( R_EDX, R_MACH );
549 store_spreg( R_EAX, R_MACL );
550 sh4_x86.tstate = TSTATE_NONE;
553 load_reg( R_EAX, Rm );
554 load_reg( R_ECX, Rn );
556 store_spreg( R_EDX, R_MACH );
557 store_spreg( R_EAX, R_MACL );
558 sh4_x86.tstate = TSTATE_NONE;
561 load_reg( R_EAX, Rn );
562 ADD_imm8s_r32( -1, R_EAX );
563 store_reg( R_EAX, Rn );
565 sh4_x86.tstate = TSTATE_E;
568 load_reg( R_EAX, Rm );
569 MOVSX_r8_r32( R_EAX, R_EAX );
570 store_reg( R_EAX, Rn );
573 load_reg( R_EAX, Rm );
574 MOVSX_r16_r32( R_EAX, R_EAX );
575 store_reg( R_EAX, Rn );
578 load_reg( R_EAX, Rm );
579 MOVZX_r8_r32( R_EAX, R_EAX );
580 store_reg( R_EAX, Rn );
583 load_reg( R_EAX, Rm );
584 MOVZX_r16_r32( R_EAX, R_EAX );
585 store_reg( R_EAX, Rn );
588 load_reg( R_ECX, Rm );
589 check_ralign32( R_ECX );
590 load_reg( R_ECX, Rn );
591 check_ralign32( R_ECX );
592 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
593 MEM_READ_LONG( R_ECX, R_EAX );
594 PUSH_realigned_r32( R_EAX );
595 load_reg( R_ECX, Rm );
596 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
597 MEM_READ_LONG( R_ECX, R_EAX );
598 POP_realigned_r32( R_ECX );
600 ADD_r32_sh4r( R_EAX, R_MACL );
601 ADC_r32_sh4r( R_EDX, R_MACH );
603 load_spreg( R_ECX, R_S );
604 TEST_r32_r32(R_ECX, R_ECX);
605 JE_rel8( CALL_FUNC0_SIZE, nosat );
606 call_func0( signsat48 );
608 sh4_x86.tstate = TSTATE_NONE;
611 load_reg( R_ECX, Rm );
612 check_ralign16( R_ECX );
613 load_reg( R_ECX, Rn );
614 check_ralign16( R_ECX );
615 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
616 MEM_READ_WORD( R_ECX, R_EAX );
617 PUSH_realigned_r32( R_EAX );
618 load_reg( R_ECX, Rm );
619 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
620 MEM_READ_WORD( R_ECX, R_EAX );
621 POP_realigned_r32( R_ECX );
624 load_spreg( R_ECX, R_S );
625 TEST_r32_r32( R_ECX, R_ECX );
626 JE_rel8( 47, nosat );
628 ADD_r32_sh4r( R_EAX, R_MACL ); // 6
629 JNO_rel8( 51, end ); // 2
630 load_imm32( R_EDX, 1 ); // 5
631 store_spreg( R_EDX, R_MACH ); // 6
632 JS_rel8( 13, positive ); // 2
633 load_imm32( R_EAX, 0x80000000 );// 5
634 store_spreg( R_EAX, R_MACL ); // 6
635 JMP_rel8( 25, end2 ); // 2
637 JMP_TARGET(positive);
638 load_imm32( R_EAX, 0x7FFFFFFF );// 5
639 store_spreg( R_EAX, R_MACL ); // 6
640 JMP_rel8( 12, end3); // 2
643 ADD_r32_sh4r( R_EAX, R_MACL ); // 6
644 ADC_r32_sh4r( R_EDX, R_MACH ); // 6
648 sh4_x86.tstate = TSTATE_NONE;
651 load_spreg( R_EAX, R_T );
652 store_reg( R_EAX, Rn );
655 load_reg( R_EAX, Rm );
656 load_reg( R_ECX, Rn );
658 store_spreg( R_EAX, R_MACL );
659 sh4_x86.tstate = TSTATE_NONE;
662 load_reg16s( R_EAX, Rm );
663 load_reg16s( R_ECX, Rn );
665 store_spreg( R_EAX, R_MACL );
666 sh4_x86.tstate = TSTATE_NONE;
669 load_reg16u( R_EAX, Rm );
670 load_reg16u( R_ECX, Rn );
672 store_spreg( R_EAX, R_MACL );
673 sh4_x86.tstate = TSTATE_NONE;
676 load_reg( R_EAX, Rm );
678 store_reg( R_EAX, Rn );
679 sh4_x86.tstate = TSTATE_NONE;
682 load_reg( R_EAX, Rm );
683 XOR_r32_r32( R_ECX, R_ECX );
685 SBB_r32_r32( R_EAX, R_ECX );
686 store_reg( R_ECX, Rn );
688 sh4_x86.tstate = TSTATE_C;
691 load_reg( R_EAX, Rm );
693 store_reg( R_EAX, Rn );
694 sh4_x86.tstate = TSTATE_NONE;
697 load_reg( R_EAX, Rm );
698 load_reg( R_ECX, Rn );
699 OR_r32_r32( R_EAX, R_ECX );
700 store_reg( R_ECX, Rn );
701 sh4_x86.tstate = TSTATE_NONE;
704 load_reg( R_EAX, 0 );
705 OR_imm32_r32(imm, R_EAX);
706 store_reg( R_EAX, 0 );
707 sh4_x86.tstate = TSTATE_NONE;
709 OR.B #imm, @(R0, GBR) {:
710 load_reg( R_EAX, 0 );
711 load_spreg( R_ECX, R_GBR );
712 ADD_r32_r32( R_EAX, R_ECX );
713 PUSH_realigned_r32(R_ECX);
714 MEM_READ_BYTE( R_ECX, R_EAX );
715 POP_realigned_r32(R_ECX);
716 OR_imm32_r32(imm, R_EAX );
717 MEM_WRITE_BYTE( R_ECX, R_EAX );
718 sh4_x86.tstate = TSTATE_NONE;
721 load_reg( R_EAX, Rn );
722 if( sh4_x86.tstate != TSTATE_C ) {
726 store_reg( R_EAX, Rn );
728 sh4_x86.tstate = TSTATE_C;
731 load_reg( R_EAX, Rn );
732 if( sh4_x86.tstate != TSTATE_C ) {
736 store_reg( R_EAX, Rn );
738 sh4_x86.tstate = TSTATE_C;
741 load_reg( R_EAX, Rn );
743 store_reg( R_EAX, Rn );
745 sh4_x86.tstate = TSTATE_C;
748 load_reg( R_EAX, Rn );
750 store_reg( R_EAX, Rn );
752 sh4_x86.tstate = TSTATE_C;
755 /* Annoyingly enough, not directly convertible */
756 load_reg( R_EAX, Rn );
757 load_reg( R_ECX, Rm );
758 CMP_imm32_r32( 0, R_ECX );
761 NEG_r32( R_ECX ); // 2
762 AND_imm8_r8( 0x1F, R_CL ); // 3
763 JE_rel8( 4, emptysar); // 2
764 SAR_r32_CL( R_EAX ); // 2
765 JMP_rel8(10, end); // 2
767 JMP_TARGET(emptysar);
768 SAR_imm8_r32(31, R_EAX ); // 3
772 AND_imm8_r8( 0x1F, R_CL ); // 3
773 SHL_r32_CL( R_EAX ); // 2
776 store_reg( R_EAX, Rn );
777 sh4_x86.tstate = TSTATE_NONE;
780 load_reg( R_EAX, Rn );
781 load_reg( R_ECX, Rm );
782 CMP_imm32_r32( 0, R_ECX );
785 NEG_r32( R_ECX ); // 2
786 AND_imm8_r8( 0x1F, R_CL ); // 3
787 JE_rel8( 4, emptyshr );
788 SHR_r32_CL( R_EAX ); // 2
789 JMP_rel8(9, end); // 2
791 JMP_TARGET(emptyshr);
792 XOR_r32_r32( R_EAX, R_EAX );
796 AND_imm8_r8( 0x1F, R_CL ); // 3
797 SHL_r32_CL( R_EAX ); // 2
800 store_reg( R_EAX, Rn );
801 sh4_x86.tstate = TSTATE_NONE;
804 load_reg( R_EAX, Rn );
807 store_reg( R_EAX, Rn );
808 sh4_x86.tstate = TSTATE_C;
811 load_reg( R_EAX, Rn );
814 store_reg( R_EAX, Rn );
815 sh4_x86.tstate = TSTATE_C;
818 load_reg( R_EAX, Rn );
821 store_reg( R_EAX, Rn );
822 sh4_x86.tstate = TSTATE_C;
825 load_reg( R_EAX, Rn );
826 SHL_imm8_r32( 2, R_EAX );
827 store_reg( R_EAX, Rn );
828 sh4_x86.tstate = TSTATE_NONE;
831 load_reg( R_EAX, Rn );
832 SHL_imm8_r32( 8, R_EAX );
833 store_reg( R_EAX, Rn );
834 sh4_x86.tstate = TSTATE_NONE;
837 load_reg( R_EAX, Rn );
838 SHL_imm8_r32( 16, R_EAX );
839 store_reg( R_EAX, Rn );
840 sh4_x86.tstate = TSTATE_NONE;
843 load_reg( R_EAX, Rn );
846 store_reg( R_EAX, Rn );
847 sh4_x86.tstate = TSTATE_C;
850 load_reg( R_EAX, Rn );
851 SHR_imm8_r32( 2, R_EAX );
852 store_reg( R_EAX, Rn );
853 sh4_x86.tstate = TSTATE_NONE;
856 load_reg( R_EAX, Rn );
857 SHR_imm8_r32( 8, R_EAX );
858 store_reg( R_EAX, Rn );
859 sh4_x86.tstate = TSTATE_NONE;
862 load_reg( R_EAX, Rn );
863 SHR_imm8_r32( 16, R_EAX );
864 store_reg( R_EAX, Rn );
865 sh4_x86.tstate = TSTATE_NONE;
868 load_reg( R_EAX, Rm );
869 load_reg( R_ECX, Rn );
870 SUB_r32_r32( R_EAX, R_ECX );
871 store_reg( R_ECX, Rn );
872 sh4_x86.tstate = TSTATE_NONE;
875 load_reg( R_EAX, Rm );
876 load_reg( R_ECX, Rn );
877 if( sh4_x86.tstate != TSTATE_C ) {
880 SBB_r32_r32( R_EAX, R_ECX );
881 store_reg( R_ECX, Rn );
883 sh4_x86.tstate = TSTATE_C;
886 load_reg( R_EAX, Rm );
887 load_reg( R_ECX, Rn );
888 SUB_r32_r32( R_EAX, R_ECX );
889 store_reg( R_ECX, Rn );
891 sh4_x86.tstate = TSTATE_O;
894 load_reg( R_EAX, Rm );
895 XCHG_r8_r8( R_AL, R_AH );
896 store_reg( R_EAX, Rn );
899 load_reg( R_EAX, Rm );
900 MOV_r32_r32( R_EAX, R_ECX );
901 SHL_imm8_r32( 16, R_ECX );
902 SHR_imm8_r32( 16, R_EAX );
903 OR_r32_r32( R_EAX, R_ECX );
904 store_reg( R_ECX, Rn );
905 sh4_x86.tstate = TSTATE_NONE;
908 load_reg( R_ECX, Rn );
909 MEM_READ_BYTE( R_ECX, R_EAX );
910 TEST_r8_r8( R_AL, R_AL );
912 OR_imm8_r8( 0x80, R_AL );
913 load_reg( R_ECX, Rn );
914 MEM_WRITE_BYTE( R_ECX, R_EAX );
915 sh4_x86.tstate = TSTATE_NONE;
918 load_reg( R_EAX, Rm );
919 load_reg( R_ECX, Rn );
920 TEST_r32_r32( R_EAX, R_ECX );
922 sh4_x86.tstate = TSTATE_E;
925 load_reg( R_EAX, 0 );
926 TEST_imm32_r32( imm, R_EAX );
928 sh4_x86.tstate = TSTATE_E;
930 TST.B #imm, @(R0, GBR) {:
932 load_reg( R_ECX, R_GBR);
933 ADD_r32_r32( R_EAX, R_ECX );
934 MEM_READ_BYTE( R_ECX, R_EAX );
935 TEST_imm8_r8( imm, R_AL );
937 sh4_x86.tstate = TSTATE_E;
940 load_reg( R_EAX, Rm );
941 load_reg( R_ECX, Rn );
942 XOR_r32_r32( R_EAX, R_ECX );
943 store_reg( R_ECX, Rn );
944 sh4_x86.tstate = TSTATE_NONE;
947 load_reg( R_EAX, 0 );
948 XOR_imm32_r32( imm, R_EAX );
949 store_reg( R_EAX, 0 );
950 sh4_x86.tstate = TSTATE_NONE;
952 XOR.B #imm, @(R0, GBR) {:
953 load_reg( R_EAX, 0 );
954 load_spreg( R_ECX, R_GBR );
955 ADD_r32_r32( R_EAX, R_ECX );
956 PUSH_realigned_r32(R_ECX);
957 MEM_READ_BYTE(R_ECX, R_EAX);
958 POP_realigned_r32(R_ECX);
959 XOR_imm32_r32( imm, R_EAX );
960 MEM_WRITE_BYTE( R_ECX, R_EAX );
961 sh4_x86.tstate = TSTATE_NONE;
964 load_reg( R_EAX, Rm );
965 load_reg( R_ECX, Rn );
966 SHL_imm8_r32( 16, R_EAX );
967 SHR_imm8_r32( 16, R_ECX );
968 OR_r32_r32( R_EAX, R_ECX );
969 store_reg( R_ECX, Rn );
970 sh4_x86.tstate = TSTATE_NONE;
973 /* Data move instructions */
975 load_reg( R_EAX, Rm );
976 store_reg( R_EAX, Rn );
979 load_imm32( R_EAX, imm );
980 store_reg( R_EAX, Rn );
983 load_reg( R_EAX, Rm );
984 load_reg( R_ECX, Rn );
985 MEM_WRITE_BYTE( R_ECX, R_EAX );
986 sh4_x86.tstate = TSTATE_NONE;
989 load_reg( R_EAX, Rm );
990 load_reg( R_ECX, Rn );
991 ADD_imm8s_r32( -1, R_ECX );
992 store_reg( R_ECX, Rn );
993 MEM_WRITE_BYTE( R_ECX, R_EAX );
994 sh4_x86.tstate = TSTATE_NONE;
996 MOV.B Rm, @(R0, Rn) {:
997 load_reg( R_EAX, 0 );
998 load_reg( R_ECX, Rn );
999 ADD_r32_r32( R_EAX, R_ECX );
1000 load_reg( R_EAX, Rm );
1001 MEM_WRITE_BYTE( R_ECX, R_EAX );
1002 sh4_x86.tstate = TSTATE_NONE;
1004 MOV.B R0, @(disp, GBR) {:
1005 load_reg( R_EAX, 0 );
1006 load_spreg( R_ECX, R_GBR );
1007 ADD_imm32_r32( disp, R_ECX );
1008 MEM_WRITE_BYTE( R_ECX, R_EAX );
1009 sh4_x86.tstate = TSTATE_NONE;
1011 MOV.B R0, @(disp, Rn) {:
1012 load_reg( R_EAX, 0 );
1013 load_reg( R_ECX, Rn );
1014 ADD_imm32_r32( disp, R_ECX );
1015 MEM_WRITE_BYTE( R_ECX, R_EAX );
1016 sh4_x86.tstate = TSTATE_NONE;
1019 load_reg( R_ECX, Rm );
1020 MEM_READ_BYTE( R_ECX, R_EAX );
1021 store_reg( R_EAX, Rn );
1022 sh4_x86.tstate = TSTATE_NONE;
1025 load_reg( R_ECX, Rm );
1026 MOV_r32_r32( R_ECX, R_EAX );
1027 ADD_imm8s_r32( 1, R_EAX );
1028 store_reg( R_EAX, Rm );
1029 MEM_READ_BYTE( R_ECX, R_EAX );
1030 store_reg( R_EAX, Rn );
1031 sh4_x86.tstate = TSTATE_NONE;
1033 MOV.B @(R0, Rm), Rn {:
1034 load_reg( R_EAX, 0 );
1035 load_reg( R_ECX, Rm );
1036 ADD_r32_r32( R_EAX, R_ECX );
1037 MEM_READ_BYTE( R_ECX, R_EAX );
1038 store_reg( R_EAX, Rn );
1039 sh4_x86.tstate = TSTATE_NONE;
1041 MOV.B @(disp, GBR), R0 {:
1042 load_spreg( R_ECX, R_GBR );
1043 ADD_imm32_r32( disp, R_ECX );
1044 MEM_READ_BYTE( R_ECX, R_EAX );
1045 store_reg( R_EAX, 0 );
1046 sh4_x86.tstate = TSTATE_NONE;
1048 MOV.B @(disp, Rm), R0 {:
1049 load_reg( R_ECX, Rm );
1050 ADD_imm32_r32( disp, R_ECX );
1051 MEM_READ_BYTE( R_ECX, R_EAX );
1052 store_reg( R_EAX, 0 );
1053 sh4_x86.tstate = TSTATE_NONE;
1056 load_reg( R_EAX, Rm );
1057 load_reg( R_ECX, Rn );
1058 check_walign32(R_ECX);
1059 MEM_WRITE_LONG( R_ECX, R_EAX );
1060 sh4_x86.tstate = TSTATE_NONE;
1063 load_reg( R_EAX, Rm );
1064 load_reg( R_ECX, Rn );
1065 check_walign32( R_ECX );
1066 ADD_imm8s_r32( -4, R_ECX );
1067 store_reg( R_ECX, Rn );
1068 MEM_WRITE_LONG( R_ECX, R_EAX );
1069 sh4_x86.tstate = TSTATE_NONE;
1071 MOV.L Rm, @(R0, Rn) {:
1072 load_reg( R_EAX, 0 );
1073 load_reg( R_ECX, Rn );
1074 ADD_r32_r32( R_EAX, R_ECX );
1075 check_walign32( R_ECX );
1076 load_reg( R_EAX, Rm );
1077 MEM_WRITE_LONG( R_ECX, R_EAX );
1078 sh4_x86.tstate = TSTATE_NONE;
1080 MOV.L R0, @(disp, GBR) {:
1081 load_spreg( R_ECX, R_GBR );
1082 load_reg( R_EAX, 0 );
1083 ADD_imm32_r32( disp, R_ECX );
1084 check_walign32( R_ECX );
1085 MEM_WRITE_LONG( R_ECX, R_EAX );
1086 sh4_x86.tstate = TSTATE_NONE;
1088 MOV.L Rm, @(disp, Rn) {:
1089 load_reg( R_ECX, Rn );
1090 load_reg( R_EAX, Rm );
1091 ADD_imm32_r32( disp, R_ECX );
1092 check_walign32( R_ECX );
1093 MEM_WRITE_LONG( R_ECX, R_EAX );
1094 sh4_x86.tstate = TSTATE_NONE;
1097 load_reg( R_ECX, Rm );
1098 check_ralign32( R_ECX );
1099 MEM_READ_LONG( R_ECX, R_EAX );
1100 store_reg( R_EAX, Rn );
1101 sh4_x86.tstate = TSTATE_NONE;
1104 load_reg( R_EAX, Rm );
1105 check_ralign32( R_EAX );
1106 MOV_r32_r32( R_EAX, R_ECX );
1107 ADD_imm8s_r32( 4, R_EAX );
1108 store_reg( R_EAX, Rm );
1109 MEM_READ_LONG( R_ECX, R_EAX );
1110 store_reg( R_EAX, Rn );
1111 sh4_x86.tstate = TSTATE_NONE;
1113 MOV.L @(R0, Rm), Rn {:
1114 load_reg( R_EAX, 0 );
1115 load_reg( R_ECX, Rm );
1116 ADD_r32_r32( R_EAX, R_ECX );
1117 check_ralign32( R_ECX );
1118 MEM_READ_LONG( R_ECX, R_EAX );
1119 store_reg( R_EAX, Rn );
1120 sh4_x86.tstate = TSTATE_NONE;
1122 MOV.L @(disp, GBR), R0 {:
1123 load_spreg( R_ECX, R_GBR );
1124 ADD_imm32_r32( disp, R_ECX );
1125 check_ralign32( R_ECX );
1126 MEM_READ_LONG( R_ECX, R_EAX );
1127 store_reg( R_EAX, 0 );
1128 sh4_x86.tstate = TSTATE_NONE;
1130 MOV.L @(disp, PC), Rn {:
1131 if( sh4_x86.in_delay_slot ) {
1134 uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
1135 if( IS_IN_ICACHE(target) ) {
1136 // If the target address is in the same page as the code, it's
1137 // pretty safe to just ref it directly and circumvent the whole
1138 // memory subsystem. (this is a big performance win)
1140 // FIXME: There's a corner-case that's not handled here when
1141 // the current code-page is in the ITLB but not in the UTLB.
1142 // (should generate a TLB miss although need to test SH4
1143 // behaviour to confirm) Unlikely to be anyone depending on this
1144 // behaviour though.
1145 sh4ptr_t ptr = GET_ICACHE_PTR(target);
1146 MOV_moff32_EAX( ptr );
1148 // Note: we use sh4r.pc for the calc as we could be running at a
1149 // different virtual address than the translation was done with,
1150 // but we can safely assume that the low bits are the same.
1151 load_imm32( R_ECX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
1152 ADD_sh4r_r32( R_PC, R_ECX );
1153 MEM_READ_LONG( R_ECX, R_EAX );
1154 sh4_x86.tstate = TSTATE_NONE;
1156 store_reg( R_EAX, Rn );
1159 MOV.L @(disp, Rm), Rn {:
1160 load_reg( R_ECX, Rm );
1161 ADD_imm8s_r32( disp, R_ECX );
1162 check_ralign32( R_ECX );
1163 MEM_READ_LONG( R_ECX, R_EAX );
1164 store_reg( R_EAX, Rn );
1165 sh4_x86.tstate = TSTATE_NONE;
1168 load_reg( R_ECX, Rn );
1169 check_walign16( R_ECX );
1170 load_reg( R_EAX, Rm );
1171 MEM_WRITE_WORD( R_ECX, R_EAX );
1172 sh4_x86.tstate = TSTATE_NONE;
1175 load_reg( R_ECX, Rn );
1176 check_walign16( R_ECX );
1177 load_reg( R_EAX, Rm );
1178 ADD_imm8s_r32( -2, R_ECX );
1179 store_reg( R_ECX, Rn );
1180 MEM_WRITE_WORD( R_ECX, R_EAX );
1181 sh4_x86.tstate = TSTATE_NONE;
1183 MOV.W Rm, @(R0, Rn) {:
1184 load_reg( R_EAX, 0 );
1185 load_reg( R_ECX, Rn );
1186 ADD_r32_r32( R_EAX, R_ECX );
1187 check_walign16( R_ECX );
1188 load_reg( R_EAX, Rm );
1189 MEM_WRITE_WORD( R_ECX, R_EAX );
1190 sh4_x86.tstate = TSTATE_NONE;
1192 MOV.W R0, @(disp, GBR) {:
1193 load_spreg( R_ECX, R_GBR );
1194 load_reg( R_EAX, 0 );
1195 ADD_imm32_r32( disp, R_ECX );
1196 check_walign16( R_ECX );
1197 MEM_WRITE_WORD( R_ECX, R_EAX );
1198 sh4_x86.tstate = TSTATE_NONE;
1200 MOV.W R0, @(disp, Rn) {:
1201 load_reg( R_ECX, Rn );
1202 load_reg( R_EAX, 0 );
1203 ADD_imm32_r32( disp, R_ECX );
1204 check_walign16( R_ECX );
1205 MEM_WRITE_WORD( R_ECX, R_EAX );
1206 sh4_x86.tstate = TSTATE_NONE;
1209 load_reg( R_ECX, Rm );
1210 check_ralign16( R_ECX );
1211 MEM_READ_WORD( R_ECX, R_EAX );
1212 store_reg( R_EAX, Rn );
1213 sh4_x86.tstate = TSTATE_NONE;
1216 load_reg( R_EAX, Rm );
1217 check_ralign16( R_EAX );
1218 MOV_r32_r32( R_EAX, R_ECX );
1219 ADD_imm8s_r32( 2, R_EAX );
1220 store_reg( R_EAX, Rm );
1221 MEM_READ_WORD( R_ECX, R_EAX );
1222 store_reg( R_EAX, Rn );
1223 sh4_x86.tstate = TSTATE_NONE;
1225 MOV.W @(R0, Rm), Rn {:
1226 load_reg( R_EAX, 0 );
1227 load_reg( R_ECX, Rm );
1228 ADD_r32_r32( R_EAX, R_ECX );
1229 check_ralign16( R_ECX );
1230 MEM_READ_WORD( R_ECX, R_EAX );
1231 store_reg( R_EAX, Rn );
1232 sh4_x86.tstate = TSTATE_NONE;
1234 MOV.W @(disp, GBR), R0 {:
1235 load_spreg( R_ECX, R_GBR );
1236 ADD_imm32_r32( disp, R_ECX );
1237 check_ralign16( R_ECX );
1238 MEM_READ_WORD( R_ECX, R_EAX );
1239 store_reg( R_EAX, 0 );
1240 sh4_x86.tstate = TSTATE_NONE;
1242 MOV.W @(disp, PC), Rn {:
1243 if( sh4_x86.in_delay_slot ) {
1246 // See comments for MOV.L @(disp, PC), Rn
1247 uint32_t target = pc + disp + 4;
1248 if( IS_IN_ICACHE(target) ) {
1249 sh4ptr_t ptr = GET_ICACHE_PTR(target);
1250 MOV_moff32_EAX( ptr );
1251 MOVSX_r16_r32( R_EAX, R_EAX );
1253 load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 );
1254 ADD_sh4r_r32( R_PC, R_ECX );
1255 MEM_READ_WORD( R_ECX, R_EAX );
1256 sh4_x86.tstate = TSTATE_NONE;
1258 store_reg( R_EAX, Rn );
1261 MOV.W @(disp, Rm), R0 {:
1262 load_reg( R_ECX, Rm );
1263 ADD_imm32_r32( disp, R_ECX );
1264 check_ralign16( R_ECX );
1265 MEM_READ_WORD( R_ECX, R_EAX );
1266 store_reg( R_EAX, 0 );
1267 sh4_x86.tstate = TSTATE_NONE;
1269 MOVA @(disp, PC), R0 {:
1270 if( sh4_x86.in_delay_slot ) {
1273 load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
1274 ADD_sh4r_r32( R_PC, R_ECX );
1275 store_reg( R_ECX, 0 );
1279 load_reg( R_EAX, 0 );
1280 load_reg( R_ECX, Rn );
1281 check_walign32( R_ECX );
1282 MEM_WRITE_LONG( R_ECX, R_EAX );
1283 sh4_x86.tstate = TSTATE_NONE;
1286 /* Control transfer instructions */
1288 if( sh4_x86.in_delay_slot ) {
1291 JT_rel8( EXIT_BLOCK_SIZE, nottaken );
1292 exit_block( disp + pc + 4, pc+2 );
1293 JMP_TARGET(nottaken);
1298 if( sh4_x86.in_delay_slot ) {
1301 sh4_x86.in_delay_slot = TRUE;
1302 if( sh4_x86.tstate == TSTATE_NONE ) {
1303 CMP_imm8s_sh4r( 1, R_T );
1304 sh4_x86.tstate = TSTATE_E;
1306 OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JNE rel32
1307 sh4_translate_instruction(pc+2);
1308 exit_block( disp + pc + 4, pc+4 );
1310 *patch = (xlat_output - ((uint8_t *)patch)) - 4;
1311 sh4_translate_instruction(pc+2);
1316 if( sh4_x86.in_delay_slot ) {
1319 sh4_x86.in_delay_slot = TRUE;
1320 sh4_translate_instruction( pc + 2 );
1321 exit_block( disp + pc + 4, pc+4 );
1322 sh4_x86.branch_taken = TRUE;
1327 if( sh4_x86.in_delay_slot ) {
1330 load_reg( R_EAX, Rn );
1331 ADD_imm32_r32( pc + 4, R_EAX );
1332 store_spreg( R_EAX, REG_OFFSET(pc) );
1333 sh4_x86.in_delay_slot = TRUE;
1334 sh4_x86.tstate = TSTATE_NONE;
1335 sh4_translate_instruction( pc + 2 );
1336 exit_block_pcset(pc+2);
1337 sh4_x86.branch_taken = TRUE;
1342 if( sh4_x86.in_delay_slot ) {
1345 load_imm32( R_EAX, pc + 4 );
1346 store_spreg( R_EAX, R_PR );
1347 sh4_x86.in_delay_slot = TRUE;
1348 sh4_translate_instruction( pc + 2 );
1349 exit_block( disp + pc + 4, pc+4 );
1350 sh4_x86.branch_taken = TRUE;
1355 if( sh4_x86.in_delay_slot ) {
1358 load_imm32( R_ECX, pc + 4 );
1359 store_spreg( R_ECX, R_PR );
1360 ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_ECX );
1361 store_spreg( R_ECX, REG_OFFSET(pc) );
1362 sh4_x86.in_delay_slot = TRUE;
1363 sh4_x86.tstate = TSTATE_NONE;
1364 sh4_translate_instruction( pc + 2 );
1365 exit_block_pcset(pc+2);
1366 sh4_x86.branch_taken = TRUE;
1371 if( sh4_x86.in_delay_slot ) {
1374 JF_rel8( EXIT_BLOCK_SIZE, nottaken );
1375 exit_block( disp + pc + 4, pc+2 );
1376 JMP_TARGET(nottaken);
1381 if( sh4_x86.in_delay_slot ) {
1384 sh4_x86.in_delay_slot = TRUE;
1385 if( sh4_x86.tstate == TSTATE_NONE ) {
1386 CMP_imm8s_sh4r( 1, R_T );
1387 sh4_x86.tstate = TSTATE_E;
1389 OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JE rel32
1390 sh4_translate_instruction(pc+2);
1391 exit_block( disp + pc + 4, pc+4 );
1393 *patch = (xlat_output - ((uint8_t *)patch)) - 4;
1394 sh4_translate_instruction(pc+2);
1399 if( sh4_x86.in_delay_slot ) {
1402 load_reg( R_ECX, Rn );
1403 store_spreg( R_ECX, REG_OFFSET(pc) );
1404 sh4_x86.in_delay_slot = TRUE;
1405 sh4_translate_instruction(pc+2);
1406 exit_block_pcset(pc+2);
1407 sh4_x86.branch_taken = TRUE;
1412 if( sh4_x86.in_delay_slot ) {
1415 load_imm32( R_EAX, pc + 4 );
1416 store_spreg( R_EAX, R_PR );
1417 load_reg( R_ECX, Rn );
1418 store_spreg( R_ECX, REG_OFFSET(pc) );
1419 sh4_x86.in_delay_slot = TRUE;
1420 sh4_translate_instruction(pc+2);
1421 exit_block_pcset(pc+2);
1422 sh4_x86.branch_taken = TRUE;
1427 if( sh4_x86.in_delay_slot ) {
1431 load_spreg( R_ECX, R_SPC );
1432 store_spreg( R_ECX, REG_OFFSET(pc) );
1433 load_spreg( R_EAX, R_SSR );
1434 call_func1( sh4_write_sr, R_EAX );
1435 sh4_x86.in_delay_slot = TRUE;
1436 sh4_x86.priv_checked = FALSE;
1437 sh4_x86.fpuen_checked = FALSE;
1438 sh4_x86.tstate = TSTATE_NONE;
1439 sh4_translate_instruction(pc+2);
1440 exit_block_pcset(pc+2);
1441 sh4_x86.branch_taken = TRUE;
1446 if( sh4_x86.in_delay_slot ) {
1449 load_spreg( R_ECX, R_PR );
1450 store_spreg( R_ECX, REG_OFFSET(pc) );
1451 sh4_x86.in_delay_slot = TRUE;
1452 sh4_translate_instruction(pc+2);
1453 exit_block_pcset(pc+2);
1454 sh4_x86.branch_taken = TRUE;
1459 if( sh4_x86.in_delay_slot ) {
1462 load_imm32( R_ECX, pc+2 );
1463 store_spreg( R_ECX, REG_OFFSET(pc) );
1464 load_imm32( R_EAX, imm );
1465 call_func1( sh4_raise_trap, R_EAX );
1466 sh4_x86.tstate = TSTATE_NONE;
1467 exit_block_pcset(pc);
1468 sh4_x86.branch_taken = TRUE;
1473 if( sh4_x86.in_delay_slot ) {
1476 JMP_exc(EXC_ILLEGAL);
1482 XOR_r32_r32(R_EAX, R_EAX);
1483 store_spreg( R_EAX, R_MACL );
1484 store_spreg( R_EAX, R_MACH );
1485 sh4_x86.tstate = TSTATE_NONE;
1490 sh4_x86.tstate = TSTATE_C;
1495 sh4_x86.tstate = TSTATE_C;
1500 sh4_x86.tstate = TSTATE_C;
1505 sh4_x86.tstate = TSTATE_C;
1508 /* Floating point moves */
1510 /* As horrible as this looks, it's actually covering 5 separate cases:
1511 * 1. 32-bit fr-to-fr (PR=0)
1512 * 2. 64-bit dr-to-dr (PR=1, FRm&1 == 0, FRn&1 == 0 )
1513 * 3. 64-bit dr-to-xd (PR=1, FRm&1 == 0, FRn&1 == 1 )
1514 * 4. 64-bit xd-to-dr (PR=1, FRm&1 == 1, FRn&1 == 0 )
1515 * 5. 64-bit xd-to-xd (PR=1, FRm&1 == 1, FRn&1 == 1 )
1518 load_spreg( R_ECX, R_FPSCR );
1519 load_fr_bank( R_EDX );
1520 TEST_imm32_r32( FPSCR_SZ, R_ECX );
1521 JNE_rel8(8, doublesize);
1522 load_fr( R_EDX, R_EAX, FRm ); // PR=0 branch
1523 store_fr( R_EDX, R_EAX, FRn );
1526 JMP_TARGET(doublesize);
1527 load_xf_bank( R_ECX );
1528 load_fr( R_ECX, R_EAX, FRm-1 );
1530 load_fr( R_ECX, R_EDX, FRm );
1531 store_fr( R_ECX, R_EAX, FRn-1 );
1532 store_fr( R_ECX, R_EDX, FRn );
1533 } else /* FRn&1 == 0 */ {
1534 load_fr( R_ECX, R_ECX, FRm );
1535 store_fr( R_EDX, R_EAX, FRn );
1536 store_fr( R_EDX, R_ECX, FRn+1 );
1539 } else /* FRm&1 == 0 */ {
1542 load_xf_bank( R_ECX );
1543 load_fr( R_EDX, R_EAX, FRm );
1544 load_fr( R_EDX, R_EDX, FRm+1 );
1545 store_fr( R_ECX, R_EAX, FRn-1 );
1546 store_fr( R_ECX, R_EDX, FRn );
1548 } else /* FRn&1 == 0 */ {
1550 load_fr( R_EDX, R_EAX, FRm );
1551 load_fr( R_EDX, R_ECX, FRm+1 );
1552 store_fr( R_EDX, R_EAX, FRn );
1553 store_fr( R_EDX, R_ECX, FRn+1 );
1557 sh4_x86.tstate = TSTATE_NONE;
1561 load_reg( R_ECX, Rn );
1562 check_walign32( R_ECX );
1563 load_spreg( R_EDX, R_FPSCR );
1564 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1565 JNE_rel8(8 + MEM_WRITE_SIZE, doublesize);
1566 load_fr_bank( R_EDX );
1567 load_fr( R_EDX, R_EAX, FRm );
1568 MEM_WRITE_LONG( R_ECX, R_EAX ); // 12
1570 JMP_rel8( 18 + MEM_WRITE_DOUBLE_SIZE, end );
1571 JMP_TARGET(doublesize);
1572 load_xf_bank( R_EDX );
1573 load_fr( R_EDX, R_EAX, FRm&0x0E );
1574 load_fr( R_EDX, R_EDX, FRm|0x01 );
1575 MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
1578 JMP_rel8( 9 + MEM_WRITE_DOUBLE_SIZE, end );
1579 JMP_TARGET(doublesize);
1580 load_fr_bank( R_EDX );
1581 load_fr( R_EDX, R_EAX, FRm&0x0E );
1582 load_fr( R_EDX, R_EDX, FRm|0x01 );
1583 MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
1586 sh4_x86.tstate = TSTATE_NONE;
1590 load_reg( R_ECX, Rm );
1591 check_ralign32( R_ECX );
1592 load_spreg( R_EDX, R_FPSCR );
1593 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1594 JNE_rel8(8 + MEM_READ_SIZE, doublesize);
1595 MEM_READ_LONG( R_ECX, R_EAX );
1596 load_fr_bank( R_EDX );
1597 store_fr( R_EDX, R_EAX, FRn );
1599 JMP_rel8(21 + MEM_READ_DOUBLE_SIZE, end);
1600 JMP_TARGET(doublesize);
1601 MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
1602 load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
1603 load_xf_bank( R_EDX );
1604 store_fr( R_EDX, R_EAX, FRn&0x0E );
1605 store_fr( R_EDX, R_ECX, FRn|0x01 );
1608 JMP_rel8(9 + MEM_READ_DOUBLE_SIZE, end);
1609 JMP_TARGET(doublesize);
1610 MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
1611 load_fr_bank( R_EDX );
1612 store_fr( R_EDX, R_EAX, FRn&0x0E );
1613 store_fr( R_EDX, R_ECX, FRn|0x01 );
1616 sh4_x86.tstate = TSTATE_NONE;
1620 load_reg( R_ECX, Rn );
1621 check_walign32( R_ECX );
1622 load_spreg( R_EDX, R_FPSCR );
1623 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1624 JNE_rel8(14 + MEM_WRITE_SIZE, doublesize);
1625 load_fr_bank( R_EDX );
1626 load_fr( R_EDX, R_EAX, FRm );
1627 ADD_imm8s_r32(-4,R_ECX);
1628 store_reg( R_ECX, Rn );
1629 MEM_WRITE_LONG( R_ECX, R_EAX ); // 12
1631 JMP_rel8( 24 + MEM_WRITE_DOUBLE_SIZE, end );
1632 JMP_TARGET(doublesize);
1633 load_xf_bank( R_EDX );
1634 load_fr( R_EDX, R_EAX, FRm&0x0E );
1635 load_fr( R_EDX, R_EDX, FRm|0x01 );
1636 ADD_imm8s_r32(-8,R_ECX);
1637 store_reg( R_ECX, Rn );
1638 MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
1641 JMP_rel8( 15 + MEM_WRITE_DOUBLE_SIZE, end );
1642 JMP_TARGET(doublesize);
1643 load_fr_bank( R_EDX );
1644 load_fr( R_EDX, R_EAX, FRm&0x0E );
1645 load_fr( R_EDX, R_EDX, FRm|0x01 );
1646 ADD_imm8s_r32(-8,R_ECX);
1647 store_reg( R_ECX, Rn );
1648 MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
1651 sh4_x86.tstate = TSTATE_NONE;
1655 load_reg( R_ECX, Rm );
1656 check_ralign32( R_ECX );
1657 MOV_r32_r32( R_ECX, R_EAX );
1658 load_spreg( R_EDX, R_FPSCR );
1659 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1660 JNE_rel8(14 + MEM_READ_SIZE, doublesize);
1661 ADD_imm8s_r32( 4, R_EAX );
1662 store_reg( R_EAX, Rm );
1663 MEM_READ_LONG( R_ECX, R_EAX );
1664 load_fr_bank( R_EDX );
1665 store_fr( R_EDX, R_EAX, FRn );
1667 JMP_rel8(27 + MEM_READ_DOUBLE_SIZE, end);
1668 JMP_TARGET(doublesize);
1669 ADD_imm8s_r32( 8, R_EAX );
1670 store_reg(R_EAX, Rm);
1671 MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
1672 load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
1673 load_xf_bank( R_EDX );
1674 store_fr( R_EDX, R_EAX, FRn&0x0E );
1675 store_fr( R_EDX, R_ECX, FRn|0x01 );
1678 JMP_rel8(15 + MEM_READ_DOUBLE_SIZE, end);
1679 ADD_imm8s_r32( 8, R_EAX );
1680 store_reg(R_EAX, Rm);
1681 MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
1682 load_fr_bank( R_EDX );
1683 store_fr( R_EDX, R_EAX, FRn&0x0E );
1684 store_fr( R_EDX, R_ECX, FRn|0x01 );
1687 sh4_x86.tstate = TSTATE_NONE;
1689 FMOV FRm, @(R0, Rn) {:
1691 load_reg( R_ECX, Rn );
1692 ADD_sh4r_r32( REG_OFFSET(r[0]), R_ECX );
1693 check_walign32( R_ECX );
1694 load_spreg( R_EDX, R_FPSCR );
1695 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1696 JNE_rel8(8 + MEM_WRITE_SIZE, doublesize);
1697 load_fr_bank( R_EDX );
1698 load_fr( R_EDX, R_EAX, FRm );
1699 MEM_WRITE_LONG( R_ECX, R_EAX ); // 12
1701 JMP_rel8( 18 + MEM_WRITE_DOUBLE_SIZE, end );
1702 JMP_TARGET(doublesize);
1703 load_xf_bank( R_EDX );
1704 load_fr( R_EDX, R_EAX, FRm&0x0E );
1705 load_fr( R_EDX, R_EDX, FRm|0x01 );
1706 MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
1709 JMP_rel8( 9 + MEM_WRITE_DOUBLE_SIZE, end );
1710 JMP_TARGET(doublesize);
1711 load_fr_bank( R_EDX );
1712 load_fr( R_EDX, R_EAX, FRm&0x0E );
1713 load_fr( R_EDX, R_EDX, FRm|0x01 );
1714 MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
1717 sh4_x86.tstate = TSTATE_NONE;
1719 FMOV @(R0, Rm), FRn {:
1721 load_reg( R_ECX, Rm );
1722 ADD_sh4r_r32( REG_OFFSET(r[0]), R_ECX );
1723 check_ralign32( R_ECX );
1724 load_spreg( R_EDX, R_FPSCR );
1725 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1726 JNE_rel8(8 + MEM_READ_SIZE, doublesize);
1727 MEM_READ_LONG( R_ECX, R_EAX );
1728 load_fr_bank( R_EDX );
1729 store_fr( R_EDX, R_EAX, FRn );
1731 JMP_rel8(21 + MEM_READ_DOUBLE_SIZE, end);
1732 JMP_TARGET(doublesize);
1733 MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
1734 load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
1735 load_xf_bank( R_EDX );
1736 store_fr( R_EDX, R_EAX, FRn&0x0E );
1737 store_fr( R_EDX, R_ECX, FRn|0x01 );
1740 JMP_rel8(9 + MEM_READ_DOUBLE_SIZE, end);
1741 JMP_TARGET(doublesize);
1742 MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
1743 load_fr_bank( R_EDX );
1744 store_fr( R_EDX, R_EAX, FRn&0x0E );
1745 store_fr( R_EDX, R_ECX, FRn|0x01 );
1748 sh4_x86.tstate = TSTATE_NONE;
1750 FLDI0 FRn {: /* IFF PR=0 */
1752 load_spreg( R_ECX, R_FPSCR );
1753 TEST_imm32_r32( FPSCR_PR, R_ECX );
1755 XOR_r32_r32( R_EAX, R_EAX );
1756 load_spreg( R_ECX, REG_OFFSET(fr_bank) );
1757 store_fr( R_ECX, R_EAX, FRn );
1759 sh4_x86.tstate = TSTATE_NONE;
1761 FLDI1 FRn {: /* IFF PR=0 */
1763 load_spreg( R_ECX, R_FPSCR );
1764 TEST_imm32_r32( FPSCR_PR, R_ECX );
1766 load_imm32(R_EAX, 0x3F800000);
1767 load_spreg( R_ECX, REG_OFFSET(fr_bank) );
1768 store_fr( R_ECX, R_EAX, FRn );
1770 sh4_x86.tstate = TSTATE_NONE;
1775 load_spreg( R_ECX, R_FPSCR );
1776 load_spreg(R_EDX, REG_OFFSET(fr_bank));
1778 TEST_imm32_r32( FPSCR_PR, R_ECX );
1779 JNE_rel8(5, doubleprec);
1780 pop_fr( R_EDX, FRn );
1782 JMP_TARGET(doubleprec);
1783 pop_dr( R_EDX, FRn );
1785 sh4_x86.tstate = TSTATE_NONE;
1789 load_spreg( R_ECX, R_FPSCR );
1790 load_fr_bank( R_EDX );
1791 TEST_imm32_r32( FPSCR_PR, R_ECX );
1792 JNE_rel8(5, doubleprec);
1793 push_fr( R_EDX, FRm );
1795 JMP_TARGET(doubleprec);
1796 push_dr( R_EDX, FRm );
1798 load_imm32( R_ECX, (uint32_t)&max_int );
1799 FILD_r32ind( R_ECX );
1801 JNA_rel8( 32, sat );
1802 load_imm32( R_ECX, (uint32_t)&min_int ); // 5
1803 FILD_r32ind( R_ECX ); // 2
1805 JAE_rel8( 21, sat2 ); // 2
1806 load_imm32( R_EAX, (uint32_t)&save_fcw );
1807 FNSTCW_r32ind( R_EAX );
1808 load_imm32( R_EDX, (uint32_t)&trunc_fcw );
1809 FLDCW_r32ind( R_EDX );
1810 FISTP_sh4r(R_FPUL); // 3
1811 FLDCW_r32ind( R_EAX );
1812 JMP_rel8( 9, end ); // 2
1816 MOV_r32ind_r32( R_ECX, R_ECX ); // 2
1817 store_spreg( R_ECX, R_FPUL );
1820 sh4_x86.tstate = TSTATE_NONE;
1824 load_fr_bank( R_ECX );
1825 load_fr( R_ECX, R_EAX, FRm );
1826 store_spreg( R_EAX, R_FPUL );
1827 sh4_x86.tstate = TSTATE_NONE;
1831 load_fr_bank( R_ECX );
1832 load_spreg( R_EAX, R_FPUL );
1833 store_fr( R_ECX, R_EAX, FRn );
1834 sh4_x86.tstate = TSTATE_NONE;
1838 load_spreg( R_ECX, R_FPSCR );
1839 TEST_imm32_r32( FPSCR_PR, R_ECX );
1840 JE_rel8(9, end); // only when PR=1
1841 load_fr_bank( R_ECX );
1842 push_dr( R_ECX, FRm );
1845 sh4_x86.tstate = TSTATE_NONE;
1849 load_spreg( R_ECX, R_FPSCR );
1850 TEST_imm32_r32( FPSCR_PR, R_ECX );
1851 JE_rel8(9, end); // only when PR=1
1852 load_fr_bank( R_ECX );
1854 pop_dr( R_ECX, FRn );
1856 sh4_x86.tstate = TSTATE_NONE;
1859 /* Floating point instructions */
1862 load_spreg( R_ECX, R_FPSCR );
1863 load_fr_bank( R_EDX );
1864 TEST_imm32_r32( FPSCR_PR, R_ECX );
1865 JNE_rel8(10, doubleprec);
1866 push_fr(R_EDX, FRn); // 3
1868 pop_fr( R_EDX, FRn); //3
1869 JMP_rel8(8,end); // 2
1870 JMP_TARGET(doubleprec);
1871 push_dr(R_EDX, FRn);
1875 sh4_x86.tstate = TSTATE_NONE;
1879 load_spreg( R_ECX, R_FPSCR );
1880 TEST_imm32_r32( FPSCR_PR, R_ECX );
1881 load_fr_bank( R_EDX );
1882 JNE_rel8(13,doubleprec);
1883 push_fr(R_EDX, FRm);
1884 push_fr(R_EDX, FRn);
1888 JMP_TARGET(doubleprec);
1889 push_dr(R_EDX, FRm);
1890 push_dr(R_EDX, FRn);
1894 sh4_x86.tstate = TSTATE_NONE;
1898 load_spreg( R_ECX, R_FPSCR );
1899 TEST_imm32_r32( FPSCR_PR, R_ECX );
1900 load_fr_bank( R_EDX );
1901 JNE_rel8(13, doubleprec);
1902 push_fr(R_EDX, FRn);
1903 push_fr(R_EDX, FRm);
1907 JMP_TARGET(doubleprec);
1908 push_dr(R_EDX, FRn);
1909 push_dr(R_EDX, FRm);
1913 sh4_x86.tstate = TSTATE_NONE;
1915 FMAC FR0, FRm, FRn {:
1917 load_spreg( R_ECX, R_FPSCR );
1918 load_spreg( R_EDX, REG_OFFSET(fr_bank));
1919 TEST_imm32_r32( FPSCR_PR, R_ECX );
1920 JNE_rel8(18, doubleprec);
1921 push_fr( R_EDX, 0 );
1922 push_fr( R_EDX, FRm );
1924 push_fr( R_EDX, FRn );
1926 pop_fr( R_EDX, FRn );
1928 JMP_TARGET(doubleprec);
1929 push_dr( R_EDX, 0 );
1930 push_dr( R_EDX, FRm );
1932 push_dr( R_EDX, FRn );
1934 pop_dr( R_EDX, FRn );
1936 sh4_x86.tstate = TSTATE_NONE;
1941 load_spreg( R_ECX, R_FPSCR );
1942 TEST_imm32_r32( FPSCR_PR, R_ECX );
1943 load_fr_bank( R_EDX );
1944 JNE_rel8(13, doubleprec);
1945 push_fr(R_EDX, FRm);
1946 push_fr(R_EDX, FRn);
1950 JMP_TARGET(doubleprec);
1951 push_dr(R_EDX, FRm);
1952 push_dr(R_EDX, FRn);
1956 sh4_x86.tstate = TSTATE_NONE;
1960 load_spreg( R_ECX, R_FPSCR );
1961 TEST_imm32_r32( FPSCR_PR, R_ECX );
1962 load_fr_bank( R_EDX );
1963 JNE_rel8(10, doubleprec);
1964 push_fr(R_EDX, FRn);
1968 JMP_TARGET(doubleprec);
1969 push_dr(R_EDX, FRn);
1973 sh4_x86.tstate = TSTATE_NONE;
1977 load_spreg( R_ECX, R_FPSCR );
1978 TEST_imm32_r32( FPSCR_PR, R_ECX );
1979 load_fr_bank( R_EDX );
1980 JNE_rel8(12, end); // PR=0 only
1982 push_fr(R_EDX, FRn);
1987 sh4_x86.tstate = TSTATE_NONE;
1991 load_spreg( R_ECX, R_FPSCR );
1992 TEST_imm32_r32( FPSCR_PR, R_ECX );
1993 load_fr_bank( R_EDX );
1994 JNE_rel8(10, doubleprec);
1995 push_fr(R_EDX, FRn);
1999 JMP_TARGET(doubleprec);
2000 push_dr(R_EDX, FRn);
2004 sh4_x86.tstate = TSTATE_NONE;
2008 load_spreg( R_ECX, R_FPSCR );
2009 TEST_imm32_r32( FPSCR_PR, R_ECX );
2010 load_fr_bank( R_EDX );
2011 JNE_rel8(13, doubleprec);
2012 push_fr(R_EDX, FRn);
2013 push_fr(R_EDX, FRm);
2017 JMP_TARGET(doubleprec);
2018 push_dr(R_EDX, FRn);
2019 push_dr(R_EDX, FRm);
2023 sh4_x86.tstate = TSTATE_NONE;
2028 load_spreg( R_ECX, R_FPSCR );
2029 TEST_imm32_r32( FPSCR_PR, R_ECX );
2030 load_fr_bank( R_EDX );
2031 JNE_rel8(8, doubleprec);
2032 push_fr(R_EDX, FRm);
2033 push_fr(R_EDX, FRn);
2035 JMP_TARGET(doubleprec);
2036 push_dr(R_EDX, FRm);
2037 push_dr(R_EDX, FRn);
2042 sh4_x86.tstate = TSTATE_NONE;
2046 load_spreg( R_ECX, R_FPSCR );
2047 TEST_imm32_r32( FPSCR_PR, R_ECX );
2048 load_fr_bank( R_EDX );
2049 JNE_rel8(8, doubleprec);
2050 push_fr(R_EDX, FRm);
2051 push_fr(R_EDX, FRn);
2053 JMP_TARGET(doubleprec);
2054 push_dr(R_EDX, FRm);
2055 push_dr(R_EDX, FRn);
2060 sh4_x86.tstate = TSTATE_NONE;
2065 load_spreg( R_ECX, R_FPSCR );
2066 TEST_imm32_r32( FPSCR_PR, R_ECX );
2067 JNE_rel8( CALL_FUNC2_SIZE + 9, doubleprec );
2068 load_fr_bank( R_ECX );
2069 ADD_imm8s_r32( (FRn&0x0E)<<2, R_ECX );
2070 load_spreg( R_EDX, R_FPUL );
2071 call_func2( sh4_fsca, R_EDX, R_ECX );
2072 JMP_TARGET(doubleprec);
2073 sh4_x86.tstate = TSTATE_NONE;
2077 load_spreg( R_ECX, R_FPSCR );
2078 TEST_imm32_r32( FPSCR_PR, R_ECX );
2079 JNE_rel8(44, doubleprec);
2081 load_fr_bank( R_ECX );
2082 push_fr( R_ECX, FVm<<2 );
2083 push_fr( R_ECX, FVn<<2 );
2085 push_fr( R_ECX, (FVm<<2)+1);
2086 push_fr( R_ECX, (FVn<<2)+1);
2089 push_fr( R_ECX, (FVm<<2)+2);
2090 push_fr( R_ECX, (FVn<<2)+2);
2093 push_fr( R_ECX, (FVm<<2)+3);
2094 push_fr( R_ECX, (FVn<<2)+3);
2097 pop_fr( R_ECX, (FVn<<2)+3);
2098 JMP_TARGET(doubleprec);
2099 sh4_x86.tstate = TSTATE_NONE;
2103 load_spreg( R_ECX, R_FPSCR );
2104 TEST_imm32_r32( FPSCR_PR, R_ECX );
2105 JNE_rel8( 18 + CALL_FUNC2_SIZE, doubleprec );
2106 load_fr_bank( R_EDX ); // 3
2107 ADD_imm8s_r32( FVn<<4, R_EDX ); // 3
2108 load_xf_bank( R_ECX ); // 12
2109 call_func2( sh4_ftrv, R_EDX, R_ECX ); // 12
2110 JMP_TARGET(doubleprec);
2111 sh4_x86.tstate = TSTATE_NONE;
2116 load_spreg( R_ECX, R_FPSCR );
2117 XOR_imm32_r32( FPSCR_FR, R_ECX );
2118 store_spreg( R_ECX, R_FPSCR );
2119 update_fr_bank( R_ECX );
2120 sh4_x86.tstate = TSTATE_NONE;
2124 load_spreg( R_ECX, R_FPSCR );
2125 XOR_imm32_r32( FPSCR_SZ, R_ECX );
2126 store_spreg( R_ECX, R_FPSCR );
2127 sh4_x86.tstate = TSTATE_NONE;
2130 /* Processor control instructions */
2132 if( sh4_x86.in_delay_slot ) {
2136 load_reg( R_EAX, Rm );
2137 call_func1( sh4_write_sr, R_EAX );
2138 sh4_x86.priv_checked = FALSE;
2139 sh4_x86.fpuen_checked = FALSE;
2140 sh4_x86.tstate = TSTATE_NONE;
2144 load_reg( R_EAX, Rm );
2145 store_spreg( R_EAX, R_GBR );
2149 load_reg( R_EAX, Rm );
2150 store_spreg( R_EAX, R_VBR );
2151 sh4_x86.tstate = TSTATE_NONE;
2155 load_reg( R_EAX, Rm );
2156 store_spreg( R_EAX, R_SSR );
2157 sh4_x86.tstate = TSTATE_NONE;
2161 load_reg( R_EAX, Rm );
2162 store_spreg( R_EAX, R_SGR );
2163 sh4_x86.tstate = TSTATE_NONE;
2167 load_reg( R_EAX, Rm );
2168 store_spreg( R_EAX, R_SPC );
2169 sh4_x86.tstate = TSTATE_NONE;
2173 load_reg( R_EAX, Rm );
2174 store_spreg( R_EAX, R_DBR );
2175 sh4_x86.tstate = TSTATE_NONE;
2179 load_reg( R_EAX, Rm );
2180 store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
2181 sh4_x86.tstate = TSTATE_NONE;
2184 load_reg( R_EAX, Rm );
2185 check_ralign32( R_EAX );
2186 MOV_r32_r32( R_EAX, R_ECX );
2187 ADD_imm8s_r32( 4, R_EAX );
2188 store_reg( R_EAX, Rm );
2189 MEM_READ_LONG( R_ECX, R_EAX );
2190 store_spreg( R_EAX, R_GBR );
2191 sh4_x86.tstate = TSTATE_NONE;
2194 if( sh4_x86.in_delay_slot ) {
2198 load_reg( R_EAX, Rm );
2199 check_ralign32( R_EAX );
2200 MOV_r32_r32( R_EAX, R_ECX );
2201 ADD_imm8s_r32( 4, R_EAX );
2202 store_reg( R_EAX, Rm );
2203 MEM_READ_LONG( R_ECX, R_EAX );
2204 call_func1( sh4_write_sr, R_EAX );
2205 sh4_x86.priv_checked = FALSE;
2206 sh4_x86.fpuen_checked = FALSE;
2207 sh4_x86.tstate = TSTATE_NONE;
2212 load_reg( R_EAX, Rm );
2213 check_ralign32( R_EAX );
2214 MOV_r32_r32( R_EAX, R_ECX );
2215 ADD_imm8s_r32( 4, R_EAX );
2216 store_reg( R_EAX, Rm );
2217 MEM_READ_LONG( R_ECX, R_EAX );
2218 store_spreg( R_EAX, R_VBR );
2219 sh4_x86.tstate = TSTATE_NONE;
2223 load_reg( R_EAX, Rm );
2224 check_ralign32( R_EAX );
2225 MOV_r32_r32( R_EAX, R_ECX );
2226 ADD_imm8s_r32( 4, R_EAX );
2227 store_reg( R_EAX, Rm );
2228 MEM_READ_LONG( R_ECX, R_EAX );
2229 store_spreg( R_EAX, R_SSR );
2230 sh4_x86.tstate = TSTATE_NONE;
2234 load_reg( R_EAX, Rm );
2235 check_ralign32( R_EAX );
2236 MOV_r32_r32( R_EAX, R_ECX );
2237 ADD_imm8s_r32( 4, R_EAX );
2238 store_reg( R_EAX, Rm );
2239 MEM_READ_LONG( R_ECX, R_EAX );
2240 store_spreg( R_EAX, R_SGR );
2241 sh4_x86.tstate = TSTATE_NONE;
2245 load_reg( R_EAX, Rm );
2246 check_ralign32( R_EAX );
2247 MOV_r32_r32( R_EAX, R_ECX );
2248 ADD_imm8s_r32( 4, R_EAX );
2249 store_reg( R_EAX, Rm );
2250 MEM_READ_LONG( R_ECX, R_EAX );
2251 store_spreg( R_EAX, R_SPC );
2252 sh4_x86.tstate = TSTATE_NONE;
2256 load_reg( R_EAX, Rm );
2257 check_ralign32( R_EAX );
2258 MOV_r32_r32( R_EAX, R_ECX );
2259 ADD_imm8s_r32( 4, R_EAX );
2260 store_reg( R_EAX, Rm );
2261 MEM_READ_LONG( R_ECX, R_EAX );
2262 store_spreg( R_EAX, R_DBR );
2263 sh4_x86.tstate = TSTATE_NONE;
2265 LDC.L @Rm+, Rn_BANK {:
2267 load_reg( R_EAX, Rm );
2268 check_ralign32( R_EAX );
2269 MOV_r32_r32( R_EAX, R_ECX );
2270 ADD_imm8s_r32( 4, R_EAX );
2271 store_reg( R_EAX, Rm );
2272 MEM_READ_LONG( R_ECX, R_EAX );
2273 store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
2274 sh4_x86.tstate = TSTATE_NONE;
2277 load_reg( R_EAX, Rm );
2278 store_spreg( R_EAX, R_FPSCR );
2279 update_fr_bank( R_EAX );
2280 sh4_x86.tstate = TSTATE_NONE;
2282 LDS.L @Rm+, FPSCR {:
2283 load_reg( R_EAX, Rm );
2284 check_ralign32( R_EAX );
2285 MOV_r32_r32( R_EAX, R_ECX );
2286 ADD_imm8s_r32( 4, R_EAX );
2287 store_reg( R_EAX, Rm );
2288 MEM_READ_LONG( R_ECX, R_EAX );
2289 store_spreg( R_EAX, R_FPSCR );
2290 update_fr_bank( R_EAX );
2291 sh4_x86.tstate = TSTATE_NONE;
2294 load_reg( R_EAX, Rm );
2295 store_spreg( R_EAX, R_FPUL );
2298 load_reg( R_EAX, Rm );
2299 check_ralign32( R_EAX );
2300 MOV_r32_r32( R_EAX, R_ECX );
2301 ADD_imm8s_r32( 4, R_EAX );
2302 store_reg( R_EAX, Rm );
2303 MEM_READ_LONG( R_ECX, R_EAX );
2304 store_spreg( R_EAX, R_FPUL );
2305 sh4_x86.tstate = TSTATE_NONE;
2308 load_reg( R_EAX, Rm );
2309 store_spreg( R_EAX, R_MACH );
2312 load_reg( R_EAX, Rm );
2313 check_ralign32( R_EAX );
2314 MOV_r32_r32( R_EAX, R_ECX );
2315 ADD_imm8s_r32( 4, R_EAX );
2316 store_reg( R_EAX, Rm );
2317 MEM_READ_LONG( R_ECX, R_EAX );
2318 store_spreg( R_EAX, R_MACH );
2319 sh4_x86.tstate = TSTATE_NONE;
2322 load_reg( R_EAX, Rm );
2323 store_spreg( R_EAX, R_MACL );
2326 load_reg( R_EAX, Rm );
2327 check_ralign32( R_EAX );
2328 MOV_r32_r32( R_EAX, R_ECX );
2329 ADD_imm8s_r32( 4, R_EAX );
2330 store_reg( R_EAX, Rm );
2331 MEM_READ_LONG( R_ECX, R_EAX );
2332 store_spreg( R_EAX, R_MACL );
2333 sh4_x86.tstate = TSTATE_NONE;
2336 load_reg( R_EAX, Rm );
2337 store_spreg( R_EAX, R_PR );
2340 load_reg( R_EAX, Rm );
2341 check_ralign32( R_EAX );
2342 MOV_r32_r32( R_EAX, R_ECX );
2343 ADD_imm8s_r32( 4, R_EAX );
2344 store_reg( R_EAX, Rm );
2345 MEM_READ_LONG( R_ECX, R_EAX );
2346 store_spreg( R_EAX, R_PR );
2347 sh4_x86.tstate = TSTATE_NONE;
2350 call_func0( MMU_ldtlb );
2356 load_reg( R_EAX, Rn );
2357 MOV_r32_r32( R_EAX, R_ECX );
2358 AND_imm32_r32( 0xFC000000, R_EAX );
2359 CMP_imm32_r32( 0xE0000000, R_EAX );
2360 JNE_rel8(CALL_FUNC1_SIZE, end);
2361 call_func1( sh4_flush_store_queue, R_ECX );
2363 sh4_x86.tstate = TSTATE_NONE;
2367 call_func0( sh4_sleep );
2368 sh4_x86.tstate = TSTATE_NONE;
2369 sh4_x86.in_delay_slot = FALSE;
2374 call_func0(sh4_read_sr);
2375 store_reg( R_EAX, Rn );
2376 sh4_x86.tstate = TSTATE_NONE;
2379 load_spreg( R_EAX, R_GBR );
2380 store_reg( R_EAX, Rn );
2384 load_spreg( R_EAX, R_VBR );
2385 store_reg( R_EAX, Rn );
2386 sh4_x86.tstate = TSTATE_NONE;
2390 load_spreg( R_EAX, R_SSR );
2391 store_reg( R_EAX, Rn );
2392 sh4_x86.tstate = TSTATE_NONE;
2396 load_spreg( R_EAX, R_SPC );
2397 store_reg( R_EAX, Rn );
2398 sh4_x86.tstate = TSTATE_NONE;
2402 load_spreg( R_EAX, R_SGR );
2403 store_reg( R_EAX, Rn );
2404 sh4_x86.tstate = TSTATE_NONE;
2408 load_spreg( R_EAX, R_DBR );
2409 store_reg( R_EAX, Rn );
2410 sh4_x86.tstate = TSTATE_NONE;
2414 load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
2415 store_reg( R_EAX, Rn );
2416 sh4_x86.tstate = TSTATE_NONE;
2420 call_func0( sh4_read_sr );
2421 load_reg( R_ECX, Rn );
2422 check_walign32( R_ECX );
2423 ADD_imm8s_r32( -4, R_ECX );
2424 store_reg( R_ECX, Rn );
2425 MEM_WRITE_LONG( R_ECX, R_EAX );
2426 sh4_x86.tstate = TSTATE_NONE;
2430 load_reg( R_ECX, Rn );
2431 check_walign32( R_ECX );
2432 ADD_imm8s_r32( -4, R_ECX );
2433 store_reg( R_ECX, Rn );
2434 load_spreg( R_EAX, R_VBR );
2435 MEM_WRITE_LONG( R_ECX, R_EAX );
2436 sh4_x86.tstate = TSTATE_NONE;
2440 load_reg( R_ECX, Rn );
2441 check_walign32( R_ECX );
2442 ADD_imm8s_r32( -4, R_ECX );
2443 store_reg( R_ECX, Rn );
2444 load_spreg( R_EAX, R_SSR );
2445 MEM_WRITE_LONG( R_ECX, R_EAX );
2446 sh4_x86.tstate = TSTATE_NONE;
2450 load_reg( R_ECX, Rn );
2451 check_walign32( R_ECX );
2452 ADD_imm8s_r32( -4, R_ECX );
2453 store_reg( R_ECX, Rn );
2454 load_spreg( R_EAX, R_SPC );
2455 MEM_WRITE_LONG( R_ECX, R_EAX );
2456 sh4_x86.tstate = TSTATE_NONE;
2460 load_reg( R_ECX, Rn );
2461 check_walign32( R_ECX );
2462 ADD_imm8s_r32( -4, R_ECX );
2463 store_reg( R_ECX, Rn );
2464 load_spreg( R_EAX, R_SGR );
2465 MEM_WRITE_LONG( R_ECX, R_EAX );
2466 sh4_x86.tstate = TSTATE_NONE;
2470 load_reg( R_ECX, Rn );
2471 check_walign32( R_ECX );
2472 ADD_imm8s_r32( -4, R_ECX );
2473 store_reg( R_ECX, Rn );
2474 load_spreg( R_EAX, R_DBR );
2475 MEM_WRITE_LONG( R_ECX, R_EAX );
2476 sh4_x86.tstate = TSTATE_NONE;
2478 STC.L Rm_BANK, @-Rn {:
2480 load_reg( R_ECX, Rn );
2481 check_walign32( R_ECX );
2482 ADD_imm8s_r32( -4, R_ECX );
2483 store_reg( R_ECX, Rn );
2484 load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
2485 MEM_WRITE_LONG( R_ECX, R_EAX );
2486 sh4_x86.tstate = TSTATE_NONE;
2489 load_reg( R_ECX, Rn );
2490 check_walign32( R_ECX );
2491 ADD_imm8s_r32( -4, R_ECX );
2492 store_reg( R_ECX, Rn );
2493 load_spreg( R_EAX, R_GBR );
2494 MEM_WRITE_LONG( R_ECX, R_EAX );
2495 sh4_x86.tstate = TSTATE_NONE;
2498 load_spreg( R_EAX, R_FPSCR );
2499 store_reg( R_EAX, Rn );
2501 STS.L FPSCR, @-Rn {:
2502 load_reg( R_ECX, Rn );
2503 check_walign32( R_ECX );
2504 ADD_imm8s_r32( -4, R_ECX );
2505 store_reg( R_ECX, Rn );
2506 load_spreg( R_EAX, R_FPSCR );
2507 MEM_WRITE_LONG( R_ECX, R_EAX );
2508 sh4_x86.tstate = TSTATE_NONE;
2511 load_spreg( R_EAX, R_FPUL );
2512 store_reg( R_EAX, Rn );
2515 load_reg( R_ECX, Rn );
2516 check_walign32( R_ECX );
2517 ADD_imm8s_r32( -4, R_ECX );
2518 store_reg( R_ECX, Rn );
2519 load_spreg( R_EAX, R_FPUL );
2520 MEM_WRITE_LONG( R_ECX, R_EAX );
2521 sh4_x86.tstate = TSTATE_NONE;
2524 load_spreg( R_EAX, R_MACH );
2525 store_reg( R_EAX, Rn );
2528 load_reg( R_ECX, Rn );
2529 check_walign32( R_ECX );
2530 ADD_imm8s_r32( -4, R_ECX );
2531 store_reg( R_ECX, Rn );
2532 load_spreg( R_EAX, R_MACH );
2533 MEM_WRITE_LONG( R_ECX, R_EAX );
2534 sh4_x86.tstate = TSTATE_NONE;
2537 load_spreg( R_EAX, R_MACL );
2538 store_reg( R_EAX, Rn );
2541 load_reg( R_ECX, Rn );
2542 check_walign32( R_ECX );
2543 ADD_imm8s_r32( -4, R_ECX );
2544 store_reg( R_ECX, Rn );
2545 load_spreg( R_EAX, R_MACL );
2546 MEM_WRITE_LONG( R_ECX, R_EAX );
2547 sh4_x86.tstate = TSTATE_NONE;
2550 load_spreg( R_EAX, R_PR );
2551 store_reg( R_EAX, Rn );
2554 load_reg( R_ECX, Rn );
2555 check_walign32( R_ECX );
2556 ADD_imm8s_r32( -4, R_ECX );
2557 store_reg( R_ECX, Rn );
2558 load_spreg( R_EAX, R_PR );
2559 MEM_WRITE_LONG( R_ECX, R_EAX );
2560 sh4_x86.tstate = TSTATE_NONE;
2563 NOP {: /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ :}
2565 sh4_x86.in_delay_slot = FALSE;
.