Search
lxdream.org :: lxdream/src/sh4/ia32abi.h
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/ia32abi.h
changeset 669:ab344e42bca9
prev604:1024c3a9cb88
next736:a02d1475ccfd
author nkeynes
date Mon May 12 10:00:13 2008 +0000 (13 years ago)
permissions -rw-r--r--
last change Cleanup most of the -Wall warnings (getting a bit sloppy...)
Convert FP code to use fixed banks rather than indirect pointer
(3-4% faster this way now)
file annotate diff log raw
nkeynes@539
     1
/**
nkeynes@586
     2
 * $Id$
nkeynes@539
     3
 * 
nkeynes@539
     4
 * Provides the implementation for the ia32 ABI (eg prologue, epilogue, and
nkeynes@539
     5
 * calling conventions)
nkeynes@539
     6
 *
nkeynes@539
     7
 * Copyright (c) 2007 Nathan Keynes.
nkeynes@539
     8
 *
nkeynes@539
     9
 * This program is free software; you can redistribute it and/or modify
nkeynes@539
    10
 * it under the terms of the GNU General Public License as published by
nkeynes@539
    11
 * the Free Software Foundation; either version 2 of the License, or
nkeynes@539
    12
 * (at your option) any later version.
nkeynes@539
    13
 *
nkeynes@539
    14
 * This program is distributed in the hope that it will be useful,
nkeynes@539
    15
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
nkeynes@539
    16
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
nkeynes@539
    17
 * GNU General Public License for more details.
nkeynes@539
    18
 */
nkeynes@539
    19
nkeynes@539
    20
#ifndef __lxdream_ia32abi_H
nkeynes@539
    21
#define __lxdream_ia32abi_H 1
nkeynes@539
    22
nkeynes@539
    23
#define load_ptr( reg, ptr ) load_imm32( reg, (uint32_t)ptr );
nkeynes@539
    24
nkeynes@539
    25
/**
nkeynes@539
    26
 * Note: clobbers EAX to make the indirect call - this isn't usually
nkeynes@539
    27
 * a problem since the callee will usually clobber it anyway.
nkeynes@539
    28
 */
nkeynes@539
    29
#define CALL_FUNC0_SIZE 7
nkeynes@539
    30
static inline void call_func0( void *ptr )
nkeynes@539
    31
{
nkeynes@539
    32
    load_imm32(R_EAX, (uint32_t)ptr);
nkeynes@539
    33
    CALL_r32(R_EAX);
nkeynes@539
    34
}
nkeynes@539
    35
nkeynes@539
    36
#define CALL_FUNC1_SIZE 11
nkeynes@539
    37
static inline void call_func1( void *ptr, int arg1 )
nkeynes@539
    38
{
nkeynes@539
    39
    PUSH_r32(arg1);
nkeynes@539
    40
    call_func0(ptr);
nkeynes@539
    41
    ADD_imm8s_r32( 4, R_ESP );
nkeynes@539
    42
}
nkeynes@539
    43
nkeynes@539
    44
#define CALL_FUNC2_SIZE 12
nkeynes@539
    45
static inline void call_func2( void *ptr, int arg1, int arg2 )
nkeynes@539
    46
{
nkeynes@539
    47
    PUSH_r32(arg2);
nkeynes@539
    48
    PUSH_r32(arg1);
nkeynes@539
    49
    call_func0(ptr);
nkeynes@539
    50
    ADD_imm8s_r32( 8, R_ESP );
nkeynes@539
    51
}
nkeynes@539
    52
nkeynes@539
    53
/**
nkeynes@539
    54
 * Write a double (64-bit) value into memory, with the first word in arg2a, and
nkeynes@539
    55
 * the second in arg2b
nkeynes@539
    56
 * NB: 30 bytes
nkeynes@539
    57
 */
nkeynes@539
    58
#define MEM_WRITE_DOUBLE_SIZE 30
nkeynes@539
    59
static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )
nkeynes@539
    60
{
nkeynes@539
    61
    ADD_imm8s_r32( 4, addr );
nkeynes@539
    62
    PUSH_r32(arg2b);
nkeynes@539
    63
    PUSH_r32(addr);
nkeynes@539
    64
    ADD_imm8s_r32( -4, addr );
nkeynes@539
    65
    PUSH_r32(arg2a);
nkeynes@539
    66
    PUSH_r32(addr);
nkeynes@539
    67
    call_func0(sh4_write_long);
nkeynes@539
    68
    ADD_imm8s_r32( 8, R_ESP );
nkeynes@539
    69
    call_func0(sh4_write_long);
nkeynes@539
    70
    ADD_imm8s_r32( 8, R_ESP );
nkeynes@539
    71
}
nkeynes@539
    72
nkeynes@539
    73
/**
nkeynes@539
    74
 * Read a double (64-bit) value from memory, writing the first word into arg2a
nkeynes@539
    75
 * and the second into arg2b. The addr must not be in EAX
nkeynes@539
    76
 * NB: 27 bytes
nkeynes@539
    77
 */
nkeynes@539
    78
#define MEM_READ_DOUBLE_SIZE 27
nkeynes@539
    79
static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )
nkeynes@539
    80
{
nkeynes@539
    81
    PUSH_r32(addr);
nkeynes@539
    82
    call_func0(sh4_read_long);
nkeynes@586
    83
    POP_r32(R_ECX);
nkeynes@539
    84
    PUSH_r32(R_EAX);
nkeynes@586
    85
    ADD_imm8s_r32( 4, R_ECX );
nkeynes@586
    86
    PUSH_r32(R_ECX);
nkeynes@539
    87
    call_func0(sh4_read_long);
nkeynes@539
    88
    ADD_imm8s_r32( 4, R_ESP );
nkeynes@539
    89
    MOV_r32_r32( R_EAX, arg2b );
nkeynes@539
    90
    POP_r32(arg2a);
nkeynes@539
    91
}
nkeynes@539
    92
nkeynes@539
    93
/**
nkeynes@539
    94
 * Emit the 'start of block' assembly. Sets up the stack frame and save
nkeynes@539
    95
 * SI/DI as required
nkeynes@539
    96
 */
nkeynes@539
    97
void sh4_translate_begin_block( sh4addr_t pc ) 
nkeynes@539
    98
{
nkeynes@539
    99
    PUSH_r32(R_EBP);
nkeynes@539
   100
    /* mov &sh4r, ebp */
nkeynes@669
   101
    load_ptr( R_EBP, ((uint8_t *)&sh4r) + 128 );
nkeynes@539
   102
    
nkeynes@539
   103
    sh4_x86.in_delay_slot = FALSE;
nkeynes@539
   104
    sh4_x86.priv_checked = FALSE;
nkeynes@539
   105
    sh4_x86.fpuen_checked = FALSE;
nkeynes@539
   106
    sh4_x86.branch_taken = FALSE;
nkeynes@539
   107
    sh4_x86.backpatch_posn = 0;
nkeynes@539
   108
    sh4_x86.block_start_pc = pc;
nkeynes@586
   109
    sh4_x86.tlb_on = IS_MMU_ENABLED();
nkeynes@539
   110
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@539
   111
#ifdef STACK_ALIGN
nkeynes@539
   112
	sh4_x86.stack_posn = 8;
nkeynes@539
   113
#endif
nkeynes@539
   114
}
nkeynes@539
   115
nkeynes@539
   116
/**
nkeynes@539
   117
 * Exit the block with sh4r.pc already written
nkeynes@539
   118
 */
nkeynes@586
   119
void exit_block_pcset( sh4addr_t pc )
nkeynes@539
   120
{
nkeynes@539
   121
    load_imm32( R_ECX, ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
nkeynes@539
   122
    ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );    // 6
nkeynes@590
   123
    load_spreg( R_EAX, R_PC );
nkeynes@590
   124
    if( sh4_x86.tlb_on ) {
nkeynes@590
   125
	call_func1(xlat_get_code_by_vma,R_EAX);
nkeynes@590
   126
    } else {
nkeynes@590
   127
	call_func1(xlat_get_code,R_EAX);
nkeynes@590
   128
    } 
nkeynes@590
   129
    POP_r32(R_EBP);
nkeynes@590
   130
    RET();
nkeynes@590
   131
}
nkeynes@590
   132
nkeynes@590
   133
/**
nkeynes@590
   134
 * Exit the block with sh4r.new_pc written with the target pc
nkeynes@590
   135
 */
nkeynes@590
   136
void exit_block_newpcset( sh4addr_t pc )
nkeynes@590
   137
{
nkeynes@590
   138
    load_imm32( R_ECX, ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
nkeynes@590
   139
    ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );    // 6
nkeynes@590
   140
    load_spreg( R_EAX, R_NEW_PC );
nkeynes@590
   141
    store_spreg( R_EAX, R_PC );
nkeynes@586
   142
    if( sh4_x86.tlb_on ) {
nkeynes@586
   143
	call_func1(xlat_get_code_by_vma,R_EAX);
nkeynes@586
   144
    } else {
nkeynes@586
   145
	call_func1(xlat_get_code,R_EAX);
nkeynes@586
   146
    } 
nkeynes@539
   147
    POP_r32(R_EBP);
nkeynes@539
   148
    RET();
nkeynes@539
   149
}
nkeynes@539
   150
nkeynes@586
   151
#define EXIT_BLOCK_SIZE(pc)  (24 + (IS_IN_ICACHE(pc)?5:CALL_FUNC1_SIZE))
nkeynes@586
   152
nkeynes@586
   153
nkeynes@539
   154
/**
nkeynes@539
   155
 * Exit the block to an absolute PC
nkeynes@539
   156
 */
nkeynes@539
   157
void exit_block( sh4addr_t pc, sh4addr_t endpc )
nkeynes@539
   158
{
nkeynes@539
   159
    load_imm32( R_ECX, pc );                            // 5
nkeynes@539
   160
    store_spreg( R_ECX, REG_OFFSET(pc) );               // 3
nkeynes@586
   161
    if( IS_IN_ICACHE(pc) ) {
nkeynes@586
   162
	MOV_moff32_EAX( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) ); // 5
nkeynes@586
   163
    } else if( sh4_x86.tlb_on ) {
nkeynes@586
   164
	call_func1(xlat_get_code_by_vma,R_ECX);
nkeynes@586
   165
    } else {
nkeynes@586
   166
	call_func1(xlat_get_code,R_ECX);
nkeynes@586
   167
    }
nkeynes@586
   168
    AND_imm8s_r32( 0xFC, R_EAX ); // 3
nkeynes@586
   169
    load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
nkeynes@586
   170
    ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
nkeynes@586
   171
    POP_r32(R_EBP);
nkeynes@586
   172
    RET();
nkeynes@586
   173
}
nkeynes@586
   174
nkeynes@586
   175
#define EXIT_BLOCK_REL_SIZE(pc)  (27 + (IS_IN_ICACHE(pc)?5:CALL_FUNC1_SIZE))
nkeynes@586
   176
nkeynes@586
   177
/**
nkeynes@586
   178
 * Exit the block to a relative PC
nkeynes@586
   179
 */
nkeynes@586
   180
void exit_block_rel( sh4addr_t pc, sh4addr_t endpc )
nkeynes@586
   181
{
nkeynes@586
   182
    load_imm32( R_ECX, pc - sh4_x86.block_start_pc );   // 5
nkeynes@586
   183
    ADD_sh4r_r32( R_PC, R_ECX );
nkeynes@586
   184
    store_spreg( R_ECX, REG_OFFSET(pc) );               // 3
nkeynes@586
   185
    if( IS_IN_ICACHE(pc) ) {
nkeynes@586
   186
	MOV_moff32_EAX( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) ); // 5
nkeynes@586
   187
    } else if( sh4_x86.tlb_on ) {
nkeynes@586
   188
	call_func1(xlat_get_code_by_vma,R_ECX);
nkeynes@586
   189
    } else {
nkeynes@586
   190
	call_func1(xlat_get_code,R_ECX);
nkeynes@586
   191
    }
nkeynes@539
   192
    AND_imm8s_r32( 0xFC, R_EAX ); // 3
nkeynes@539
   193
    load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
nkeynes@539
   194
    ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
nkeynes@539
   195
    POP_r32(R_EBP);
nkeynes@539
   196
    RET();
nkeynes@539
   197
}
nkeynes@539
   198
nkeynes@539
   199
/**
nkeynes@539
   200
 * Write the block trailer (exception handling block)
nkeynes@539
   201
 */
nkeynes@539
   202
void sh4_translate_end_block( sh4addr_t pc ) {
nkeynes@539
   203
    if( sh4_x86.branch_taken == FALSE ) {
nkeynes@539
   204
	// Didn't exit unconditionally already, so write the termination here
nkeynes@586
   205
	exit_block_rel( pc, pc );
nkeynes@539
   206
    }
nkeynes@539
   207
    if( sh4_x86.backpatch_posn != 0 ) {
nkeynes@586
   208
	unsigned int i;
nkeynes@586
   209
	// Raise exception
nkeynes@539
   210
	uint8_t *end_ptr = xlat_output;
nkeynes@586
   211
	MOV_r32_r32( R_EDX, R_ECX );
nkeynes@539
   212
	ADD_r32_r32( R_EDX, R_ECX );
nkeynes@586
   213
	ADD_r32_sh4r( R_ECX, R_PC );
nkeynes@539
   214
	MOV_moff32_EAX( &sh4_cpu_period );
nkeynes@539
   215
	MUL_r32( R_EDX );
nkeynes@539
   216
	ADD_r32_sh4r( R_EAX, REG_OFFSET(slice_cycle) );
nkeynes@539
   217
nkeynes@539
   218
	call_func0( sh4_raise_exception );
nkeynes@539
   219
	ADD_imm8s_r32( 4, R_ESP );
nkeynes@586
   220
	load_spreg( R_EAX, R_PC );
nkeynes@586
   221
	if( sh4_x86.tlb_on ) {
nkeynes@586
   222
	    call_func1(xlat_get_code_by_vma,R_EAX);
nkeynes@586
   223
	} else {
nkeynes@586
   224
	    call_func1(xlat_get_code,R_EAX);
nkeynes@586
   225
	}
nkeynes@539
   226
	POP_r32(R_EBP);
nkeynes@539
   227
	RET();
nkeynes@539
   228
nkeynes@586
   229
	// Exception already raised - just cleanup
nkeynes@586
   230
	uint8_t *preexc_ptr = xlat_output;
nkeynes@586
   231
	MOV_r32_r32( R_EDX, R_ECX );
nkeynes@586
   232
	ADD_r32_r32( R_EDX, R_ECX );
nkeynes@586
   233
	ADD_r32_sh4r( R_ECX, R_SPC );
nkeynes@586
   234
	MOV_moff32_EAX( &sh4_cpu_period );
nkeynes@586
   235
	MUL_r32( R_EDX );
nkeynes@586
   236
	ADD_r32_sh4r( R_EAX, REG_OFFSET(slice_cycle) );
nkeynes@586
   237
	load_spreg( R_EAX, R_PC );
nkeynes@586
   238
	if( sh4_x86.tlb_on ) {
nkeynes@586
   239
	    call_func1(xlat_get_code_by_vma,R_EAX);
nkeynes@586
   240
	} else {
nkeynes@586
   241
	    call_func1(xlat_get_code,R_EAX);
nkeynes@586
   242
	}
nkeynes@586
   243
	POP_r32(R_EBP);
nkeynes@586
   244
	RET();
nkeynes@586
   245
nkeynes@586
   246
	for( i=0; i< sh4_x86.backpatch_posn; i++ ) {
nkeynes@604
   247
	    uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];
nkeynes@604
   248
	    *fixup_addr = xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
nkeynes@596
   249
	    if( sh4_x86.backpatch_list[i].exc_code < 0 ) {
nkeynes@586
   250
		load_imm32( R_EDX, sh4_x86.backpatch_list[i].fixup_icount );
nkeynes@596
   251
		int stack_adj = -1 - sh4_x86.backpatch_list[i].exc_code;
nkeynes@596
   252
		if( stack_adj > 0 ) { 
nkeynes@596
   253
		    ADD_imm8s_r32( stack_adj, R_ESP );
nkeynes@596
   254
		}
nkeynes@586
   255
		int rel = preexc_ptr - xlat_output;
nkeynes@586
   256
		JMP_rel(rel);
nkeynes@586
   257
	    } else {
nkeynes@586
   258
		PUSH_imm32( sh4_x86.backpatch_list[i].exc_code );
nkeynes@586
   259
		load_imm32( R_EDX, sh4_x86.backpatch_list[i].fixup_icount );
nkeynes@586
   260
		int rel = end_ptr - xlat_output;
nkeynes@586
   261
		JMP_rel(rel);
nkeynes@586
   262
	    }
nkeynes@586
   263
	}
nkeynes@539
   264
    }
nkeynes@539
   265
}
nkeynes@539
   266
nkeynes@586
   267
void *xlat_get_native_pc()
nkeynes@586
   268
{
nkeynes@586
   269
    void *result = NULL;
nkeynes@586
   270
    asm(
nkeynes@586
   271
	"mov %%ebp, %%eax\n\t"
nkeynes@586
   272
	"mov $0x8, %%ecx\n\t"
nkeynes@586
   273
	"mov %1, %%edx\n"
nkeynes@586
   274
"frame_loop: test %%eax, %%eax\n\t"
nkeynes@586
   275
	"je frame_not_found\n\t"
nkeynes@586
   276
	"cmp (%%eax), %%edx\n\t"
nkeynes@586
   277
	"je frame_found\n\t"
nkeynes@586
   278
	"sub $0x1, %%ecx\n\t"
nkeynes@586
   279
	"je frame_not_found\n\t"
nkeynes@586
   280
	"movl (%%eax), %%eax\n\t"
nkeynes@586
   281
	"jmp frame_loop\n"
nkeynes@586
   282
"frame_found: movl 0x4(%%eax), %0\n"
nkeynes@586
   283
"frame_not_found:"
nkeynes@586
   284
	: "=r" (result)
nkeynes@669
   285
	: "r" (((uint8_t *)&sh4r) + 128 )
nkeynes@586
   286
	: "eax", "ecx", "edx" );
nkeynes@586
   287
    return result;
nkeynes@586
   288
}
nkeynes@586
   289
nkeynes@539
   290
#endif
nkeynes@539
   291
nkeynes@539
   292
.