Search
lxdream.org :: lxdream :: r1006:3a169c224c12
lxdream 0.9.1
released Jun 29
Download Now
changeset1006:3a169c224c12 xlat-refactor
parent1005:7c6ff471751c
child1011:fdd58619b760
authornkeynes
dateTue Apr 07 10:55:03 2009 +0000 (15 years ago)
branchxlat-refactor
Commit current work-in-progress to xlat-refactor branch
src/Makefile.am
src/Makefile.in
src/sh4/sh4x86.in
src/sh4/sh4xir.h
src/sh4/sh4xir.in
src/test/testsh4xir.c
src/test/testxir.c
src/xlat/dce.c
src/xlat/livevar.c
src/xlat/target.c
src/xlat/x86/x86gen.c
src/xlat/x86/x86op.h
src/xlat/x86/x86target.c
src/xlat/x86/x86target.h
src/xlat/xir.c
src/xlat/xir.h
src/xlat/xiropt.h
src/xlat/xirsup.c
src/xlat/xlat.h
src/xlat/xltcache.h
1.1 --- a/src/Makefile.am Tue Apr 07 10:39:02 2009 +0000
1.2 +++ b/src/Makefile.am Tue Apr 07 10:55:03 2009 +0000
1.3 @@ -13,6 +13,7 @@
1.4 check_PROGRAMS = test/testxlt
1.5
1.6
1.7 +
1.8 EXTRA_DIST=drivers/genkeymap.pl checkver.pl
1.9 AM_CFLAGS = -D__EXTENSIONS__ -D_BSD_SOURCE -D_GNU_SOURCE
1.10
1.11 @@ -81,7 +82,13 @@
1.12 sh4/sh4trans.c sh4/sh4x86.c xlat/xltcache.c \
1.13 xlat/xltcache.h mem.c util.c
1.14
1.15 -check_PROGRAMS += test/testsh4x86
1.16 +test_testsh4xir_LDADD = @GLIB_LIBS@ @GTK_LIBS@ @LIBPNG_LIBS@
1.17 +test_testsh4xir_SOURCES = test/testsh4xir.c xlat/xir.c xlat/xir.h sh4/sh4xir.c xlat/x86/x86target.c xlat/x86/x86gen.c
1.18 +
1.19 +test_testxir_LDADD = @GLIB_LIBS@ @GTK_LIBS@ @LIBPNG_LIBS@
1.20 +test_testxir_SOURCES = test/testxir.c xlat/xir.c xlat/xir.h xlat/xirsup.c xlat/xiropt.c xlat/xiropt.h
1.21 +
1.22 +check_PROGRAMS += test/testsh4x86 test/testsh4xir test/testxir
1.23 endif
1.24
1.25 if GUI_GTK
1.26 @@ -169,6 +176,9 @@
1.27 sh4/sh4x86.c: gendec sh4/sh4.def sh4/sh4x86.in
1.28 mkdir -p `dirname $@`
1.29 ./gendec $(srcdir)/sh4/sh4.def $(srcdir)/sh4/sh4x86.in -o $@
1.30 +sh4/sh4xir.c: gendec sh4/sh4.def sh4/sh4xir.in
1.31 + mkdir -p `dirname $@`
1.32 + ./gendec $(srcdir)/sh4/sh4.def $(srcdir)/sh4/sh4xir.in -o $@
1.33 sh4/sh4stat.c: gendec sh4/sh4.def sh4/sh4stat.in
1.34 mkdir -p `dirname $@`
1.35 ./gendec $(srcdir)/sh4/sh4.def $(srcdir)/sh4/sh4stat.in -o $@
2.1 --- a/src/Makefile.in Tue Apr 07 10:39:02 2009 +0000
2.2 +++ b/src/Makefile.in Tue Apr 07 10:55:03 2009 +0000
2.3 @@ -47,7 +47,7 @@
2.4 @BUILD_SH4X86_TRUE@ x86dasm/ansidecl.h x86dasm/bfd.h x86dasm/dis-asm.h \
2.5 @BUILD_SH4X86_TRUE@ x86dasm/symcat.h x86dasm/sysdep.h
2.6
2.7 -@BUILD_SH4X86_TRUE@am__append_2 = test/testsh4x86
2.8 +@BUILD_SH4X86_TRUE@am__append_2 = test/testsh4x86 test/testsh4xir test/testxir
2.9 @GUI_GTK_TRUE@am__append_3 = gtkui/gtkui.c gtkui/gtkui.h \
2.10 @GUI_GTK_TRUE@ gtkui/gtk_win.c gtkui/gtkcb.c \
2.11 @GUI_GTK_TRUE@ gtkui/gtk_mmio.c gtkui/gtk_debug.c gtkui/gtk_dump.c \
2.12 @@ -85,7 +85,9 @@
2.13 CONFIG_CLEAN_FILES =
2.14 am__installdirs = "$(DESTDIR)$(bindir)"
2.15 binPROGRAMS_INSTALL = $(INSTALL_PROGRAM)
2.16 -@BUILD_SH4X86_TRUE@am__EXEEXT_1 = test/testsh4x86$(EXEEXT)
2.17 +@BUILD_SH4X86_TRUE@am__EXEEXT_1 = test/testsh4x86$(EXEEXT) \
2.18 +@BUILD_SH4X86_TRUE@ test/testsh4xir$(EXEEXT) \
2.19 +@BUILD_SH4X86_TRUE@ test/testxir$(EXEEXT)
2.20 PROGRAMS = $(bin_PROGRAMS) $(noinst_PROGRAMS)
2.21 am_gendec_OBJECTS = gendec.$(OBJEXT) insparse.$(OBJEXT) \
2.22 actparse.$(OBJEXT)
2.23 @@ -213,6 +215,20 @@
2.24 test_testsh4x86_OBJECTS = $(am_test_testsh4x86_OBJECTS)
2.25 test_testsh4x86_DEPENDENCIES =
2.26 am__dirstamp = $(am__leading_dot)dirstamp
2.27 +am__test_testsh4xir_SOURCES_DIST = test/testsh4xir.c xlat/xir.c \
2.28 + xlat/xir.h sh4/sh4xir.c xlat/x86/x86target.c xlat/x86/x86gen.c
2.29 +@BUILD_SH4X86_TRUE@am_test_testsh4xir_OBJECTS = testsh4xir.$(OBJEXT) \
2.30 +@BUILD_SH4X86_TRUE@ xir.$(OBJEXT) sh4xir.$(OBJEXT) \
2.31 +@BUILD_SH4X86_TRUE@ x86target.$(OBJEXT) x86gen.$(OBJEXT)
2.32 +test_testsh4xir_OBJECTS = $(am_test_testsh4xir_OBJECTS)
2.33 +test_testsh4xir_DEPENDENCIES =
2.34 +am__test_testxir_SOURCES_DIST = test/testxir.c xlat/xir.c xlat/xir.h \
2.35 + xlat/xirsup.c xlat/xiropt.c xlat/xiropt.h
2.36 +@BUILD_SH4X86_TRUE@am_test_testxir_OBJECTS = testxir.$(OBJEXT) \
2.37 +@BUILD_SH4X86_TRUE@ xir.$(OBJEXT) xirsup.$(OBJEXT) \
2.38 +@BUILD_SH4X86_TRUE@ xiropt.$(OBJEXT)
2.39 +test_testxir_OBJECTS = $(am_test_testxir_OBJECTS)
2.40 +test_testxir_DEPENDENCIES =
2.41 am_test_testxlt_OBJECTS = testxlt.$(OBJEXT) xltcache.$(OBJEXT)
2.42 test_testxlt_OBJECTS = $(am_test_testxlt_OBJECTS)
2.43 test_testxlt_LDADD = $(LDADD)
2.44 @@ -229,10 +245,13 @@
2.45 OBJCLINK = $(OBJCLD) $(AM_OBJCFLAGS) $(OBJCFLAGS) $(AM_LDFLAGS) \
2.46 $(LDFLAGS) -o $@
2.47 SOURCES = $(gendec_SOURCES) $(genglsl_SOURCES) $(lxdream_SOURCES) \
2.48 - $(test_testsh4x86_SOURCES) $(test_testxlt_SOURCES)
2.49 + $(test_testsh4x86_SOURCES) $(test_testsh4xir_SOURCES) \
2.50 + $(test_testxir_SOURCES) $(test_testxlt_SOURCES)
2.51 DIST_SOURCES = $(gendec_SOURCES) $(genglsl_SOURCES) \
2.52 $(am__lxdream_SOURCES_DIST) \
2.53 - $(am__test_testsh4x86_SOURCES_DIST) $(test_testxlt_SOURCES)
2.54 + $(am__test_testsh4x86_SOURCES_DIST) \
2.55 + $(am__test_testsh4xir_SOURCES_DIST) \
2.56 + $(am__test_testxir_SOURCES_DIST) $(test_testxlt_SOURCES)
2.57 ETAGS = etags
2.58 CTAGS = ctags
2.59 DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
2.60 @@ -471,6 +490,10 @@
2.61 @BUILD_SH4X86_TRUE@ sh4/sh4trans.c sh4/sh4x86.c xlat/xltcache.c \
2.62 @BUILD_SH4X86_TRUE@ xlat/xltcache.h mem.c util.c
2.63
2.64 +@BUILD_SH4X86_TRUE@test_testsh4xir_LDADD = @GLIB_LIBS@ @GTK_LIBS@ @LIBPNG_LIBS@
2.65 +@BUILD_SH4X86_TRUE@test_testsh4xir_SOURCES = test/testsh4xir.c xlat/xir.c xlat/xir.h sh4/sh4xir.c xlat/x86/x86target.c xlat/x86/x86gen.c
2.66 +@BUILD_SH4X86_TRUE@test_testxir_LDADD = @GLIB_LIBS@ @GTK_LIBS@ @LIBPNG_LIBS@
2.67 +@BUILD_SH4X86_TRUE@test_testxir_SOURCES = test/testxir.c xlat/xir.c xlat/xir.h xlat/xirsup.c xlat/xiropt.c xlat/xiropt.h
2.68 lxdream_LDADD = @GLIB_LIBS@ @GTK_LIBS@ @LIBPNG_LIBS@ @PULSE_LIBS@ @ESOUND_LIBS@ @ALSA_LIBS@ @SDL_LIBS@ $(INTLLIBS)
2.69 gendec_LDADD = @GLIB_LIBS@ @GTK_LIBS@ $(INTLLIBS)
2.70 genglsl_LDADD = @GLIB_LIBS@ @GTK_LIBS@ $(INTLLIBS)
2.71 @@ -553,6 +576,12 @@
2.72 test/testsh4x86$(EXEEXT): $(test_testsh4x86_OBJECTS) $(test_testsh4x86_DEPENDENCIES) test/$(am__dirstamp)
2.73 @rm -f test/testsh4x86$(EXEEXT)
2.74 $(LINK) $(test_testsh4x86_LDFLAGS) $(test_testsh4x86_OBJECTS) $(test_testsh4x86_LDADD) $(LIBS)
2.75 +test/testsh4xir$(EXEEXT): $(test_testsh4xir_OBJECTS) $(test_testsh4xir_DEPENDENCIES) test/$(am__dirstamp)
2.76 + @rm -f test/testsh4xir$(EXEEXT)
2.77 + $(LINK) $(test_testsh4xir_LDFLAGS) $(test_testsh4xir_OBJECTS) $(test_testsh4xir_LDADD) $(LIBS)
2.78 +test/testxir$(EXEEXT): $(test_testxir_OBJECTS) $(test_testxir_DEPENDENCIES) test/$(am__dirstamp)
2.79 + @rm -f test/testxir$(EXEEXT)
2.80 + $(LINK) $(test_testxir_LDFLAGS) $(test_testxir_OBJECTS) $(test_testxir_LDADD) $(LIBS)
2.81 test/testxlt$(EXEEXT): $(test_testxlt_OBJECTS) $(test_testxlt_DEPENDENCIES) test/$(am__dirstamp)
2.82 @rm -f test/testxlt$(EXEEXT)
2.83 $(LINK) $(test_testxlt_LDFLAGS) $(test_testxlt_OBJECTS) $(test_testxlt_LDADD) $(LIBS)
2.84 @@ -658,9 +687,12 @@
2.85 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sh4stat.Po@am__quote@
2.86 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sh4trans.Po@am__quote@
2.87 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sh4x86.Po@am__quote@
2.88 +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sh4xir.Po@am__quote@
2.89 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/syscall.Po@am__quote@
2.90 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tacore.Po@am__quote@
2.91 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/testsh4x86.Po@am__quote@
2.92 +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/testsh4xir.Po@am__quote@
2.93 +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/testxir.Po@am__quote@
2.94 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/testxlt.Po@am__quote@
2.95 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/texcache.Po@am__quote@
2.96 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/timer.Po@am__quote@
2.97 @@ -675,6 +707,11 @@
2.98 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/video_osx.Po@am__quote@
2.99 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/watch.Po@am__quote@
2.100 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/x86dasm.Po@am__quote@
2.101 +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/x86gen.Po@am__quote@
2.102 +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/x86target.Po@am__quote@
2.103 +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xir.Po@am__quote@
2.104 +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xiropt.Po@am__quote@
2.105 +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xirsup.Po@am__quote@
2.106 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xltcache.Po@am__quote@
2.107 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/yuv.Po@am__quote@
2.108
2.109 @@ -1826,6 +1863,118 @@
2.110 @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
2.111 @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o testsh4x86.obj `if test -f 'test/testsh4x86.c'; then $(CYGPATH_W) 'test/testsh4x86.c'; else $(CYGPATH_W) '$(srcdir)/test/testsh4x86.c'; fi`
2.112
2.113 +testsh4xir.o: test/testsh4xir.c
2.114 +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT testsh4xir.o -MD -MP -MF "$(DEPDIR)/testsh4xir.Tpo" -c -o testsh4xir.o `test -f 'test/testsh4xir.c' || echo '$(srcdir)/'`test/testsh4xir.c; \
2.115 +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/testsh4xir.Tpo" "$(DEPDIR)/testsh4xir.Po"; else rm -f "$(DEPDIR)/testsh4xir.Tpo"; exit 1; fi
2.116 +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='test/testsh4xir.c' object='testsh4xir.o' libtool=no @AMDEPBACKSLASH@
2.117 +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
2.118 +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o testsh4xir.o `test -f 'test/testsh4xir.c' || echo '$(srcdir)/'`test/testsh4xir.c
2.119 +
2.120 +testsh4xir.obj: test/testsh4xir.c
2.121 +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT testsh4xir.obj -MD -MP -MF "$(DEPDIR)/testsh4xir.Tpo" -c -o testsh4xir.obj `if test -f 'test/testsh4xir.c'; then $(CYGPATH_W) 'test/testsh4xir.c'; else $(CYGPATH_W) '$(srcdir)/test/testsh4xir.c'; fi`; \
2.122 +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/testsh4xir.Tpo" "$(DEPDIR)/testsh4xir.Po"; else rm -f "$(DEPDIR)/testsh4xir.Tpo"; exit 1; fi
2.123 +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='test/testsh4xir.c' object='testsh4xir.obj' libtool=no @AMDEPBACKSLASH@
2.124 +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
2.125 +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o testsh4xir.obj `if test -f 'test/testsh4xir.c'; then $(CYGPATH_W) 'test/testsh4xir.c'; else $(CYGPATH_W) '$(srcdir)/test/testsh4xir.c'; fi`
2.126 +
2.127 +xir.o: xlat/xir.c
2.128 +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xir.o -MD -MP -MF "$(DEPDIR)/xir.Tpo" -c -o xir.o `test -f 'xlat/xir.c' || echo '$(srcdir)/'`xlat/xir.c; \
2.129 +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/xir.Tpo" "$(DEPDIR)/xir.Po"; else rm -f "$(DEPDIR)/xir.Tpo"; exit 1; fi
2.130 +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='xlat/xir.c' object='xir.o' libtool=no @AMDEPBACKSLASH@
2.131 +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
2.132 +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xir.o `test -f 'xlat/xir.c' || echo '$(srcdir)/'`xlat/xir.c
2.133 +
2.134 +xir.obj: xlat/xir.c
2.135 +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xir.obj -MD -MP -MF "$(DEPDIR)/xir.Tpo" -c -o xir.obj `if test -f 'xlat/xir.c'; then $(CYGPATH_W) 'xlat/xir.c'; else $(CYGPATH_W) '$(srcdir)/xlat/xir.c'; fi`; \
2.136 +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/xir.Tpo" "$(DEPDIR)/xir.Po"; else rm -f "$(DEPDIR)/xir.Tpo"; exit 1; fi
2.137 +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='xlat/xir.c' object='xir.obj' libtool=no @AMDEPBACKSLASH@
2.138 +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
2.139 +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xir.obj `if test -f 'xlat/xir.c'; then $(CYGPATH_W) 'xlat/xir.c'; else $(CYGPATH_W) '$(srcdir)/xlat/xir.c'; fi`
2.140 +
2.141 +sh4xir.o: sh4/sh4xir.c
2.142 +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT sh4xir.o -MD -MP -MF "$(DEPDIR)/sh4xir.Tpo" -c -o sh4xir.o `test -f 'sh4/sh4xir.c' || echo '$(srcdir)/'`sh4/sh4xir.c; \
2.143 +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/sh4xir.Tpo" "$(DEPDIR)/sh4xir.Po"; else rm -f "$(DEPDIR)/sh4xir.Tpo"; exit 1; fi
2.144 +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='sh4/sh4xir.c' object='sh4xir.o' libtool=no @AMDEPBACKSLASH@
2.145 +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
2.146 +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o sh4xir.o `test -f 'sh4/sh4xir.c' || echo '$(srcdir)/'`sh4/sh4xir.c
2.147 +
2.148 +sh4xir.obj: sh4/sh4xir.c
2.149 +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT sh4xir.obj -MD -MP -MF "$(DEPDIR)/sh4xir.Tpo" -c -o sh4xir.obj `if test -f 'sh4/sh4xir.c'; then $(CYGPATH_W) 'sh4/sh4xir.c'; else $(CYGPATH_W) '$(srcdir)/sh4/sh4xir.c'; fi`; \
2.150 +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/sh4xir.Tpo" "$(DEPDIR)/sh4xir.Po"; else rm -f "$(DEPDIR)/sh4xir.Tpo"; exit 1; fi
2.151 +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='sh4/sh4xir.c' object='sh4xir.obj' libtool=no @AMDEPBACKSLASH@
2.152 +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
2.153 +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o sh4xir.obj `if test -f 'sh4/sh4xir.c'; then $(CYGPATH_W) 'sh4/sh4xir.c'; else $(CYGPATH_W) '$(srcdir)/sh4/sh4xir.c'; fi`
2.154 +
2.155 +x86target.o: xlat/x86/x86target.c
2.156 +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT x86target.o -MD -MP -MF "$(DEPDIR)/x86target.Tpo" -c -o x86target.o `test -f 'xlat/x86/x86target.c' || echo '$(srcdir)/'`xlat/x86/x86target.c; \
2.157 +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/x86target.Tpo" "$(DEPDIR)/x86target.Po"; else rm -f "$(DEPDIR)/x86target.Tpo"; exit 1; fi
2.158 +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='xlat/x86/x86target.c' object='x86target.o' libtool=no @AMDEPBACKSLASH@
2.159 +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
2.160 +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o x86target.o `test -f 'xlat/x86/x86target.c' || echo '$(srcdir)/'`xlat/x86/x86target.c
2.161 +
2.162 +x86target.obj: xlat/x86/x86target.c
2.163 +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT x86target.obj -MD -MP -MF "$(DEPDIR)/x86target.Tpo" -c -o x86target.obj `if test -f 'xlat/x86/x86target.c'; then $(CYGPATH_W) 'xlat/x86/x86target.c'; else $(CYGPATH_W) '$(srcdir)/xlat/x86/x86target.c'; fi`; \
2.164 +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/x86target.Tpo" "$(DEPDIR)/x86target.Po"; else rm -f "$(DEPDIR)/x86target.Tpo"; exit 1; fi
2.165 +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='xlat/x86/x86target.c' object='x86target.obj' libtool=no @AMDEPBACKSLASH@
2.166 +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
2.167 +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o x86target.obj `if test -f 'xlat/x86/x86target.c'; then $(CYGPATH_W) 'xlat/x86/x86target.c'; else $(CYGPATH_W) '$(srcdir)/xlat/x86/x86target.c'; fi`
2.168 +
2.169 +x86gen.o: xlat/x86/x86gen.c
2.170 +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT x86gen.o -MD -MP -MF "$(DEPDIR)/x86gen.Tpo" -c -o x86gen.o `test -f 'xlat/x86/x86gen.c' || echo '$(srcdir)/'`xlat/x86/x86gen.c; \
2.171 +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/x86gen.Tpo" "$(DEPDIR)/x86gen.Po"; else rm -f "$(DEPDIR)/x86gen.Tpo"; exit 1; fi
2.172 +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='xlat/x86/x86gen.c' object='x86gen.o' libtool=no @AMDEPBACKSLASH@
2.173 +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
2.174 +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o x86gen.o `test -f 'xlat/x86/x86gen.c' || echo '$(srcdir)/'`xlat/x86/x86gen.c
2.175 +
2.176 +x86gen.obj: xlat/x86/x86gen.c
2.177 +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT x86gen.obj -MD -MP -MF "$(DEPDIR)/x86gen.Tpo" -c -o x86gen.obj `if test -f 'xlat/x86/x86gen.c'; then $(CYGPATH_W) 'xlat/x86/x86gen.c'; else $(CYGPATH_W) '$(srcdir)/xlat/x86/x86gen.c'; fi`; \
2.178 +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/x86gen.Tpo" "$(DEPDIR)/x86gen.Po"; else rm -f "$(DEPDIR)/x86gen.Tpo"; exit 1; fi
2.179 +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='xlat/x86/x86gen.c' object='x86gen.obj' libtool=no @AMDEPBACKSLASH@
2.180 +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
2.181 +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o x86gen.obj `if test -f 'xlat/x86/x86gen.c'; then $(CYGPATH_W) 'xlat/x86/x86gen.c'; else $(CYGPATH_W) '$(srcdir)/xlat/x86/x86gen.c'; fi`
2.182 +
2.183 +testxir.o: test/testxir.c
2.184 +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT testxir.o -MD -MP -MF "$(DEPDIR)/testxir.Tpo" -c -o testxir.o `test -f 'test/testxir.c' || echo '$(srcdir)/'`test/testxir.c; \
2.185 +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/testxir.Tpo" "$(DEPDIR)/testxir.Po"; else rm -f "$(DEPDIR)/testxir.Tpo"; exit 1; fi
2.186 +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='test/testxir.c' object='testxir.o' libtool=no @AMDEPBACKSLASH@
2.187 +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
2.188 +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o testxir.o `test -f 'test/testxir.c' || echo '$(srcdir)/'`test/testxir.c
2.189 +
2.190 +testxir.obj: test/testxir.c
2.191 +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT testxir.obj -MD -MP -MF "$(DEPDIR)/testxir.Tpo" -c -o testxir.obj `if test -f 'test/testxir.c'; then $(CYGPATH_W) 'test/testxir.c'; else $(CYGPATH_W) '$(srcdir)/test/testxir.c'; fi`; \
2.192 +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/testxir.Tpo" "$(DEPDIR)/testxir.Po"; else rm -f "$(DEPDIR)/testxir.Tpo"; exit 1; fi
2.193 +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='test/testxir.c' object='testxir.obj' libtool=no @AMDEPBACKSLASH@
2.194 +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
2.195 +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o testxir.obj `if test -f 'test/testxir.c'; then $(CYGPATH_W) 'test/testxir.c'; else $(CYGPATH_W) '$(srcdir)/test/testxir.c'; fi`
2.196 +
2.197 +xirsup.o: xlat/xirsup.c
2.198 +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xirsup.o -MD -MP -MF "$(DEPDIR)/xirsup.Tpo" -c -o xirsup.o `test -f 'xlat/xirsup.c' || echo '$(srcdir)/'`xlat/xirsup.c; \
2.199 +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/xirsup.Tpo" "$(DEPDIR)/xirsup.Po"; else rm -f "$(DEPDIR)/xirsup.Tpo"; exit 1; fi
2.200 +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='xlat/xirsup.c' object='xirsup.o' libtool=no @AMDEPBACKSLASH@
2.201 +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
2.202 +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xirsup.o `test -f 'xlat/xirsup.c' || echo '$(srcdir)/'`xlat/xirsup.c
2.203 +
2.204 +xirsup.obj: xlat/xirsup.c
2.205 +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xirsup.obj -MD -MP -MF "$(DEPDIR)/xirsup.Tpo" -c -o xirsup.obj `if test -f 'xlat/xirsup.c'; then $(CYGPATH_W) 'xlat/xirsup.c'; else $(CYGPATH_W) '$(srcdir)/xlat/xirsup.c'; fi`; \
2.206 +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/xirsup.Tpo" "$(DEPDIR)/xirsup.Po"; else rm -f "$(DEPDIR)/xirsup.Tpo"; exit 1; fi
2.207 +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='xlat/xirsup.c' object='xirsup.obj' libtool=no @AMDEPBACKSLASH@
2.208 +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
2.209 +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xirsup.obj `if test -f 'xlat/xirsup.c'; then $(CYGPATH_W) 'xlat/xirsup.c'; else $(CYGPATH_W) '$(srcdir)/xlat/xirsup.c'; fi`
2.210 +
2.211 +xiropt.o: xlat/xiropt.c
2.212 +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xiropt.o -MD -MP -MF "$(DEPDIR)/xiropt.Tpo" -c -o xiropt.o `test -f 'xlat/xiropt.c' || echo '$(srcdir)/'`xlat/xiropt.c; \
2.213 +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/xiropt.Tpo" "$(DEPDIR)/xiropt.Po"; else rm -f "$(DEPDIR)/xiropt.Tpo"; exit 1; fi
2.214 +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='xlat/xiropt.c' object='xiropt.o' libtool=no @AMDEPBACKSLASH@
2.215 +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
2.216 +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xiropt.o `test -f 'xlat/xiropt.c' || echo '$(srcdir)/'`xlat/xiropt.c
2.217 +
2.218 +xiropt.obj: xlat/xiropt.c
2.219 +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xiropt.obj -MD -MP -MF "$(DEPDIR)/xiropt.Tpo" -c -o xiropt.obj `if test -f 'xlat/xiropt.c'; then $(CYGPATH_W) 'xlat/xiropt.c'; else $(CYGPATH_W) '$(srcdir)/xlat/xiropt.c'; fi`; \
2.220 +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/xiropt.Tpo" "$(DEPDIR)/xiropt.Po"; else rm -f "$(DEPDIR)/xiropt.Tpo"; exit 1; fi
2.221 +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='xlat/xiropt.c' object='xiropt.obj' libtool=no @AMDEPBACKSLASH@
2.222 +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
2.223 +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xiropt.obj `if test -f 'xlat/xiropt.c'; then $(CYGPATH_W) 'xlat/xiropt.c'; else $(CYGPATH_W) '$(srcdir)/xlat/xiropt.c'; fi`
2.224 +
2.225 testxlt.o: test/testxlt.c
2.226 @am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT testxlt.o -MD -MP -MF "$(DEPDIR)/testxlt.Tpo" -c -o testxlt.o `test -f 'test/testxlt.c' || echo '$(srcdir)/'`test/testxlt.c; \
2.227 @am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/testxlt.Tpo" "$(DEPDIR)/testxlt.Po"; else rm -f "$(DEPDIR)/testxlt.Tpo"; exit 1; fi
2.228 @@ -2288,6 +2437,9 @@
2.229 sh4/sh4x86.c: gendec sh4/sh4.def sh4/sh4x86.in
2.230 mkdir -p `dirname $@`
2.231 ./gendec $(srcdir)/sh4/sh4.def $(srcdir)/sh4/sh4x86.in -o $@
2.232 +sh4/sh4xir.c: gendec sh4/sh4.def sh4/sh4xir.in
2.233 + mkdir -p `dirname $@`
2.234 + ./gendec $(srcdir)/sh4/sh4.def $(srcdir)/sh4/sh4xir.in -o $@
2.235 sh4/sh4stat.c: gendec sh4/sh4.def sh4/sh4stat.in
2.236 mkdir -p `dirname $@`
2.237 ./gendec $(srcdir)/sh4/sh4.def $(srcdir)/sh4/sh4stat.in -o $@
3.1 --- a/src/sh4/sh4x86.in Tue Apr 07 10:39:02 2009 +0000
3.2 +++ b/src/sh4/sh4x86.in Tue Apr 07 10:55:03 2009 +0000
3.3 @@ -160,9 +160,6 @@
3.4 #define TSTATE_A X86_COND_A
3.5 #define TSTATE_AE X86_COND_AE
3.6
3.7 -#define MARK_JMP8(x) uint8_t *_mark_jmp_##x = (xlat_output-1)
3.8 -#define JMP_TARGET(x) *_mark_jmp_##x += (xlat_output - _mark_jmp_##x)
3.9 -
3.10 /* Convenience instructions */
3.11 #define LDC_t() CMPB_imms_rbpdisp(1,R_T); CMC()
3.12 #define SETE_t() SETCCB_cc_rbpdisp(X86_COND_E,R_T)
3.13 @@ -174,25 +171,17 @@
3.14 #define SETO_t() SETCCB_cc_rbpdisp(X86_COND_O,R_T)
3.15 #define SETNE_t() SETCCB_cc_rbpdisp(X86_COND_NE,R_T)
3.16 #define SETC_r8(r1) SETCCB_cc_r8(X86_COND_C, r1)
3.17 -#define JAE_label(label) JCC_cc_rel8(X86_COND_AE,-1); MARK_JMP8(label)
3.18 -#define JE_label(label) JCC_cc_rel8(X86_COND_E,-1); MARK_JMP8(label)
3.19 -#define JGE_label(label) JCC_cc_rel8(X86_COND_GE,-1); MARK_JMP8(label)
3.20 -#define JNA_label(label) JCC_cc_rel8(X86_COND_NA,-1); MARK_JMP8(label)
3.21 -#define JNE_label(label) JCC_cc_rel8(X86_COND_NE,-1); MARK_JMP8(label)
3.22 -#define JNO_label(label) JCC_cc_rel8(X86_COND_NO,-1); MARK_JMP8(label)
3.23 -#define JS_label(label) JCC_cc_rel8(X86_COND_S,-1); MARK_JMP8(label)
3.24 -#define JMP_label(label) JMP_rel8(-1); MARK_JMP8(label)
3.25 #define JNE_exc(exc) JCC_cc_rel32(X86_COND_NE,0); sh4_x86_add_backpatch(xlat_output, pc, exc)
3.26
3.27 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
3.28 #define JT_label(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
3.29 CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
3.30 - JCC_cc_rel8(sh4_x86.tstate,-1); MARK_JMP8(label)
3.31 + JCC_cc_rel8(sh4_x86.tstate,-1); _MARK_JMP8(label)
3.32
3.33 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
3.34 #define JF_label(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
3.35 CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
3.36 - JCC_cc_rel8(sh4_x86.tstate^1, -1); MARK_JMP8(label)
3.37 + JCC_cc_rel8(sh4_x86.tstate^1, -1); _MARK_JMP8(label)
3.38
3.39
3.40 #define load_reg(x86reg,sh4reg) MOVL_rbpdisp_r32( REG_OFFSET(r[sh4reg]), x86reg )
4.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
4.2 +++ b/src/sh4/sh4xir.h Tue Apr 07 10:55:03 2009 +0000
4.3 @@ -0,0 +1,35 @@
4.4 +/**
4.5 + * $Id: x86op.h 973 2009-01-13 11:56:28Z nkeynes $
4.6 + *
4.7 + * Declarations for the SH4 -> IR decoder.
4.8 + *
4.9 + * Copyright (c) 2009 Nathan Keynes.
4.10 + *
4.11 + * This program is free software; you can redistribute it and/or modify
4.12 + * it under the terms of the GNU General Public License as published by
4.13 + * the Free Software Foundation; either version 2 of the License, or
4.14 + * (at your option) any later version.
4.15 + *
4.16 + * This program is distributed in the hope that it will be useful,
4.17 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
4.18 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4.19 + * GNU General Public License for more details.
4.20 + */
4.21 +
4.22 +#ifndef lxdream_sh4xir_H
4.23 +#define lxdream_sh4xir_H 1
4.24 +
4.25 +#include "xlat/xir.h"
4.26 +#include "xlat/xlat.h"
4.27 +
4.28 +/**
4.29 + * SH4 source description
4.30 + */
4.31 +extern struct xlat_source_machine sh4_source_machine_desc;
4.32 +
4.33 +/**
4.34 + * Mapping from register number to names
4.35 + */
4.36 +extern struct xlat_source_machine sh4_source_machine_desc;
4.37 +
4.38 +#endif /* !lxdream_sh4xir_H */
5.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
5.2 +++ b/src/sh4/sh4xir.in Tue Apr 07 10:55:03 2009 +0000
5.3 @@ -0,0 +1,1409 @@
5.4 +/**
5.5 + * $Id: sh4xir.in 931 2008-10-31 02:57:59Z nkeynes $
5.6 + *
5.7 + * SH4 => IR conversion.
5.8 + *
5.9 + * Copyright (c) 2009 Nathan Keynes.
5.10 + *
5.11 + * This program is free software; you can redistribute it and/or modify
5.12 + * it under the terms of the GNU General Public License as published by
5.13 + * the Free Software Foundation; either version 2 of the License, or
5.14 + * (at your option) any later version.
5.15 + *
5.16 + * This program is distributed in the hope that it will be useful,
5.17 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
5.18 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5.19 + * GNU General Public License for more details.
5.20 + */
5.21 +
5.22 +#include <assert.h>
5.23 +
5.24 +#include "sh4/sh4core.h"
5.25 +#include "sh4/mmu.h"
5.26 +#include "sh4/sh4xir.h"
5.27 +#include "xlat/xlat.h"
5.28 +#include "clock.h"
5.29 +
5.30 +#define REG_OFFSET(reg) (offsetof( struct sh4_registers, reg))
5.31 +
5.32 +#define R_R(rn) REG_OFFSET(r[rn])
5.33 +#define R_R0 R_R(0)
5.34 +#define R_SR REG_OFFSET(sr)
5.35 +#define R_PR REG_OFFSET(pr)
5.36 +#define R_PC REG_OFFSET(pc)
5.37 +#define R_FPUL REG_OFFSET(fpul)
5.38 +#define R_T REG_OFFSET(t)
5.39 +#define R_M REG_OFFSET(m)
5.40 +#define R_Q REG_OFFSET(q)
5.41 +#define R_S REG_OFFSET(s)
5.42 +#define R_FR(frn) REG_OFFSET(fr[0][(frn)^1])
5.43 +#define R_DR(frn) REG_OFFSET(fr[0][frn])
5.44 +#define R_DRL(f) REG_OFFSET(fr[(f)&1][(f)|0x01])
5.45 +#define R_DRH(f) REG_OFFSET(fr[(f)&1][(f)&0x0E])
5.46 +#define R_XF(frn) REG_OFFSET(fr[1][(frn)^1])
5.47 +#define R_XD(frn) REG_OFFSET(fr[1][frn^1])
5.48 +#define R_FV(fvn) REG_OFFSET(fr[0][fvn<<2])
5.49 +#define R_XMTRX R_XD(0)
5.50 +#define R_FPSCR REG_OFFSET(fpscr)
5.51 +#define R_MAC REG_OFFSET(mac)
5.52 +#define R_MACL REG_OFFSET(mac)
5.53 +#define R_MACH REG_OFFSET(mac)+4
5.54 +#define R_GBR REG_OFFSET(gbr)
5.55 +#define R_SSR REG_OFFSET(ssr)
5.56 +#define R_SPC REG_OFFSET(spc)
5.57 +#define R_SGR REG_OFFSET(sgr)
5.58 +#define R_DBR REG_OFFSET(dbr)
5.59 +#define R_VBR REG_OFFSET(vbr)
5.60 +#define R_BANK(rn) REG_OFFSET(r_bank[rn])
5.61 +#define R_NEW_PC REG_OFFSET(new_pc)
5.62 +#define R_DELAY_SLOT REG_OFFSET(in_delay_slot)
5.63 +#define R_SLICE_CYCLE REG_OFFSET(slice_cycle)
5.64 +#define R_SH4_MODE REG_OFFSET(xlat_sh4_mode)
5.65 +
5.66 +uint32_t sh4_decode_basic_block(xir_basic_block_t xbb);
5.67 +
5.68 +static const char *sh4_register_names[] =
5.69 + {"r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
5.70 + "sr", "pr", "pc", "fpul", "T", "M", "Q", "S",
5.71 + "fr1", "fr0", "fr3", "fr2", "fr5", "fr4", "fr7", "fr6", "fr9", "fr8", "fr11", "fr10", "fr13", "fr11", "fr15", "fr14",
5.72 + "xf1", "xf0", "xf3", "xf2", "xf5", "xf4", "xf7", "xf6", "xf9", "xf8", "xf11", "xf10", "xf13", "xf11", "xf15", "xf14",
5.73 + "fpscr", 0, "macl", "mach", "gbr", "ssr", "spc", "sgr", "dbr", "vbr",
5.74 + "r_bank0", "r_bank1", "r_bank2", "r_bank3", "r_bank4", "r_bank5", "r_bank6", "r_bank7",
5.75 + "store_queue", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5.76 + "new_pc", "event_pending", "event_type", "delay_slot", "slice_cycle", "bus_cycle", "state", "xlat_mode" };
5.77 +
5.78 +struct xlat_source_machine sh4_source_machine = { "sH4", &sh4r,
5.79 + sh4_register_names, R_PC, R_NEW_PC, R_T, R_M, R_Q, R_S,
5.80 + sh4_decode_basic_block };
5.81 +
5.82 + /**
5.83 + * Struct to manage internal translation state. This state is not saved -
5.84 + * it is only valid between calls to sh4_translate_begin_block() and
5.85 + * sh4_translate_end_block()
5.86 + */
5.87 +struct sh4_xir_state {
5.88 + gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
5.89 + gboolean double_prec; /* true if FPU is in double-precision mode */
5.90 + gboolean double_size; /* true if FPU is in double-size mode */
5.91 +};
5.92 +
5.93 +static struct sh4_xir_state sh4_xir;
5.94 +
5.95 +#define XOP1E( op, arg0 ) do{ \
5.96 + xir_op_t ins = xir_append_op2(xbb, op, SOURCE_REGISTER_OPERAND, arg0, NO_OPERAND, 0); \
5.97 + ins->exc = write_postexc(xbb, (in_delay_slot ? pc-2 : pc) ); \
5.98 + ins->next = xbb->ir_ptr; \
5.99 + xbb->ir_ptr->prev = ins; \
5.100 + } while(0)
5.101 +#define XOP2E( op, arg0, arg1 ) do{ \
5.102 + xir_op_t ins = xir_append_op2(xbb, op, SOURCE_REGISTER_OPERAND, arg0, SOURCE_REGISTER_OPERAND, arg1); \
5.103 + ins->exc = write_postexc(xbb, (in_delay_slot ? pc-2 : pc) ); \
5.104 + ins->exc->prev = ins; \
5.105 + ins->next = xbb->ir_ptr; \
5.106 + xbb->ir_ptr->prev = ins; \
5.107 + } while(0)
5.108 +#define ALIGN(m,r,code) do { \
5.109 + xir_op_t ins = xir_append_op2(xbb, OP_RAISEMNE, INT_IMM_OPERAND, m, SOURCE_REGISTER_OPERAND, r); \
5.110 + ins->exc = write_exc(xbb, (in_delay_slot ? pc-2 : pc), code); \
5.111 + ins->exc->prev = ins; \
5.112 + ins->next = xbb->ir_ptr; \
5.113 + xbb->ir_ptr->prev = ins; \
5.114 + } while(0)
5.115 +
5.116 +#define SLOTILLEGAL() write_exc(xbb, pc, EXC_SLOT_ILLEGAL)
5.117 +#define ILLEGAL() write_exc(xbb, pc, EXC_ILLEGAL)
5.118 +
5.119 +#define UNDEF(ir) if( in_delay_slot ) { SLOTILLEGAL(); return 2; } else { ILLEGAL(); return 2; }
5.120 +#define CHECKFPUEN() if( !sh4_xir.fpuen_checked ) { \
5.121 + xir_op_t ins = XOP2I( OP_RAISEMNE, SR_FD, R_SR ); \
5.122 + if( in_delay_slot ) { \
5.123 + ins->exc = write_exc(xbb, pc-2, EXC_SLOT_FPU_DISABLED); \
5.124 + } else { \
5.125 + ins->exc = write_exc(xbb, pc, EXC_FPU_DISABLED); \
5.126 + } \
5.127 + ins->exc->prev = ins; \
5.128 + ins->next = xbb->ir_ptr; \
5.129 + xbb->ir_ptr->prev = ins; \
5.130 + sh4_xir.fpuen_checked = TRUE; \
5.131 + }
5.132 +#define CHECKPRIV() if( (sh4r.xlat_sh4_mode & SR_MD) == 0 ) { UNDEF(ir); }
5.133 +
5.134 +#define RALIGN16(r) ALIGN(0x01,r,EXC_DATA_ADDR_READ)
5.135 +#define RALIGN32(r) ALIGN(0x03,r,EXC_DATA_ADDR_READ)
5.136 +#define RALIGN64(r) ALIGN(0x07,r,EXC_DATA_ADDR_READ)
5.137 +#define WALIGN16(r) ALIGN(0x01,r,EXC_DATA_ADDR_WRITE)
5.138 +#define WALIGN32(r) ALIGN(0x03,r,EXC_DATA_ADDR_WRITE)
5.139 +#define WALIGN64(r) ALIGN(0x07,r,EXC_DATA_ADDR_WRITE)
5.140 +
5.141 +#define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
5.142 +
5.143 +#define EMU_DELAY_SLOT() do { \
5.144 + XOP2I( OP_ADD, (pc+2 - xbb->pc_begin), R_PC ); \
5.145 + XOP2I( OP_MOV, 1, R_DELAY_SLOT ); \
5.146 + XOP0( OP_BARRIER ); \
5.147 + XOPCALL0( sh4_execute_instruction ); \
5.148 + XOP1( OP_BR, R_PC ); \
5.149 + } while(0)
5.150 +
5.151 +
5.152 +/**
5.153 + * Create a standard post-exception stub sub-block (ie sh4_raise_exception or similar has
5.154 + * already been called - update SPC + slice_cycle and exit).
5.155 + * @return the first xir_op_t of the exception block.
5.156 + */
5.157 +static inline xir_op_t write_postexc( xir_basic_block_t xbb, sh4addr_t pc )
5.158 +{
5.159 + xir_op_t start = xbb->ir_ptr;
5.160 + if( pc != xbb->pc_begin ) {
5.161 + XOP2I( OP_ADD, pc - xbb->pc_begin, R_SPC );
5.162 + }
5.163 + XOP2I( OP_ADD, (pc+2 - xbb->pc_begin) * sh4_cpu_period, R_SLICE_CYCLE );
5.164 + XOP1( OP_BR, R_SPC )->next = NULL;
5.165 + start->prev = NULL;
5.166 + return start;
5.167 +}
5.168 +
5.169 +/**
5.170 + * Create a standard exception-taking stub sub-block - updates SPC, slice_cycle, and exits
5.171 + * @return the first xir_op_t of the exception block.
5.172 + */
5.173 +static inline xir_op_t write_exc( xir_basic_block_t xbb, sh4addr_t pc, int exc_code )
5.174 +{
5.175 + xir_op_t start = xbb->ir_ptr;
5.176 + XOPCALL1I( sh4_raise_exception, exc_code );
5.177 + if( pc != xbb->pc_begin ) {
5.178 + XOP2I( OP_ADD, pc - xbb->pc_begin, R_SPC );
5.179 + }
5.180 + XOP2I( OP_ADD, (pc+2 - xbb->pc_begin) * sh4_cpu_period, R_SLICE_CYCLE );
5.181 + XOP1( OP_BR, R_SPC )->next = NULL;
5.182 + start->prev = NULL;
5.183 + return start;
5.184 +}
5.185 +
5.186 +static sh4addr_t sh4_decode_instruction( xir_basic_block_t xbb, sh4addr_t pc, gboolean in_delay_slot )
5.187 +{
5.188 + assert( IS_IN_ICACHE(pc) );
5.189 + uint16_t ir = *(uint16_t *)GET_ICACHE_PTR(pc);
5.190 +
5.191 +%%
5.192 +ADD Rm, Rn {: XOP2( OP_ADD, R_R(Rm), R_R(Rn) ); :}
5.193 +ADD #imm, Rn {: XOP2I( OP_ADD, imm, R_R(Rn) ); :}
5.194 +ADDC Rm, Rn {:
5.195 + XOP1CC( OP_LD, CC_C, R_T );
5.196 + XOP2( OP_ADDCS, R_R(Rm), R_R(Rn) );
5.197 + XOP1CC( OP_ST, CC_C, R_T );
5.198 +:}
5.199 +ADDV Rm, Rn {: XOP2( OP_ADDS, R_R(Rm), R_R(Rn) ); XOP1CC( OP_ST, CC_OV, R_T ); :}
5.200 +AND Rm, Rn {: XOP2( OP_AND, R_R(Rm), R_R(Rn) ); :}
5.201 +AND #imm, R0 {: XOP2I( OP_AND, imm, R_R0 ); :}
5.202 +CMP/EQ Rm, Rn {: XOP2( OP_CMP, Rm, R_R(Rn) ); XOP1CC( OP_ST, CC_EQ, R_T ); :}
5.203 +CMP/EQ #imm, R0 {: XOP2I( OP_CMP, imm, R_R0 ); XOP1CC( OP_ST, CC_EQ, R_T ); :}
5.204 +CMP/GE Rm, Rn {: XOP2( OP_CMP, R_R(Rm), R_R(Rn) ); XOP1CC( OP_ST, CC_SGE, R_T ); :}
5.205 +CMP/GT Rm, Rn {: XOP2( OP_CMP, R_R(Rm), R_R(Rn) ); XOP1CC( OP_ST, CC_SGT, R_T ); :}
5.206 +CMP/HI Rm, Rn {: XOP2( OP_CMP, R_R(Rm), R_R(Rn) ); XOP1CC( OP_ST, CC_UGT, R_T ); :}
5.207 +CMP/HS Rm, Rn {: XOP2( OP_CMP, R_R(Rm), R_R(Rn) ); XOP1CC( OP_ST, CC_UGE, R_T ); :}
5.208 +CMP/PL Rn {: XOP2I( OP_CMP, 0, R_R(Rn) ); XOP1CC( OP_ST, CC_SGT, R_T ); :}
5.209 +CMP/PZ Rn {: XOP2I( OP_CMP, 0, R_R(Rn) ); XOP1CC( OP_ST, CC_SGE, R_T ); :}
5.210 +CMP/STR Rm, Rn {: XOP2( OP_CMPSTR, R_R(Rm), R_R(Rn) ); :}
5.211 +DIV0S Rm, Rn {:
5.212 + XOP2( OP_MOV, R_R(Rm), R_M );
5.213 + XOP2I( OP_SLR, 31, R_M );
5.214 + XOP2( OP_MOV, R_R(Rn), R_Q );
5.215 + XOP2I( OP_SLR, 31, R_Q );
5.216 + XOP2( OP_CMP, R_M, R_Q );
5.217 + XOP1CC( OP_ST, CC_NE, R_T );
5.218 +:}
5.219 +DIV0U {:
5.220 + XOP2I( OP_MOV, 0, R_M );
5.221 + XOP2I( OP_MOV, 0, R_Q );
5.222 + XOP2I( OP_MOV, 0, R_T );
5.223 +:}
5.224 +DIV1 Rm, Rn {: XOP2( OP_DIV1, R_R(Rm), R_R(Rn) ); :}
5.225 +DMULS.L Rm, Rn {:
5.226 + XOP2( OP_MOVSX32, R_R(Rm), R_MAC );
5.227 + XOP2( OP_MOVSX32, R_R(Rn), REG_TMP0 );
5.228 + XOP2( OP_MULQ, REG_TMPQ0, R_MAC );
5.229 +:}
5.230 +DMULU.L Rm, Rn {:
5.231 + XOP2( OP_MOVZX32, R_R(Rm), R_MAC );
5.232 + XOP2( OP_MOVZX32, R_R(Rn), REG_TMP0 ) ;
5.233 + XOP2( OP_MULQ, REG_TMP0, R_MAC );
5.234 +:}
5.235 +DT Rn {: XOP1( OP_DEC, R_R(Rn) ); :}
5.236 +EXTS.B Rm, Rn {: XOP2( OP_MOVSX8, R_R(Rm), R_R(Rn)); :}
5.237 +EXTS.W Rm, Rn {: XOP2( OP_MOVSX16, R_R(Rm), R_R(Rn)); :}
5.238 +EXTU.B Rm, Rn {: XOP2( OP_MOVZX8, R_R(Rm), R_R(Rn)); :}
5.239 +EXTU.W Rm, Rn {: XOP2( OP_MOVZX16, R_R(Rm), R_R(Rn)); :}
5.240 +MAC.L @Rm+, @Rn+ {:
5.241 + RALIGN32(R_R(Rm));
5.242 + if( Rm == Rn ) {
5.243 + XOP2E( OP_LOADL, R_R(Rm), REG_TMP0 );
5.244 + XOP2( OP_MOV, R_R(Rm), REG_TMP1 );
5.245 + XOP2I( OP_ADD, 4, REG_TMP1 );
5.246 + XOP2E( OP_LOADL, REG_TMP1, REG_TMP1 );
5.247 + XOP2I( OP_ADD, 8, R_R(Rm) );
5.248 + } else {
5.249 + RALIGN32(R_R(Rn));
5.250 + XOP2E( OP_LOADL, R_R(Rm), REG_TMP0 );
5.251 + XOP2E( OP_LOADL, R_R(Rn), REG_TMP1 );
5.252 + XOP2I( OP_ADD, 4, R_R(Rm) );
5.253 + XOP2I( OP_ADD, 4, R_R(Rn) );
5.254 + }
5.255 + XOP2( OP_MOVSX32, REG_TMP0, REG_TMPQ0 );
5.256 + XOP2( OP_MOVSX32, REG_TMP1, REG_TMPQ1 );
5.257 + XOP2( OP_MULQ, REG_TMPQ0, REG_TMPQ1 );
5.258 + XOP2( OP_ADDQSAT48, REG_TMPQ1, R_MAC );
5.259 +:}
5.260 +MAC.W @Rm+, @Rn+ {:
5.261 + RALIGN32(R_R(Rm));
5.262 + if( Rm == Rn ) {
5.263 + XOP2E( OP_LOADW, R_R(Rm), REG_TMP0 );
5.264 + XOP2( OP_MOV, R_R(Rm), REG_TMP1 );
5.265 + XOP2I( OP_ADD, 2, REG_TMP1 );
5.266 + XOP2E( OP_LOADW, REG_TMP1, REG_TMP1 );
5.267 + XOP2I( OP_ADD, 4, R_R(Rm) );
5.268 + } else {
5.269 + RALIGN32(Rn);
5.270 + XOP2E( OP_LOADW, R_R(Rm), REG_TMP0 );
5.271 + XOP2E( OP_LOADW, R_R(Rn), REG_TMP1 );
5.272 + XOP2I( OP_ADD, 2, R_R(Rm) );
5.273 + XOP2I( OP_ADD, 2, R_R(Rn) );
5.274 + }
5.275 + XOP2( OP_MOVSX32, REG_TMP0, REG_TMPQ0 );
5.276 + XOP2( OP_MOVSX32, REG_TMP1, REG_TMPQ1 );
5.277 + XOP2( OP_MULQ, REG_TMPQ0, REG_TMPQ1 );
5.278 + XOP2( OP_ADDQSAT32, REG_TMPQ1, R_MAC );
5.279 +:}
5.280 +MOVT Rn {: XOP2( OP_MOV, R_R(Rn), R_T ); :}
5.281 +MUL.L Rm, Rn {:
5.282 + XOP2( OP_MOV, R_R(Rm), R_MACL );
5.283 + XOP2( OP_MUL, R_R(Rn), R_MACL );
5.284 +:}
5.285 +MULS.W Rm, Rn {:
5.286 + XOP2( OP_MOVSX16, R_R(Rm), REG_TMP0 );
5.287 + XOP2( OP_MOVSX16, R_R(Rn), R_MACL );
5.288 + XOP2( OP_MUL, REG_TMP0, R_MACL );
5.289 +:}
5.290 +MULU.W Rm, Rn {:
5.291 + XOP2( OP_MOVZX16, R_R(Rm), REG_TMP0 );
5.292 + XOP2( OP_MOVZX16, R_R(Rn), R_MACL );
5.293 + XOP2( OP_MUL, REG_TMP0, R_MACL );
5.294 +:}
5.295 +NEG Rm, Rn {:
5.296 + XOP2( OP_NEG, R_R(Rm), R_R(Rn) );
5.297 +:}
5.298 +NEGC Rm, Rn {:
5.299 + XOP1CC( OP_LD, CC_C, R_T );
5.300 + if( Rm == Rn ) {
5.301 + XOP2( OP_MOV, R_R(Rn), REG_TMP0 );
5.302 + XOP2I(OP_MOV, 0, R_R(Rn) );
5.303 + XOP2( OP_SUBBS, REG_TMP0, R_R(Rn) );
5.304 + } else {
5.305 + XOP2I(OP_MOV, 0, R_R(Rn) );
5.306 + XOP2( OP_SUBBS, R_R(Rm), R_R(Rn) );
5.307 + }
5.308 + XOP1CC( OP_ST, CC_C, R_T );
5.309 +:}
5.310 +NOT Rm, Rn {:
5.311 + XOP2( OP_NOT, R_R(Rm), R_R(Rn) );
5.312 +:}
5.313 +OR Rm, Rn {: XOP2( OP_OR, R_R(Rm), R_R(Rn) ); :}
5.314 +OR #imm, R0 {: XOP2I( OP_OR, imm, R_R0 ); :}
5.315 +ROTCL Rn {: XOP1CC( OP_LD, CC_C, R_T ); XOP2I( OP_RCL, 1, R_R(Rn) ); XOP1CC( OP_ST, CC_C, R_T); :}
5.316 +ROTCR Rn {: XOP1CC( OP_LD, CC_C, R_T ); XOP2I( OP_RCR, 1, R_R(Rn) ); XOP1CC( OP_ST, CC_C, R_T); :}
5.317 +ROTL Rn {: XOP2I( OP_ROL, 1, R_R(Rn) ); :}
5.318 +ROTR Rn {: XOP2I( OP_ROR, 1, R_R(Rn) ); :}
5.319 +SHAD Rm, Rn {: XOP2( OP_SHAD, R_R(Rm), R_R(Rn) ); :}
5.320 +SHLD Rm, Rn {: XOP2( OP_SHLD, R_R(Rm), R_R(Rn) ); :}
5.321 +SHAL Rn {: XOP2I( OP_SLLS, 1, R_R(Rn) ); XOP1CC( OP_ST, CC_C, R_T); :}
5.322 +SHAR Rn {: XOP2I( OP_SARS, 1, R_R(Rn) ); XOP1CC( OP_ST, CC_C, R_T); :}
5.323 +SHLL Rn {: XOP2I( OP_SLLS, 1, R_R(Rn) ); XOP1CC( OP_ST, CC_C, R_T); :}
5.324 +SHLL2 Rn {: XOP2I( OP_SLL, 2, R_R(Rn) ); :}
5.325 +SHLL8 Rn {: XOP2I( OP_SLL, 8, R_R(Rn) ); :}
5.326 +SHLL16 Rn {: XOP2I( OP_SLL, 16, R_R(Rn) ); :}
5.327 +SHLR Rn {: XOP2I( OP_SLRS, 1, R_R(Rn) ); XOP1CC( OP_ST, CC_C, R_T); :}
5.328 +SHLR2 Rn {: XOP2I( OP_SLR, 2, R_R(Rn) ); :}
5.329 +SHLR8 Rn {: XOP2I( OP_SLR, 8, R_R(Rn) ); :}
5.330 +SHLR16 Rn {: XOP2I( OP_SLR, 16, R_R(Rn) ); :}
5.331 +SUB Rm, Rn {:
5.332 + if( Rm == Rn ) {
5.333 + /* Break false dependence */
5.334 + XOP2I( OP_MOV, 0, R_R(Rn) );
5.335 + } else {
5.336 + XOP2( OP_SUB, R_R(Rm), R_R(Rn) );
5.337 + }
5.338 +:}
5.339 +SUBC Rm, Rn {: XOP1CC( OP_LD, CC_C, R_T ); XOP2( OP_SUBBS, R_R(Rm), R_R(Rn) ); XOP1CC( OP_ST, CC_C, R_T ); :}
5.340 +SUBV Rm, Rn {: XOP2( OP_SUB, R_R(Rm), R_R(Rn) ); XOP1CC( OP_ST, CC_OV, R_T ); :}
5.341 +SWAP.B Rm, Rn {:
5.342 + if( Rm != Rn ) {
5.343 + XOP2( OP_MOV, R_R(Rm), R_R(Rn) );
5.344 + }
5.345 + XOP2I( OP_SHUFFLE, 0x1243, R_R(Rn) );
5.346 +:}
5.347 +SWAP.W Rm, Rn {:
5.348 + if( Rm != Rn ) {
5.349 + XOP2( OP_MOV, R_R(Rm), R_R(Rn) );
5.350 + }
5.351 + XOP2I( OP_SHUFFLE, 0x3412, R_R(Rn) );
5.352 +:}
5.353 +TST Rm, Rn {: XOP2( OP_TST, R_R(Rm), R_R(Rn) ); XOP1CC( OP_ST, CC_EQ, R_T ); :}
5.354 +TST #imm, R0 {: XOP2I( OP_TST, imm, R_R0 ); XOP1CC( OP_ST, CC_EQ, R_T ); :}
5.355 +XOR Rm, Rn {:
5.356 + if( Rm == Rn ) {
5.357 + /* Break false dependence */
5.358 + XOP2I( OP_MOV, 0, R_R(Rn) );
5.359 + } else {
5.360 + XOP2( OP_XOR, R_R(Rm), R_R(Rn) );
5.361 + }
5.362 +:}
5.363 +XOR #imm, R0 {: XOP2I( OP_XOR, imm, R_R0 ); :}
5.364 +XTRCT Rm, Rn {:
5.365 + XOP2( OP_MOV, R_R(Rm), REG_TMP0 );
5.366 + XOP2I( OP_SLL, 16, REG_TMP0 );
5.367 + XOP2I( OP_SLR, 16, R_R(Rn) );
5.368 + XOP2( OP_OR, REG_TMP0, R_R(Rn) );
5.369 +:}
5.370 +MOV Rm, Rn {: XOP2( OP_MOV, R_R(Rm), R_R(Rn) ); :}
5.371 +MOV #imm, Rn {: XOP2I( OP_MOV, imm, R_R(Rn) ); :}
5.372 +
5.373 +AND.B #imm, @(R0, GBR) {:
5.374 + XOP2( OP_MOV, R_R0, REG_TMP0 );
5.375 + XOP2( OP_ADD, R_GBR, REG_TMP0 );
5.376 + XOP2E( OP_LOADBFW, REG_TMP0, REG_TMP1 );
5.377 + XOP2I( OP_AND, imm, REG_TMP1 );
5.378 + XOP2E( OP_STOREB, REG_TMP0, REG_TMP1 );
5.379 +:}
5.380 +OR.B #imm, @(R0, GBR) {:
5.381 + XOP2( OP_MOV, R_R0, REG_TMP0 );
5.382 + XOP2( OP_ADD, R_GBR, REG_TMP0 );
5.383 + XOP2E( OP_LOADBFW, REG_TMP0, REG_TMP1 );
5.384 + XOP2I( OP_OR, imm, REG_TMP1 );
5.385 + XOP2E( OP_STOREB, REG_TMP0, REG_TMP1 );
5.386 +:}
5.387 +TAS.B @Rn {:
5.388 + XOP1( OP_OCBP, R_R(Rn) );
5.389 + XOP2E( OP_LOADBFW, R_R(Rn), REG_TMP0 );
5.390 + XOP2I( OP_CMP, 0, REG_TMP0 );
5.391 + XOP1CC(OP_ST, CC_EQ, R_T );
5.392 + XOP2I( OP_OR, 0x80, REG_TMP0 );
5.393 + XOP2E( OP_STOREB, R_R(Rn), REG_TMP0 );
5.394 +:}
5.395 +TST.B #imm, @(R0, GBR) {:
5.396 + XOP2( OP_MOV, R_R0, REG_TMP0 );
5.397 + XOP2( OP_ADD, R_GBR, REG_TMP0 );
5.398 + XOP2E( OP_LOADB, REG_TMP0, REG_TMP0 );
5.399 + XOP2I( OP_TST, imm, REG_TMP0 );
5.400 +:}
5.401 +XOR.B #imm, @(R0, GBR) {:
5.402 + XOP2( OP_MOV, R_R0, REG_TMP0 );
5.403 + XOP2( OP_ADD, R_GBR, REG_TMP0 );
5.404 + XOP2E( OP_LOADBFW, REG_TMP0, REG_TMP1 );
5.405 + XOP2I( OP_XOR, imm, REG_TMP1 );
5.406 + XOP2E( OP_STOREB, REG_TMP0, REG_TMP1 );
5.407 +:}
5.408 +
5.409 +MOV.B Rm, @Rn {:
5.410 + XOP2E( OP_STOREB, R_R(Rn), R_R(Rm) );
5.411 +:}
5.412 +MOV.B Rm, @-Rn {:
5.413 + XOP2( OP_MOV, R_R(Rn), REG_TMP0 );
5.414 + XOP2I( OP_ADD, -1, REG_TMP0 );
5.415 + XOP2E( OP_STOREB, REG_TMP0, R_R(Rm) );
5.416 + XOP2I( OP_ADD, -1, R_R(Rn) );
5.417 +:}
5.418 +MOV.B Rm, @(R0, Rn) {:
5.419 + XOP2( OP_MOV, R_R(Rn), REG_TMP0 );
5.420 + XOP2( OP_ADD, R_R0, REG_TMP0 );
5.421 + XOP2E( OP_STOREB, REG_TMP0, R_R(Rm) );
5.422 +:}
5.423 +MOV.B R0, @(disp, GBR) {:
5.424 + XOP2( OP_MOV, R_GBR, REG_TMP0 );
5.425 + XOP2I( OP_ADD, disp, REG_TMP0 );
5.426 + XOP2E( OP_STOREB, REG_TMP0, R_R0 );
5.427 +:}
5.428 +MOV.B R0, @(disp, Rn) {:
5.429 + XOP2( OP_MOV, R_R(Rn), REG_TMP0 );
5.430 + XOP2I( OP_ADD, disp, REG_TMP0 );
5.431 + XOP2E( OP_STOREB, REG_TMP0, R_R0 );
5.432 +:}
5.433 +MOV.B @Rm, Rn {:
5.434 + XOP2E( OP_LOADB, R_R(Rm), R_R(Rn) );
5.435 +:}
5.436 +MOV.B @Rm+, Rn {:
5.437 + XOP2E( OP_LOADB, R_R(Rm), R_R(Rn) );
5.438 + if( Rm != Rn ) {
5.439 + XOP2I( OP_ADD, 1, R_R(Rm) );
5.440 + }
5.441 +:}
5.442 +MOV.B @(R0, Rm), Rn {:
5.443 + XOP2( OP_MOV, R_R(Rm), REG_TMP0 );
5.444 + XOP2( OP_ADD, R_R0, REG_TMP0 );
5.445 + XOP2E(OP_LOADB, REG_TMP0, R_R(Rn) );
5.446 +:}
5.447 +MOV.B @(disp, GBR), R0 {:
5.448 + XOP2( OP_MOV, R_GBR, REG_TMP0 );
5.449 + XOP2I(OP_ADD, disp, REG_TMP0 );
5.450 + XOP2E(OP_LOADB, REG_TMP0, R_R0 );
5.451 +:}
5.452 +MOV.B @(disp, Rm), R0 {:
5.453 + XOP2( OP_MOV, R_R(Rm), REG_TMP0 );
5.454 + XOP2I(OP_ADD, disp, REG_TMP0 );
5.455 + XOP2E(OP_LOADB, REG_TMP0, R_R0 );
5.456 +:}
5.457 +MOV.L Rm, @Rn {:
5.458 + WALIGN32( R_R(Rn) );
5.459 + XOP2E( OP_STOREL, R_R(Rn), R_R(Rm) );
5.460 +:}
5.461 +MOV.L Rm, @-Rn {:
5.462 + WALIGN32( R_R(Rn) );
5.463 + XOP2( OP_MOV, R_R(Rn), REG_TMP0 );
5.464 + XOP2I(OP_ADD, -4, REG_TMP0 );
5.465 + XOP2( OP_STOREL, REG_TMP0, R_R(Rm) );
5.466 + XOP2I(OP_ADD, -4, R_R(Rn) );
5.467 +:}
5.468 +MOV.L Rm, @(R0, Rn) {:
5.469 + XOP2( OP_MOV, R_R(Rn), REG_TMP0 );
5.470 + XOP2( OP_ADD, R_R0, REG_TMP0 );
5.471 + WALIGN32( REG_TMP0 );
5.472 + XOP2E(OP_STOREL, REG_TMP0, R_R(Rm) );
5.473 +:}
5.474 +MOV.L R0, @(disp, GBR) {:
5.475 + XOP2( OP_MOV, R_GBR, REG_TMP0 );
5.476 + XOP2I(OP_ADD, disp, REG_TMP0 );
5.477 + WALIGN32( REG_TMP0 );
5.478 + XOP2E(OP_STOREL, REG_TMP0, R_R0 );
5.479 +:}
5.480 +MOV.L Rm, @(disp, Rn) {:
5.481 + XOP2( OP_MOV, R_R(Rn), REG_TMP0 );
5.482 + XOP2I(OP_ADD, disp, REG_TMP0 );
5.483 + WALIGN32( REG_TMP0 );
5.484 + XOP2E(OP_STOREL, REG_TMP0, R_R(Rm) );
5.485 +:}
5.486 +MOV.L @Rm, Rn {:
5.487 + RALIGN32( R_R(Rm) );
5.488 + XOP2E(OP_LOADL, R_R(Rm), R_R(Rn) );
5.489 +:}
5.490 +MOV.L @Rm+, Rn {:
5.491 + RALIGN32( R_R(Rm) );
5.492 + XOP2E( OP_LOADL, R_R(Rm), R_R(Rn) );
5.493 + if( R_R(Rm) != R_R(Rn) ) {
5.494 + XOP2I( OP_ADD, 4, R_R(Rm) );
5.495 + }
5.496 +:}
5.497 +MOV.L @(R0, Rm), Rn {:
5.498 + XOP2( OP_MOV, R_R0, REG_TMP0 );
5.499 + XOP2( OP_ADD, R_R(Rm), REG_TMP0 );
5.500 + RALIGN32( REG_TMP0 );
5.501 + XOP2E(OP_LOADL, REG_TMP0, R_R(Rn) );
5.502 +:}
5.503 +MOV.L @(disp, GBR), R0 {:
5.504 + XOP2( OP_MOV, R_GBR, REG_TMP0 );
5.505 + XOP2I(OP_ADD, disp, REG_TMP0 );
5.506 + RALIGN32( REG_TMP0 );
5.507 + XOP2E(OP_LOADL, REG_TMP0, R_R0 );
5.508 +:}
5.509 +MOV.L @(disp, PC), Rn {:
5.510 + if( in_delay_slot ) {
5.511 + SLOTILLEGAL();
5.512 + return 2;
5.513 + } else {
5.514 + uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
5.515 + if( IS_IN_ICACHE(target) ) {
5.516 + // If the target address is in the same page as the code, it's
5.517 + // pretty safe to just ref it directly and circumvent the whole
5.518 + // memory subsystem. (this is a big performance win)
5.519 +
5.520 + // FIXME: There's a corner-case that's not handled here when
5.521 + // the current code-page is in the ITLB but not in the UTLB.
5.522 + // (should generate a TLB miss although need to test SH4
5.523 + // behaviour to confirm) Unlikely to be anyone depending on this
5.524 + // behaviour though.
5.525 + sh4ptr_t ptr = GET_ICACHE_PTR(target);
5.526 + XOP2P( OP_MOV, ptr, R_R(Rn) );
5.527 + } else {
5.528 + // Note: we use sh4r.pc for the calc as we could be running at a
5.529 + // different virtual address than the translation was done with,
5.530 + // but we can safely assume that the low bits are the same.
5.531 + XOP2( OP_MOV, R_PC, REG_TMP0 );
5.532 + XOP2( OP_ADD, (pc-xbb->pc_begin) + disp + 4 - (pc&0x03), REG_TMP0 );
5.533 + XOP2E(OP_LOADL, REG_TMP0, R_R(Rn) );
5.534 + }
5.535 + }
5.536 +:}
5.537 +MOV.L @(disp, Rm), Rn {:
5.538 + XOP2( OP_MOV, R_R(Rm), REG_TMP0 );
5.539 + XOP2I(OP_ADD, disp, REG_TMP0 );
5.540 + RALIGN32( REG_TMP0 );
5.541 + XOP2E(OP_LOADL, REG_TMP0, R_R(Rn) );
5.542 +:}
5.543 +MOV.W Rm, @Rn {:
5.544 + WALIGN16( R_R(Rn) );
5.545 + XOP2E(OP_STOREW, R_R(Rn), R_R(Rm) );
5.546 +:}
5.547 +MOV.W Rm, @-Rn {:
5.548 + WALIGN16( R_R(Rn) );
5.549 + XOP2( OP_MOV, R_R(Rn), REG_TMP0 );
5.550 + XOP2I(OP_ADD, -2, REG_TMP0 );
5.551 + XOP2E(OP_STOREW, REG_TMP0, R_R(Rm) );
5.552 + XOP2I(OP_ADD, -2, R_R(Rn) );
5.553 +:}
5.554 +MOV.W Rm, @(R0, Rn) {:
5.555 + XOP2( OP_MOV, R_R0, REG_TMP0 );
5.556 + XOP2( OP_ADD, R_R(Rn), REG_TMP0 );
5.557 + WALIGN16( REG_TMP0 );
5.558 + XOP2E(OP_STOREW, REG_TMP0, R_R(Rm) );
5.559 +:}
5.560 +MOV.W R0, @(disp, GBR) {:
5.561 + XOP2( OP_MOV, R_GBR, REG_TMP0 );
5.562 + XOP2I(OP_ADD, disp, REG_TMP0 );
5.563 + WALIGN16( REG_TMP0 );
5.564 + XOP2( OP_STOREW, REG_TMP0, R_R0 );
5.565 +:}
5.566 +MOV.W R0, @(disp, Rn) {:
5.567 + XOP2( OP_MOV, R_R(Rn), REG_TMP0 );
5.568 + XOP2I(OP_ADD, disp, REG_TMP0 );
5.569 + WALIGN16( REG_TMP0 );
5.570 + XOP2E(OP_STOREW, REG_TMP0, R_R0 );
5.571 +:}
5.572 +MOV.W @Rm, Rn {:
5.573 + RALIGN16( R_R(Rm) );
5.574 + XOP2E(OP_LOADW, R_R(Rm), R_R(Rn) );
5.575 +:}
5.576 +MOV.W @Rm+, Rn {:
5.577 + RALIGN16( R_R(Rm) );
5.578 + XOP2E(OP_LOADW, R_R(Rm), R_R(Rn) );
5.579 + if( Rm != Rn ) {
5.580 + XOP2I( OP_ADD, 2, R_R(Rm) );
5.581 + }
5.582 +:}
5.583 +MOV.W @(R0, Rm), Rn {:
5.584 + XOP2( OP_MOV, R_R0, REG_TMP0 );
5.585 + XOP2( OP_ADD, R_R(Rm), REG_TMP0 );
5.586 + RALIGN16( REG_TMP0 );
5.587 + XOP2E(OP_LOADW, REG_TMP0, R_R(Rn) );
5.588 +:}
5.589 +MOV.W @(disp, GBR), R0 {:
5.590 + XOP2( OP_MOV, R_GBR, REG_TMP0 );
5.591 + XOP2I(OP_ADD, disp, REG_TMP0 );
5.592 + RALIGN16( REG_TMP0 );
5.593 + XOP2E(OP_LOADW, REG_TMP0, R_R0 );
5.594 +:}
5.595 +MOV.W @(disp, PC), Rn {:
5.596 + if( in_delay_slot ) {
5.597 + SLOTILLEGAL();
5.598 + return 2;
5.599 + } else {
5.600 + uint32_t target = pc + disp + 4;
5.601 + if( IS_IN_ICACHE(target) ) {
5.602 + // If the target address is in the same page as the code, it's
5.603 + // pretty safe to just ref it directly and circumvent the whole
5.604 + // memory subsystem. (this is a big performance win)
5.605 +
5.606 + // FIXME: There's a corner-case that's not handled here when
5.607 + // the current code-page is in the ITLB but not in the UTLB.
5.608 + // (should generate a TLB miss although need to test SH4
5.609 + // behaviour to confirm) Unlikely to be anyone depending on this
5.610 + // behaviour though.
5.611 + sh4ptr_t ptr = GET_ICACHE_PTR(target);
5.612 + XOP2P( OP_MOV, ptr, REG_TMP0 );
5.613 + XOP2( OP_MOVSX16, REG_TMP0, R_R(Rn) );
5.614 + } else {
5.615 + // Note: we use sh4r.pc for the calc as we could be running at a
5.616 + // different virtual address than the translation was done with,
5.617 + // but we can safely assume that the low bits are the same.
5.618 + XOP2( OP_MOV, R_PC, REG_TMP0 );
5.619 + XOP2( OP_ADD, (pc - xbb->pc_begin) + disp + 4, REG_TMP0 );
5.620 + XOP2E(OP_LOADW, REG_TMP0, R_R(Rn) );
5.621 + }
5.622 + }
5.623 +:}
5.624 +MOV.W @(disp, Rm), R0 {:
5.625 + XOP2( OP_MOV, R_R(Rm), REG_TMP0 );
5.626 + XOP2I(OP_ADD, disp, REG_TMP0 );
5.627 + RALIGN16( REG_TMP0 );
5.628 + XOP2E(OP_LOADW, REG_TMP0, R_R0 );
5.629 +:}
5.630 +MOVA @(disp, PC), R0 {:
5.631 + if( in_delay_slot ) {
5.632 + SLOTILLEGAL();
5.633 + return 2;
5.634 + } else {
5.635 + XOP2( OP_MOV, R_PC, R_R0 );
5.636 + XOP2I( OP_ADD, (pc - xbb->pc_begin) + disp + 4 - (pc&0x03), R_R0 );
5.637 + }
5.638 +:}
5.639 +MOVCA.L R0, @Rn {:
5.640 + XOP2E(OP_STORELCA, R_R(Rn), R_R0 );
5.641 +:}
5.642 +LDTLB {:
5.643 + CHECKPRIV();
5.644 + XOPCALL0( MMU_ldtlb );
5.645 +:}
5.646 +OCBI @Rn {: XOP1E( OP_OCBI, R_R(Rn) ); :}
5.647 +OCBP @Rn {: XOP1E( OP_OCBP, R_R(Rn) ); :}
5.648 +OCBWB @Rn {: XOP1E( OP_OCBWB, R_R(Rn) ); :}
5.649 +PREF @Rn {: XOP1E( OP_PREF, R_R(Rn) ); :}
5.650 +
5.651 +CLRMAC {:
5.652 + XOP2I( OP_MOV, 0, R_MACL );
5.653 + XOP2I( OP_MOV, 0, R_MACH );
5.654 +:}
5.655 +CLRS {: XOP2I( OP_MOV, 0, R_S ); :}
5.656 +CLRT {: XOP2I( OP_MOV, 0, R_T ); :}
5.657 +SETS {: XOP2I( OP_MOV, 1, R_S ); :}
5.658 +SETT {: XOP2I( OP_MOV, 1, R_T ); :}
5.659 +FMOV FRm, FRn {:
5.660 + CHECKFPUEN();
5.661 + if( sh4_xir.double_size ) {
5.662 + XOP2( OP_MOVQ, (FRm&1) ? R_XD(FRm) : R_DR(FRm), (FRn&1) ? R_XD(FRn) : R_DR(FRn) );
5.663 + } else {
5.664 + XOP2( OP_MOV, R_FR(FRm), R_FR(FRn) );
5.665 + }
5.666 +:}
5.667 +FMOV FRm, @Rn {:
5.668 + CHECKFPUEN();
5.669 + if( sh4_xir.double_size ) {
5.670 + WALIGN64( R_R(Rn) );
5.671 + XOP2E( OP_STOREQ, R_R(Rn), (FRm&1) ? R_XD(FRm) : R_DR(FRm) );
5.672 + } else {
5.673 + WALIGN32( R_R(Rn) );
5.674 + XOP2E( OP_STOREL, R_R(Rn), R_FR(FRm) );
5.675 + }
5.676 +:}
5.677 +FMOV @Rm, FRn {:
5.678 + CHECKFPUEN();
5.679 + if( sh4_xir.double_size ) {
5.680 + RALIGN64( R_R(Rm) );
5.681 + XOP2E( OP_LOADQ, R_R(Rm), (FRn&1) ? R_XD(FRn) : R_DR(FRn) );
5.682 + } else {
5.683 + RALIGN32( R_R(Rm) );
5.684 + XOP2E( OP_LOADL, R_R(Rm), R_FR(FRn) );
5.685 + }
5.686 +:}
5.687 +FMOV FRm, @-Rn {:
5.688 + CHECKFPUEN();
5.689 + if( sh4_xir.double_size ) {
5.690 + WALIGN64( R_R(Rn) );
5.691 + XOP2( OP_MOV, R_R(Rn), REG_TMP0 );
5.692 + XOP2I(OP_ADD, -8, REG_TMP0 );
5.693 + XOP2E(OP_STOREQ, REG_TMP0, (FRm&1) ? R_XD(FRm) : R_DR(FRm) );
5.694 + XOP2I(OP_ADD, -8, R_R(Rn) );
5.695 + } else {
5.696 + WALIGN32( R_R(Rn) );
5.697 + XOP2( OP_MOV, R_R(Rn), REG_TMP0 );
5.698 + XOP2I(OP_ADD, -4, REG_TMP0 );
5.699 + XOP2E(OP_STOREL, REG_TMP0, R_FR(FRm) );
5.700 + XOP2I(OP_ADD, -4, R_R(Rn) );
5.701 + }
5.702 +:}
5.703 +FMOV @Rm+, FRn {:
5.704 + CHECKFPUEN();
5.705 + if( sh4_xir.double_size ) {
5.706 + RALIGN64( R_R(Rm) );
5.707 + XOP2( OP_LOADQ, R_R(Rm), (FRn&1) ? R_XD(FRn) : R_DR(FRn) );
5.708 + XOP2I( OP_ADD, 8, R_R(Rm) );
5.709 + } else {
5.710 + RALIGN32( R_R(Rm) );
5.711 + XOP2( OP_LOADL, R_R(Rm), R_FR(FRn) );
5.712 + XOP2I( OP_ADD, 4, R_R(Rm) );
5.713 + }
5.714 +:}
5.715 +FMOV FRm, @(R0, Rn) {:
5.716 + CHECKFPUEN();
5.717 + XOP2( OP_MOV, R_R0, REG_TMP0 );
5.718 + XOP2( OP_ADD, R_R(Rn), REG_TMP0 );
5.719 + if( sh4_xir.double_size ) {
5.720 + WALIGN64( REG_TMP0 );
5.721 + XOP2E( OP_STOREQ, REG_TMP0, (FRm&1) ? R_XD(FRm) : R_DR(FRm) );
5.722 + } else {
5.723 + WALIGN32( REG_TMP0 );
5.724 + XOP2E( OP_STOREL, REG_TMP0, R_FR(FRm) );
5.725 + }
5.726 +:}
5.727 +FMOV @(R0, Rm), FRn {:
5.728 + CHECKFPUEN();
5.729 + XOP2( OP_MOV, R_R0, REG_TMP0 );
5.730 + XOP2( OP_ADD, R_R(Rm), REG_TMP0 );
5.731 + if( sh4_xir.double_size ) {
5.732 + RALIGN64( REG_TMP0 );
5.733 + XOP2E( OP_LOADQ, REG_TMP0, (FRn&1) ? R_XD(FRn) : R_DR(FRn) );
5.734 + } else {
5.735 + RALIGN32( REG_TMP0 );
5.736 + XOP2E( OP_LOADL, REG_TMP0, R_FR(FRn) );
5.737 + }
5.738 +:}
5.739 +FLDI0 FRn {: /* IFF PR=0 */
5.740 + CHECKFPUEN();
5.741 + if( sh4_xir.double_prec == 0 ) {
5.742 + XOP2F( OP_MOV, 0.0, R_FR(FRn) );
5.743 + }
5.744 +:}
5.745 +FLDI1 FRn {: /* IFF PR=0 */
5.746 + CHECKFPUEN();
5.747 + if( sh4_xir.double_prec == 0 ) {
5.748 + XOP2F( OP_MOV, 1.0, R_FR(FRn) );
5.749 + }
5.750 +:}
5.751 +FLOAT FPUL, FRn {:
5.752 + CHECKFPUEN();
5.753 + if( sh4_xir.double_prec ) {
5.754 + XOP2( OP_ITOD, R_FPUL, R_DR(FRn) );
5.755 + } else {
5.756 + XOP2( OP_ITOF, R_FPUL, R_FR(FRn) );
5.757 + }
5.758 +:}
5.759 +FTRC FRm, FPUL {:
5.760 + CHECKFPUEN();
5.761 + if( sh4_xir.double_prec ) {
5.762 + XOP2( OP_DTOI, R_DR(FRm), R_FPUL );
5.763 + } else {
5.764 + XOP2( OP_FTOI, R_FR(FRm), R_FPUL );
5.765 + }
5.766 +:}
5.767 +FLDS FRm, FPUL {:
5.768 + CHECKFPUEN();
5.769 + XOP2( OP_MOV, R_FR(FRm), R_FPUL );
5.770 +:}
5.771 +FSTS FPUL, FRn {:
5.772 + CHECKFPUEN();
5.773 + XOP2( OP_MOV, R_FPUL, R_FR(FRn) );
5.774 +:}
5.775 +FCNVDS FRm, FPUL {:
5.776 + CHECKFPUEN();
5.777 + if( sh4_xir.double_prec && !sh4_xir.double_size ) {
5.778 + XOP2( OP_DTOF, R_DR(FRm), R_FPUL );
5.779 + }
5.780 +:}
5.781 +FCNVSD FPUL, FRn {:
5.782 + CHECKFPUEN();
5.783 + if( sh4_xir.double_prec && !sh4_xir.double_size ) {
5.784 + XOP2( OP_FTOD, R_FPUL, R_DR(FRn) );
5.785 + }
5.786 +:}
5.787 +FABS FRn {:
5.788 + CHECKFPUEN();
5.789 + if( sh4_xir.double_prec ) {
5.790 + XOP1( OP_ABSD, R_DR(FRn) );
5.791 + } else {
5.792 + XOP1( OP_ABSF, R_FR(FRn) );
5.793 + }
5.794 +:}
5.795 +FADD FRm, FRn {:
5.796 + CHECKFPUEN();
5.797 + if( sh4_xir.double_prec ) {
5.798 + XOP2( OP_ADDD, R_DR(FRm), R_DR(FRn) );
5.799 + } else {
5.800 + XOP2( OP_ADDF, R_FR(FRm), R_FR(FRn) );
5.801 + }
5.802 +:}
5.803 +FDIV FRm, FRn {:
5.804 + CHECKFPUEN();
5.805 + if( sh4_xir.double_prec ) {
5.806 + XOP2( OP_DIVD, R_DR(FRm), R_DR(FRn) );
5.807 + } else {
5.808 + XOP2( OP_DIVF, R_FR(FRm), R_FR(FRn) );
5.809 + }
5.810 +:}
5.811 +FMAC FR0, FRm, FRn {:
5.812 + CHECKFPUEN();
5.813 + if( sh4_xir.double_prec == 0 ) {
5.814 + XOP2( OP_MOV, R_FR(0), REG_TMP0 );
5.815 + XOP2( OP_MULF, R_FR(FRm), REG_TMP0 );
5.816 + XOP2( OP_ADDF, REG_TMP0, R_FR(FRn) );
5.817 + }
5.818 +:}
5.819 +FMUL FRm, FRn {:
5.820 + CHECKFPUEN();
5.821 + if( sh4_xir.double_prec ) {
5.822 + XOP2( OP_MULD, R_DR(FRm), R_DR(FRn) );
5.823 + } else {
5.824 + XOP2( OP_MULF, R_FR(FRm), R_FR(FRn) );
5.825 + }
5.826 +:}
5.827 +FNEG FRn {:
5.828 + CHECKFPUEN();
5.829 + if( sh4_xir.double_prec ) {
5.830 + XOP1( OP_NEGD, R_DR(FRn) );
5.831 + } else {
5.832 + XOP1( OP_NEGF, R_FR(FRn) );
5.833 + }
5.834 +:}
5.835 +FSRRA FRn {:
5.836 + CHECKFPUEN();
5.837 + if( sh4_xir.double_prec == 0 ) {
5.838 + XOP1( OP_RSQRTF, R_FR(FRn) );
5.839 + }
5.840 +:}
5.841 +FSQRT FRn {:
5.842 + CHECKFPUEN();
5.843 + if( sh4_xir.double_prec ) {
5.844 + XOP1( OP_SQRTD, R_DR(FRn) );
5.845 + } else {
5.846 + XOP1( OP_SQRTF, R_FR(FRn) );
5.847 + }
5.848 +:}
5.849 +FSUB FRm, FRn {:
5.850 + CHECKFPUEN();
5.851 + if( sh4_xir.double_prec ) {
5.852 + XOP2( OP_SUBD, R_DR(FRm), R_DR(FRn) );
5.853 + } else {
5.854 + XOP2( OP_SUBF, R_FR(FRm), R_FR(FRn) );
5.855 + }
5.856 +:}
5.857 +FCMP/EQ FRm, FRn {:
5.858 + CHECKFPUEN();
5.859 + if( sh4_xir.double_prec ) {
5.860 + XOP2( OP_CMPD, R_DR(FRm), R_DR(FRn) );
5.861 + } else {
5.862 + XOP2( OP_CMPF, R_FR(FRm), R_FR(FRn) );
5.863 + }
5.864 + XOP1CC( OP_ST, CC_EQ, R_T );
5.865 +:}
5.866 +FCMP/GT FRm, FRn {:
5.867 + CHECKFPUEN();
5.868 + if( sh4_xir.double_prec ) {
5.869 + XOP2( OP_CMPD, R_DR(FRm), R_DR(FRn) );
5.870 + } else {
5.871 + XOP2( OP_CMPF, R_FR(FRm), R_FR(FRn) );
5.872 + }
5.873 + XOP1CC( OP_ST, CC_SGT, R_T );
5.874 +:}
5.875 +FSCA FPUL, FRn {:
5.876 + CHECKFPUEN();
5.877 + if( sh4_xir.double_prec == 0 ) {
5.878 + XOP2( OP_SINCOSF, R_FPUL, R_DR(FRn) );
5.879 + }
5.880 +:}
5.881 +FIPR FVm, FVn {:
5.882 + CHECKFPUEN();
5.883 + if( sh4_xir.double_prec == 0 ) {
5.884 + XOP2( OP_DOTPRODV, R_FV(FVm), R_FV(FVn) );
5.885 + }
5.886 +:}
5.887 +FTRV XMTRX, FVn {:
5.888 + CHECKFPUEN();
5.889 + if( sh4_xir.double_prec == 0 ) {
5.890 + XOP2( OP_MATMULV, R_XMTRX, R_FV(FVn) );
5.891 + }
5.892 +:}
5.893 +FRCHG {:
5.894 + CHECKFPUEN();
5.895 + XOP2I( OP_XOR, FPSCR_FR, R_FPSCR );
5.896 + XOPCALL0( sh4_switch_fr_banks );
5.897 +:}
5.898 +FSCHG {:
5.899 + CHECKFPUEN();
5.900 + XOP2I( OP_XOR, FPSCR_SZ, R_FPSCR );
5.901 + XOP2I( OP_XOR, FPSCR_SZ, R_SH4_MODE );
5.902 + sh4_xir.double_size = !sh4_xir.double_size;
5.903 +:}
5.904 +LDC Rm, SR {:
5.905 + if( in_delay_slot ) {
5.906 + SLOTILLEGAL();
5.907 + } else {
5.908 + CHECKPRIV();
5.909 + XOPCALL1( sh4_write_sr, R_R(Rm) );
5.910 + }
5.911 + return 2;
5.912 +:}
5.913 +LDC Rm, GBR {: XOP2( OP_MOV, R_R(Rm), R_GBR ); :}
5.914 +LDC Rm, VBR {: CHECKPRIV(); XOP2( OP_MOV, R_R(Rm), R_VBR ); :}
5.915 +LDC Rm, SSR {: CHECKPRIV(); XOP2( OP_MOV, R_R(Rm), R_SSR ); :}
5.916 +LDC Rm, SGR {: CHECKPRIV(); XOP2( OP_MOV, R_R(Rm), R_SGR ); :}
5.917 +LDC Rm, SPC {: CHECKPRIV(); XOP2( OP_MOV, R_R(Rm), R_SPC ); :}
5.918 +LDC Rm, DBR {: CHECKPRIV(); XOP2( OP_MOV, R_R(Rm), R_DBR ); :}
5.919 +LDC Rm, Rn_BANK {: CHECKPRIV(); XOP2( OP_MOV, R_R(Rm), R_BANK(Rn_BANK) ); :}
5.920 +LDC.L @Rm+, GBR {:
5.921 + XOP2E( OP_LOADL, R_R(Rm), R_GBR );
5.922 + XOP2I( OP_ADD, 4, R_R(Rm) );
5.923 +:}
5.924 +LDC.L @Rm+, SR {:
5.925 + if( in_delay_slot ) {
5.926 + SLOTILLEGAL();
5.927 + } else {
5.928 + CHECKPRIV();
5.929 + RALIGN32( R_R(Rm) );
5.930 + XOP2E( OP_LOADL, R_R(Rm), REG_TMP0 );
5.931 + XOP2I( OP_ADD, 4, R_R(Rm) );
5.932 + XOPCALL1( sh4_write_sr, REG_TMP0 );
5.933 + }
5.934 + return 2;
5.935 +:}
5.936 +LDC.L @Rm+, VBR {:
5.937 + CHECKPRIV();
5.938 + RALIGN32( R_R(Rm) );
5.939 + XOP2E( OP_LOADL, R_R(Rm), R_VBR );
5.940 + XOP2I( OP_ADD, 4, R_R(Rm) );
5.941 +:}
5.942 +LDC.L @Rm+, SSR {:
5.943 + CHECKPRIV();
5.944 + RALIGN32( R_R(Rm) );
5.945 + XOP2E( OP_LOADL, R_R(Rm), R_SSR );
5.946 + XOP2I( OP_ADD, 4, R_R(Rm) );
5.947 +:}
5.948 +LDC.L @Rm+, SGR {:
5.949 + CHECKPRIV();
5.950 + RALIGN32( R_R(Rm) );
5.951 + XOP2E( OP_LOADL, R_R(Rm), R_SGR );
5.952 + XOP2I( OP_ADD, 4, R_R(Rm) );
5.953 +:}
5.954 +LDC.L @Rm+, SPC {:
5.955 + CHECKPRIV();
5.956 + RALIGN32( R_R(Rm) );
5.957 + XOP2E( OP_LOADL, R_R(Rm), R_SPC );
5.958 + XOP2I( OP_ADD, 4, R_R(Rm) );
5.959 +:}
5.960 +LDC.L @Rm+, DBR {:
5.961 + CHECKPRIV();
5.962 + RALIGN32( R_R(Rm) );
5.963 + XOP2E( OP_LOADL, R_R(Rm), R_DBR );
5.964 + XOP2I( OP_ADD, 4, R_R(Rm) );
5.965 +:}
5.966 +LDC.L @Rm+, Rn_BANK {:
5.967 + CHECKPRIV();
5.968 + RALIGN32( R_R(Rm) );
5.969 + XOP2E( OP_LOADL, R_R(Rm), R_BANK(Rn_BANK) );
5.970 + XOP2I( OP_ADD, 4, R_R(Rm) );
5.971 +:}
5.972 +LDS Rm, FPSCR {:
5.973 + CHECKFPUEN();
5.974 + XOPCALL1( sh4_write_fpscr, R_R(Rm) );
5.975 + return 2;
5.976 +:}
5.977 +LDS Rm, FPUL {:
5.978 + CHECKFPUEN();
5.979 + XOP2( OP_MOV, R_R(Rm), R_FPUL );
5.980 +:}
5.981 +LDS Rm, MACH {: XOP2( OP_MOV, R_R(Rm), R_MACH ); :}
5.982 +LDS Rm, MACL {: XOP2( OP_MOV, R_R(Rm), R_MACL ); :}
5.983 +LDS Rm, PR {: XOP2( OP_MOV, R_R(Rm), R_PR ); :}
5.984 +LDS.L @Rm+, FPSCR {:
5.985 + CHECKFPUEN();
5.986 + RALIGN32( R_R(Rm) );
5.987 + XOP2E( OP_LOADL, R_R(Rm), REG_TMP0 );
5.988 + XOP2I( OP_ADD, 4, R_R(Rm) );
5.989 + XOPCALL1( sh4_write_fpscr, REG_TMP0 );
5.990 + return 2;
5.991 +:}
5.992 +LDS.L @Rm+, FPUL {:
5.993 + CHECKFPUEN();
5.994 + RALIGN32( R_R(Rm) );
5.995 + XOP2E( OP_LOADL, R_R(Rm), R_FPUL );
5.996 + XOP2I( OP_ADD, 4, R_R(Rm) );
5.997 +:}
5.998 +LDS.L @Rm+, MACH {:
5.999 + RALIGN32( R_R(Rm) );
5.1000 + XOP2E( OP_LOADL, R_R(Rm), R_MACH );
5.1001 + XOP2I( OP_ADD, 4, R_R(Rm) );
5.1002 +:}
5.1003 +LDS.L @Rm+, MACL {:
5.1004 + RALIGN32( R_R(Rm) );
5.1005 + XOP2E( OP_LOADL, R_R(Rm), R_MACL );
5.1006 + XOP2I( OP_ADD, 4, R_R(Rm) );
5.1007 +:}
5.1008 +LDS.L @Rm+, PR {:
5.1009 + RALIGN32( R_R(Rm) );
5.1010 + XOP2E( OP_LOADL, R_R(Rm), R_PR );
5.1011 + XOP2I( OP_ADD, 4, R_R(Rm) );
5.1012 +:}
5.1013 +STC SR, Rn {:
5.1014 + CHECKPRIV();
5.1015 + XOPCALLR( sh4_read_sr, R_R(Rn) );
5.1016 +:}
5.1017 +STC GBR, Rn {: XOP2( OP_MOV, R_GBR, R_R(Rn) ); :}
5.1018 +STC VBR, Rn {: CHECKPRIV(); XOP2( OP_MOV, R_VBR, R_R(Rn) ); :}
5.1019 +STC SSR, Rn {: CHECKPRIV(); XOP2( OP_MOV, R_SSR, R_R(Rn) ); :}
5.1020 +STC SPC, Rn {: CHECKPRIV(); XOP2( OP_MOV, R_SPC, R_R(Rn) ); :}
5.1021 +STC SGR, Rn {: CHECKPRIV(); XOP2( OP_MOV, R_SGR, R_R(Rn) ); :}
5.1022 +STC DBR, Rn {: CHECKPRIV(); XOP2( OP_MOV, R_DBR, R_R(Rn) ); :}
5.1023 +STC Rm_BANK, Rn {: CHECKPRIV(); XOP2( OP_MOV, R_BANK(Rm_BANK), R_R(Rn) ); :}
5.1024 +STC.L SR, @-Rn {:
5.1025 + CHECKPRIV();
5.1026 + XOPCALLR( sh4_read_sr, REG_TMP1 );
5.1027 + WALIGN32( R_R(Rn) );
5.1028 + XOP2( OP_MOV, R_R(Rn), REG_TMP0 );
5.1029 + XOP2I(OP_ADD, -4, REG_TMP0 );
5.1030 + XOP2E(OP_STOREL, REG_TMP0, REG_TMP1 );
5.1031 + XOP2I(OP_ADD, -4, R_R(Rn) );
5.1032 +:}
5.1033 +STC.L VBR, @-Rn {:
5.1034 + CHECKPRIV();
5.1035 + WALIGN32( R_R(Rn) );
5.1036 + XOP2( OP_MOV, R_R(Rn), REG_TMP0 );
5.1037 + XOP2I(OP_ADD, -4, REG_TMP0 );
5.1038 + XOP2E(OP_STOREL, REG_TMP0, R_VBR );
5.1039 + XOP2I(OP_ADD, -4, R_R(Rn) );
5.1040 +:}
5.1041 +STC.L SSR, @-Rn {:
5.1042 + CHECKPRIV();
5.1043 + WALIGN32( R_R(Rn) );
5.1044 + XOP2( OP_MOV, R_R(Rn), REG_TMP0 );
5.1045 + XOP2I(OP_ADD, -4, REG_TMP0 );
5.1046 + XOP2E(OP_STOREL, REG_TMP0, R_SSR );
5.1047 + XOP2I(OP_ADD, -4, R_R(Rn) );
5.1048 +:}
5.1049 +STC.L SPC, @-Rn {:
5.1050 + CHECKPRIV();
5.1051 + WALIGN32( R_R(Rn) );
5.1052 + XOP2( OP_MOV, R_R(Rn), REG_TMP0 );
5.1053 + XOP2I(OP_ADD, -4, REG_TMP0 );
5.1054 + XOP2E(OP_STOREL, REG_TMP0, R_SPC );
5.1055 + XOP2I(OP_ADD, -4, R_R(Rn) );
5.1056 +:}
5.1057 +STC.L SGR, @-Rn {:
5.1058 + CHECKPRIV();
5.1059 + WALIGN32( R_R(Rn) );
5.1060 + XOP2( OP_MOV, R_R(Rn), REG_TMP0 );
5.1061 + XOP2I(OP_ADD, -4, REG_TMP0 );
5.1062 + XOP2E(OP_STOREL, REG_TMP0, R_SGR );
5.1063 + XOP2I(OP_ADD, -4, R_R(Rn) );
5.1064 +:}
5.1065 +STC.L DBR, @-Rn {:
5.1066 + CHECKPRIV();
5.1067 + WALIGN32( R_R(Rn) );
5.1068 + XOP2( OP_MOV, R_R(Rn), REG_TMP0 );
5.1069 + XOP2I(OP_ADD, -4, REG_TMP0 );
5.1070 + XOP2E(OP_STOREL, REG_TMP0, R_DBR );
5.1071 + XOP2I(OP_ADD, -4, R_R(Rn) );
5.1072 +:}
5.1073 +STC.L Rm_BANK, @-Rn {:
5.1074 + CHECKPRIV();
5.1075 + WALIGN32( R_R(Rn) );
5.1076 + XOP2( OP_MOV, R_R(Rn), REG_TMP0 );
5.1077 + XOP2I(OP_ADD, -4, REG_TMP0 );
5.1078 + XOP2E(OP_STOREL, REG_TMP0, R_BANK(Rm_BANK) );
5.1079 + XOP2I(OP_ADD, -4, R_R(Rn) );
5.1080 +:}
5.1081 +STC.L GBR, @-Rn {:
5.1082 + WALIGN32( R_R(Rn) );
5.1083 + XOP2( OP_MOV, R_R(Rn), REG_TMP0 );
5.1084 + XOP2I(OP_ADD, -4, REG_TMP0 );
5.1085 + XOP2E(OP_STOREL, REG_TMP0, R_GBR );
5.1086 + XOP2I(OP_ADD, -4, R_R(Rn) );
5.1087 +:}
5.1088 +STS FPSCR, Rn {:
5.1089 + CHECKFPUEN();
5.1090 + XOP2( OP_MOV, R_FPSCR, R_R(Rn) );
5.1091 +:}
5.1092 +STS FPUL, Rn {:
5.1093 + CHECKFPUEN();
5.1094 + XOP2( OP_MOV, R_FPUL, R_R(Rn) );
5.1095 +:}
5.1096 +STS MACH, Rn {:
5.1097 + XOP2( OP_MOV, R_MACH, R_R(Rn) );
5.1098 +:}
5.1099 +STS MACL, Rn {:
5.1100 + XOP2( OP_MOV, R_MACL, R_R(Rn) );
5.1101 +:}
5.1102 +STS PR, Rn {:
5.1103 + XOP2( OP_MOV, R_PR, R_R(Rn) );
5.1104 +:}
5.1105 +STS.L FPSCR, @-Rn {:
5.1106 + CHECKFPUEN();
5.1107 + WALIGN32( R_R(Rn) );
5.1108 + XOP2( OP_MOV, R_R(Rn), REG_TMP0 );
5.1109 + XOP2I(OP_ADD, -4, REG_TMP0 );
5.1110 + XOP2E(OP_STOREL, REG_TMP0, R_FPSCR );
5.1111 + XOP2I(OP_ADD, -4, R_R(Rn) );
5.1112 +:}
5.1113 +STS.L FPUL, @-Rn {:
5.1114 + CHECKFPUEN();
5.1115 + WALIGN32( R_R(Rn) );
5.1116 + XOP2( OP_MOV, R_R(Rn), REG_TMP0 );
5.1117 + XOP2I(OP_ADD, -4, REG_TMP0 );
5.1118 + XOP2E(OP_STOREL, REG_TMP0, R_FPUL );
5.1119 + XOP2I(OP_ADD, -4, R_R(Rn) );
5.1120 +:}
5.1121 +STS.L MACH, @-Rn {:
5.1122 + WALIGN32( R_R(Rn) );
5.1123 + XOP2( OP_MOV, R_R(Rn), REG_TMP0 );
5.1124 + XOP2I(OP_ADD, -4, REG_TMP0 );
5.1125 + XOP2E(OP_STOREL, REG_TMP0, R_MACH );
5.1126 + XOP2I(OP_ADD, -4, R_R(Rn) );
5.1127 +:}
5.1128 +STS.L MACL, @-Rn {:
5.1129 + WALIGN32( R_R(Rn) );
5.1130 + XOP2( OP_MOV, R_R(Rn), REG_TMP0 );
5.1131 + XOP2I(OP_ADD, -4, REG_TMP0 );
5.1132 + XOP2E(OP_STOREL, REG_TMP0, R_MACL );
5.1133 + XOP2I(OP_ADD, -4, R_R(Rn) );
5.1134 +:}
5.1135 +STS.L PR, @-Rn {:
5.1136 + WALIGN32( R_R(Rn) );
5.1137 + XOP2( OP_MOV, R_R(Rn), REG_TMP0 );
5.1138 + XOP2I(OP_ADD, -4, REG_TMP0 );
5.1139 + XOP2E(OP_STOREL, REG_TMP0, R_PR );
5.1140 + XOP2I(OP_ADD, -4, R_R(Rn) );
5.1141 +:}
5.1142 +
5.1143 +BF disp {:
5.1144 + if( in_delay_slot ) {
5.1145 + SLOTILLEGAL();
5.1146 + } else {
5.1147 + XOP2I( OP_ADD, (pc+2 - xbb->pc_begin) * sh4_cpu_period, R_SLICE_CYCLE );
5.1148 + XOP2I( OP_CMP, 0, R_T );
5.1149 + XOP2IICC( OP_BRCOND, CC_EQ, disp+pc+4-xbb->pc_begin, pc+2-xbb->pc_begin );
5.1150 + }
5.1151 + return 2;
5.1152 +:}
5.1153 +BF/S disp {:
5.1154 + if( in_delay_slot ) {
5.1155 + SLOTILLEGAL();
5.1156 + return 2;
5.1157 + } else {
5.1158 + if( UNTRANSLATABLE(pc+2 ) ) {
5.1159 + XOP2I( OP_CMP, 0, R_T );
5.1160 + XOP2IICC( OP_BRCONDDEL, CC_EQ, disp+pc+4-xbb->pc_begin, pc+4-xbb->pc_begin );
5.1161 + EMU_DELAY_SLOT();
5.1162 + return 2;
5.1163 + } else {
5.1164 + XOP2( OP_MOV, R_T, REG_TMP2 );
5.1165 + sh4_decode_instruction( xbb, pc+2, TRUE );
5.1166 + if( !XOP_IS_TERMINATOR( xbb->ir_ptr->prev ) ) {
5.1167 + XOP2I( OP_ADD, (pc+4 - xbb->pc_begin) * sh4_cpu_period, R_SLICE_CYCLE );
5.1168 + XOP2I( OP_CMP, 0, REG_TMP2 );
5.1169 + XOP2IICC( OP_BRCOND, CC_EQ, disp+pc+4-xbb->pc_begin, pc+4-xbb->pc_begin );
5.1170 + }
5.1171 + return 4;
5.1172 + }
5.1173 + }
5.1174 +:}
5.1175 +BT disp {:
5.1176 + if( in_delay_slot ) {
5.1177 + SLOTILLEGAL();
5.1178 + } else {
5.1179 + XOP2I( OP_ADD, (pc+2 - xbb->pc_begin) * sh4_cpu_period, R_SLICE_CYCLE );
5.1180 + XOP2I( OP_CMP, 1, R_T );
5.1181 + XOP2IICC( OP_BRCOND, CC_EQ, disp+pc+4-xbb->pc_begin, pc+2-xbb->pc_begin );
5.1182 + }
5.1183 + return 2;
5.1184 +:}
5.1185 +BT/S disp {:
5.1186 + if( in_delay_slot ) {
5.1187 + SLOTILLEGAL();
5.1188 + return 2;
5.1189 + } else {
5.1190 + if( UNTRANSLATABLE(pc+2 ) ) {
5.1191 + XOP2I( OP_CMP, 1, R_T );
5.1192 + XOP2IICC( OP_BRCONDDEL, CC_EQ, disp+pc+4-xbb->pc_begin, pc+2-xbb->pc_begin );
5.1193 + EMU_DELAY_SLOT();
5.1194 + return 2;
5.1195 + } else {
5.1196 + XOP2( OP_MOV, R_T, REG_TMP2 );
5.1197 + sh4_decode_instruction( xbb, pc+2, TRUE );
5.1198 + if( !XOP_IS_TERMINATOR( xbb->ir_ptr->prev ) ) {
5.1199 + XOP2I( OP_ADD, (pc+4 - xbb->pc_begin) * sh4_cpu_period, R_SLICE_CYCLE );
5.1200 + XOP2I( OP_CMP, 1, REG_TMP2 );
5.1201 + XOP2IICC( OP_BRCOND, CC_EQ, disp+pc+4-xbb->pc_begin, pc+4-xbb->pc_begin );
5.1202 + }
5.1203 + return 4;
5.1204 + }
5.1205 + }
5.1206 +:}
5.1207 +BRA disp {:
5.1208 + if( in_delay_slot ) {
5.1209 + SLOTILLEGAL();
5.1210 + return 2;
5.1211 + } else {
5.1212 + if( UNTRANSLATABLE(pc+2) ) {
5.1213 + XOP2( OP_MOV, R_PC, R_NEW_PC );
5.1214 + XOP2I( OP_ADD, pc+disp+4-xbb->pc_begin, R_NEW_PC );
5.1215 + EMU_DELAY_SLOT();
5.1216 + return 2;
5.1217 + } else {
5.1218 + sh4_decode_instruction( xbb, pc+2, TRUE );
5.1219 + if( xbb->ir_ptr->prev == NULL || !XOP_IS_TERMINATOR( xbb->ir_ptr->prev ) ) {
5.1220 + XOP2I( OP_ADD, (pc+4 - xbb->pc_begin) * sh4_cpu_period, R_SLICE_CYCLE );
5.1221 + XOP1I( OP_BRREL, pc+disp+4-xbb->pc_begin );
5.1222 + }
5.1223 + return 4;
5.1224 + }
5.1225 + }
5.1226 +:}
5.1227 +BRAF Rn {:
5.1228 + if( in_delay_slot ) {
5.1229 + SLOTILLEGAL();
5.1230 + return 2;
5.1231 + } else {
5.1232 + XOP2( OP_MOV, R_R(Rn), REG_TMP2 );
5.1233 + XOP2( OP_ADD, R_PC, REG_TMP2 );
5.1234 + XOP2I( OP_ADD, pc - xbb->pc_begin + 4, REG_TMP2 );
5.1235 + if( UNTRANSLATABLE(pc+2) ) {
5.1236 + XOP2( OP_MOV, REG_TMP2, R_NEW_PC );
5.1237 + EMU_DELAY_SLOT();
5.1238 + return 2;
5.1239 + } else {
5.1240 + sh4_decode_instruction( xbb, pc + 2, TRUE );
5.1241 + if( !XOP_IS_TERMINATOR( xbb->ir_ptr->prev ) ) {
5.1242 + XOP2I( OP_ADD, (pc+4 - xbb->pc_begin) * sh4_cpu_period, R_SLICE_CYCLE );
5.1243 + XOP1( OP_BR, REG_TMP2 );
5.1244 + }
5.1245 + return 4;
5.1246 + }
5.1247 + }
5.1248 +:}
5.1249 +BSR disp {:
5.1250 + if( in_delay_slot ) {
5.1251 + SLOTILLEGAL();
5.1252 + return 2;
5.1253 + } else {
5.1254 + XOP2( OP_MOV, R_PC, R_PR );
5.1255 + XOP2I( OP_ADD, pc - xbb->pc_begin + 4, R_PR );
5.1256 + if( UNTRANSLATABLE(pc+2) ) {
5.1257 + XOP2( OP_MOV, R_PC, R_NEW_PC );
5.1258 + XOP2I( OP_ADD, pc+disp+4-xbb->pc_begin, R_NEW_PC );
5.1259 + EMU_DELAY_SLOT();
5.1260 + return 2;
5.1261 + } else {
5.1262 + sh4_decode_instruction( xbb, pc+2, TRUE );
5.1263 + if( !XOP_IS_TERMINATOR( xbb->ir_ptr->prev ) ) {
5.1264 + XOP2I( OP_ADD, (pc+4 - xbb->pc_begin) * sh4_cpu_period, R_SLICE_CYCLE );
5.1265 + XOP1I( OP_BRREL, pc+disp+4-xbb->pc_begin );
5.1266 + }
5.1267 + return 4;
5.1268 + }
5.1269 + }
5.1270 +:}
5.1271 +BSRF Rn {:
5.1272 + if( in_delay_slot ) {
5.1273 + SLOTILLEGAL();
5.1274 + return 2;
5.1275 + } else {
5.1276 + XOP2( OP_MOV, R_PC, R_PR );
5.1277 + XOP2I( OP_ADD, pc - xbb->pc_begin + 4, R_PR );
5.1278 + XOP2( OP_MOV, R_R(Rn), REG_TMP2 );
5.1279 + XOP2( OP_ADD, R_PC, REG_TMP2 );
5.1280 + XOP2I( OP_ADD, pc - xbb->pc_begin + 4, REG_TMP2 );
5.1281 + if( UNTRANSLATABLE(pc+2) ) {
5.1282 + XOP2( OP_MOV, REG_TMP2, R_NEW_PC );
5.1283 + EMU_DELAY_SLOT();
5.1284 + return 2;
5.1285 + } else {
5.1286 + sh4_decode_instruction( xbb, pc+2, TRUE );
5.1287 + if( !XOP_IS_TERMINATOR( xbb->ir_ptr->prev ) ) {
5.1288 + XOP2I( OP_ADD, (pc+4 - xbb->pc_begin) * sh4_cpu_period, R_SLICE_CYCLE );
5.1289 + XOP1( OP_BR, REG_TMP2 );
5.1290 + }
5.1291 + return 4;
5.1292 + }
5.1293 + }
5.1294 +:}
5.1295 +JMP @Rn {:
5.1296 + if( in_delay_slot ) {
5.1297 + SLOTILLEGAL();
5.1298 + return 2;
5.1299 + } else {
5.1300 + if( UNTRANSLATABLE(pc+2) ) {
5.1301 + XOP2( OP_MOV, R_R(Rn), R_NEW_PC );
5.1302 + EMU_DELAY_SLOT();
5.1303 + return 2;
5.1304 + } else {
5.1305 + XOP2( OP_MOV, R_R(Rn), REG_TMP2 );
5.1306 + sh4_decode_instruction( xbb, pc+2, TRUE );
5.1307 + if( !XOP_IS_TERMINATOR( xbb->ir_ptr->prev ) ) {
5.1308 + XOP2I( OP_ADD, (pc+4 - xbb->pc_begin) * sh4_cpu_period, R_SLICE_CYCLE );
5.1309 + XOP1( OP_BR, REG_TMP2 );
5.1310 + }
5.1311 + return 4;
5.1312 + }
5.1313 + }
5.1314 +:}
5.1315 +JSR @Rn {:
5.1316 + if( in_delay_slot ) {
5.1317 + SLOTILLEGAL();
5.1318 + return 2;
5.1319 + } else {
5.1320 + XOP2( OP_MOV, R_PC, R_PR );
5.1321 + XOP2I( OP_ADD, pc - xbb->pc_begin + 4, R_PR );
5.1322 + if( UNTRANSLATABLE(pc+2) ) {
5.1323 + XOP2( OP_MOV, R_R(Rn), R_NEW_PC );
5.1324 + EMU_DELAY_SLOT();
5.1325 + return 2;
5.1326 + } else {
5.1327 + XOP2( OP_MOV, R_R(Rn), REG_TMP2 );
5.1328 + sh4_decode_instruction( xbb, pc+2, TRUE );
5.1329 + if( !XOP_IS_TERMINATOR( xbb->ir_ptr->prev ) ) {
5.1330 + XOP2I( OP_ADD, (pc+4 - xbb->pc_begin) * sh4_cpu_period, R_SLICE_CYCLE );
5.1331 + XOP1( OP_BR, REG_TMP2 );
5.1332 + }
5.1333 + return 4;
5.1334 + }
5.1335 + }
5.1336 +:}
5.1337 +RTE {:
5.1338 + CHECKPRIV();
5.1339 + if( in_delay_slot ) {
5.1340 + SLOTILLEGAL();
5.1341 + return 2;
5.1342 + } else {
5.1343 + if( UNTRANSLATABLE(pc+2) ) {
5.1344 + XOP2( OP_MOV, R_SPC, R_NEW_PC );
5.1345 + EMU_DELAY_SLOT();
5.1346 + return 2;
5.1347 + } else {
5.1348 + XOP2( OP_MOV, R_SPC, REG_TMP2 );
5.1349 + XOPCALL1( sh4_write_sr, R_SSR );
5.1350 + sh4_decode_instruction( xbb, pc+2, TRUE );
5.1351 + if( !XOP_IS_TERMINATOR( xbb->ir_ptr->prev ) ) {
5.1352 + XOP2I( OP_ADD, (pc+4 - xbb->pc_begin) * sh4_cpu_period, R_SLICE_CYCLE );
5.1353 + XOP1( OP_BR, REG_TMP2 );
5.1354 + }
5.1355 + return 4;
5.1356 + }
5.1357 + }
5.1358 +:}
5.1359 +RTS {:
5.1360 + if( in_delay_slot ) {
5.1361 + SLOTILLEGAL();
5.1362 + return 2;
5.1363 + } else {
5.1364 + if( UNTRANSLATABLE(pc+2) ) {
5.1365 + XOP2( OP_MOV, R_PR, R_NEW_PC );
5.1366 + EMU_DELAY_SLOT();
5.1367 + return 2;
5.1368 + } else {
5.1369 + XOP2( OP_MOV, R_PR, REG_TMP2 );
5.1370 + sh4_decode_instruction( xbb, pc+2, TRUE );
5.1371 + if( !XOP_IS_TERMINATOR( xbb->ir_ptr->prev ) ) {
5.1372 + XOP2I( OP_ADD, (pc+4 - xbb->pc_begin) * sh4_cpu_period, R_SLICE_CYCLE );
5.1373 + XOP1( OP_BR, REG_TMP2 );
5.1374 + }
5.1375 + return 4;
5.1376 + }
5.1377 + }
5.1378 +:}
5.1379 +TRAPA #imm {: XOPCALL1I( sh4_raise_trap, imm ); return pc+2; :}
5.1380 +SLEEP {: XOPCALL0( sh4_sleep ); return pc+2; :}
5.1381 +UNDEF {: UNDEF(ir); :}
5.1382 +NOP {: /* Do nothing */ :}
5.1383 +
5.1384 +%%
5.1385 + return 0;
5.1386 +}
5.1387 +
5.1388 +
5.1389 +sh4addr_t sh4_decode_basic_block( xir_basic_block_t xbb )
5.1390 +{
5.1391 + sh4addr_t pc;
5.1392 +
5.1393 + sh4_xir.fpuen_checked = FALSE;
5.1394 + sh4_xir.double_prec = sh4r.fpscr & FPSCR_PR;
5.1395 + sh4_xir.double_size = sh4r.fpscr & FPSCR_SZ;
5.1396 + xbb->address_space = (sh4r.xlat_sh4_mode&SR_MD) ? sh4_address_space : sh4_user_address_space;
5.1397 +
5.1398 + xbb->ir_alloc_begin->prev = NULL;
5.1399 + XOP1I( OP_ENTER, 0 );
5.1400 + for( pc = xbb->pc_begin; pc < xbb->pc_end; pc += 2 ) {
5.1401 + int done = sh4_decode_instruction( xbb, pc, FALSE );
5.1402 + if( done ) {
5.1403 + pc += done;
5.1404 + break;
5.1405 + }
5.1406 + }
5.1407 + xbb->ir_end = xbb->ir_ptr-1;
5.1408 + xbb->ir_end->next = NULL;
5.1409 + xbb->pc_end = pc;
5.1410 + return pc;
5.1411 +}
5.1412 +
6.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
6.2 +++ b/src/test/testsh4xir.c Tue Apr 07 10:55:03 2009 +0000
6.3 @@ -0,0 +1,150 @@
6.4 +/**
6.5 + * $Id: testsh4x86.c 988 2009-01-15 11:23:20Z nkeynes $
6.6 + *
6.7 + * Test cases for the SH4 => XIR decoder. Takes as
6.8 + * input a binary SH4 object (and VMA), generates the
6.9 + * corresponding IR, and dumps it to stdout.
6.10 + *
6.11 + * Copyright (c) 2009 Nathan Keynes.
6.12 + *
6.13 + * This program is free software; you can redistribute it and/or modify
6.14 + * it under the terms of the GNU General Public License as published by
6.15 + * the Free Software Foundation; either version 2 of the License, or
6.16 + * (at your option) any later version.
6.17 + *
6.18 + * This program is distributed in the hope that it will be useful,
6.19 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
6.20 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6.21 + * GNU General Public License for more details.
6.22 + */
6.23 +#include <getopt.h>
6.24 +#include <stdlib.h>
6.25 +#include <sys/stat.h>
6.26 +#include <string.h>
6.27 +
6.28 +#include "lxdream.h"
6.29 +#include "sh4/sh4core.h"
6.30 +#include "sh4/sh4mmio.h"
6.31 +#include "sh4/sh4xir.h"
6.32 +
6.33 +
6.34 +struct mmio_region mmio_region_MMU;
6.35 +struct mmio_region mmio_region_PMM;
6.36 +struct breakpoint_struct sh4_breakpoints[MAX_BREAKPOINTS];
6.37 +struct sh4_registers sh4r;
6.38 +int sh4_breakpoint_count = 0;
6.39 +uint32_t sh4_cpu_period = 5;
6.40 +struct sh4_icache_struct sh4_icache;
6.41 +
6.42 +void MMU_ldtlb() { }
6.43 +void FASTCALL sh4_sleep() { }
6.44 +void FASTCALL sh4_write_fpscr( uint32_t val ) { }
6.45 +void sh4_switch_fr_banks() { }
6.46 +void FASTCALL sh4_write_sr( uint32_t val ) { }
6.47 +uint32_t FASTCALL sh4_read_sr( void ) { return 0; }
6.48 +void FASTCALL sh4_raise_trap( int exc ) { }
6.49 +void FASTCALL sh4_raise_exception( int exc ) { }
6.50 +void log_message( void *ptr, int level, const gchar *source, const char *msg, ... ) { }
6.51 +gboolean sh4_execute_instruction( ) { return TRUE; }
6.52 +void **sh4_address_space;
6.53 +void **sh4_user_address_space;
6.54 +unsigned char *xlat_output;
6.55 +
6.56 +#define MAX_INS_SIZE 32
6.57 +#define MAX_XIR_OPS 16384
6.58 +
6.59 +char *option_list = "s:o:d:h";
6.60 +struct option longopts[1] = { { NULL, 0, 0, 0 } };
6.61 +
6.62 +struct xir_symbol_entry debug_symbols[] = {
6.63 + { "sh4_cpu_period", &sh4_cpu_period },
6.64 + { "sh4_write_fpscr", sh4_write_fpscr },
6.65 + { "sh4_write_sr", sh4_write_sr },
6.66 + { "sh4_read_sr", sh4_read_sr },
6.67 + { "sh4_sleep", sh4_sleep },
6.68 + { "sh4_switch_fr_banks", sh4_switch_fr_banks },
6.69 + { "sh4_raise_exception", sh4_raise_exception },
6.70 + { "sh4_raise_trap", sh4_raise_trap },
6.71 + { "sh4_execute_instruction", sh4_execute_instruction },
6.72 +};
6.73 +
6.74 +extern struct xlat_source_machine sh4_source_machine;
6.75 +extern struct xlat_target_machine x86_target_machine;
6.76 +void usage()
6.77 +{
6.78 + fprintf( stderr, "Usage: testsh4xir [options] <input bin file>\n");
6.79 + fprintf( stderr, "Options:\n");
6.80 + fprintf( stderr, " -d <filename> Diff results against contents of file\n" );
6.81 + fprintf( stderr, " -h Display this help message\n" );
6.82 + fprintf( stderr, " -o <filename> Output disassembly to file [stdout]\n" );
6.83 + fprintf( stderr, " -s <addr> Specify start address of binary [8C010000]\n" );
6.84 +}
6.85 +
6.86 +int main( int argc, char *argv[] )
6.87 +{
6.88 + char *input_file;
6.89 + char *output_file;
6.90 + char *diff_file;
6.91 + char *inbuf;
6.92 + uint32_t start_addr = 0x8c010000;
6.93 + struct stat st;
6.94 + int opt;
6.95 + struct xir_op xir[MAX_XIR_OPS];
6.96 + xir_op_t xir_ptr = &xir[0];
6.97 +
6.98 + while( (opt = getopt_long( argc, argv, option_list, longopts, NULL )) != -1 ) {
6.99 + switch( opt ) {
6.100 + case 'd':
6.101 + diff_file = optarg;
6.102 + break;
6.103 + case 'o':
6.104 + output_file = optarg;
6.105 + break;
6.106 + case 's':
6.107 + start_addr = strtoul(optarg, NULL, 0);
6.108 + break;
6.109 + case 'h':
6.110 + usage();
6.111 + exit(0);
6.112 + }
6.113 + }
6.114 + if( optind < argc ) {
6.115 + input_file = argv[optind++];
6.116 + } else {
6.117 + usage();
6.118 + exit(1);
6.119 + }
6.120 +
6.121 + mmio_region_MMU.mem = malloc(4096);
6.122 + memset( mmio_region_MMU.mem, 0, 4096 );
6.123 +
6.124 + ((uint32_t *)mmio_region_MMU.mem)[4] = 1;
6.125 +
6.126 + FILE *in = fopen( input_file, "ro" );
6.127 + if( in == NULL ) {
6.128 + perror( "Unable to open input file" );
6.129 + exit(2);
6.130 + }
6.131 + fstat( fileno(in), &st );
6.132 + inbuf = malloc( st.st_size );
6.133 + fread( inbuf, st.st_size, 1, in );
6.134 + sh4_icache.mask = 0xFFFFF000;
6.135 + sh4_icache.page_vma = start_addr & 0xFFFFF000;
6.136 + sh4_icache.page = (unsigned char *)(inbuf - (start_addr&0xFFF));
6.137 + sh4_icache.page_ppa = start_addr & 0xFFFFF000;
6.138 +
6.139 + struct xir_basic_block xbb;
6.140 + xbb.source = &sh4_source_machine;
6.141 + xbb.ir_alloc_begin = &xir[0];
6.142 + xbb.ir_alloc_end = &xir[MAX_XIR_OPS];
6.143 + xbb.ir_begin = xbb.ir_ptr = xbb.ir_end = xbb.ir_alloc_begin;
6.144 + xbb.pc_begin = start_addr;
6.145 + xbb.pc_end = start_addr+4096;
6.146 + xbb.source->decode_basic_block( &xbb );
6.147 +
6.148 + x86_target_machine.lower( &xbb, xbb.ir_begin, xbb.ir_end );
6.149 + xir_set_register_names( sh4_source_machine.reg_names, x86_target_machine.reg_names );
6.150 + xir_set_symbol_table( debug_symbols );
6.151 + xir_dump_block( &xir[0], NULL );
6.152 + return 0;
6.153 +}
7.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
7.2 +++ b/src/test/testxir.c Tue Apr 07 10:55:03 2009 +0000
7.3 @@ -0,0 +1,74 @@
7.4 +/**
7.5 + * $Id: testsh4x86.c 988 2009-01-15 11:23:20Z nkeynes $
7.6 + *
7.7 + * Test XIR internals
7.8 + *
7.9 + * Copyright (c) 2009 Nathan Keynes.
7.10 + *
7.11 + * This program is free software; you can redistribute it and/or modify
7.12 + * it under the terms of the GNU General Public License as published by
7.13 + * the Free Software Foundation; either version 2 of the License, or
7.14 + * (at your option) any later version.
7.15 + *
7.16 + * This program is distributed in the hope that it will be useful,
7.17 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
7.18 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7.19 + * GNU General Public License for more details.
7.20 + */
7.21 +
7.22 +#include <assert.h>
7.23 +#include <stdlib.h>
7.24 +#include "xlat/xir.h"
7.25 +
7.26 +void test_shuffle()
7.27 +{
7.28 + struct xir_op op[64];
7.29 + struct xir_basic_block bb;
7.30 + xir_basic_block_t xbb = &bb;
7.31 + bb.ir_alloc_begin = bb.ir_begin = bb.ir_end = bb.ir_ptr = &op[0];
7.32 + bb.ir_alloc_end = &op[64];
7.33 +
7.34 + XOP2I( OP_SHUFFLE, 0x1243, REG_TMP1 );
7.35 + XOP2I( OP_SHUFFLE, 0x3412, REG_TMP1 );
7.36 + XOP2I( OP_SHUFFLE, 0x1243, REG_TMP1 );
7.37 + XOP2I( OP_SHUFFLE, 0x3412, REG_TMP1 );
7.38 + XOP2I( OP_SHUFFLE, 0x1234, REG_TMP1 );
7.39 + XOP2I( OP_SHUFFLE, 0x1111, REG_TMP2 );
7.40 + XOP2I( OP_SHUFFLE, 0x0123, REG_TMP1 );
7.41 + XOP1I( OP_BR, 0x8C001000 );
7.42 + (bb.ir_ptr-1)->next = NULL;
7.43 + bb.ir_end = bb.ir_ptr-1;
7.44 +
7.45 + assert( xir_shuffle_imm32( 0x2134, 0x12345678) == 0x34125678 );
7.46 + assert( xir_shuffle_imm32( 0x1243, 0x12345678) == 0x12347856 );
7.47 + assert( xir_shuffle_imm32( 0x3412, 0x12345678) == 0x56781234 );
7.48 +
7.49 + xir_shuffle_op( op[0].operand[0].value.i, &op[1] );
7.50 + assert( op[1].operand[0].value.i == 0x4312 );
7.51 + xir_shuffle_op( op[1].operand[0].value.i, &op[2] );
7.52 + assert( op[2].operand[0].value.i == 0x4321 );
7.53 +
7.54 + assert( xir_shuffle_lower_size( &op[0] ) == 9);
7.55 + assert( xir_shuffle_lower_size( &op[1] ) == 8);
7.56 + assert( xir_shuffle_lower_size( &op[3] ) == 4);
7.57 + assert( xir_shuffle_lower_size( &op[4] ) == 0);
7.58 + assert( xir_shuffle_lower_size( &op[5] ) == 12);
7.59 + assert( xir_shuffle_lower_size( &op[6] ) == 1);
7.60 + xir_shuffle_lower( xbb, &op[0], REG_TMP3, REG_TMP4 );
7.61 + xir_shuffle_lower( xbb, &op[1], REG_TMP3, REG_TMP4 );
7.62 + xir_shuffle_lower( xbb, &op[3], REG_TMP3, REG_TMP4 );
7.63 + xir_shuffle_lower( xbb, &op[4], REG_TMP3, REG_TMP4 );
7.64 + xir_shuffle_lower( xbb, &op[5], REG_TMP3, REG_TMP4 );
7.65 + xir_shuffle_lower( xbb, &op[6], REG_TMP3, REG_TMP4 );
7.66 + xir_dump_block( &op[0], NULL );
7.67 +}
7.68 +
7.69 +
7.70 +
7.71 +
7.72 +int main( int argc, char *argv[] )
7.73 +{
7.74 + test_shuffle();
7.75 +
7.76 + return 0;
7.77 +}
7.78 \ No newline at end of file
8.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
8.2 +++ b/src/xlat/dce.c Tue Apr 07 10:55:03 2009 +0000
8.3 @@ -0,0 +1,98 @@
8.4 +/**
8.5 + * $Id: dce.c 931 2008-10-31 02:57:59Z nkeynes $
8.6 + *
8.7 + * Implementation of simple dead code elimination based on a reverse pass
8.8 + * through the code.
8.9 + *
8.10 + * Copyright (c) 2009 Nathan Keynes.
8.11 + *
8.12 + * This program is free software; you can redistribute it and/or modify
8.13 + * it under the terms of the GNU General Public License as published by
8.14 + * the Free Software Foundation; either version 2 of the License, or
8.15 + * (at your option) any later version.
8.16 + *
8.17 + * This program is distributed in the hope that it will be useful,
8.18 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
8.19 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8.20 + * GNU General Public License for more details.
8.21 + */
8.22 +
8.23 +/**
8.24 + * Traverse the block in reverse, killing any dead instructions as we
8.25 + * go. Instructions are dead iff no values they write are read, and all
8.26 + * source registers written are overwritten before the end of the block.
8.27 + *
8.28 + * Dead instructions that may be exposed by an exception are moved to
8.29 + * the exception block rather than being unconditionally removed.
8.30 + */
8.31 +int xir_dead_code_elimination( xir_basic_block_t xbb, xir_op_t begin, xir_op_t end )
8.32 +{
8.33 + char source_regs[MAX_TEMP_REGISTER+1];
8.34 + char target_regs[MAX_TARGET_REGISTER];
8.35 + char flags_live;
8.36 +
8.37 + /* Initially all source regs are live */
8.38 + memset( source_regs, 1, sizeof(source_regs) );
8.39 + memset( target_regs, 0, sizeof(target_regs) );
8.40 + for( xir_op_t it = end; it != NULL; it = it->prev ) {
8.41 + /* Assume the instruction is dead, then check if any of the
8.42 + * output args are live */
8.43 + char is_live = 0;
8.44 +
8.45 + if( XOP_WRITES_REG1(it) ) {
8.46 + if( XOP_IS_SRCREG(it,0) ) {
8.47 + is_live = source_regs[XOP_REG1(it)];
8.48 + source_regs[XOP_REG1(it)] = 0;
8.49 + } else if( XOP_IS_TGTREG(it,0) ) {
8.50 + is_live = target_regs[XOP_REG1(it)];
8.51 + target_regs[XOP_REG1(it)] = 0;
8.52 + }
8.53 + }
8.54 + if( XOP_WRITES_REG2(it) ) {
8.55 + if( XOP_IS_SRCREG(it,1) ) {
8.56 + is_live = source_regs[XOP_REG2(it)];
8.57 + source_regs[XOP_REG2(it)] = 0;
8.58 + } else if( XOP_IS_TGTREG(it,1) ) {
8.59 + is_live = target_regs[XOP_REG2(it)];
8.60 + target_regs[XOP_REG2(it)] = 0;
8.61 + }
8.62 + }
8.63 +
8.64 + if( XOP_WRITES_FLAGS(it) ) {
8.65 + is_live ||= flags_live;
8.66 + flags_live = 0;
8.67 + }
8.68 +
8.69 + /* Exception-raising instructions can't be DCEd */
8.70 + if( XOP_HAS_EXCEPTIONS(it) || XOP_IS_TERMINATOR(it) ||
8.71 + it->opcode == OP_ENTER || it->opcode == OP_BARRIER ) {
8.72 + is_live = 1;
8.73 + }
8.74 +
8.75 + if( !is_live ) {
8.76 + /* Kill it with fire */
8.77 + xir_remove_op(it);
8.78 + } else {
8.79 + /* Propagate live reads */
8.80 + if( XOP_READS_REG1(it) ) {
8.81 + if( XOP_IS_SRCREG(it,0) ) {
8.82 + source_regs[XOP_REG1(it)] = 1;
8.83 + } else if( XOP_IS_TGTREG(it,0) ) {
8.84 + target_regs[XOP_REG1(it)] = 1;
8.85 + }
8.86 + }
8.87 + if( XOP_READS_REG2(it) ) {
8.88 + if( XOP_IS_SRCREG(it,1) ) {
8.89 + source_regs[XOP_REG2(it)] = 1;
8.90 + } else if( XOP_IS_TGTREG(it,1) ) {
8.91 + target_regs[XOP_REG2(it)] = 1;
8.92 + }
8.93 + }
8.94 +
8.95 + flags_live ||= XOP_READS_FLAGS(it);
8.96 + }
8.97 +
8.98 + if( it == begin )
8.99 + break;
8.100 + }
8.101 +}
8.102 \ No newline at end of file
9.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
9.2 +++ b/src/xlat/livevar.c Tue Apr 07 10:55:03 2009 +0000
9.3 @@ -0,0 +1,96 @@
9.4 +/**
9.5 + * $Id: livevar.h 931 2008-10-31 02:57:59Z nkeynes $
9.6 + *
9.7 + * Live variable analysis
9.8 + *
9.9 + * Copyright (c) 2009 Nathan Keynes.
9.10 + *
9.11 + * This program is free software; you can redistribute it and/or modify
9.12 + * it under the terms of the GNU General Public License as published by
9.13 + * the Free Software Foundation; either version 2 of the License, or
9.14 + * (at your option) any later version.
9.15 + *
9.16 + * This program is distributed in the hope that it will be useful,
9.17 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
9.18 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9.19 + * GNU General Public License for more details.
9.20 + */
9.21 +
9.22 +#include "xlat/xir.h"
9.23 +#include "xlat/xiropt.h"
9.24 +
9.25 +
9.26 +gboolean live_range_calculate( xir_op_t start, xir_op_t end,
9.27 + struct live_range *live_ranges, unsigned int live_ranges_size )
9.28 +{
9.29 + struct live_range *current[MAX_REGISTERS];
9.30 + struct live_range *range_next = live_ranges;
9.31 + struct live_range *range_end = live_ranges + live_ranges_size;
9.32 + xir_offset_t position = 0;
9.33 + xir_offset_t last_exc = 0;
9.34 + xir_op_t it;
9.35 +
9.36 + memset( current, 0, sizeof(current) );
9.37 +
9.38 + while( it != end ) {
9.39 + int reg0 = -1;
9.40 + int reg1 = -1;
9.41 +
9.42 + if( it->exc != NULL ) {
9.43 + // Track when the last possible exception was
9.44 + last_exc = position;
9.45 + }
9.46 +
9.47 + if( XOP_READS_REG1(it) ) { // Update live-range for op0
9.48 + reg0 = XOP_REG1(it);
9.49 + if( current[reg0] == NULL ) {
9.50 + current[reg0] = range_next++;
9.51 + if( current[reg0] == range_end )
9.52 + return FALSE;
9.53 + current[reg0]->start = it;
9.54 + current[reg0]->offset = position;
9.55 + current[reg0]->writeback = FALSE; // register is already coherent
9.56 + }
9.57 + current[reg0]->end = it;
9.58 + current[reg0]->length = position - current[reg0]->offset;
9.59 + }
9.60 +
9.61 + if( XOP_READS_REG2(it) ) {
9.62 + reg1 = XOP_REG2(it);
9.63 + if( current[reg1] == NULL ) {
9.64 + current[reg1] = range_next++;
9.65 + if( current[reg1] == range_end )
9.66 + return FALSE;
9.67 + current[reg1]->start = it;
9.68 + current[reg1]->offset = position;
9.69 + }
9.70 + current[reg1]->end = it;
9.71 + current[reg1]->length = position - current[reg1]->offset;
9.72 + } // op1 is Use-only
9.73 +
9.74 + if( XOP_WRITES_REG1(it) ) {
9.75 + }
9.76 +
9.77 + if( XOP_WRITES_REG2(it) ) {
9.78 + int reg = XOP_REG2(it);
9.79 + if( last_exc < current[reg].end ) {
9.80 + // Value is dead and doesn't need to be spilled.
9.81 + current[reg].writeback = FALSE;
9.82 + }
9.83 + // Kill last range for op1 if we're not using it. Otherwise
9.84 + // this is just a continuation.
9.85 + current[reg] = range_next++;
9.86 + if( current[reg] == range_end )
9.87 + return FALSE;
9.88 + current[reg]->start = it;
9.89 + current[reg]->offset = position;
9.90 + current[reg]->end = it;
9.91 + current[reg]->length = 0;
9.92 + current[reg]->writeback = TRUE;
9.93 + }
9.94 +
9.95 + it = it->next;
9.96 + position++;
9.97 + }
9.98 + return TRUE;
9.99 +}
9.100 \ No newline at end of file
10.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
10.2 +++ b/src/xlat/target.c Tue Apr 07 10:55:03 2009 +0000
10.3 @@ -0,0 +1,243 @@
10.4 +/**
10.5 + * $Id: target.c 931 2008-10-31 02:57:59Z nkeynes $
10.6 + *
10.7 + * Target code-generation support - provides a generic harness around the raw
10.8 + * (machine-specific) code emitter.
10.9 + *
10.10 + * Copyright (c) 2009 Nathan Keynes.
10.11 + *
10.12 + * This program is free software; you can redistribute it and/or modify
10.13 + * it under the terms of the GNU General Public License as published by
10.14 + * the Free Software Foundation; either version 2 of the License, or
10.15 + * (at your option) any later version.
10.16 + *
10.17 + * This program is distributed in the hope that it will be useful,
10.18 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
10.19 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10.20 + * GNU General Public License for more details.
10.21 + */
10.22 +
10.23 +#include <stdlib.h>
10.24 +
10.25 +#include "lxdream.h"
10.26 +#include "xlat/xir.h"
10.27 +#include "xlat/machine.h"
10.28 +
10.29 +#define DEFAULT_FIXUP_TABLE_SIZE 4096
10.30 +#define ALIGN32(p) p += ((-(uintptr_t)p)&0x03)
10.31 +#define ALIGN64(p) p += ((-(uintptr_t)p)&0x07)
10.32 +
10.33 +/**
10.34 + * Currently we use a single static target_data so that we can reuse the
10.35 + * allocated memory (and we only do one codegen at a time anyway). However
10.36 + * we keep this private so that other modules can't assume there's only one TD.
10.37 + */
10.38 +static struct target_data TD;
10.39 +
10.40 +/**
10.41 + * Add a new fixup without setting a target value
10.42 + */
10.43 +static target_fixup_t target_add_fixup( target_data_t td, int type, void *location )
10.44 +{
10.45 + if( td->fixup_table_posn == td->fixup_table_size ) {
10.46 + td->fixup_table_size <<= 1;
10.47 + td->fixup_table = realloc(td->fixup_table, td->fixup_table_size * sizeof(struct target_fixup_struct));
10.48 + assert( td->fixup_table != NULL );
10.49 + }
10.50 + target_fixup_t fixup = &td->fixup_table[td->fixup_table_posn++];
10.51 + fixup->fixup_type = type | TARGET_FIXUP_CONST32;
10.52 + fixup->fixup_offset = ((uint8_t *)location) - (uint8_t *)&td->block->code[0];
10.53 + return fixup;
10.54 +}
10.55 +
10.56 +void target_add_const32_fixup( target_data_t td, int mode, void *location, uint32_t i )
10.57 +{
10.58 + target_add_fixup(td, mode|TARGET_FIXUP_CONST32, location)->value.i = i;
10.59 +}
10.60 +
10.61 +void target_add_const64_fixup( target_data_t td, int mode, void *location, uint64_t q )
10.62 +{
10.63 + target_add_fixup(td, mode|TARGET_FIXUP_CONST64, location)->value.q = q;
10.64 +}
10.65 +
10.66 +void target_add_raise_fixup( target_data_t td, int type, void *location, xir_op_t *exc )
10.67 +{
10.68 + target_add_fixup(td, mode|TARGET_FIXUP_RAISE, location)->value.exc = exc;
10.69 +}
10.70 +
10.71 +void target_add_raiseext_fixup( target_data_t td, int type, void *location, xir_op_t *exc )
10.72 +{
10.73 + target_add_fixup(td, mode|TARGET_FIXUP_RAISEEXT, location)->value.exc = exc;
10.74 +}
10.75 +
10.76 +void target_add_offset_fixup( target_data_t td, int type, void *location, uint32_t off )
10.77 +{
10.78 + target_add_fixup(td, mode|TARGET_FIXUP_OFFSET, location)->target_offset = off;
10.79 +}
10.80 +
10.81 +void target_add_pointer_fixup( target_data_t td, int type, void *location, void *p )
10.82 +{
10.83 + target_add_fixup(td, mode|TARGET_FIXUP_POINTER, location)->value.p = p;
10.84 +}
10.85 +
10.86 +
10.87 +
10.88 +void target_ensure_space( target_data_t td, int space_required )
10.89 +{
10.90 + uint8_t *oldstart = td->block->code;
10.91 + uint32_t new_size = (td->xlat_output - oldstart) + space_required;
10.92 + if( new_size < td->block->size ) {
10.93 + xlat_current_block = xlat_extend_block( xlat_output - oldstart + MAX_INSTRUCTION_SIZE );
10.94 + eob = xlat_current_block->code + xlat_current_block->size;
10.95 + td->block = xlat_extend_block( new_size );
10.96 + xlat_output = td->block->code + (xlat_output - oldstart);
10.97 + }
10.98 +}
10.99 +
10.100 +/**
10.101 + * Generate the exception table and exception bodies from the fixup data
10.102 + * Note that this may add additional constants to the fixup table.
10.103 + */
10.104 +static void target_gen_exception_table( )
10.105 +{
10.106 + int exc_size = 0, num_raiseext = 0;
10.107 +
10.108 + for( target_fixup_t fixup = &TD.fixup_table[0]; fixup != &TD.fixup_table[TD.fixup_table_posn]; fixup++ ) {
10.109 + int type =
10.110 + switch(TARGET_FIXUP_TARGET(fixup->type)) {
10.111 + case TARGET_FIXUP_RAISEEXT:
10.112 + num_raiseext++;
10.113 + /* fallthrough */
10.114 + case TARGET_FIXUP_RAISE:
10.115 + exc_size += TD.get_code_size(fixup->value.exc, NULL);
10.116 + }
10.117 + }
10.118 +
10.119 + ALIGN64(TD.xlat_output);
10.120 + target_ensure_space( td, exc_size + num_raiseext*sizeof(struct xlat_exception_record) );
10.121 + uint8_t *blockstart = &TD.block->code[0];
10.122 + struct xlat_exception_record *exc_record = (struct xlat_exception_record *)TD.xlat_output;
10.123 + TD.block->exc_table_offset = TD.xlat_output - blockstart;
10.124 + TD.block->exc_table_size = num_raiseext;
10.125 + TD.xlat_output += (num_raiseext*sizeof(struct xlat_exception_record));
10.126 +
10.127 + for( target_fixup_t fixup = &TD.fixup_table[0]; fixup != &td_fixup_table[TD.fixup_table_posn]; fixup++ ) {
10.128 + switch( TARGET_FIXUP_TARGET(fixup->type) ) {
10.129 + case TARGET_FIXUP_RAISEEXT:
10.130 + exc_record->xlat_pc_offset = fixup->fixup_offset + 4;
10.131 + exc_record->xlat_exc_offset = TD.xlat_output - blockstart;
10.132 + /* fallthrough */
10.133 + case TARGET_FIXUP_RAISE:
10.134 + fixup->target_offset = TD.xlat_output - blockstart;
10.135 + TD.codegen( td, fixup->value.exc, NULL );
10.136 + }
10.137 + }
10.138 +}
10.139 +
10.140 +/**
10.141 + * Generate constant table from the fixup data.
10.142 + */
10.143 +static void target_gen_constant_table( )
10.144 +{
10.145 + int numconst32=0, numconst64=0;
10.146 +
10.147 + /* Determine table size */
10.148 + for( target_fixup_t fixup = &TD.fixup_table[0]; fixup != &td_fixup_table[TD.fixup_table_posn]; fixup++ ) {
10.149 + int type = TARGET_FIXUP_TARGET(fixup->type);
10.150 + if( type == TARGET_FIXUP_CONST32 ) {
10.151 + numconst32++;
10.152 + } else if( type == TARGET_FIXUP_CONST64 ) {
10.153 + numconst64++;
10.154 + }
10.155 + }
10.156 +
10.157 + if( numconst64 != 0 ) {
10.158 + ALIGN64(TD.xlat_output);
10.159 + } else if( numconst32 != 0 ) {
10.160 + ALIGN32(TD.xlat_output);
10.161 + } else {
10.162 + return; /* no constants */
10.163 + }
10.164 + target_ensure_space( td, numconst64*8 + numconst32*4 );
10.165 + uint8_t *blockstart = &TD.block->code[0];
10.166 +
10.167 + /* TODO: Merge reused constant values */
10.168 + uint64_t *const64p = (uint64_t *)TD.xlat_output;
10.169 + uint32_t *const32p = (uint32_t *)(TD.xlat_output + numconst64*8);
10.170 + TD.xlat_output += (numconst64*8 + numconst32*4);
10.171 +
10.172 + for( target_fixup_t fixup = &TD.fixup_table[0]; fixup != &td_fixup_table[TD.fixup_table_posn]; fixup++ ) {
10.173 + switch(TARGET_FIXUP_TARGET(fixup->type)) {
10.174 + case TARGET_FIXUP_CONST32:
10.175 + fixup->target_offset = ((uint8_t *)const32p) - blockstart;
10.176 + *const32p++ = fixup->value.i;
10.177 + break;
10.178 + case TARGET_FIXUP_CONST64:
10.179 + fixup->target_offset = ((uint8_t *)const64p) - blockstart;
10.180 + *const64p++ = fixup->value.q;
10.181 + break;
10.182 + }
10.183 + }
10.184 +}
10.185 +
10.186 +/**
10.187 + * Apply all target fixups - assumes exceptions + constants have already been
10.188 + * generated.
10.189 + */
10.190 +static void target_apply_fixups( )
10.191 +{
10.192 + for( target_fixup_t fixup = &TD.fixup_table[0]; fixup != &TD.fixup_table[TD.fixup_table_posn]; fixup++ ) {
10.193 + void *target;
10.194 + if( TARGET_FIXUP_TARGET(fixup->type) == TARGET_FIXUP_POINTER ) {
10.195 + target = fixup->value.p;
10.196 + } else {
10.197 + target = &TD.block->code[fixup->target_offset];
10.198 + }
10.199 +
10.200 + uint32_t *loc = (uint32_t *)TD.block->code[fixup->fixup_offset];
10.201 + uint64_t *loc64 = (uint64_t *)TD.block->code[fixup->fixup_offset];
10.202 + switch(TARGET_FIXUP_MODE(fixup->fixup_type)) {
10.203 + case TARGET_FIXUP_REL32:
10.204 + *loc += (uint8_t *)target - (uint8_t *)(loc+1);
10.205 + break;
10.206 + case TARGET_FIXUP_REL64:
10.207 + *loc64 += (uint8_t *)target - (uint8_t *)(loc64+1);
10.208 + break;
10.209 + case TARGET_FIXUP_ABS32:
10.210 + *loc += (uint32_t)target;
10.211 + break;
10.212 + case TARGET_FIXUP_ABS64:
10.213 + *loc64 += (uint64_t)target;
10.214 + break;
10.215 + }
10.216 + }
10.217 +}
10.218 +
10.219 +void target_codegen( xlat_target_machine_t machine, source_data_t sd )
10.220 +{
10.221 + /* Setup the target data struct */
10.222 + TD.mach = machine;
10.223 + TD.src = sd->machine;
10.224 + TD.block = xlat_start_block( sd->pc_start );
10.225 + TD.xlat_output = &TD.block->code[0];
10.226 + if( TD.fixup_table == NULL ) {
10.227 + if( TD.fixup_table == NULL ) {
10.228 + TD.fixup_table_size = DEFAULT_FIXUP_TABLE_SIZE;
10.229 + TD.fixup_table = malloc( td->fixup_table_size * sizeof(struct target_fixup_struct) );
10.230 + assert( TD.fixup_table != NULL );
10.231 + }
10.232 + }
10.233 + TD.fixup_table_posn = 0;
10.234 +
10.235 + uint32_t code_size = machine->get_code_size(sd->ir_begin,sd->ir_end);
10.236 + target_ensure_space(&TD, code_size);
10.237 +
10.238 + machine->codegen(&TD, sd->begin, sd->end);
10.239 +
10.240 + target_gen_exception_table();
10.241 + target_gen_constant_table();
10.242 + target_apply_fixups();
10.243 +
10.244 + xlat_commit_block( TD.xlat_output - &TD.block->code[0], sd->pc_end-sd->pc_start );
10.245 + return &TD.block->code[0];
10.246 +}
11.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
11.2 +++ b/src/xlat/x86/x86gen.c Tue Apr 07 10:55:03 2009 +0000
11.3 @@ -0,0 +1,557 @@
11.4 +/**
11.5 + * $Id: xir.h 931 2008-10-31 02:57:59Z nkeynes $
11.6 + *
11.7 + * x86/x86-64 final code generation
11.8 + *
11.9 + * Copyright (c) 2009 Nathan Keynes.
11.10 + *
11.11 + * This program is free software; you can redistribute it and/or modify
11.12 + * it under the terms of the GNU General Public License as published by
11.13 + * the Free Software Foundation; either version 2 of the License, or
11.14 + * (at your option) any later version.
11.15 + *
11.16 + * This program is distributed in the hope that it will be useful,
11.17 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
11.18 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11.19 + * GNU General Public License for more details.
11.20 + */
11.21 +#include <assert.h>
11.22 +
11.23 +#include "xlat/xir.h"
11.24 +#include "xlat/xlat.h"
11.25 +#include "xlat/x86/x86op.h"
11.26 +
11.27 +typedef enum {
11.28 + SSE_NONE = 0,
11.29 + SSE_1,
11.30 + SSE_2,
11.31 + SSE_3,
11.32 + SSE_3_1, /* AKA SSSE3 */
11.33 + SSE_4_1,
11.34 + SSE_4_2
11.35 +} sse_version_t;
11.36 +
11.37 +/* 32-bit register groups:
11.38 + * General regs 0..7
11.39 + * - EAX, EDX - arguments, volatile
11.40 + * - ECX - volatile
11.41 + * - EBX, ESI, EDI - non-volatile
11.42 + * - ESP, EBP - blocked out for system use.
11.43 + * XMM regs 16..23
11.44 + * - Floating or integer, all volatile
11.45 + * MMX regs 32..39
11.46 + * - integer, all volatile
11.47 + * OR (if SSE is unsupported)
11.48 + * x87 regs 32..39
11.49 + * - floating point, all volatile, stack allocator
11.50 + */
11.51 +
11.52 +/*
11.53 + * 64-bit register groups:
11.54 + * General regs 0..15
11.55 + * - EDI, ESI - arguments, volatile
11.56 + * - EAX, ECX, EDX, ... - volatile
11.57 + * - EBX, ... non-volatile
11.58 + * - ESP, EBP - blocked for system use (r13?)
11.59 + * XMM regs 16..31
11.60 + * - Floating or integer, all volatile
11.61 + * MMX regs 32..39
11.62 + * - integer, all volatile
11.63 + * OR
11.64 + * x87 regs 32..39
11.65 + * - floating point, all volatile, stack allocator
11.66 + */
11.67 +
11.68 +
11.69 +
11.70 +
11.71 +struct x86_target_info_struct {
11.72 + sse_version_t sse_version;
11.73 +} x86_target_info;
11.74 +
11.75 +
11.76 +/**
11.77 + * Initialize x86_target_info - detect supported features from cpuid
11.78 + */
11.79 +void x86_target_init()
11.80 +{
11.81 + uint32_t feature1, feature2;
11.82 +
11.83 + __asm__ __volatile__(
11.84 + "mov $0x01, %%eax\n\t"
11.85 + "cpuid\n\t" : "=c" (feature1), "=d" (feature2) : : "eax", "ebx");
11.86 +
11.87 + /* Walk through from oldest to newest - while it's normally the case
11.88 + * that all older extensions are supported, you're not supposed to
11.89 + * depend on that assumption. So instead we stop as soon as we find
11.90 + * a missing feature bit. */
11.91 + if( (feature2 & 0x02000000) == 0 ) {
11.92 + x86_target_info.sse_version = SSE_NONE;
11.93 + } else if( (feature2 & 0x04000000) == 0 ) {
11.94 + x86_target_info.sse_version = SSE_1;
11.95 + } else if( (feature1 & 0x00000001) == 0 ) { /* SSE3 bit */
11.96 + x86_target_info.sse_version = SSE_2;
11.97 + } else if( (feature1 & 0x00000100) == 0 ) { /* SSSE3 bit */
11.98 + x86_target_info.sse_version = SSE_3;
11.99 + } else if( (feature1 & 0x00080000) == 0 ) { /* SSE4.1 bit */
11.100 + x86_target_info.sse_version = SSE_3_1;
11.101 + } else if( (feature1 & 0x00100000) == 0 ) { /* SSE4.2 bit */
11.102 + x86_target_info.sse_version = SSE_4_1;
11.103 + } else {
11.104 + x86_target_info.sse_version = SSE_4_2;
11.105 + }
11.106 +}
11.107 +
11.108 +#define IS_X86_64() (sizeof(void *)==8)
11.109 +#define IS_XMM_REG(op,n) (XOP_REG(op,n) >= MIN_XMM_REGISTER && XOP_REG(op,n) <= MAX_AMD64_XMM_REGISTER)
11.110 +
11.111 +#define RBP_OFFSET (-128)
11.112 +
11.113 +#define NONE NO_OPERAND
11.114 +#define SRC SOURCE_REGISTER_OPERAND
11.115 +#define TGT TARGET_REGISTER_OPERAND
11.116 +#define IMM INT_IMM_OPERAND
11.117 +#define FLT FLOAT_IMM_OPERAND
11.118 +#define DBL DOUBLE_IMM_OPERAND
11.119 +
11.120 +#define MAX_X86_GENERAL_REGISTER (MIN_TARGET_REGISTER+7)
11.121 +#define MAX_AMD64_GENERAL_REGISTER (MIN_TARGET_REGISTER+15)
11.122 +#define MIN_XMM_REGISTER (MIN_TARGET_REGISTER+16)
11.123 +#define MAX_X86_XMM_REGISTER (MIN_TARGET_REGISTER+23)
11.124 +#define MAX_AMD64_XMM_REGISTER (MIN_TARGET_REGISTER+31)
11.125 +
11.126 +#define ILLOP(op) FATAL("Illegal x86 opcode %s %d %d\n", XIR_OPCODE_TABLE[op->opcode], op->operand[0].type, op->operand[1].type)
11.127 +
11.128 +// Convenience macros
11.129 +#define X86L_IMMS_REG(opname, op) \
11.130 + if( XOP_IS_FORM(op,IMM,TGT) ) { opname##_imms_r32(XOP_INT(op,0),XOP_REG(op,1)); } \
11.131 + else if( XOP_IS_FORM(op,IMM,SRC) ) { opname##_imms_rbpdisp(XOP_INT(op,0),XOP_REG(op,1)+RBP_OFFSET); } \
11.132 + else { ILLOP(op); }
11.133 +
11.134 +#define X86L_REG_TGT(opname,op) \
11.135 + if( XOP_IS_FORM(op,TGT,TGT) ) { opname##_r32_r32(XOP_REG(op,0),XOP_REG(op,1)); } \
11.136 + else if( XOP_IS_FORM(op,SRC,TGT) ) { opname##_rbpdisp_r32(XOP_REG(op,0)+RBP_OFFSET,XOP_REG(op,1)); } \
11.137 + else { ILLOP(op); }
11.138 +
11.139 +#define X86F_REG_TGT(opname,op ) \
11.140 + if( XOP_IS_FORM(op,TGT,TGT) ) { opname##_xmm_xmm(XOP_REG(op,0),XOP_REG(op,1)); } \
11.141 + else if( XOP_IS_FORM(op,SRC,TGT) ) { opname##_rbpdisp_xmm(XOP_REG(op,0)+RBP_OFFSET,XOP_REG(op,1)); } \
11.142 + else { ILLOP(op); }
11.143 +
11.144 +#define X86L_REG_REG(opname,op) \
11.145 + if( XOP_IS_FORM(op,TGT,TGT) ) { opname##_r32_r32(XOP_REG(op,0),XOP_REG(op,1)); } \
11.146 + else if( XOP_IS_FORM(op,SRC,TGT) ) { opname##_rbpdisp_r32(XOP_REG(op,0)+RBP_OFFSET,XOP_REG(op,1)); } \
11.147 + else if( XOP_IS_FORM(op,TGT,SRC) ) { opname##_r32_rbpdisp(XOP_REG(op,0),XOP_REG(op,1)+RBP_OFFSET); } \
11.148 + else { ILLOP(op); }
11.149 +
11.150 +#define X86L_REG(opname,op) \
11.151 + if( XOP_IS_TGTREG(op,0) ) { opname##_r32(XOP_REG(op,0)); } \
11.152 + else if( XOP_IS_SRCREG(op,0) ) { opname##_rbpdisp(XOP_REG(op,0)+RBP_OFFSET); } \
11.153 + else { ILLOP(op); }
11.154 +
11.155 +#define X86L_CL_REG(opname,op) \
11.156 + if( XOP_IS_FORM(op,TGT,TGT) && XOP_REG(op,0) == REG_CL ) { opname##_cl_r32(XOP_REG(op,1)); } \
11.157 + else if( XOP_IS_FORM(op,TGT,SRC) && XOP_REG(op,0) == REG_CL ) { opname##_cl_rbpdisp(XOP_REG(op,1)+RBP_OFFSET); } \
11.158 + else { ILLOP(op); }
11.159 +
11.160 +#define X86L_IMMCL_REG(opname,op) \
11.161 + if( XOP_IS_FORM(op,IMM,TGT) ) { opname##_imm_r32(XOP_INT(op,0),XOP_REG(op,1)); } \
11.162 + else if( XOP_IS_FORM(op,IMM,SRC) ) { opname##_imm_rbpdisp(XOP_INT(op,0),XOP_REG(op,1)+RBP_OFFSET); } \
11.163 + else if( XOP_IS_FORM(op,TGT,TGT) && XOP_REG(op,0) == REG_CL ) { opname##_cl_r32(XOP_REG(op,1)); } \
11.164 + else if( XOP_IS_FORM(op,TGT,SRC) && XOP_REG(op,0) == REG_CL ) { opname##_cl_rbpdisp(XOP_REG(op,1)+RBP_OFFSET); } \
11.165 + else { ILLOP(op); }
11.166 +
11.167 +// Standard ALU forms - imms,reg or reg,reg
11.168 +#define X86L_ALU_REG(opname,op) \
11.169 + if( XOP_IS_FORM(op,IMM,TGT) ) { opname##_imms_r32(XOP_INT(op,0),XOP_REG(op,1)); } \
11.170 + else if( XOP_IS_FORM(op,IMM,SRC) ) { opname##_imms_rbpdisp(XOP_INT(op,0),XOP_REG(op,1)+RBP_OFFSET); } \
11.171 + else if( XOP_IS_FORM(op,TGT,TGT) ) { opname##_r32_r32(XOP_REG(op,0),XOP_REG(op,1)); } \
11.172 + else if( XOP_IS_FORM(op,SRC,TGT) ) { opname##_rbpdisp_r32(XOP_REG(op,0)+RBP_OFFSET,XOP_REG(op,1)); } \
11.173 + else if( XOP_IS_FORM(op,TGT,SRC) ) { opname##_r32_rbpdisp(XOP_REG(op,0),XOP_REG(op,1)+RBP_OFFSET); } \
11.174 + else { ILLOP(op); }
11.175 +
11.176 +uint32_t x86_target_get_code_size( xir_op_t begin, xir_op_t end )
11.177 +{
11.178 + return -1;
11.179 +}
11.180 +
11.181 +
11.182 +/**
11.183 + * Note: Assumes that the IR is x86-legal (ie doesn't contain any unencodeable instructions).
11.184 + */
11.185 +uint32_t x86_target_codegen( target_data_t td, xir_op_t begin, xir_op_t end )
11.186 +{
11.187 + int ss;
11.188 + xir_op_t it;
11.189 +
11.190 + /* Prologue */
11.191 +
11.192 + for( it=begin; it != NULL; it = it->next ) {
11.193 + switch( it->opcode ) {
11.194 + case OP_ENTER:
11.195 + case OP_BARRIER:
11.196 + case OP_NOP:
11.197 + /* No code to generate */
11.198 + break;
11.199 + case OP_MOV:
11.200 + if( XOP_IS_FORM(it, IMM, SRC) ) {
11.201 + MOVL_imm32_rbpdisp( XOP_INT(it,0), XOP_REG2(it)+RBP_OFFSET );
11.202 + } else if( XOP_IS_FORM(it, IMM, TGT) ) {
11.203 + MOVL_imm32_r32( XOP_INT(it,0), XOP_REG2(it) );
11.204 + } else if( XOP_IS_FORM(it, TGT, SRC) ) {
11.205 + if( IS_XMM_REG(it,0) ) {
11.206 + MOVSS_xmm_rbpdisp( XOP_REG1(it), XOP_REG2(it)+RBP_OFFSET );
11.207 + } else {
11.208 + MOVL_r32_rbpdisp( XOP_REG1(it), XOP_REG2(it)+RBP_OFFSET );
11.209 + }
11.210 + } else if( XOP_IS_FORM(it, TGT, TGT) ) {
11.211 + if( IS_XMM_REG(it,0) ) {
11.212 + if( IS_XMM_REG(it,1) ) {
11.213 + MOVSS_xmm_xmm( XOP_REG1(it), XOP_REG2(it) );
11.214 + } else {
11.215 + MOVL_xmm_r32( XOP_REG1(it), XOP_REG2(it) );
11.216 + }
11.217 + } else if( IS_XMM_REG(it,1) ) {
11.218 + MOVL_r32_xmm( XOP_REG1(it), XOP_REG2(it) );
11.219 + } else {
11.220 + MOVL_r32_r32( XOP_REG1(it), XOP_REG2(it) );
11.221 + }
11.222 + } else if( XOP_IS_FORM(it, SRC, TGT) ) {
11.223 + if( IS_XMM_REG(it,1) ) {
11.224 + MOVSS_rbpdisp_xmm( XOP_REG1(it)+RBP_OFFSET, XOP_REG2(it) );
11.225 + } else {
11.226 + MOVL_rbpdisp_r32( XOP_REG1(it)+RBP_OFFSET, XOP_REG2(it) );
11.227 + }
11.228 + } else {
11.229 + ILLOP(it);
11.230 + }
11.231 + break;
11.232 + case OP_MOVQ:
11.233 + if( XOP_IS_FORM(it, IMM, SRC) ) {
11.234 + ILLOP(it);
11.235 + } else if( XOP_IS_FORM(it, IMM, TGT) ) {
11.236 + if( IS_XMM_REG(it,0) ) {
11.237 + if( XOP_INT(it,0) == 0 ) {
11.238 + XORPD_xmm_xmm( XOP_REG2(it), XOP_REG2(it) );
11.239 + }
11.240 + } else {
11.241 + MOVQ_imm64_r64( XOP_INT(it,0), XOP_REG2(it) );
11.242 + }
11.243 + } else if( XOP_IS_FORM(it, TGT, SRC) ) {
11.244 + if( IS_XMM_REG(it,0) ) {
11.245 + MOVSD_xmm_rbpdisp( XOP_REG1(it), XOP_REG2(it)+RBP_OFFSET );
11.246 + } else {
11.247 + MOVQ_r64_rbpdisp( XOP_REG1(it), XOP_REG2(it)+RBP_OFFSET );
11.248 + }
11.249 + } else if( XOP_IS_FORM(it, TGT, TGT) ) {
11.250 + if( IS_XMM_REG(it,0) ) {
11.251 + if( IS_XMM_REG(it,1) ) {
11.252 + MOVSD_xmm_xmm( XOP_REG1(it), XOP_REG2(it) );
11.253 + } else {
11.254 + MOVQ_xmm_r64( XOP_REG1(it), XOP_REG2(it) );
11.255 + }
11.256 + } else if( IS_XMM_REG(it,1) ) {
11.257 + MOVQ_r64_xmm( XOP_REG1(it), XOP_REG2(it) );
11.258 + } else {
11.259 + MOVQ_r64_r64( XOP_REG1(it), XOP_REG2(it) );
11.260 + }
11.261 + } else if( XOP_IS_FORM(it, SRC, TGT) ) {
11.262 + if( IS_XMM_REG(it,1) ) {
11.263 + MOVSD_rbpdisp_xmm( XOP_REG1(it)+RBP_OFFSET, XOP_REG2(it) );
11.264 + } else {
11.265 + MOVQ_rbpdisp_r64( XOP_REG1(it)+RBP_OFFSET, XOP_REG2(it) );
11.266 + }
11.267 + } else {
11.268 + ILLOP(it);
11.269 + }
11.270 + break;
11.271 + case OP_MOVSX8:
11.272 + if( XOP_IS_FORM(it, TGT, TGT) ) {
11.273 + MOVSXL_r8_r32( XOP_REG1(it), XOP_REG2(it) );
11.274 + } else if( XOP_IS_FORM(it, SRC, TGT) ) {
11.275 + MOVSXL_rbpdisp8_r32( XOP_REG1(it)+RBP_OFFSET, XOP_REG2(it) );
11.276 + } else {
11.277 + ILLOP(it);
11.278 + }
11.279 + break;
11.280 + case OP_MOVSX16:
11.281 + if( XOP_IS_FORM(it, TGT, TGT) ) {
11.282 + MOVSXL_r16_r32( XOP_REG1(it), XOP_REG2(it) );
11.283 + } else if( XOP_IS_FORM(it, SRC, TGT) ) {
11.284 + MOVSXL_rbpdisp16_r32( XOP_REG1(it)+RBP_OFFSET, XOP_REG2(it) );
11.285 + } else {
11.286 + ILLOP(it);
11.287 + }
11.288 + break;
11.289 + case OP_MOVZX8:
11.290 + if( XOP_IS_FORM(it, TGT, TGT) ) {
11.291 + MOVZXL_r8_r32( XOP_REG1(it), XOP_REG2(it) );
11.292 + } else if( XOP_IS_FORM(it, SRC, TGT) ) {
11.293 + MOVZXL_rbpdisp8_r32( XOP_REG1(it)+RBP_OFFSET, XOP_REG2(it) );
11.294 + } else {
11.295 + ILLOP(it);
11.296 + }
11.297 + break;
11.298 + case OP_MOVZX16:
11.299 + if( XOP_IS_FORM(it, TGT, TGT) ) {
11.300 + MOVZXL_r16_r32( XOP_REG1(it), XOP_REG2(it) );
11.301 + } else if( XOP_IS_FORM(it, SRC, TGT) ) {
11.302 + MOVZXL_rbpdisp16_r32( XOP_REG1(it)+RBP_OFFSET, XOP_REG2(it) );
11.303 + } else {
11.304 + ILLOP(it);
11.305 + }
11.306 + break;
11.307 + case OP_ADD:
11.308 + case OP_ADDS: X86L_ALU_REG(ADDL,it); break;
11.309 + case OP_ADDCS: X86L_ALU_REG(ADCL,it); break;
11.310 + case OP_AND: X86L_ALU_REG(ANDL,it); break;
11.311 + case OP_CMP:
11.312 + X86L_ALU_REG(CMPL,it); break;
11.313 + case OP_DEC:
11.314 + if( XOP_IS_FORM(it,TGT,NONE) ) {
11.315 + DECL_r32(XOP_REG(it,0));
11.316 + } else if( XOP_IS_FORM(it,SRC,NONE) ) {
11.317 + DECL_rbpdisp(XOP_REG(it,0)+RBP_OFFSET);
11.318 + } else {
11.319 + ILLOP(it);
11.320 + }
11.321 + break;
11.322 + case OP_MUL:
11.323 + X86L_REG_TGT(IMULL,it);
11.324 + break;
11.325 + case OP_NEG: X86L_REG(NEGL,it); break;
11.326 + case OP_NOT: X86L_REG(NOTL,it); break;
11.327 + case OP_OR: X86L_ALU_REG(ORL,it); break;
11.328 + case OP_RCL: X86L_IMMCL_REG(RCLL,it); break;
11.329 + case OP_RCR: X86L_IMMCL_REG(RCRL,it); break;
11.330 + case OP_ROL: X86L_IMMCL_REG(ROLL,it); break;
11.331 + case OP_ROR: X86L_IMMCL_REG(RORL,it); break;
11.332 + case OP_SAR:
11.333 + case OP_SARS: X86L_IMMCL_REG(SARL,it); break;
11.334 + case OP_SUBBS: X86L_ALU_REG(SBBL,it); break;
11.335 + case OP_SLL:
11.336 + case OP_SLLS: X86L_IMMCL_REG(SHLL,it); break;
11.337 + case OP_SLR:
11.338 + case OP_SLRS: X86L_IMMCL_REG(SHRL,it); break;
11.339 + case OP_SUB:
11.340 + case OP_SUBS: X86L_ALU_REG(SUBL,it); break;
11.341 + case OP_SHUFFLE:
11.342 + if( XOP_IS_FORM(it,IMM,TGT) ) {
11.343 + if( XOP_INT(it,0) == 0x4321 ) {
11.344 + BSWAPL_r32( XOP_REG(it,1) );
11.345 + } else if( it->operand[1].value.i == 0x1243 ) {
11.346 + XCHGB_r8_r8( REG_AL, REG_AH );
11.347 + /* XCHG al, ah */
11.348 + }
11.349 + }
11.350 + break;
11.351 + case OP_TST: X86L_ALU_REG(TESTL,it); break;
11.352 + case OP_XOR: X86L_ALU_REG(XORL,it); break;
11.353 +
11.354 + // Float
11.355 + case OP_ABSF:
11.356 + case OP_ABSD:
11.357 + // Why is there no SSE FP ABS instruction?
11.358 + break;
11.359 + case OP_ADDF: X86F_REG_TGT(ADDSS,it); break;
11.360 + case OP_ADDD: X86F_REG_TGT(ADDSD,it); break;
11.361 + case OP_CMPF:
11.362 + break;
11.363 + case OP_CMPD: // UCOMISD
11.364 + break;
11.365 + case OP_DIVF: X86F_REG_TGT(DIVSS,it); break;
11.366 + case OP_DIVD: X86F_REG_TGT(DIVSD,it); break;
11.367 + case OP_MULF: X86F_REG_TGT(MULSS,it); break;
11.368 + case OP_MULD: X86F_REG_TGT(MULSD,it); break;
11.369 + case OP_RSQRTF:X86F_REG_TGT(RSQRTSS,it); break;
11.370 + case OP_SQRTF: X86F_REG_TGT(SQRTSS,it); break;
11.371 + case OP_SQRTD: X86F_REG_TGT(SQRTSD,it); break;
11.372 + case OP_SUBF: X86F_REG_TGT(SUBSS,it); break;
11.373 + case OP_SUBD: X86F_REG_TGT(SUBSD,it); break;
11.374 +
11.375 + case OP_DOTPRODV:
11.376 + MULPS_rbpdisp_xmm( XOP_REG1(it), 4 );
11.377 + HADDPS_xmm_xmm( 4, 4 );
11.378 + HADDPS_xmm_xmm( 4, 4 );
11.379 + MOVSS_xmm_rbpdisp( 4, XOP_REG1(it) );
11.380 + break;
11.381 + case OP_SINCOSF:
11.382 + case OP_MATMULV:
11.383 + break;
11.384 + case OP_FTOD:
11.385 + if( XOP_IS_FORM(it,TGT,TGT) ) {
11.386 + CVTSS2SD_xmm_xmm( XOP_REG(it,0), XOP_REG(it,1) );
11.387 + } else if( XOP_IS_FORM(it,SRC,TGT) ) {
11.388 + CVTSS2SD_rbpdisp_xmm( XOP_REG(it,0)+RBP_OFFSET, XOP_REG(it,1) );
11.389 + } else {
11.390 + ILLOP(it);
11.391 + }
11.392 + break;
11.393 + case OP_DTOF:
11.394 + if( XOP_IS_FORM(it,TGT,TGT) ) {
11.395 + CVTSS2SD_xmm_xmm( XOP_REG(it,0), XOP_REG(it,1) );
11.396 + } else if( XOP_IS_FORM(it, SRC,TGT) ) {
11.397 + CVTSS2SD_rbpdisp_xmm( XOP_REG(it,0)+RBP_OFFSET, XOP_REG(it,1) );
11.398 + } else {
11.399 + ILLOP(it);
11.400 + }
11.401 + break;
11.402 + case OP_ITOD:
11.403 + if( XOP_IS_FORM(it,TGT,TGT) ) {
11.404 + CVTSI2SDL_r32_xmm( XOP_REG(it,0), XOP_REG(it,1) );
11.405 + } else if( XOP_IS_FORM(it,SRC,TGT) ) {
11.406 + CVTSI2SDL_rbpdisp_xmm( XOP_REG(it,0)+RBP_OFFSET, XOP_REG(it,1) );
11.407 + } else {
11.408 + ILLOP(it);
11.409 + }
11.410 + break;
11.411 + case OP_DTOI:
11.412 + if( XOP_IS_FORM(it,TGT,TGT) ) {
11.413 + CVTSD2SIL_xmm_r32( XOP_REG(it,0), XOP_REG(it,1) );
11.414 + } else if( XOP_IS_FORM(it,SRC,TGT) ) {
11.415 + CVTSD2SIL_rbpdisp_r32( XOP_REG(it,0)+RBP_OFFSET, XOP_REG(it,1) );
11.416 + } else {
11.417 + ILLOP(it);
11.418 + }
11.419 + break;
11.420 + case OP_ITOF:
11.421 + case OP_FTOI:
11.422 +
11.423 + case OP_BRCOND:
11.424 + case OP_BRREL:
11.425 + case OP_BR:
11.426 + case OP_BRCONDDEL:
11.427 +
11.428 + case OP_CALL0:
11.429 + if( XOP_IS_INTIMM(it,0) ) {
11.430 + CALL_imm32( XOP_INT(it,0) );
11.431 + } else if( XOP_IS_SRCREG(it,0) ) {
11.432 + CALL_r32( XOP_INT(it,0) );
11.433 + } else {
11.434 + ILLOP(it);
11.435 + }
11.436 + break;
11.437 + case OP_XLAT:
11.438 + if( IS_X86_64() ) {
11.439 + ss = 3;
11.440 + } else {
11.441 + ss = 2;
11.442 + }
11.443 + if( XOP_IS_FORM(it,IMM,TGT) ) {
11.444 + MOVP_sib_rptr(ss, XOP_REG(it,1), -1, XOP_INT(it,0), XOP_REG(it,1));
11.445 + } else if( XOP_IS_FORM(it,TGT,TGT) ) {
11.446 + MOVP_sib_rptr(ss, XOP_REG(it,1), XOP_REG(it,0), 0, XOP_REG(it,1));
11.447 + } else {
11.448 + ILLOP(it);
11.449 + }
11.450 + break;
11.451 + case OP_CALLLUT:
11.452 + if( XOP_IS_FORM(it,TGT,IMM) ) {
11.453 + CALL_r32disp(XOP_REG(it,0),XOP_INT(it,1));
11.454 + } else if( XOP_IS_FORM(it,TGT,TGT) ) {
11.455 + CALL_sib(0,XOP_REG(it,0),XOP_REG(it,1),0);
11.456 + } else if( XOP_IS_FORM(it,IMM,TGT) ) {
11.457 + CALL_r32disp(XOP_REG(it,1),XOP_INT(it,0));
11.458 + } else {
11.459 + ILLOP(it);
11.460 + }
11.461 + break;
11.462 +
11.463 + // SH4-specific macro operations
11.464 + case OP_RAISEME:
11.465 +
11.466 + case OP_RAISEMNE:
11.467 +
11.468 + case OP_CMPSTR:
11.469 + break;
11.470 + case OP_DIV1:
11.471 + break;
11.472 + case OP_SHAD:
11.473 + assert( it->operand[0].type == TGT && XOP_REG(it,0) == REG_ECX );
11.474 + CMPL_imms_r32(0,REG_ECX);
11.475 + JNGE_label(shad_shr);
11.476 + X86L_CL_REG(SHLL,it);
11.477 + JMP_label(shad_end);
11.478 +
11.479 + JMP_TARGET(shad_shr);
11.480 + if( IS_X86_64() && it->operand[1].type == TGT ) {
11.481 + /* We can do this a little more simply with a 64-bit shift */
11.482 + ORL_imms_r32(0xFFFFFFE0,REG_ECX);
11.483 + NEGL_r32(REG_ECX);
11.484 + MOVSXQ_r32_r64(XOP_REG(it,1), XOP_REG(it,1)); // sign-extend
11.485 + SARQ_cl_r64(XOP_REG(it,1));
11.486 + } else {
11.487 + NEGL_r32(REG_ECX);
11.488 + ANDB_imms_r8( 0x1F, REG_ECX );
11.489 + JE_label(emptyshr );
11.490 + X86L_CL_REG(SARL,it);
11.491 + JMP_label(shad_end2);
11.492 +
11.493 + JMP_TARGET(emptyshr);
11.494 + if( it->operand[1].type == TGT ) {
11.495 + SARL_imm_r32( 31, XOP_REG(it,1) );
11.496 + } else {
11.497 + SARL_imm_rbpdisp( 32, XOP_REG(it,1)+RBP_OFFSET );
11.498 + }
11.499 + JMP_TARGET(shad_end2);
11.500 + }
11.501 + JMP_TARGET(shad_end);
11.502 + break;
11.503 +
11.504 + case OP_SHLD:
11.505 + assert( it->operand[0].type == TGT && XOP_REG(it,0) == REG_ECX );
11.506 + CMPL_imms_r32(0,REG_ECX);
11.507 + JNGE_label(shld_shr);
11.508 + X86L_CL_REG(SHLL,it);
11.509 + JMP_label(shld_end);
11.510 +
11.511 + JMP_TARGET(shld_shr);
11.512 + if( IS_X86_64() && it->operand[1].type == TGT ) {
11.513 + /* We can do this a little more simply with a 64-bit shift */
11.514 + ORL_imms_r32(0xFFFFFFE0,REG_ECX);
11.515 + NEGL_r32(REG_ECX);
11.516 + MOVL_r32_r32(XOP_REG(it,1), XOP_REG(it,1)); // Ensure high bits are 0
11.517 + SHRQ_cl_r64(XOP_REG(it,1));
11.518 + } else {
11.519 + NEGL_r32(REG_ECX);
11.520 + ANDB_imms_r8( 0x1F, REG_ECX );
11.521 + JE_label(emptyshr );
11.522 + X86L_CL_REG(SHRL,it);
11.523 + JMP_label(shld_end2);
11.524 +
11.525 + JMP_TARGET(emptyshr);
11.526 + XORL_r32_r32( REG_EAX, REG_EAX );
11.527 + JMP_TARGET(shld_end2);
11.528 + }
11.529 + JMP_TARGET(shld_end);
11.530 + break;
11.531 +
11.532 + case OP_MULQ:
11.533 + case OP_ADDQSAT32:
11.534 + case OP_ADDQSAT48:
11.535 +
11.536 + // Should not occur (should be have been lowered in target_lower)
11.537 + case OP_NEGF:
11.538 + case OP_NEGD:
11.539 + case OP_LOADB:
11.540 + case OP_LOADBFW:
11.541 + case OP_LOADW:
11.542 + case OP_LOADL:
11.543 + case OP_LOADQ:
11.544 + case OP_STOREB:
11.545 + case OP_STOREW:
11.546 + case OP_STOREL:
11.547 + case OP_STOREQ:
11.548 + case OP_STORELCA:
11.549 + case OP_OCBI:
11.550 + case OP_OCBP:
11.551 + case OP_OCBWB:
11.552 + case OP_PREF:
11.553 + default:
11.554 + ILLOP(it);
11.555 + }
11.556 + if( it == end )
11.557 + break;
11.558 + /* Epilogue */
11.559 + }
11.560 +}
12.1 --- a/src/xlat/x86/x86op.h Tue Apr 07 10:39:02 2009 +0000
12.2 +++ b/src/xlat/x86/x86op.h Tue Apr 07 10:55:03 2009 +0000
12.3 @@ -386,6 +386,9 @@
12.4 #define ANDQ_imms_r64(imm,r1) x86_encode_imms_rm64(0x83, 0x81, 4, imm, r1)
12.5 #define ANDP_imms_rptr(imm,r1) x86_encode_imms_rmptr(0x83, 0x81, 4, imm, r1)
12.6
12.7 +#define BSWAPL_r32(r1) x86_encode_opcode32(0x0FC8, r1)
12.8 +#define BSWAPQ_r64(r2) x86_encode_opcode64(0x0FC8, r1)
12.9 +
12.10 #define CLC() OP(0xF8)
12.11 #define CLD() OP(0xFC)
12.12 #define CMC() OP(0xF5)
12.13 @@ -404,6 +407,14 @@
12.14 #define CMPQ_imms_r64(imm,r1) x86_encode_imms_rm64(0x83, 0x81, 7, imm, r1)
12.15 #define CMPQ_r64_r64(r1,r2) x86_encode_r64_rm64(0x39, r1, r2)
12.16
12.17 +#define CDQL() OP(0x99)
12.18 +#define CDOQ() OP(PREF_REXW); OP(0x99)
12.19 +
12.20 +#define DECL_r32(r1) x86_encode_r32_rm32(0xFF,1,r1) /* NB single-op form unavailable in 64-bit mode */
12.21 +#define DECL_rbpdisp(disp) x86_encode_r32_rbpdisp32(0xFF,1,disp)
12.22 +#define DECQ_r64(r1) x86_encode_r64_rm64(0xFF,1,r1)
12.23 +#define DECQ_rbpdisp(disp) x86_encode_r64_rbpdisp64(0xFF,1,disp)
12.24 +
12.25 #define IDIVL_r32(r1) x86_encode_r32_rm32(0xF7, 7, r1)
12.26 #define IDIVL_rbpdisp(disp) x86_encode_r32_rbpdisp32(0xF7, 7, disp)
12.27 #define IDIVQ_r64(r1) x86_encode_r64_rm64(0xF7, 7, r1)
12.28 @@ -417,6 +428,12 @@
12.29 #define IMULL_rspdisp_r32(disp,r1) x86_encode_r32_rspdisp32(0x0FAF, r1, disp)
12.30 #define IMULQ_imms_r64(imm,r1) x86_encode_imms_rm64(0x6B,0x69, r1, imm, r1)
12.31 #define IMULQ_r64_r64(r1,r2) x86_encode_r64_rm64(0x0FAF, r2, r1)
12.32 +#define IMULQ_rbpdisp_r64(disp,r1) x86_encode_r64_rbpdisp64(0x0FAF, r1, disp)
12.33 +
12.34 +#define INCL_r32(r1) x86_encode_r32_rm32(0xFF,0,r1) /* NB single-op form unavailable in 64-bit mode */
12.35 +#define INCL_rbpdisp(disp) x86_encode_r32_rbpdisp32(0xFF,0,disp)
12.36 +#define INCQ_r64(r1) x86_encode_r64_rm64(0xFF,0,r1)
12.37 +#define INCQ_rbpdisp(disp) x86_encode_r64_rbpdisp64(0xFF,0,disp)
12.38
12.39 #define LEAL_r32disp_r32(r1,disp,r2) x86_encode_r32_mem32(0x8D, r2, r1, -1, 0, disp)
12.40 #define LEAL_rbpdisp_r32(disp,r1) x86_encode_r32_rbpdisp32(0x8D, r1, disp)
12.41 @@ -474,12 +491,12 @@
12.42
12.43 #define NEGB_r8(r1) x86_encode_r32_rm32(0xF6, 3, r1)
12.44 #define NEGL_r32(r1) x86_encode_r32_rm32(0xF7, 3, r1)
12.45 -#define NEGL_rbpdisp(r1) x86_encode_r32_rbspdisp32(0xF7, 3, disp)
12.46 +#define NEGL_rbpdisp(disp) x86_encode_r32_rbpdisp32(0xF7, 3, disp)
12.47 #define NEGQ_r64(r1) x86_encode_r64_rm64(0xF7, 3, r1)
12.48
12.49 #define NOTB_r8(r1) x86_encode_r32_rm32(0xF6, 2, r1)
12.50 #define NOTL_r32(r1) x86_encode_r32_rm32(0xF7, 2, r1)
12.51 -#define NOTL_rbpdisp(r1) x86_encode_r32_rbspdisp32(0xF7, 2, disp)
12.52 +#define NOTL_rbpdisp(disp) x86_encode_r32_rbpdisp32(0xF7, 2, disp)
12.53 #define NOTQ_r64(r1) x86_encode_r64_rm64(0xF7, 2, r1)
12.54
12.55 #define ORB_imms_r8(imm,r1) x86_encode_r32_rm32(0x80, 1, r1); OP(imm)
12.56 @@ -498,34 +515,85 @@
12.57 #define PUSH_r32(r1) x86_encode_opcode32(0x50, r1)
12.58
12.59 #define RCLL_cl_r32(r1) x86_encode_r32_rm32(0xD3,2,r1)
12.60 +#define RCLL_cl_rbpdisp(disp) x86_encode_r32_rbpdisp32(0xD3,2,disp)
12.61 #define RCLL_imm_r32(imm,r1) if( imm == 1 ) { x86_encode_r32_rm32(0xD1,2,r1); } else { x86_encode_r32_rm32(0xC1,2,r1); OP(imm); }
12.62 +#define RCLL_imm_rbpdisp(imm,disp) if( imm == 1 ) { x86_encode_r32_rbpdisp32(0xD1,2,disp); } else { x86_encode_r32_rbpdisp32(0xC1,2,disp); OP(imm); }
12.63 #define RCLQ_cl_r64(r1) x86_encode_r64_rm64(0xD3,2,r1)
12.64 +#define RCLQ_cl_rbpdisp(disp) x86_encode_r64_rbpdisp64(0xD3,2,disp)
12.65 #define RCLQ_imm_r64(imm,r1) if( imm == 1 ) { x86_encode_r64_rm64(0xD1,2,r1); } else { x86_encode_r64_rm64(0xC1,2,r1); OP(imm); }
12.66 +#define RCLQ_imm_rbpdisp(imm,disp) if( imm == 1 ) { x86_encode_r64_rbpdisp64(0xD1,2,disp); } else { x86_encode_r64_rbpdisp64(0xC1,2,disp); OP(imm); }
12.67 +
12.68 #define RCRL_cl_r32(r1) x86_encode_r32_rm32(0xD3,3,r1)
12.69 +#define RCRL_cl_rbpdisp(disp) x86_encode_r32_rbpdisp32(0xD3,3,disp)
12.70 #define RCRL_imm_r32(imm,r1) if( imm == 1 ) { x86_encode_r32_rm32(0xD1,3,r1); } else { x86_encode_r32_rm32(0xC1,3,r1); OP(imm); }
12.71 +#define RCRL_imm_rbpdisp(imm,disp) if( imm == 1 ) { x86_encode_r32_rbpdisp32(0xD1,3,disp); } else { x86_encode_r32_rbpdisp32(0xC1,3,disp); OP(imm); }
12.72 #define RCRQ_cl_r64(r1) x86_encode_r64_rm64(0xD3,3,r1)
12.73 +#define RCRQ_cl_rbpdisp(disp) x86_encode_r64_rbpdisp64(0xD3,3,disp)
12.74 #define RCRQ_imm_r64(imm,r1) if( imm == 1 ) { x86_encode_r64_rm64(0xD1,3,r1); } else { x86_encode_r64_rm64(0xC1,3,r1); OP(imm); }
12.75 +#define RCRQ_imm_rbpdisp(imm,disp) if( imm == 1 ) { x86_encode_r64_rbpdisp64(0xD1,3,disp); } else { x86_encode_r64_rbpdisp64(0xC1,3,disp); OP(imm); }
12.76 +
12.77 #define ROLL_cl_r32(r1) x86_encode_r32_rm32(0xD3,0,r1)
12.78 +#define ROLL_cl_rbpdisp(disp) x86_encode_r32_rbpdisp32(0xD3,0,disp)
12.79 #define ROLL_imm_r32(imm,r1) if( imm == 1 ) { x86_encode_r32_rm32(0xD1,0,r1); } else { x86_encode_r32_rm32(0xC1,0,r1); OP(imm); }
12.80 +#define ROLL_imm_rbpdisp(imm,disp) if( imm == 1 ) { x86_encode_r32_rbpdisp32(0xD1,0,disp); } else { x86_encode_r32_rbpdisp32(0xC1,0,disp); OP(imm); }
12.81 #define ROLQ_cl_r64(r1) x86_encode_r64_rm64(0xD3,0,r1)
12.82 +#define ROLQ_cl_rbpdisp(disp) x86_encode_r64_rbpdisp64(0xD3,0,disp)
12.83 #define ROLQ_imm_r64(imm,r1) if( imm == 1 ) { x86_encode_r64_rm64(0xD1,0,r1); } else { x86_encode_r64_rm64(0xC1,0,r1); OP(imm); }
12.84 +#define ROLQ_imm_rbpdisp(imm,disp) if( imm == 1 ) { x86_encode_r64_rbpdisp64(0xD1,0,disp); } else { x86_encode_r64_rbpdisp64(0xC1,0,disp); OP(imm); }
12.85 +
12.86 #define RORL_cl_r32(r1) x86_encode_r32_rm32(0xD3,1,r1)
12.87 +#define RORL_cl_rbpdisp(disp) x86_encode_r32_rbpdisp32(0xD3,1,disp)
12.88 #define RORL_imm_r32(imm,r1) if( imm == 1 ) { x86_encode_r32_rm32(0xD1,1,r1); } else { x86_encode_r32_rm32(0xC1,1,r1); OP(imm); }
12.89 +#define RORL_imm_rbpdisp(imm,disp) if( imm == 1 ) { x86_encode_r32_rbpdisp32(0xD1,1,disp); } else { x86_encode_r32_rbpdisp32(0xC1,1,disp); OP(imm); }
12.90 #define RORQ_cl_r64(r1) x86_encode_r64_rm64(0xD3,1,r1)
12.91 +#define RORQ_cl_rbpdisp(disp) x86_encode_r64_rbpdisp64(0xD3,1,disp)
12.92 #define RORQ_imm_r64(imm,r1) if( imm == 1 ) { x86_encode_r64_rm64(0xD1,1,r1); } else { x86_encode_r64_rm64(0xC1,1,r1); OP(imm); }
12.93 +#define RORQ_imm_rbpdisp(imm,disp) if( imm == 1 ) { x86_encode_r64_rbpdisp64(0xD1,1,disp); } else { x86_encode_r64_rbpdisp64(0xC1,1,disp); OP(imm); }
12.94
12.95 #define SARL_cl_r32(r1) x86_encode_r32_rm32(0xD3,7,r1)
12.96 +#define SARL_cl_rbpdisp(disp) x86_encode_r32_rbpdisp32(0xD3,7,disp)
12.97 #define SARL_imm_r32(imm,r1) if( imm == 1 ) { x86_encode_r32_rm32(0xD1,7,r1); } else { x86_encode_r32_rm32(0xC1,7,r1); OP(imm); }
12.98 +#define SARL_imm_rbpdisp(imm,disp) if( imm == 1 ) { x86_encode_r32_rbpdisp32(0xD1,7,disp); } else { x86_encode_r32_rbpdisp32(0xC1,7,disp); OP(imm); }
12.99 #define SARQ_cl_r64(r1) x86_encode_r64_rm64(0xD3,7,r1)
12.100 +#define SARQ_cl_rbpdisp(disp) x86_encode_r64_rbpdisp64(0xD3,7,disp)
12.101 #define SARQ_imm_r64(imm,r1) if( imm == 1 ) { x86_encode_r64_rm64(0xD1,7,r1); } else { x86_encode_r64_rm64(0xC1,7,r1); OP(imm); }
12.102 +#define SARQ_imm_rbpdisp(imm,disp) if( imm == 1 ) { x86_encode_r64_rbpdisp64(0xD1,7,disp); } else { x86_encode_r64_rbpdisp64(0xC1,7,disp); OP(imm); }
12.103 +
12.104 #define SHLL_cl_r32(r1) x86_encode_r32_rm32(0xD3,4,r1)
12.105 +#define SHLL_cl_rbpdisp(disp) x86_encode_r32_rbpdisp32(0xD3,4,disp)
12.106 #define SHLL_imm_r32(imm,r1) if( imm == 1 ) { x86_encode_r32_rm32(0xD1,4,r1); } else { x86_encode_r32_rm32(0xC1,4,r1); OP(imm); }
12.107 +#define SHLL_imm_rbpdisp(imm,disp) if( imm == 1 ) { x86_encode_r32_rbpdisp32(0xD1,4,disp); } else { x86_encode_r32_rbpdisp32(0xC1,4,disp); OP(imm); }
12.108 #define SHLQ_cl_r64(r1) x86_encode_r64_rm64(0xD3,4,r1)
12.109 +#define SHLQ_cl_rbpdisp(disp) x86_encode_r64_rbpdisp64(0xD3,4,disp)
12.110 #define SHLQ_imm_r64(imm,r1) if( imm == 1 ) { x86_encode_r64_rm64(0xD1,4,r1); } else { x86_encode_r64_rm64(0xC1,4,r1); OP(imm); }
12.111 +#define SHLQ_imm_rbpdisp(imm,disp) if( imm == 1 ) { x86_encode_r64_rbpdisp64(0xD1,4,disp); } else { x86_encode_r64_rbpdisp64(0xC1,4,disp); OP(imm); }
12.112 +
12.113 +#define SHLDL_cl_r32_r32(r1,r2) x86_encode_r32_rm32(0x0FA5,r1,r2)
12.114 +#define SHLDL_cl_r32_rbpdisp(r1,d) x86_encode_r32_rbpdisp32(0x0FA5,r1,d)
12.115 +#define SHLDL_imm_r32_r32(imm,r1,r2) x86_encode_r32_rm32(0x0FA4,r1,r2); OP(imm)
12.116 +#define SHLDL_imm_r32_rbpdisp(i,r,d) x86_encode_r32_rbpdisp32(0x0FA4,r,d); OP(imm)
12.117 +#define SHLDQ_cl_r64_r64(r1,r2) x86_encode_r64_rm64(0x0FA5,r1,r2)
12.118 +#define SHLDQ_cl_r64_rbpdisp(r1,d) x86_encode_r64_rbpdisp64(0x0FA5,r1,d)
12.119 +#define SHLDQ_imm_r64_r64(imm,r1,r2) x86_encode_r64_rm64(0x0FA4,r1,r2); OP(imm)
12.120 +#define SHLDQ_imm_r64_rbpdisp(i,r,d) x86_encode_r64_rbpdisp64(0x0FA4,r,d); OP(imm)
12.121 +
12.122 #define SHRL_cl_r32(r1) x86_encode_r32_rm32(0xD3,5,r1)
12.123 +#define SHRL_cl_rbpdisp(disp) x86_encode_r32_rbpdisp32(0xD3,5,disp)
12.124 #define SHRL_imm_r32(imm,r1) if( imm == 1 ) { x86_encode_r32_rm32(0xD1,5,r1); } else { x86_encode_r32_rm32(0xC1,5,r1); OP(imm); }
12.125 +#define SHRL_imm_rbpdisp(imm,disp) if( imm == 1 ) { x86_encode_r32_rbpdisp32(0xD1,5,disp); } else { x86_encode_r32_rbpdisp32(0xC1,5,disp); OP(imm); }
12.126 #define SHRQ_cl_r64(r1) x86_encode_r64_rm64(0xD3,5,r1)
12.127 +#define SHRQ_cl_rbpdisp(disp) x86_encode_r64_rbpdisp64(0xD3,5,disp)
12.128 #define SHRQ_imm_r64(imm,r1) if( imm == 1 ) { x86_encode_r64_rm64(0xD1,5,r1); } else { x86_encode_r64_rm64(0xC1,5,r1); OP(imm); }
12.129 +#define SHRQ_imm_rbpdisp(imm,disp) if( imm == 1 ) { x86_encode_r64_rbpdisp64(0xD1,5,disp); } else { x86_encode_r64_rbpdisp64(0xC1,5,disp); OP(imm); }
12.130 +
12.131 +#define SHRDL_cl_r32_r32(r1,r2) x86_encode_r32_rm32(0x0FAD,r1,r2)
12.132 +#define SHRDL_cl_r32_rbpdisp(r1,d) x86_encode_r32_rbpdisp32(0x0FAD,r1,d)
12.133 +#define SHRDL_imm_r32_r32(imm,r1,r2) x86_encode_r32_rm32(0x0FAC,r1,r2); OP(imm)
12.134 +#define SHRDL_imm_r32_rbpdisp(i,r,d) x86_encode_r32_rbpdisp32(0x0FAC,r,d); OP(imm)
12.135 +#define SHRDQ_cl_r64_r64(r1,r2) x86_encode_r64_rm64(0x0FAD,r1,r2)
12.136 +#define SHRDQ_cl_r64_rbpdisp(r1,d) x86_encode_r64_rbpdisp64(0x0FAD,r1,d)
12.137 +#define SHRDQ_imm_r64_r64(imm,r1,r2) x86_encode_r64_rm64(0x0FAC,r1,r2); OP(imm)
12.138 +#define SHRDQ_imm_r64_rbpdisp(i,r,d) x86_encode_r64_rbpdisp64(0x0FAC,r,d); OP(imm)
12.139
12.140 #define SBBB_imms_r8(imm,r1) x86_encode_r32_rm32(0x80, 3, r1); OP(imm)
12.141 #define SBBB_r8_r8(r1,r2) x86_encode_r32_rm32(0x18, r1, r2)
12.142 @@ -582,6 +650,7 @@
12.143 #define CALL_imm32(ptr) x86_encode_r32_mem32disp32(0xFF, 2, -1, ptr)
12.144 #define CALL_r32(r1) x86_encode_r32_rm32(0xFF, 2, r1)
12.145 #define CALL_r32disp(r1,disp) x86_encode_r32_mem32disp32(0xFF, 2, r1, disp)
12.146 +#define CALL_sib(ss,ii,bb,disp) x86_encode_r32_mem32(0xFF, 2, bb, ii, ss, disp)
12.147
12.148 #define JCC_cc_rel8(cc,rel) OP(0x70+(cc)); OP(rel)
12.149 #define JCC_cc_rel32(cc,rel) OP(0x0F); OP(0x80+(cc)); OP32(rel)
12.150 @@ -596,6 +665,20 @@
12.151 #define RET() OP(0xC3)
12.152 #define RET_imm(imm) OP(0xC2); OP16(imm)
12.153
12.154 +/* Labeled jumps for automated backpatching of forward jumps (rel8 only) */
12.155 +#define _MARK_JMP8(x) uint8_t *_mark_jmp_##x = (xlat_output-1)
12.156 +#define JMP_TARGET(x) *_mark_jmp_##x += (xlat_output - _mark_jmp_##x)
12.157 +#define JCC_cc_label(cc,label) JCC_cc_rel8(cc,-1); _MARK_JMP8(label)
12.158 +#define JMP_label(label) JMP_rel8(-1); _MARK_JMP8(label)
12.159 +#define JAE_label(label) JCC_cc_label(X86_COND_AE,label)
12.160 +#define JE_label(label) JCC_cc_label(X86_COND_E,label)
12.161 +#define JG_label(label) JCC_cc_label(X86_COND_G,label)
12.162 +#define JGE_label(label) JCC_cc_label(X86_COND_GE,label)
12.163 +#define JNA_label(label) JCC_cc_label(X86_COND_NA,label)
12.164 +#define JNE_label(label) JCC_cc_label(X86_COND_NE,label)
12.165 +#define JNGE_label(label) JCC_cc_label(X86_COND_NGE,label)
12.166 +#define JNO_label(label) JCC_cc_label(X86_COND_NO,label)
12.167 +#define JS_label(label) JCC_cc_label(X86_COND_S,label)
12.168
12.169 /* x87 Floating point instructions */
12.170 #define FABS_st0() OP(0xD9); OP(0xE1)
12.171 @@ -621,6 +704,12 @@
12.172 #define FSTPD_rbpdisp(disp) x86_encode_r32_rbpdisp32(0xDD, 3, disp)
12.173
12.174
12.175 +/* SSE Integer instructions */
12.176 +#define MOVL_r32_xmm(r1,r2) OP(0x66); x86_encode_r32_rm32(0x0F6E, r2, r1)
12.177 +#define MOVL_xmm_r32(r1,r2) OP(0x66); x86_encode_r32_rm32(0x0F7E, r1, r2)
12.178 +#define MOVQ_r64_xmm(r1,r2) OP(0x66); x86_encode_r64_rm64(0x0F6E, r2, r1)
12.179 +#define MOVQ_xmm_r64(r1,r2) OP(0x66); x86_encode_r64_rm64(0x0F7E, r1, r2)
12.180 +
12.181 /* SSE Packed floating point instructions */
12.182 #define ADDPS_rbpdisp_xmm(disp,r1) x86_encode_r32_rbpdisp32(0x0F58, r1, disp)
12.183 #define ADDPS_xmm_xmm(r1,r2) x86_encode_r32_rm32(0x0F58, r2, r1)
12.184 @@ -675,6 +764,18 @@
12.185 #define CMPSS_cc_xmm_xmm(cc,r1,r2) OP(0xF3); x86_encode_r32_rm32(0x0FC2, r2, r1); OP(cc)
12.186 #define COMISS_rbpdisp_xmm(disp,r1) x86_encode_r32_rbpdisp32(0x0F2F, r1, disp)
12.187 #define COMISS_xmm_xmm(r1,r2) x86_encode_r32_rm32(0x0F2F, r2, r1)
12.188 +#define CVTSI2SSL_r32_xmm(r1,r2) OP(0xF3); x86_encode_r32_rm32(0x0F2A, r2, r1)
12.189 +#define CVTSI2SSL_rbpdisp_xmm(d,r1) OP(0xF3); x86_encode_r32_rbpdisp32(0x0F2A, r1, d)
12.190 +#define CVTSI2SSQ_r64_xmm(r1,r2) OP(0xF3); x86_encode_r64_rm64(0x0F2A, r2, r1)
12.191 +#define CVTSI2SSQ_rbpdisp_xmm(d,r1) OP(0xF3); x86_encode_r64_rbpdisp64(0x0F2A, r1, d)
12.192 +#define CVTSS2SIL_xmm_r32(r1,r2) OP(0xF3); x86_encode_r32_rm32(0x0F2D, r2, r1)
12.193 +#define CVTSS2SIL_rbpdisp_r32(d,r1) OP(0xF3); x86_encode_r32_rbpdisp32(0x0F2D, r1, d)
12.194 +#define CVTSS2SIQ_xmm_r64(r1,r2) OP(0xF3); x86_encode_r64_rm64(0x0F2D, r2, r1)
12.195 +#define CVTSS2SIQ_rbpdisp_r64(d,r1) OP(0xF3); x86_encode_r64_rbpdisp64(0x0F2D, r1, d)
12.196 +#define CVTTSS2SIL_xmm_r32(r1,r2) OP(0xF3); x86_encode_r32_rm32(0x0F2C, r2, r1)
12.197 +#define CVTTSS2SIL_rbpdisp_r32(d,r1) OP(0xF3); x86_encode_r32_rbpdisp32(0x0F2C, r1, d)
12.198 +#define CVTTSS2SIQ_xmm_r64(r1,r2) OP(0xF3); x86_encode_r64_rm64(0x0F2C, r2, r1)
12.199 +#define CVTTSS2SIQ_rbpdisp_r64(d,r1) OP(0xF3); x86_encode_r64_rbpdisp64(0x0F2C, r1, d)
12.200 #define DIVSS_rbpdisp_xmm(disp,r1) OP(0xF3); x86_encode_r32_rbpdisp32(0x0F5E, r1, disp)
12.201 #define DIVSS_xmm_xmm(r1,r2) OP(0xF3); x86_encode_r32_rm32(0x0F5E, r2, r1)
12.202 #define MAXSS_rbpdisp_xmm(disp,r1) OP(0xF3); x86_encode_r32_rbpdisp32(0x0F5F, r1, disp)
12.203 @@ -706,9 +807,9 @@
12.204 #define ANDNPD_xmm_xmm(r1,r2) OP(0x66); x86_encode_r32_rm32(0x0F55, r2, r1)
12.205 #define CMPPD_cc_rbpdisp_xmm(cc,d,r) OP(0x66); x86_encode_r32_rbpdisp32(0x0FC2, r, d); OP(cc)
12.206 #define CMPPD_cc_xmm_xmm(cc,r1,r2) OP(0x66); x86_encode_r32_rm32(0x0FC2, r2, r1); OP(cc)
12.207 -#define CVTPD2PS_rbpdisp_xmm(dsp,r1) OP(0x66); x86_encode_r32_rbpdisp32(0x0F5A, r1, disp)
12.208 +#define CVTPD2PS_rbpdisp_xmm(dsp,r1) OP(0x66); x86_encode_r32_rbpdisp32(0x0F5A, r1, dsp)
12.209 #define CVTPD2PS_xmm_xmm(r1,r2) OP(0x66); x86_encode_r32_rm32(0x0F5A, r2, r1)
12.210 -#define CVTPS2PD_rbpdisp_xmm(dsp,r1) x86_encode_r32_rbpdisp32(0x0F5A, r1, disp)
12.211 +#define CVTPS2PD_rbpdisp_xmm(dsp,r1) x86_encode_r32_rbpdisp32(0x0F5A, r1, dsp)
12.212 #define CVTPS2PD_xmm_xmm(r1,r2) x86_encode_r32_rm32(0x0F5A, r2, r1)
12.213 #define DIVPD_rbpdisp_xmm(disp,r1) OP(0x66); x86_encode_r32_rbpdisp32(0x0F5E, r1, disp)
12.214 #define DIVPD_xmm_xmm(r1,r2) OP(0x66); x86_encode_r32_rm32(0x0F5E, r2, r1)
12.215 @@ -741,6 +842,23 @@
12.216 #define ADDSD_xmm_xmm(r1,r2) OP(0xF2); x86_encode_r32_rm32(0x0F58, r2, r1)
12.217 #define CMPSD_cc_rbpdisp_xmm(cc,d,r) OP(0xF2); x86_encode_r32_rbpdisp32(0x0FC2, r, d); OP(cc)
12.218 #define CMPSD_cc_xmm_xmm(cc,r1,r2) OP(0xF2); x86_encode_r32_rm32(0x0FC2, r2, r1); OP(cc)
12.219 +#define CVTSI2SDL_r32_xmm(r1,r2) OP(0xF2); x86_encode_r32_rm32(0x0F2A, r2, r1)
12.220 +#define CVTSI2SDL_rbpdisp_xmm(d,r1) OP(0xF2); x86_encode_r32_rbpdisp32(0x0F2A, r1, d)
12.221 +#define CVTSI2SDQ_r64_xmm(r1,r2) OP(0xF2); x86_encode_r64_rm64(0x0F2A, r2, r1)
12.222 +#define CVTSI2SDQ_rbpdisp_xmm(d,r1) OP(0xF2); x86_encode_r64_rbpdisp64(0x0F2A, r1, d)
12.223 +#define CVTSD2SIL_xmm_r32(r1,r2) OP(0xF2); x86_encode_r32_rm32(0x0F2D, r2, r1)
12.224 +#define CVTSD2SIL_rbpdisp_r32(d,r1) OP(0xF2); x86_encode_r32_rbpdisp32(0x0F2D, r1, d)
12.225 +#define CVTSD2SIQ_xmm_r64(r1,r2) OP(0xF2); x86_encode_r64_rm64(0x0F2D, r2, r1)
12.226 +#define CVTSD2SIQ_rbpdisp_r64(d,r1) OP(0xF2); x86_encode_r64_rbpdisp64(0x0F2D, r1, d)
12.227 +#define CVTSD2SS_rbpdisp_xmm(dsp,r1) OP(0xF2); x86_encode_r32_rbpdisp32(0x0F5A, r1, dsp)
12.228 +#define CVTSD2SS_xmm_xmm(r1,r2) OP(0xF2); x86_encode_r32_rm32(0x0F5A, r2, r1)
12.229 +#define CVTSS2SD_rbpdisp_xmm(dsp,r1) OP(0xF3); x86_encode_r32_rbpdisp32(0x0F5A, r1, dsp)
12.230 +#define CVTSS2SD_xmm_xmm(r1,r2) OP(0xF3); x86_encode_r32_rm32(0x0F5A, r2, r1)
12.231 +#define CVTTSD2SIL_xmm_r32(r1,r2) OP(0xF2); x86_encode_r32_rm32(0x0F2C, r2, r1)
12.232 +#define CVTTSD2SIL_rbpdisp_r32(d,r1) OP(0xF2); x86_encode_r32_rbpdisp32(0x0F2C, r1, d)
12.233 +#define CVTTSD2SIQ_xmm_r64(r1,r2) OP(0xF2); x86_encode_r64_rm64(0x0F2C, r2, r1)
12.234 +#define CVTTSD2SIQ_rbpdisp_r64(d,r1) OP(0xF2); x86_encode_r64_rbpdisp64(0x0F2C, r1, d)
12.235 +
12.236 #define COMISD_rbpdisp_xmm(disp,r1) OP(0x66); x86_encode_r32_rbpdisp32(0x0F2F, r1, disp)
12.237 #define COMISD_xmm_xmm(r1,r2) OP(0x66); x86_encode_r32_rm32(0x0F2F, r2, r1)
12.238 #define DIVSD_rbpdisp_xmm(disp,r1) OP(0xF2); x86_encode_r32_rbpdisp32(0x0F5E, r1, disp)
13.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
13.2 +++ b/src/xlat/x86/x86target.c Tue Apr 07 10:55:03 2009 +0000
13.3 @@ -0,0 +1,367 @@
13.4 +/**
13.5 + * $Id: xir.h 931 2008-10-31 02:57:59Z nkeynes $
13.6 + *
13.7 + * x86/x86-64 target support
13.8 + *
13.9 + * Copyright (c) 2009 Nathan Keynes.
13.10 + *
13.11 + * This program is free software; you can redistribute it and/or modify
13.12 + * it under the terms of the GNU General Public License as published by
13.13 + * the Free Software Foundation; either version 2 of the License, or
13.14 + * (at your option) any later version.
13.15 + *
13.16 + * This program is distributed in the hope that it will be useful,
13.17 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
13.18 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13.19 + * GNU General Public License for more details.
13.20 + */
13.21 +#include <assert.h>
13.22 +
13.23 +#include "xlat/xir.h"
13.24 +#include "xlat/xlat.h"
13.25 +#include "xlat/x86/x86op.h"
13.26 +
13.27 +static char *x86_reg_names[] = { "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi",
13.28 + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
13.29 + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
13.30 + "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" };
13.31 +
13.32 +void x86_target_lower( xir_basic_block_t xbb, xir_op_t begin, xir_op_t end );
13.33 +uint32_t x86_target_get_code_size( xir_op_t begin, xir_op_t end );
13.34 +uint32_t x86_target_codegen( target_data_t td, xir_op_t begin, xir_op_t end );
13.35 +
13.36 +struct xlat_target_machine x86_target_machine = { "x86", x86_reg_names,
13.37 + NULL, x86_target_lower, x86_target_get_code_size, x86_target_codegen
13.38 +};
13.39 +
13.40 +
13.41 +
13.42 +/******************************************************************************
13.43 + * Target lowering - Replace higher level/unsupported operations *
13.44 + * with equivalent x86 sequences. Note that we don't lower conditional ops *
13.45 + * here - that's left to final codegen as we can't represent local branches *
13.46 + * in XIR *
13.47 + *****************************************************************************/
13.48 +
13.49 +#define MEM_FUNC_OFFSET(name) offsetof( struct mem_region_fn, name )
13.50 +
13.51 +/**
13.52 + * Construct an XLAT operation and append it to the code block. For 64-bit
13.53 + * code this may need to be a load/xlat sequence as we can't encode a 64-bit
13.54 + * displacement.
13.55 + */
13.56 +static inline void xir_append_xlat( xir_basic_block_t xbb, void *address_space,
13.57 + int opertype, int operval )
13.58 +{
13.59 + if( sizeof(void *) == 8 ) {
13.60 + xir_append_ptr_op2( xbb, OP_MOVQ, address_space, SOURCE_REGISTER_OPERAND, REG_TMP4 );
13.61 + xir_append_op2( xbb, OP_XLAT, SOURCE_REGISTER_OPERAND, REG_TMP4, opertype, operval );
13.62 + } else {
13.63 + xir_append_ptr_op2( xbb, OP_XLAT, address_space, opertype, operval );
13.64 + }
13.65 +}
13.66 +
13.67 +
13.68 +/* Replace LOAD/STORE with low-level calling sequence eg:
13.69 + * mov addr, %eax
13.70 + * mov addr, %tmp3
13.71 + * slr 12, %tmp3
13.72 + * xlat $sh4_address_space, %tmp3
13.73 + * call/lut %tmp3, $operation_offset
13.74 + * mov %eax, result
13.75 + */
13.76 +static void lower_mem_load( xir_basic_block_t xbb, xir_op_t it, void *addr_space, int offset )
13.77 +{
13.78 + xir_op_t start =
13.79 + xir_append_op2( xbb, OP_MOV, it->operand[0].type, it->operand[0].value.i, TARGET_REGISTER_OPERAND, REG_ARG1 );
13.80 + xir_append_op2( xbb, OP_MOV, it->operand[0].type, it->operand[0].value.i, SOURCE_REGISTER_OPERAND, REG_TMP3 );
13.81 + xir_append_op2( xbb, OP_SLR, INT_IMM_OPERAND, 12, SOURCE_REGISTER_OPERAND, REG_TMP3 );
13.82 + xir_append_xlat( xbb, addr_space, SOURCE_REGISTER_OPERAND, REG_TMP3 );
13.83 + xir_insert_block(start,xbb->ir_ptr-1, it);
13.84 + if( XOP_WRITES_OP2(it) ) {
13.85 + xir_insert_op( xir_append_op2( xbb, OP_MOV, TARGET_REGISTER_OPERAND, REG_RESULT1, it->operand[1].type, it->operand[1].value.i ), it->next );
13.86 + }
13.87 + /* Replace original op with CALLLUT */
13.88 + it->opcode = OP_CALLLUT;
13.89 + it->operand[0].type = SOURCE_REGISTER_OPERAND;
13.90 + it->operand[0].value.i = REG_TMP3;
13.91 + it->operand[1].type = INT_IMM_OPERAND;
13.92 + it->operand[1].value.i = offset;
13.93 +}
13.94 +
13.95 +static void lower_mem_store( xir_basic_block_t xbb, xir_op_t it, void *addr_space, int offset )
13.96 +{
13.97 + xir_op_t start =
13.98 + xir_append_op2( xbb, OP_MOV, it->operand[0].type, it->operand[0].value.i, TARGET_REGISTER_OPERAND, REG_ARG1 );
13.99 + xir_append_op2( xbb, OP_MOV, it->operand[1].type, it->operand[1].value.i, TARGET_REGISTER_OPERAND, REG_ARG2 );
13.100 + xir_append_op2( xbb, OP_MOV, it->operand[0].type, it->operand[0].value.i, SOURCE_REGISTER_OPERAND, REG_TMP3 );
13.101 + xir_append_op2( xbb, OP_SLR, INT_IMM_OPERAND, 12, SOURCE_REGISTER_OPERAND, REG_TMP3 );
13.102 + xir_append_xlat( xbb, addr_space, SOURCE_REGISTER_OPERAND, REG_TMP3 );
13.103 + xir_insert_block(start,xbb->ir_ptr-1, it);
13.104 + /* Replace original op with CALLLUT */
13.105 + it->opcode = OP_CALLLUT;
13.106 + it->operand[0].type = SOURCE_REGISTER_OPERAND;
13.107 + it->operand[0].value.i = REG_TMP3;
13.108 + it->operand[1].type = INT_IMM_OPERAND;
13.109 + it->operand[1].value.i = offset;
13.110 +}
13.111 +
13.112 +static void lower_mem_loadq( xir_basic_block_t xbb, xir_op_t it, void *addr_space )
13.113 +{
13.114 + int resulttype = it->operand[1].type;
13.115 + uint32_t resultval = it->operand[1].value.i;
13.116 +
13.117 + /* First block */
13.118 + xir_op_t start =
13.119 + xir_append_op2( xbb, OP_MOV, it->operand[0].type, it->operand[0].value.i, TARGET_REGISTER_OPERAND, REG_ARG1 );
13.120 + xir_append_op2( xbb, OP_MOV, it->operand[0].type, it->operand[0].value.i, SOURCE_REGISTER_OPERAND, REG_TMP3 );
13.121 + xir_append_op2( xbb, OP_SLR, INT_IMM_OPERAND, 12, SOURCE_REGISTER_OPERAND, REG_TMP3 );
13.122 + xir_append_xlat( xbb, addr_space, SOURCE_REGISTER_OPERAND, REG_TMP3 );
13.123 + xir_insert_block(start,xbb->ir_ptr-1, it);
13.124 + /* Replace original op with CALLLUT */
13.125 + it->opcode = OP_CALLLUT;
13.126 + it->operand[0].type = SOURCE_REGISTER_OPERAND;
13.127 + it->operand[0].value.i = REG_TMP3;
13.128 + it->operand[1].type = INT_IMM_OPERAND;
13.129 + it->operand[1].value.i = MEM_FUNC_OFFSET(read_long);
13.130 +
13.131 + /* Second block */
13.132 + start = xir_append_op2( xbb, OP_MOV, TARGET_REGISTER_OPERAND, REG_RESULT1, resulttype, resultval+1 );
13.133 + xir_append_op2( xbb, OP_ADD, INT_IMM_OPERAND, 4, SOURCE_REGISTER_OPERAND, REG_ARG1 );
13.134 + xir_op_t fin = xir_append_op2( xbb, OP_CALLLUT, SOURCE_REGISTER_OPERAND, REG_TMP3, INT_IMM_OPERAND, MEM_FUNC_OFFSET(read_long) );
13.135 + xir_append_op2( xbb, OP_MOV, TARGET_REGISTER_OPERAND, REG_RESULT1, resulttype, resultval );
13.136 + fin->exc = it->exc;
13.137 + xir_insert_block(start, xbb->ir_ptr-1, it->next);
13.138 +}
13.139 +
13.140 +static void lower_mem_storeq( xir_basic_block_t xbb, xir_op_t it, void *addr_space )
13.141 +{
13.142 + int argtype = it->operand[1].type;
13.143 + uint32_t argval = it->operand[1].value.i;
13.144 +
13.145 + /* First block */
13.146 + xir_op_t start =
13.147 + xir_append_op2( xbb, OP_MOV, it->operand[0].type, it->operand[0].value.i, TARGET_REGISTER_OPERAND, REG_ARG1 );
13.148 + xir_append_op2( xbb, OP_MOV, argtype, argval+1, TARGET_REGISTER_OPERAND, REG_ARG2 );
13.149 + xir_append_op2( xbb, OP_MOV, it->operand[0].type, it->operand[0].value.i, SOURCE_REGISTER_OPERAND, REG_TMP3 );
13.150 + xir_append_op2( xbb, OP_SLR, INT_IMM_OPERAND, 12, SOURCE_REGISTER_OPERAND, REG_TMP3 );
13.151 + xir_append_xlat( xbb, addr_space, SOURCE_REGISTER_OPERAND, REG_TMP3 );
13.152 + xir_insert_block(start,xbb->ir_ptr-1, it);
13.153 + /* Replace original op with CALLLUT */
13.154 + it->opcode = OP_CALLLUT;
13.155 + it->operand[0].type = SOURCE_REGISTER_OPERAND;
13.156 + it->operand[0].value.i = REG_TMP3;
13.157 + it->operand[1].type = INT_IMM_OPERAND;
13.158 + it->operand[1].value.i = MEM_FUNC_OFFSET(read_long);
13.159 +
13.160 + /* Second block */
13.161 + xir_append_op2( xbb, OP_MOV, argtype, argval, TARGET_REGISTER_OPERAND, REG_ARG2 );
13.162 + xir_append_op2( xbb, OP_ADD, INT_IMM_OPERAND, 4, SOURCE_REGISTER_OPERAND, REG_ARG1 );
13.163 + xir_op_t fin = xir_append_op2( xbb, OP_CALLLUT, SOURCE_REGISTER_OPERAND, REG_TMP3, INT_IMM_OPERAND, MEM_FUNC_OFFSET(read_long) );
13.164 + fin->exc = it->exc;
13.165 + xir_insert_block(start, xbb->ir_ptr-1, it->next);
13.166 +}
13.167 +
13.168 +
13.169 +/**
13.170 + * Runs a single pass over the block, performing the following transformations:
13.171 + * Load/Store ops -> Mov/call sequences
13.172 + * Flags -> explicit SETcc/loadcc ops where necessary (doesn't try to reorder
13.173 + * at the moment)
13.174 + * Mov operands into target specific registers where the ISA requires it. (eg SAR)
13.175 + * Run in reverse order so we can track liveness of the flags as we go (for ALU
13.176 + * lowering to flags-modifying instructions)
13.177 + */
13.178 +void x86_target_lower( xir_basic_block_t xbb, xir_op_t start, xir_op_t end )
13.179 +{
13.180 + gboolean flags_live = FALSE;
13.181 + xir_op_t it;
13.182 + for( it=end; it != NULL; it = it->prev ) {
13.183 + switch( it->opcode ) {
13.184 +
13.185 + /* Promote non-flag versions to flag versions where there's no flag-free version
13.186 + * (in other words, all ALU ops except ADD, since we can use LEA for a flag-free
13.187 + * ADD
13.188 + */
13.189 + case OP_ADDC: case OP_AND: case OP_DIV: case OP_MUL: case OP_MULQ:
13.190 + case OP_NEG: case OP_NOT: case OP_OR: case OP_XOR: case OP_SUB:
13.191 + case OP_SUBB: case OP_SDIV:
13.192 + it->opcode++;
13.193 + if( flags_live ) {
13.194 + xir_insert_op( XOP1( OP_SAVEFLAGS, REG_TMP5 ), it );
13.195 + xir_insert_op( XOP1( OP_RESTFLAGS, REG_TMP5 ), it->next );
13.196 + }
13.197 + break;
13.198 +
13.199 + case OP_SAR: case OP_SLL: case OP_SLR: case OP_ROL: case OP_ROR:
13.200 + /* Promote to *S form since we don't have a non-flag version */
13.201 + it->opcode++;
13.202 + if( flags_live ) {
13.203 + xir_insert_op( XOP1( OP_SAVEFLAGS, REG_TMP5 ), it );
13.204 + xir_insert_op( XOP1( OP_RESTFLAGS, REG_TMP5 ), it->next );
13.205 + }
13.206 +
13.207 + case OP_SARS: case OP_SLLS: case OP_SLRS:
13.208 + case OP_RCL: case OP_RCR: case OP_ROLS: case OP_RORS:
13.209 + /* Insert mov %reg, %ecx */
13.210 + if( it->operand[0].type == SOURCE_REGISTER_OPERAND ) {
13.211 + xir_insert_op( xir_append_op2( xbb, OP_MOV, SOURCE_REGISTER_OPERAND, it->operand[0].value.i, TARGET_REGISTER_OPERAND, REG_ECX ), it );
13.212 + it->operand[0].type = TARGET_REGISTER_OPERAND;
13.213 + it->operand[0].value.i = REG_ECX;
13.214 + }
13.215 + break;
13.216 + case OP_SHLD: case OP_SHAD:
13.217 + /* Insert mov %reg, %ecx */
13.218 + if( it->operand[0].type == SOURCE_REGISTER_OPERAND ) {
13.219 + xir_insert_op( xir_append_op2( xbb, OP_MOV, SOURCE_REGISTER_OPERAND, it->operand[0].value.i, TARGET_REGISTER_OPERAND, REG_ECX ), it );
13.220 + it->operand[0].type = TARGET_REGISTER_OPERAND;
13.221 + it->operand[0].value.i = REG_ECX;
13.222 + } else if( it->operand[0].type == INT_IMM_OPERAND ) {
13.223 + /* Simplify down to SAR/SLL/SLR where we have a constant shift */
13.224 + if( it->operand[0].value.i == 0 ) {
13.225 + /* No-op */
13.226 + it->opcode = OP_NOP;
13.227 + it->operand[1].type = it->operand[0].type = NO_OPERAND;
13.228 + } else if( it->operand[0].value.i > 0 ) {
13.229 + it->opcode = OP_SLL;
13.230 + } else if( (it->operand[0].value.i & 0x1F) == 0 ) {
13.231 + if( it->opcode == OP_SHLD ) {
13.232 + it->opcode = OP_MOV;
13.233 + it->operand[0].value.i = 0;
13.234 + } else {
13.235 + it->opcode = OP_SAR;
13.236 + it->operand[0].value.i = 31;
13.237 + }
13.238 + } else {
13.239 + if( it->opcode == OP_SHLD ) {
13.240 + it->opcode = OP_SLR;
13.241 + } else {
13.242 + it->opcode = OP_SAR;
13.243 + }
13.244 + }
13.245 + }
13.246 + break;
13.247 +
13.248 + case OP_CALL1: /* Reduce to mov reg, %eax; call0 ptr */
13.249 + xir_insert_op( xir_append_op2( xbb, OP_MOV, it->operand[1].type, it->operand[1].value.i, TARGET_REGISTER_OPERAND, REG_ARG1 ), it );
13.250 + it->opcode = OP_CALL0;
13.251 + it->operand[1].type = NO_OPERAND;
13.252 + break;
13.253 + case OP_CALLR: /* reduce to call0 ptr, mov result, reg */
13.254 + xir_insert_op( xir_append_op2( xbb, OP_MOV, TARGET_REGISTER_OPERAND, REG_RESULT1, it->operand[1].type, it->operand[1].value.i), it->next );
13.255 + it->opcode = OP_CALL0;
13.256 + it->operand[1].type = NO_OPERAND;
13.257 + break;
13.258 + case OP_LOADB:
13.259 + lower_mem_load( xbb, it, xbb->address_space, MEM_FUNC_OFFSET(read_byte) );
13.260 + break;
13.261 + case OP_LOADW:
13.262 + lower_mem_load( xbb, it, xbb->address_space, MEM_FUNC_OFFSET(read_word) );
13.263 + break;
13.264 + case OP_LOADL:
13.265 + lower_mem_load( xbb, it, xbb->address_space, MEM_FUNC_OFFSET(read_long) );
13.266 + break;
13.267 + case OP_LOADBFW:
13.268 + lower_mem_load( xbb, it, xbb->address_space, MEM_FUNC_OFFSET(read_byte_for_write) );
13.269 + break;
13.270 + case OP_LOADQ:
13.271 + lower_mem_loadq( xbb, it, xbb->address_space );
13.272 + break;
13.273 + case OP_PREF:
13.274 + lower_mem_load( xbb, it, xbb->address_space, MEM_FUNC_OFFSET(prefetch) );
13.275 + break;
13.276 + case OP_OCBI:
13.277 + case OP_OCBP:
13.278 + case OP_OCBWB:
13.279 + break;
13.280 + case OP_STOREB:
13.281 + lower_mem_store( xbb, it, xbb->address_space, MEM_FUNC_OFFSET(write_byte) );
13.282 + break;
13.283 + case OP_STOREW:
13.284 + lower_mem_store( xbb, it, xbb->address_space, MEM_FUNC_OFFSET(write_word) );
13.285 + break;
13.286 + case OP_STOREL:
13.287 + lower_mem_store( xbb, it, xbb->address_space, MEM_FUNC_OFFSET(write_long) );
13.288 + break;
13.289 + case OP_STORELCA:
13.290 + lower_mem_store( xbb, it, xbb->address_space, MEM_FUNC_OFFSET(write_long) );
13.291 + break;
13.292 + case OP_STOREQ:
13.293 + lower_mem_storeq( xbb, it, xbb->address_space );
13.294 + break;
13.295 +
13.296 + case OP_SHUFFLE:
13.297 + assert( it->operand[0].type == INT_IMM_OPERAND );
13.298 + if( it->operand[0].value.i = 0x2134 ) { /* Swap low bytes */
13.299 + /* This is an xchg al,ah, but we need to force the operand into one of the bottom 4 registers */
13.300 + xir_insert_op( xir_append_op2( xbb, OP_MOV, it->operand[1].type, it->operand[1].value.i, TARGET_REGISTER_OPERAND, REG_EAX ), it);
13.301 + it->operand[1].type = TARGET_REGISTER_OPERAND;
13.302 + it->operand[1].value.i = REG_EAX;
13.303 + } else if( it->operand[0].value.i != 0x4321 ) {
13.304 + /* 4321 is a full byteswap (directly supported) - use shift/mask/or
13.305 + * sequence for anything else. Although we could use PSHUF...
13.306 + */
13.307 + it = xir_shuffle_lower( xbb, it, REG_TMP3, REG_TMP4 );
13.308 + }
13.309 + break;
13.310 + case OP_NEGF:
13.311 + xir_insert_op( xir_append_op2( xbb, OP_MOV, INT_IMM_OPERAND, 0, SOURCE_REGISTER_OPERAND, REG_TMP4 ), it);
13.312 + xir_insert_op( xir_append_op2( xbb, OP_MOV, SOURCE_REGISTER_OPERAND, REG_TMP4,it->operand[0].type, it->operand[0].value.i), it->next );
13.313 + it->opcode = OP_SUBF;
13.314 + it->operand[1].type = SOURCE_REGISTER_OPERAND;
13.315 + it->operand[1].value.i = REG_TMP4;
13.316 + break;
13.317 + case OP_NEGD:
13.318 + xir_insert_op( xir_append_op2( xbb, OP_MOVQ, INT_IMM_OPERAND, 0, SOURCE_REGISTER_OPERAND, REG_TMPQ0 ), it);
13.319 + xir_insert_op( xir_append_op2( xbb, OP_MOVQ, SOURCE_REGISTER_OPERAND, REG_TMPQ0,it->operand[0].type, it->operand[0].value.i), it->next );
13.320 + it->opcode = OP_SUBD;
13.321 + it->operand[1].type = SOURCE_REGISTER_OPERAND;
13.322 + it->operand[1].value.i = REG_TMPQ0;
13.323 + break;
13.324 + case OP_XLAT:
13.325 + /* Insert temp register if translating through a 64-bit pointer */
13.326 + if( XOP_IS_PTRIMM(it, 0) && sizeof(void *) == 8 && it->operand[0].value.q >= 0x100000000LL ) {
13.327 + xir_insert_op( XOP2P( OP_MOVQ, it->operand[0].value.p, REG_TMP4 ), it );
13.328 + it->operand[0].type = SOURCE_REGISTER_OPERAND;
13.329 + it->operand[0].value.i = REG_TMP4;
13.330 + }
13.331 + break;
13.332 + }
13.333 +
13.334 + if( XOP_READS_FLAGS(it) ) {
13.335 + flags_live = TRUE;
13.336 + } else if( XOP_WRITES_FLAGS(it) ) {
13.337 + flags_live = FALSE;
13.338 + }
13.339 +
13.340 + /* Lower pointer operands to INT or QUAD according to address and value size. */
13.341 + if( it->operand[0].type == POINTER_OPERAND ) {
13.342 + if( sizeof(void *) == 8 && it->operand[0].value.q >= 0x100000000LL ) {
13.343 + if( it->opcode == OP_MOV ) {
13.344 + // Promote MOV ptr, reg to MOVQ ptr, reg
13.345 + it->opcode = OP_MOVQ;
13.346 + } else if( it->opcode != OP_MOVQ ) {
13.347 + /* 64-bit pointers can't be used as immediate values - break up into
13.348 + * an immediate load to temporary, followed by the original instruction.
13.349 + * (We only check the first operand as there are no instructions that
13.350 + * permit the second operand to be an immediate pointer.
13.351 + */
13.352 + xir_insert_op( xir_append_op2( xbb, OP_MOVQ, POINTER_OPERAND, it->operand[0].value.q, SOURCE_REGISTER_OPERAND, REG_TMP4 ), it );
13.353 + it->operand[0].type = SOURCE_REGISTER_OPERAND;
13.354 + it->operand[1].value.i = REG_TMP4;
13.355 + }
13.356 + it->operand[0].type = QUAD_IMM_OPERAND;
13.357 + } else {
13.358 + if( it->opcode == OP_MOVQ ) {
13.359 + /* Lower a MOVQ of a 32-bit quantity to a MOV, and save the 5 bytes */
13.360 + it->opcode = OP_MOV;
13.361 + }
13.362 + it->operand[0].type = INT_IMM_OPERAND;
13.363 + }
13.364 + }
13.365 +
13.366 + if( it == start )
13.367 + break;
13.368 + }
13.369 +
13.370 +}
14.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
14.2 +++ b/src/xlat/x86/x86target.h Tue Apr 07 10:55:03 2009 +0000
14.3 @@ -0,0 +1,25 @@
14.4 +/**
14.5 + * $Id: xir.h 931 2008-10-31 02:57:59Z nkeynes $
14.6 + *
14.7 + * x86/x86-64 target support
14.8 + *
14.9 + * Copyright (c) 2009 Nathan Keynes.
14.10 + *
14.11 + * This program is free software; you can redistribute it and/or modify
14.12 + * it under the terms of the GNU General Public License as published by
14.13 + * the Free Software Foundation; either version 2 of the License, or
14.14 + * (at your option) any later version.
14.15 + *
14.16 + * This program is distributed in the hope that it will be useful,
14.17 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
14.18 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14.19 + * GNU General Public License for more details.
14.20 + */
14.21 +
14.22 +#ifndef lxdream_x86target_H
14.23 +#define lxdream_x86target_H
14.24 +
14.25 +extern struct xlat_target_machine x86_target_machine;
14.26 +
14.27 +
14.28 +#endif /* !lxdream_x86target_H */
15.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
15.2 +++ b/src/xlat/xir.c Tue Apr 07 10:55:03 2009 +0000
15.3 @@ -0,0 +1,458 @@
15.4 +/**
15.5 + * $Id: xir.h 931 2008-10-31 02:57:59Z nkeynes $
15.6 + *
15.7 + * This file provides support functions for the translation IR.
15.8 + *
15.9 + * Copyright (c) 2009 Nathan Keynes.
15.10 + *
15.11 + * This program is free software; you can redistribute it and/or modify
15.12 + * it under the terms of the GNU General Public License as published by
15.13 + * the Free Software Foundation; either version 2 of the License, or
15.14 + * (at your option) any later version.
15.15 + *
15.16 + * This program is distributed in the hope that it will be useful,
15.17 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
15.18 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15.19 + * GNU General Public License for more details.
15.20 + */
15.21 +
15.22 +#include <stdio.h>
15.23 +#include <string.h>
15.24 +#include <assert.h>
15.25 +#include "xlat/xir.h"
15.26 +
15.27 +static const char **xir_source_register_names = NULL;
15.28 +static const char **xir_target_register_names = NULL;
15.29 +static const struct xir_symbol_entry *xir_symbol_table = NULL;
15.30 +
15.31 +static const char *XIR_CC_NAMES[] = {
15.32 + "ov", "no", "uge", "ult", "ule", "ugt", "eq", "ne",
15.33 + "neg", "pos", "sge", "slt", "sle", "sgt" };
15.34 +static const int XIR_OPERAND_SIZE[] = { 4, 8, 4, 8, 16, 64, 0, 0 };
15.35 +
15.36 +const struct xir_opcode_entry XIR_OPCODE_TABLE[] = {
15.37 + { "NOP", OPM_NO },
15.38 + { "BARRIER", OPM_NO | OPM_CLB },
15.39 + { "DEC", OPM_RW_TW },
15.40 + { "LD", OPM_R | OPM_TW },
15.41 + { "ST", OPM_W | OPM_TR },
15.42 + { "RESTFLAGS", OPM_R | OPM_TW },
15.43 + { "SAVEFLAGS", OPM_W | OPM_TR },
15.44 + { "ENTER", OPM_R },
15.45 + { "BRREL", OPM_R | OPM_TERM },
15.46 + { "BR", OPM_R | OPM_TERM },
15.47 + { "CALL0", OPM_R | OPM_CLB },
15.48 + { "OCBI", OPM_R_EXC },
15.49 + { "OCBP", OPM_R_EXC },
15.50 + { "OCBWB", OPM_R_EXC },
15.51 + { "PREF", OPM_R_EXC },
15.52 +
15.53 + { "MOV", OPM_R_W },
15.54 + { "MOVQ", OPM_R_W|OPM_Q_Q },
15.55 + { "MOVV", OPM_R_W|OPM_V_V },
15.56 + { "MOVM", OPM_R_W|OPM_M_M },
15.57 + { "MOVSX8", OPM_R_W },
15.58 + { "MOVSX16", OPM_R_W },
15.59 + { "MOVSX32", OPM_R_W|OPM_I_Q },
15.60 + { "MOVZX8", OPM_R_W },
15.61 + { "MOVZX16", OPM_R_W },
15.62 + { "MOVZX32", OPM_R_W|OPM_I_Q },
15.63 +
15.64 + { "ADD", OPM_R_RW },
15.65 + { "ADDS", OPM_R_RW_TW },
15.66 + { "ADDC", OPM_R_RW_TR },
15.67 + { "ADDCS", OPM_R_RW_TRW },
15.68 + { "AND", OPM_R_RW },
15.69 + { "ANDS", OPM_R_RW_TW },
15.70 + { "CMP", OPM_R_R_TW },
15.71 + { "DIV", OPM_R_RW },
15.72 + { "DIVS", OPM_R_RW_TW },
15.73 + { "MUL", OPM_R_RW },
15.74 + { "MULS", OPM_R_RW_TW },
15.75 + { "MULQ", OPM_R_RW|OPM_Q_Q },
15.76 + { "MULQS", OPM_R_RW_TW|OPM_Q_Q },
15.77 + { "NEG", OPM_R_W },
15.78 + { "NEGS", OPM_R_W_TW },
15.79 + { "NOT", OPM_R_W },
15.80 + { "NOTS", OPM_R_W_TW },
15.81 + { "OR", OPM_R_RW },
15.82 + { "ORS", OPM_R_RW_TW },
15.83 + { "RCL", OPM_R_RW_TRW },
15.84 + { "RCR", OPM_R_RW_TRW },
15.85 + { "ROL", OPM_R_RW },
15.86 + { "ROLS", OPM_R_RW_TW },
15.87 + { "ROR", OPM_R_RW },
15.88 + { "RORS", OPM_R_RW_TW },
15.89 + { "SAR", OPM_R_RW },
15.90 + { "SARS", OPM_R_RW_TW },
15.91 + { "SDIV", OPM_R_RW },
15.92 + { "SDIVS", OPM_R_RW_TW },
15.93 + { "SLL", OPM_R_RW },
15.94 + { "SLLS", OPM_R_RW_TW },
15.95 + { "SLR", OPM_R_RW },
15.96 + { "SLRS", OPM_R_RW_TW },
15.97 + { "SUB", OPM_R_RW },
15.98 + { "SUBS", OPM_R_RW_TW },
15.99 + { "SUBB", OPM_R_RW },
15.100 + { "SUBBS", OPM_R_RW_TRW },
15.101 + { "SHUFFLE", OPM_R_RW },
15.102 + { "TST", OPM_R_R_TW },
15.103 + { "XOR", OPM_R_RW },
15.104 + { "XORS", OPM_R_RW_TW },
15.105 + { "XLAT", OPM_R_RW },
15.106 +
15.107 + { "ABSD", OPM_DR_DW },
15.108 + { "ABSF", OPM_FR_FW },
15.109 + { "ABSV", OPM_VR_VW },
15.110 + { "ADDD", OPM_DR_DRW },
15.111 + { "ADDF", OPM_FR_FRW },
15.112 + { "ADDV", OPM_VR_VRW },
15.113 + { "CMPD", OPM_DR_DR_TW },
15.114 + { "CMPF", OPM_FR_FR_TW },
15.115 + { "DIVD", OPM_DR_DRW },
15.116 + { "DIVF", OPM_FR_FRW },
15.117 + { "DIVV", OPM_VR_VRW },
15.118 + { "MULD", OPM_DR_DRW },
15.119 + { "MULF", OPM_FR_FRW },
15.120 + { "MULV", OPM_VR_VRW },
15.121 + { "NEGD", OPM_DR_DW },
15.122 + { "NEGF", OPM_FR_FW },
15.123 + { "NEGV", OPM_VR_VW },
15.124 + { "SQRTD", OPM_DR_DW },
15.125 + { "SQRTF", OPM_FR_FW },
15.126 + { "SQRTV", OPM_VR_VW },
15.127 + { "RSQRTD", OPM_DR_DW },
15.128 + { "RSQRTF", OPM_FR_FW },
15.129 + { "RSQRTV", OPM_VR_VW },
15.130 + { "SUBD", OPM_DR_DRW },
15.131 + { "SUBF", OPM_FR_FRW },
15.132 + { "SUBV", OPM_VR_VRW },
15.133 +
15.134 + { "DTOF", OPM_R_W|OPM_D_F }, /* Round according to rounding mode */
15.135 + { "DTOI", OPM_R_W|OPM_D_I }, /* Truncate + saturate to signed 32-bits */
15.136 + { "FTOD", OPM_R_W|OPM_F_D }, /* Exact */
15.137 + { "FTOI", OPM_R_W|OPM_F_I }, /* Truncate + saturate to signed 32-bits */
15.138 + { "ITOD", OPM_R_W|OPM_I_D }, /* Exact */
15.139 + { "ITOF", OPM_R_W|OPM_I_F }, /* Round according to rounding mode */
15.140 +
15.141 + { "SINCOSF", OPM_FR_FRW },
15.142 +
15.143 + /* Compute the dot product of two vectors - the result is
15.144 + * stored in the last element of the target operand (and the
15.145 + * other elements are unchanged)
15.146 + */
15.147 + { "DOTPRODV", OPM_R_RW|OPM_V_V },
15.148 + /* Perform the matrix multiplication V * M and place the result
15.149 + * in V.
15.150 + */
15.151 + { "MATMULV", OPM_R_RW|OPM_V_M },
15.152 +
15.153 + { "LOAD.B", OPM_R_W_EXC },
15.154 + { "LOAD.BFW", OPM_R_W_EXC },
15.155 + { "LOAD.W", OPM_R_W_EXC },
15.156 + { "LOAD.L", OPM_R_W_EXC },
15.157 + { "LOAD.Q", OPM_R_W_EXC|OPM_I_Q },
15.158 + { "STORE.B", OPM_R_R_EXC },
15.159 + { "STORE.W", OPM_R_R_EXC },
15.160 + { "STORE.L", OPM_R_R_EXC },
15.161 + { "STORE.Q", OPM_R_R_EXC|OPM_I_Q },
15.162 + { "STORE.LCA", OPM_R_R_EXC },
15.163 +
15.164 + { "BRCOND", OPM_R_R|OPM_TR | OPM_TERM },
15.165 + { "BRCONDDEL", OPM_R_R|OPM_TR },
15.166 + { "RAISE/ME", OPM_R_R | OPM_EXC },
15.167 + { "RAISE/MNE", OPM_R_R | OPM_EXC },
15.168 +
15.169 + { "CALL/LUT", OPM_R_R | OPM_EXC },
15.170 + { "CALL1", OPM_R_R | OPM_CLB },
15.171 + { "CALLR", OPM_R_W | OPM_CLB },
15.172 +
15.173 + { "ADDQSAT32", OPM_R_R | OPM_CLBT|OPM_Q_Q },
15.174 + { "ADDQSAT48", OPM_R_R | OPM_CLBT|OPM_Q_Q },
15.175 + { "CMP/STR", OPM_R_R_TW | OPM_CLBT },
15.176 + { "DIV1", OPM_R_RW_TRW | OPM_CLBT },
15.177 + { "SHAD", OPM_R_RW | OPM_CLBT },
15.178 + { "SHLD", OPM_R_RW | OPM_CLBT },
15.179 +
15.180 +};
15.181 +
15.182 +void xir_set_register_names( const char **source_regs, const char **target_regs )
15.183 +{
15.184 + xir_source_register_names = source_regs;
15.185 + xir_target_register_names = target_regs;
15.186 +}
15.187 +
15.188 +void xir_set_symbol_table( const struct xir_symbol_entry *symtab )
15.189 +{
15.190 + xir_symbol_table = symtab;
15.191 +}
15.192 +
15.193 +const char *xir_lookup_symbol( const void *ptr )
15.194 +{
15.195 + if( xir_symbol_table != NULL ) {
15.196 + const struct xir_symbol_entry *p;
15.197 + for( p = xir_symbol_table; p->name != NULL; p++ ) {
15.198 + if( p->ptr == ptr ) {
15.199 + return p->name;
15.200 + }
15.201 + }
15.202 + }
15.203 + return NULL;
15.204 +}
15.205 +
15.206 +int xir_snprint_operand( char *buf, int buflen, xir_operand_t op )
15.207 +{
15.208 + const char *name;
15.209 + switch( op->type ) {
15.210 + case INT_IMM_OPERAND:
15.211 + return snprintf( buf, buflen, "$0x%x", op->value.i );
15.212 + case FLOAT_IMM_OPERAND:
15.213 + return snprintf( buf, buflen, "%f", op->value.f );
15.214 + case DOUBLE_IMM_OPERAND:
15.215 + return snprintf( buf, buflen, "%f", op->value.f );
15.216 + case POINTER_OPERAND:
15.217 + name = xir_lookup_symbol( op->value.p );
15.218 + if( name != NULL ) {
15.219 + return snprintf( buf, buflen, "*%s", name );
15.220 + } else {
15.221 + return snprintf( buf, buflen, "*%p", op->value.p );
15.222 + }
15.223 + case SOURCE_REGISTER_OPERAND:
15.224 + if( op->value.i >= MIN_SOURCE_REGISTER && op->value.i <= MAX_SOURCE_REGISTER ) {
15.225 + if( xir_source_register_names ) {
15.226 + return snprintf( buf, buflen, "%%%s", xir_source_register_names[(op->value.i-MIN_SOURCE_REGISTER)>>2] );
15.227 + } else {
15.228 + return snprintf( buf, buflen, "%%src%d", op->value.i-MIN_SOURCE_REGISTER );
15.229 + }
15.230 + } else {
15.231 + return snprintf( buf, buflen, "%%tmp%d", op->value.i-MIN_TEMP_REGISTER );
15.232 + }
15.233 + case TARGET_REGISTER_OPERAND:
15.234 + if( xir_target_register_names ) {
15.235 + return snprintf( buf, buflen, "%%%s", xir_target_register_names[op->value.i-MIN_TARGET_REGISTER] );
15.236 + } else {
15.237 + return snprintf( buf, buflen, "%%dst%d", op->value.i-MIN_TARGET_REGISTER );
15.238 + }
15.239 + default:
15.240 + return snprintf( buf, buflen, "ILLOP" );
15.241 + }
15.242 +}
15.243 +
15.244 +void xir_print_instruction( FILE *out, xir_op_t i )
15.245 +{
15.246 + char operands[64] = "";
15.247 +
15.248 + if( i->operand[0].type != NO_OPERAND ) {
15.249 + int pos = xir_snprint_operand( operands, sizeof(operands), &i->operand[0] );
15.250 + if( i->operand[1].type != NO_OPERAND ) {
15.251 + strncat( operands, ", ", sizeof(operands)-pos );
15.252 + pos += 2;
15.253 + xir_snprint_operand( operands+pos, sizeof(operands)-pos, &i->operand[1] );
15.254 + }
15.255 + }
15.256 + if( i->cond == CC_TRUE ) {
15.257 + fprintf( out, "%-9s %-30s", XIR_OPCODE_TABLE[i->opcode].name, operands );
15.258 + } else {
15.259 + char buf[16];
15.260 + snprintf( buf, 16, "%s%s", XIR_OPCODE_TABLE[i->opcode].name, XIR_CC_NAMES[i->cond] );
15.261 + fprintf( out, "%-9s %-30s", buf, operands );
15.262 + }
15.263 +}
15.264 +
15.265 +/**
15.266 + * Sanity check a block of IR to make sure that
15.267 + * operands match up with the expected values etc
15.268 + */
15.269 +void xir_verify_block( xir_op_t start, xir_op_t end )
15.270 +{
15.271 + xir_op_t it;
15.272 + int flags_written = 0;
15.273 + for( it = start; it != NULL; it = it->next ) {
15.274 + assert( it != NULL && "Unexpected end of block" );
15.275 + assert( it->cond >= CC_TRUE && it->cond <= CC_SGT && "Invalid condition code" );
15.276 + if( XOP_HAS_0_OPERANDS(it) ) {
15.277 + assert( it->operand[0].type == NO_OPERAND && it->operand[1].type == NO_OPERAND );
15.278 + } else if( XOP_HAS_1_OPERAND(it) ) {
15.279 + assert( it->operand[0].type != NO_OPERAND && it->operand[1].type == NO_OPERAND );
15.280 + } else if( XOP_HAS_2_OPERANDS(it) ) {
15.281 + assert( it->operand[0].type != NO_OPERAND && it->operand[1].type != NO_OPERAND );
15.282 + }
15.283 +
15.284 + if( it->opcode == OP_ENTER ) {
15.285 + assert( it->prev == NULL && "Enter instruction must have no predecessors" );
15.286 + assert( it == start && "Enter instruction must occur at the start of the block" );
15.287 + assert( it->operand[0].type == INT_IMM_OPERAND && "Enter instruction must have an immediate operand" );
15.288 + } else if( it->opcode == OP_ST || it->opcode == OP_LD ) {
15.289 + assert( it->cond != CC_TRUE && "Opcode not permitted with True condition code" );
15.290 + }
15.291 +
15.292 + if( XOP_WRITES_OP1(it) ) {
15.293 + assert( (it->operand[0].type == SOURCE_REGISTER_OPERAND ||
15.294 + it->operand[0].type == TARGET_REGISTER_OPERAND) && "Writable operand 1 requires a register" );
15.295 + }
15.296 + if( XOP_WRITES_OP2(it) ) {
15.297 + assert( (it->operand[1].type == SOURCE_REGISTER_OPERAND ||
15.298 + it->operand[1].type == TARGET_REGISTER_OPERAND) && "Writable operand 2 requires a register" );
15.299 + }
15.300 +
15.301 + if( XOP_READS_FLAGS(it) ) {
15.302 + assert( flags_written && "Flags used without prior definition in block" );
15.303 + }
15.304 + if( XOP_WRITES_FLAGS(it) ) {
15.305 + flags_written = 1;
15.306 + }
15.307 +
15.308 + if( XOP_HAS_EXCEPTION(it) ) {
15.309 + assert( it->exc != NULL && "Missing exception block" );
15.310 + assert( it->exc->prev == it && "Exception back-link broken" );
15.311 + xir_verify_block( it->exc, NULL ); // Verify exception sub-block
15.312 + } else {
15.313 + assert( it->exc == NULL && "Unexpected exception block" );
15.314 + }
15.315 + if( XOP_IS_TERMINATOR(it) ) {
15.316 + assert( it->next == NULL && "Unexpected next instruction on terminator" );
15.317 + } else {
15.318 + assert( it->next != NULL && "Missing terminator instruction at end of block" );
15.319 + assert( it->next->prev == it && "Linked-list chain broken" );
15.320 + }
15.321 + if( it == end )
15.322 + break;
15.323 + }
15.324 +}
15.325 +
15.326 +xir_op_t xir_append_op2( xir_basic_block_t xbb, int op, int arg0type, uint32_t arg0, int arg1type, uint32_t arg1 )
15.327 +{
15.328 + xbb->ir_ptr->opcode = op;
15.329 + xbb->ir_ptr->cond = CC_TRUE;
15.330 + xbb->ir_ptr->operand[0].type = arg0type;
15.331 + xbb->ir_ptr->operand[0].value.i = arg0;
15.332 + xbb->ir_ptr->operand[1].type = arg1type;
15.333 + xbb->ir_ptr->operand[1].value.i = arg1;
15.334 + xbb->ir_ptr->exc = NULL;
15.335 + xbb->ir_ptr->next = xbb->ir_ptr+1;
15.336 + (xbb->ir_ptr+1)->prev = xbb->ir_ptr;
15.337 + return xbb->ir_ptr++;
15.338 +}
15.339 +
15.340 +xir_op_t xir_append_op2cc( xir_basic_block_t xbb, int op, xir_cc_t cc, int arg0type, uint32_t arg0, int arg1type, uint32_t arg1 )
15.341 +{
15.342 + xbb->ir_ptr->opcode = op;
15.343 + xbb->ir_ptr->cond = cc;
15.344 + xbb->ir_ptr->operand[0].type = arg0type;
15.345 + xbb->ir_ptr->operand[0].value.i = arg0;
15.346 + xbb->ir_ptr->operand[1].type = arg1type;
15.347 + xbb->ir_ptr->operand[1].value.i = arg1;
15.348 + xbb->ir_ptr->exc = NULL;
15.349 + xbb->ir_ptr->next = xbb->ir_ptr+1;
15.350 + (xbb->ir_ptr+1)->prev = xbb->ir_ptr;
15.351 + return xbb->ir_ptr++;
15.352 +}
15.353 +
15.354 +xir_op_t xir_append_float_op2( xir_basic_block_t xbb, int op, float imm1, int arg1type, uint32_t arg1 )
15.355 +{
15.356 + xbb->ir_ptr->opcode = op;
15.357 + xbb->ir_ptr->cond = CC_TRUE;
15.358 + xbb->ir_ptr->operand[0].type = FLOAT_IMM_OPERAND;
15.359 + xbb->ir_ptr->operand[0].value.i = imm1;
15.360 + xbb->ir_ptr->operand[1].type = arg1type;
15.361 + xbb->ir_ptr->operand[1].value.i = arg1;
15.362 + xbb->ir_ptr->exc = NULL;
15.363 + xbb->ir_ptr->next = xbb->ir_ptr+1;
15.364 + (xbb->ir_ptr+1)->prev = xbb->ir_ptr;
15.365 + return xbb->ir_ptr++;
15.366 +}
15.367 +
15.368 +xir_op_t xir_append_ptr_op2( xir_basic_block_t xbb, int op, void *arg0, int arg1type, uint32_t arg1 )
15.369 +{
15.370 + xbb->ir_ptr->opcode = op;
15.371 + xbb->ir_ptr->cond = CC_TRUE;
15.372 + xbb->ir_ptr->operand[0].type = POINTER_OPERAND;
15.373 + xbb->ir_ptr->operand[0].value.p = arg0;
15.374 + xbb->ir_ptr->operand[1].type = arg1type;
15.375 + xbb->ir_ptr->operand[1].value.i = arg1;
15.376 + xbb->ir_ptr->exc = NULL;
15.377 + xbb->ir_ptr->next = xbb->ir_ptr+1;
15.378 + (xbb->ir_ptr+1)->prev = xbb->ir_ptr;
15.379 + return xbb->ir_ptr++;
15.380 +}
15.381 +
15.382 +void xir_insert_op( xir_op_t op, xir_op_t before )
15.383 +{
15.384 + op->prev = before->prev;
15.385 + op->next = before;
15.386 + before->prev->next = op;
15.387 + before->prev = op;
15.388 +}
15.389 +
15.390 +void xir_insert_block( xir_op_t start, xir_op_t end, xir_op_t before )
15.391 +{
15.392 + start->prev = before->prev;
15.393 + end->next = before;
15.394 + before->prev->next = start;
15.395 + before->prev = end;
15.396 +}
15.397 +
15.398 +void xir_remove_op( xir_op_t op )
15.399 +{
15.400 + if( op->next != NULL ) {
15.401 + op->next->prev = op->prev;
15.402 + }
15.403 + if( op->prev != NULL ) {
15.404 + if( op->prev->next == op ) {
15.405 + op->prev->next = op->next;
15.406 + } else {
15.407 + assert( op->prev->exc == op );
15.408 + op->prev->exc = op->next;
15.409 + }
15.410 + }
15.411 +}
15.412 +
15.413 +int xir_get_operand_size( xir_op_t op, int operand )
15.414 +{
15.415 + int mode = XIR_OPCODE_TABLE[op->opcode].mode;
15.416 + if( operand == 0 ) {
15.417 + mode >>= 4;
15.418 + } else {
15.419 + mode >>= 8;
15.420 + }
15.421 + mode &= 0x07;
15.422 + return XIR_OPERAND_SIZE[mode];
15.423 +}
15.424 +
15.425 +void xir_print_block( FILE *out, xir_op_t start, xir_op_t end )
15.426 +{
15.427 + xir_op_t it = start;
15.428 + xir_op_t exc = NULL;
15.429 +
15.430 + while( it != NULL ) {
15.431 + if( it->exc ) {
15.432 + while( exc ) {
15.433 + fprintf( out, "%40c ", ' ' );
15.434 + xir_print_instruction( out, exc );
15.435 + fprintf( out, "\n" );
15.436 + exc = exc->next;
15.437 + }
15.438 + exc = it->exc;
15.439 + }
15.440 + xir_print_instruction( out, it );
15.441 + if( exc ) {
15.442 + if( it->exc ) {
15.443 + fprintf( out, "=> " );
15.444 + } else {
15.445 + fprintf( out, " " );
15.446 + }
15.447 + xir_print_instruction( out, exc );
15.448 + exc = exc->next;
15.449 + }
15.450 + fprintf( out, "\n" );
15.451 + if( it == end )
15.452 + break;
15.453 + it = it->next;
15.454 + }
15.455 +}
15.456 +
15.457 +void xir_dump_block( xir_op_t start, xir_op_t end )
15.458 +{
15.459 + xir_print_block( stdout, start, end );
15.460 + xir_verify_block( start, end );
15.461 +}
16.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
16.2 +++ b/src/xlat/xir.h Tue Apr 07 10:55:03 2009 +0000
16.3 @@ -0,0 +1,607 @@
16.4 +/**
16.5 + * $Id: xir.h 931 2008-10-31 02:57:59Z nkeynes $
16.6 + *
16.7 + * This file defines the translation IR and associated functions.
16.8 + *
16.9 + * Copyright (c) 2009 Nathan Keynes.
16.10 + *
16.11 + * This program is free software; you can redistribute it and/or modify
16.12 + * it under the terms of the GNU General Public License as published by
16.13 + * the Free Software Foundation; either version 2 of the License, or
16.14 + * (at your option) any later version.
16.15 + *
16.16 + * This program is distributed in the hope that it will be useful,
16.17 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
16.18 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16.19 + * GNU General Public License for more details.
16.20 + */
16.21 +
16.22 +#ifndef lxdream_xir_H
16.23 +#define lxdream_xir_H 1
16.24 +
16.25 +#include <stdint.h>
16.26 +
16.27 +/*****************************************************************************
16.28 + *
16.29 + * We use a very simple low-level 2-op instruction form, largely intended to
16.30 + * closely match the x86 ISA to simplify final code generation. Complex
16.31 + * instructions are either broken up into simpler ops, or inserted as
16.32 + * opaque macros. First operand is source, second operand is destination.
16.33 + *
16.34 + * Data types are encoded in the instruction:
16.35 + * Byte (B) 8-bit integer
16.36 + * Word (W) 16-bit integer
16.37 + * Long (L) 32-bit integer
16.38 + * Quad (Q) 64-bit integer
16.39 + * Float (F) 32-bit floating point
16.40 + * Double (D) 64-bit floating point
16.41 + * Vec4 (V) 4x32-bit floating point
16.42 + * Matrix (M) 4x4x32-bit floating point in column-major order
16.43 + * This is not an exhaustive list, but it is sufficient to cover all operations
16.44 + * required for the SH4.
16.45 + *
16.46 + * ALU instructions come in two variants, xxxS which modifies the condition
16.47 + * flags, and the regular xxx version that does not. Implementations are assumed
16.48 + * to have at least the standard NZVC flags available (or will have to fake it)
16.49 + *
16.50 + * Variations in flag behaviour between implementations need to be accounted for
16.51 + * somehow.
16.52 + ****************************************************************************/
16.53 +
16.54 +/* Registers 0..127 belong to the source machine, all higher numbers are temporaries */
16.55 +#define MIN_SOURCE_REGISTER 0
16.56 +#define MAX_SOURCE_REGISTER 1023
16.57 +#define MIN_TEMP_REGISTER 1024
16.58 +#define MAX_TEMP_REGISTER 1535
16.59 +
16.60 +/* Target registers have a separate 'address' space. */
16.61 +#define MIN_TARGET_REGISTER 0
16.62 +#define MAX_TARGET_REGISTER 127
16.63 +
16.64 +/* Convenience defines */
16.65 +#define REG_TMP0 (MIN_TEMP_REGISTER)
16.66 +#define REG_TMP1 (MIN_TEMP_REGISTER+1)
16.67 +#define REG_TMP2 (MIN_TEMP_REGISTER+2)
16.68 +#define REG_TMP3 (MIN_TEMP_REGISTER+3)
16.69 +#define REG_TMP4 (MIN_TEMP_REGISTER+4)
16.70 +#define REG_TMP5 (MIN_TEMP_REGISTER+5)
16.71 +
16.72 +#define REG_TMPQ0 (MIN_TEMP_REGISTER+128)
16.73 +#define REG_TMPQ1 (MIN_TEMP_REGISTER+129)
16.74 +
16.75 +/**
16.76 + * Operands are either integer, float, or double, and are either immediate or
16.77 + * assigned to a source-machine register, destination-machine register, or a
16.78 + * temporary register. (All temporaries have to be resolved to a dest-reg before
16.79 + * code generation)
16.80 + */
16.81 +typedef enum {
16.82 + NO_OPERAND = 0,
16.83 + SOURCE_REGISTER_OPERAND =1, // Source (or temp) register
16.84 + TARGET_REGISTER_OPERAND =2,
16.85 + INT_IMM_OPERAND = 3,
16.86 + QUAD_IMM_OPERAND = 4,
16.87 + FLOAT_IMM_OPERAND = 5,
16.88 + DOUBLE_IMM_OPERAND = 6,
16.89 + POINTER_OPERAND = 7, // Native target pointer, eg direct memory access
16.90 +} xir_operand_type_t;
16.91 +
16.92 +typedef struct xir_operand {
16.93 + xir_operand_type_t type;
16.94 + union {
16.95 + uint32_t i;
16.96 + uint64_t q;
16.97 + float f;
16.98 + double d;
16.99 + void *p;
16.100 + } value;
16.101 +} *xir_operand_t;
16.102 +
16.103 +/* Condition codes */
16.104 +typedef enum {
16.105 + CC_TRUE = -1, /* Always */
16.106 + CC_OV = 0, /* Overflow */
16.107 + CC_NO = 1, /* !Overflow */
16.108 + CC_UGE = 2, /* Unsigned greater or equal */
16.109 + CC_ULT = 3, /* Unsigned less than */
16.110 + CC_ULE = 4, /* Unsigned less or equal */
16.111 + CC_UGT = 5, /* Unsigned greater than */
16.112 + CC_EQ = 6, /* Equal */
16.113 + CC_NE = 7, /* !Equal */
16.114 + CC_NEG = 8, /* Negative */
16.115 + CC_POS = 9, /* Not-negative (positive or zero) */
16.116 + CC_SGE = 10,
16.117 + CC_SLT = 11,
16.118 + CC_SLE = 12,
16.119 + CC_SGT = 13
16.120 +} xir_cc_t;
16.121 +
16.122 +#define CC_C CC_ULT
16.123 +#define CC_NC CC_UGE
16.124 +
16.125 +typedef enum {
16.126 + // No operands
16.127 + OP_NOP = 0,
16.128 + OP_BARRIER, // Direction to register allocator - Ensure all state is committed
16.129 +
16.130 + // One operand
16.131 + OP_DEC, /* Decrement and set Z if result == 0 */
16.132 + OP_LD, /* Load flags from reg/imm (1 = condition, 0 = !condition) */
16.133 + OP_ST, /* Set reg to 1 on condition, 0 on !condition */
16.134 + OP_RESTFLAGS, /* Restore flags from register */
16.135 + OP_SAVEFLAGS, /* Save flags into register */
16.136 + OP_ENTER, // Block start - immediate operand is a bitmask of target registers used
16.137 + OP_BRREL,
16.138 + OP_BR,
16.139 + OP_CALL0, // Call function with no arguments or return value
16.140 + OP_OCBI,
16.141 + OP_OCBP,
16.142 + OP_OCBWB,
16.143 + OP_PREF,
16.144 +
16.145 + // Register moves */
16.146 + OP_MOV,
16.147 + OP_MOVQ,
16.148 + OP_MOVV,
16.149 + OP_MOVM,
16.150 + OP_MOVSX8,
16.151 + OP_MOVSX16,
16.152 + OP_MOVSX32,
16.153 + OP_MOVZX8,
16.154 + OP_MOVZX16,
16.155 + OP_MOVZX32,
16.156 +
16.157 + /* ALU */
16.158 + OP_ADD,
16.159 + OP_ADDS,
16.160 + OP_ADDC,
16.161 + OP_ADDCS,
16.162 + OP_AND,
16.163 + OP_ANDS,
16.164 + OP_CMP,
16.165 + OP_DIV, /* Unsigned division */
16.166 + OP_DIVS, /* Unsigned divison and update flags */
16.167 + OP_MUL,
16.168 + OP_MULS,
16.169 + OP_MULQ,
16.170 + OP_MULQS,
16.171 + OP_NEG,
16.172 + OP_NEGS,
16.173 + OP_NOT,
16.174 + OP_NOTS,
16.175 + OP_OR,
16.176 + OP_ORS,
16.177 + OP_RCL,
16.178 + OP_RCR,
16.179 + OP_ROL, /* Rotate left w/o updating flags */
16.180 + OP_ROLS, /* Rotate left, and set carry */
16.181 + OP_ROR, /* Rotate right */
16.182 + OP_RORS, /* Rotate right and set carry */
16.183 + OP_SAR, /* Shift arithmetic right */
16.184 + OP_SARS, /* Shift arithmetic right and set carry */
16.185 + OP_SDIV, /* Signed division */
16.186 + OP_SDIVS, /* Signed division and update flags */
16.187 + OP_SLL, /* Shift logical left */
16.188 + OP_SLLS, /* Shift logical left and set carry */
16.189 + OP_SLR, /* Shift logical right */
16.190 + OP_SLRS, /* Shift logical right and set carry */
16.191 + OP_SUB, /* Subtract, no flags changed/used */
16.192 + OP_SUBS, /* Subtract, flag set on overflow */
16.193 + OP_SUBB, /* Subtract with borrow */
16.194 + OP_SUBBS, /* Subtract with borrow and set carry */
16.195 + OP_SHUFFLE, /* Rearrange bytes according to immediate pattern */
16.196 + OP_TST,
16.197 + OP_XOR,
16.198 + OP_XORS,
16.199 + OP_XLAT,
16.200 +
16.201 + /* FPU */
16.202 + OP_ABSD,
16.203 + OP_ABSF,
16.204 + OP_ABSV,
16.205 + OP_ADDD,
16.206 + OP_ADDF,
16.207 + OP_ADDV,
16.208 + OP_CMPD,
16.209 + OP_CMPF,
16.210 + OP_DIVD,
16.211 + OP_DIVF,
16.212 + OP_DIVV,
16.213 + OP_MULD,
16.214 + OP_MULF,
16.215 + OP_MULV,
16.216 + OP_NEGD,
16.217 + OP_NEGF,
16.218 + OP_NEGV,
16.219 + OP_SQRTD,
16.220 + OP_SQRTF,
16.221 + OP_SQRTV,
16.222 + OP_RSQRTD,
16.223 + OP_RSQRTF,
16.224 + OP_RSQRTV,
16.225 + OP_SUBD,
16.226 + OP_SUBF,
16.227 + OP_SUBV,
16.228 + OP_DTOF,
16.229 + OP_DTOI,
16.230 + OP_FTOD,
16.231 + OP_FTOI,
16.232 + OP_ITOD,
16.233 + OP_ITOF,
16.234 + OP_SINCOSF,
16.235 + OP_DOTPRODV,
16.236 + OP_MATMULV,
16.237 +
16.238 + // Memory operations - these all indirect through the memory tables.
16.239 + OP_LOADB,
16.240 + OP_LOADBFW,
16.241 + OP_LOADW,
16.242 + OP_LOADL,
16.243 + OP_LOADQ,
16.244 + OP_STOREB,
16.245 + OP_STOREW,
16.246 + OP_STOREL,
16.247 + OP_STOREQ,
16.248 + OP_STORELCA,
16.249 +
16.250 + OP_BRCOND,
16.251 + OP_BRCONDDEL, // Delayed branch - sets newpc rather than pc (and is not a terminator)
16.252 + OP_RAISEME, // imm mask in, reg in - branch to exception if (reg & mask) == 0
16.253 + OP_RAISEMNE, // imm mask in, reg in - branch to exception if (reg & mask) != 0
16.254 +
16.255 +
16.256 + // Native calls (not source machine calls)
16.257 + OP_CALLLUT, // Call indirect through base pointer (reg) + displacement
16.258 + OP_CALL1, // Call function with single argument and no return value
16.259 + OP_CALLR, // Call function with no arguments and a single return value
16.260 +
16.261 + /********************** SH4-specific macro operations *************************/
16.262 + /* TODO: These need to be broken down into smaller operations eventually,
16.263 + * especially as some are likely to be partially optimizable. But in the
16.264 + * meantime this at least gets things working
16.265 + */
16.266 +
16.267 + /**
16.268 + * ADDQSAT32 Rm, Rn - 64-bit Add Rm to Rn, saturating to 32-bits if S==1 (per SH4 MAC.W)
16.269 + *
16.270 + * if R_S == 0 ->
16.271 + * Rn += Rm
16.272 + * else ->
16.273 + * if overflow32( Rn + Rm ) ->
16.274 + * Rn = saturate32( Rn + Rm ) | 0x100000000
16.275 + * else ->
16.276 + * Rn += Rm
16.277 + */
16.278 + OP_ADDQSAT32,
16.279 +
16.280 + /**
16.281 + * ADDSAT48 Rm, Rn - 64-bit Add Rm to Rn, saturating to 48-bits if S==1 (per SH4 MAC.L)
16.282 + *
16.283 + * if R_S == 0 ->
16.284 + * Rn += Rm
16.285 + * else ->
16.286 + * if( Rm + Rn > 0x00007FFFFFFFFFFF ) ->
16.287 + * Rn = 0x00007FFFFFFFFFFF
16.288 + * else if( Rm + Rn < 0x0000800000000000 ) ->
16.289 + * Rn = 0x0000800000000000
16.290 + * else ->
16.291 + * Rn += Rm
16.292 + */
16.293 + OP_ADDQSAT48,
16.294 +
16.295 + /**
16.296 + * CMP/STR Rm, Rn - Set T if any byte is the same between Rm and Rn
16.297 + *
16.298 + * Macro expansion:
16.299 + * MOV Rm, %tmp
16.300 + * XOR Rn, %tmp
16.301 + * TEST 0x000000FF, %tmp
16.302 + * TESTne 0x0000FF00, %tmp
16.303 + * TESTne 0x00FF0000, %tmp
16.304 + * TESTne 0xFF000000, %tmp
16.305 + * SETe T
16.306 + *
16.307 + */
16.308 + OP_CMPSTR,
16.309 +
16.310 + /**
16.311 + * DIV1 Rm,Rn performs a single-step division of Rm/Rn, modifying flags
16.312 + * as it goes.
16.313 + *
16.314 + * sign = Rn >> 31
16.315 + * Rn = (Rn << 1) | R_T
16.316 + * If R_Q == R_M -> Rn = Rn - Rm
16.317 + * Else -> Rn = Rn + Rm
16.318 + * R_Q = sign ^ R_M ^ (Rn>>31)
16.319 + * R_T = (R_Q == R_M) ; or newq == (rn>>31)
16.320 + *
16.321 + * Macro expansion:
16.322 + * LDc R_T
16.323 + * RCL 1, Rn
16.324 + * SETc temp
16.325 + * CMP R_Q, R_M
16.326 + * ADDne Rm, Rn
16.327 + * SUBeq Rm, Rn
16.328 + * MOV Rn, R_Q
16.329 + * SHR 31, Rn
16.330 + * XOR temp, R_Q
16.331 + * XOR R_M, R_Q
16.332 + * CMP R_M, R_Q
16.333 + * SETe R_T
16.334 + */
16.335 + OP_DIV1,
16.336 +
16.337 + /**
16.338 + * SHAD Rm, Rn performs an arithmetic shift of Rn as follows:
16.339 + * If Rm >= 0 -> Rn = Rn << (Rm&0x1F)
16.340 + * If Rm < 0 ->
16.341 + * If Rm&0x1F == 0 -> Rn = Rn >> 31
16.342 + * Else -> Rn = Rn >> 32 - (Rm&0x1F)
16.343 + *
16.344 + * CMP 0, Rm
16.345 + * ANDuge 0x1F, Rm
16.346 + * SLLuge Rm, Rn
16.347 + * ORult 0xFFFFFFE0, Rm
16.348 + * NEGult Rm
16.349 + * SARult Rm, Rn ; unmasked shift
16.350 + *
16.351 + */
16.352 + OP_SHAD, // Shift dynamic arithmetic (left or right)
16.353 +
16.354 + /**
16.355 + * SHLD Rm, Rn performs a logical shift of Rn as follows:
16.356 + * If Rm >= 0 -> Rn = Rn << (Rm&0x1F)
16.357 + * If Rm < 0 ->
16.358 + * If Rm&0x1F == 0 -> Rn = 0
16.359 + * Else -> Rn = Rn >> 32 - (Rm&0x1F)
16.360 + */
16.361 + OP_SHLD, // Shift dynamic logical (left or right)
16.362 +} xir_opcode_t;
16.363 +
16.364 +#define MAX_OP0_OPCODE OP_BARRIER
16.365 +#define MAX_OP1_OPCODE OP_PREF
16.366 +#define MAX_OP2_OPCODE OP_SHLD
16.367 +#define NUM_OP0_OPCODES (MAX_OP0_OPCODE+1)
16.368 +#define NUM_OP1_OPCODES (MAX_OP1_OPCODE-MAX_OP0_OPCODE)
16.369 +#define NUM_OP2_OPCODES (MAX_OP2_OPCODE-MAX_OP1_OPCODE)
16.370 +#define MAX_OPCODE (MAX_OP2_OPCODE)
16.371 +#define NUM_OPCODES (MAX_OP2_OPCODE+1)
16.372 +
16.373 +typedef struct xir_op {
16.374 + xir_opcode_t opcode;
16.375 + xir_cc_t cond;
16.376 + struct xir_operand operand[2];
16.377 + struct xir_op *next; /* Next instruction (normal path) - NULL in the case of the last instruction */
16.378 + struct xir_op *prev; /* Previous instruction (normal path) - NULL in the case of the first instruction */
16.379 + struct xir_op *exc; /* Next instruction if the opcode takes an exception - NULL if no exception is possible */
16.380 +} *xir_op_t;
16.381 +
16.382 +/* Defined in xlat/xlat.h */
16.383 +typedef struct xlat_source_machine *xlat_source_machine_t;
16.384 +typedef struct xlat_target_machine *xlat_target_machine_t;
16.385 +
16.386 +/**
16.387 + * Source data structure. This mainly exists to manage memory for XIR operations
16.388 + */
16.389 +typedef struct xir_basic_block {
16.390 + xir_op_t ir_begin; /* Beginning of code block */
16.391 + xir_op_t ir_end; /* End of code block (Last instruction in code block) */
16.392 + xir_op_t ir_ptr; /* First unallocated instruction in allocation block */
16.393 + xir_op_t ir_alloc_begin; /* Beginning of memory allocation */
16.394 + xir_op_t ir_alloc_end; /* End of allocation */
16.395 + uint32_t pc_begin; /* first instruction */
16.396 + uint32_t pc_end; /* next instruction after end */
16.397 + xlat_source_machine_t source;
16.398 + struct mem_region_fn **address_space; /* source machine memory access table */
16.399 +} *xir_basic_block_t;
16.400 +
16.401 +typedef int xir_offset_t;
16.402 +
16.403 +/**************************** OP Information ******************************/
16.404 +
16.405 +/* Instruction operand modes */
16.406 +#define OPM_NO 0x000000 /* No operands */
16.407 +#define OPM_R 0x000001 /* Single operand, read-only */
16.408 +#define OPM_W 0x000002 /* Single operand, write-only */
16.409 +#define OPM_RW 0x000003 /* Single operand, read-write */
16.410 +#define OPM_R_R 0x000005 /* Two operands, both read-only */
16.411 +#define OPM_R_W 0x000009 /* Two operands, first read-only, second write-only */
16.412 +#define OPM_R_RW 0x00000D /* Two operands, first read-only, second read-write */
16.413 +#define OPM_I_I 0x000000 /* Both operands i32 */
16.414 +#define OPM_Q_Q 0x000110 /* Both operands i64 */
16.415 +#define OPM_I_Q 0x000100 /* i32,i64 operands */
16.416 +#define OPM_Q_I 0x000010 /* i64,i32 operands */
16.417 +#define OPM_F_F 0x000220 /* Both operands float */
16.418 +#define OPM_D_D 0x000330 /* Both operands double */
16.419 +#define OPM_I_F 0x000200 /* i32,float operands */
16.420 +#define OPM_I_D 0x000300 /* i32,double operands */
16.421 +#define OPM_F_I 0x000020 /* float,i32 operands */
16.422 +#define OPM_D_I 0x000030 /* double,i32 operands */
16.423 +#define OPM_F_D 0x000320 /* float,double operands */
16.424 +#define OPM_D_F 0x000230 /* double,float operands */
16.425 +#define OPM_V_V 0x000440 /* vec4,vec4 operands */
16.426 +#define OPM_V_M 0x000540 /* vec4,matrix16 operands */
16.427 +#define OPM_M_M 0x000550 /* mat16,mat16 operands */
16.428 +#define OPM_TR 0x001000 /* Use T */
16.429 +#define OPM_TW 0x002000 /* Set T */
16.430 +#define OPM_TRW 0x003000 /* Use+Set T */
16.431 +#define OPM_EXC 0x004000 /* May raise an exception, clobbers volatiles */
16.432 +#define OPM_CLB 0x008000 /* Clobbers volatile registers */
16.433 +#define OPM_CLBT 0x00C000 /* Clobbers 'temporary regs' but not the full volatile set */
16.434 +#define OPM_TERM 0x010000 /* Terminates block. (Must be final instruction in block) */
16.435 +
16.436 +#define OPM_R_R_TW (OPM_R_R|OPM_TW) /* Read two ops + set flags */
16.437 +#define OPM_R_RW_TR (OPM_R_RW|OPM_TR) /* Read/write + use flags */
16.438 +#define OPM_R_RW_TW (OPM_R_RW|OPM_TW) /* Read/write + set flags */
16.439 +#define OPM_R_RW_TRW (OPM_R_RW|OPM_TRW) /* Read/write + use/set flags */
16.440 +#define OPM_R_W_TW (OPM_R_W|OPM_TW) /* Read/write + set flags */
16.441 +#define OPM_RW_TW (OPM_RW|OPM_TW) /* Read/write single op + set flags */
16.442 +#define OPM_RW_TRW (OPM_RW|OPM_TRW) /* Read/write single op + use/set flags */
16.443 +#define OPM_FRW (OPM_RW|OPM_F_F) /* Read/write single float op */
16.444 +#define OPM_FR_FRW (OPM_R_RW|OPM_F_F) /* Read/write float op pair */
16.445 +#define OPM_FR_FW (OPM_R_W|OPM_F_F) /* Read/write float op pair */
16.446 +#define OPM_FR_FR_TW (OPM_R_R_TW|OPM_F_F) /* Read two float ops + set flags */
16.447 +#define OPM_DRW (OPM_RW|OPM_D_D) /* Read/write single double op */
16.448 +#define OPM_DR_DRW (OPM_R_RW|OPM_D_D) /* Read/write double op pair */
16.449 +#define OPM_DR_DW (OPM_R_W|OPM_D_D) /* Read/write double op pair */
16.450 +#define OPM_VR_VRW (OPM_R_RW|OPM_V_V) /* Vector Read/write double op pair */
16.451 +#define OPM_VR_VW (OPM_R_W|OPM_V_V) /* Vector Read/write double op pair */
16.452 +#define OPM_DR_DR_TW (OPM_R_R_TW|OPM_D_D) /* Read two double ops + set flags */
16.453 +
16.454 +#define OPM_R_W_EXC (OPM_R_W|OPM_EXC) /* Read first, write second, possible exc (typical load) */
16.455 +#define OPM_R_R_EXC (OPM_R_R|OPM_EXC) /* Read first, write second, possible exc (typical store) */
16.456 +#define OPM_R_EXC (OPM_R|OPM_EXC) /* Read-only single op, possible exc (eg pref) */
16.457 +
16.458 +struct xir_opcode_entry {
16.459 + char *name;
16.460 + int mode;
16.461 +};
16.462 +
16.463 +struct xir_symbol_entry {
16.464 + const char *name;
16.465 + void *ptr;
16.466 +};
16.467 +
16.468 +extern const struct xir_opcode_entry XIR_OPCODE_TABLE[];
16.469 +#define XOP_IS_SRCREG(op,n) (op->operand[n].type == SOURCE_REGISTER_OPERAND)
16.470 +#define XOP_IS_TGTREG(op,n) (op->operand[n].type == TARGET_REGISTER_OPERAND)
16.471 +#define XOP_IS_INTIMM(op,n) (op->operand[n].type == INT_IMM_OPERAND)
16.472 +#define XOP_IS_FLOATIMM(op,n) (op->operand[n].type == FLOAT_IMM_OPERAND)
16.473 +#define XOP_IS_DOUBLEIMM(op,n) (op->operand[n].type == DOUBLE_IMM_OPERAND)
16.474 +#define XOP_IS_QUADIMM(op,n) (op->operand[n].type == QUAD_IMM_OPERAND)
16.475 +#define XOP_IS_PTRIMM(op,n) (op->operand[n].type == POINTER_OPERAND)
16.476 +#define XOP_IS_IMM(op,n) (op->operand[n].type > TARGET_REGISTER_OPERAND)
16.477 +#define XOP_IS_REG(op,n) (XOP_IS_SRCREG(op,n)||XOP_IS_TGTREG(op,n)
16.478 +#define XOP_IS_FORM(op,t1,t2) (op->operand[0].type == t1 && op->operand[1].type == t2)
16.479 +
16.480 +#define XOP_REG(op,n) (op->operand[n].value.i)
16.481 +#define XOP_REG1(op) XOP_REG(op,0)
16.482 +#define XOP_REG2(op) XOP_REG(op,1)
16.483 +#define XOP_INT(op,n) (op->operand[n].value.i)
16.484 +#define XOP_QUAD(op,n) (op->operand[n].value.q)
16.485 +#define XOP_FLOAT(op,n) (op->operand[n].value.f)
16.486 +#define XOP_DOUBLE(op,n) (op->operand[n].value.d)
16.487 +#define XOP_PTR(op,n) (op->operand[n].value.p)
16.488 +
16.489 +#define XOP_IS_TERMINATOR(op) (XIR_OPCODE_TABLE[op->opcode].mode & OPM_TERM)
16.490 +#define XOP_HAS_0_OPERANDS(op) ((XIR_OPCODE_TABLE[op->opcode].mode & 0x0F) == 0)
16.491 +#define XOP_HAS_1_OPERAND(op) ((XIR_OPCODE_TABLE[op->opcode].mode & 0x0F) < 4)
16.492 +#define XOP_HAS_2_OPERANDS(op) ((XIR_OPCODE_TABLE[op->opcode].mode & 0x0C) != 0)
16.493 +#define XOP_HAS_EXCEPTION(op) ((XIR_OPCODE_TABLE[op->opcode].mode & 0xC000) == OPM_EXC)
16.494 +
16.495 +#define XOP_READS_OP1(op) (XIR_OPCODE_TABLE[op->opcode].mode & 0x01)
16.496 +#define XOP_WRITES_OP1(op) (XIR_OPCODE_TABLE[op->opcode].mode & 0x02)
16.497 +#define XOP_READS_OP2(op) (XIR_OPCODE_TABLE[op->opcode].mode & 0x04)
16.498 +#define XOP_WRITES_OP2(op) (XIR_OPCODE_TABLE[op->opcode].mode & 0x08)
16.499 +#define XOP_READS_FLAGS(op) ((XIR_OPCODE_TABLE[op->opcode].mode & OPM_TR) || (op->cond != CC_TRUE && op->opcode != OP_LD))
16.500 +#define XOP_WRITES_FLAGS(op) (XIR_OPCODE_TABLE[op->opcode].mode & OPM_TW)
16.501 +
16.502 +#define XOP_READS_REG1(op) (XOP_READS_OP1(op) && XOP_IS_REG(op,0))
16.503 +#define XOP_WRITES_REG1(op) (XOP_WRITES_OP1(op) && XOP_IS_REG(op,0))
16.504 +#define XOP_READS_REG2(op) (XOP_READS_OP2(op) && XOP_IS_REG(op,1))
16.505 +#define XOP_WRITES_REG2(op) (XOP_WRITES_OP2(op) && XOP_IS_REG(op,1))
16.506 +
16.507 +#define XOP_TYPE1(op) (op->operand[0].type)
16.508 +#define XOP_TYPE2(op) (op->operand[1].type)
16.509 +#define XOP_OPERAND(op,i) (&op->operand[i])
16.510 +
16.511 +/******************************* OP Constructors ******************************/
16.512 +
16.513 +xir_op_t xir_append_op2( xir_basic_block_t xbb, int op, int arg0type, uint32_t arg0, int arg1type, uint32_t arg1 );
16.514 +xir_op_t xir_append_op2cc( xir_basic_block_t xbb, int op, int cc, int arg0type, uint32_t arg0, int arg1type, uint32_t arg1 );
16.515 +xir_op_t xir_append_float_op2( xir_basic_block_t xbb, int op, float imm1, int arg1type, uint32_t arg1 );
16.516 +xir_op_t xir_append_ptr_op2( xir_basic_block_t xbb, int op, void *arg0, int arg1type, uint32_t arg1 );
16.517 +
16.518 +
16.519 +#define XOP1( op, arg0 ) xir_append_op2(xbb, op, SOURCE_REGISTER_OPERAND, arg0, NO_OPERAND, 0)
16.520 +#define XOP1CC( op, cc, arg0 ) xir_append_op2cc(xbb, op, cc, SOURCE_REGISTER_OPERAND, arg0, NO_OPERAND, 0)
16.521 +#define XOP1I( op, arg0 ) xir_append_op2(xbb, op, INT_IMM_OPERAND, arg0, NO_OPERAND, 0)
16.522 +#define XOP2I( op, arg0, arg1 ) xir_append_op2(xbb, op, INT_IMM_OPERAND, arg0, SOURCE_REGISTER_OPERAND, arg1)
16.523 +#define XOP2II( op, arg0, arg1 ) xir_append_op2(xbb, op, INT_IMM_OPERAND, arg0, INT_IMM_OPERAND, arg1)
16.524 +#define XOP2IICC( op, cc, arg0, arg1 ) xir_append_op2cc(xbb, op, cc, INT_IMM_OPERAND, arg0, INT_IMM_OPERAND, arg1)
16.525 +#define XOP2( op, arg0, arg1 ) xir_append_op2(xbb, op, SOURCE_REGISTER_OPERAND, arg0, SOURCE_REGISTER_OPERAND, arg1)
16.526 +#define XOP2CC( op, cc, arg0, arg1 ) xir_append_op2cc(xbb, op, cc, SOURCE_REGISTER_OPERAND, arg0, SOURCE_REGISTER_OPERAND, arg1)
16.527 +#define XOP2F( op, arg0, arg1 ) xir_append_float_op2(xbb, op, arg0, SOURCE_REGISTER_OPERAND, arg1)
16.528 +#define XOP2P( op, arg0, arg1 ) xir_append_ptr_op2(xbb, op, arg0, SOURCE_REGISTER_OPERAND, arg1)
16.529 +#define XOP0( op ) xir_append_op2(xbb, op, NO_OPERAND, 0, NO_OPERAND, 0)
16.530 +#define XOPCALL0( arg0 ) xir_append_ptr_op2(xbb, OP_CALL0, arg0, NO_OPERAND, 0)
16.531 +#define XOPCALL1( arg0, arg1 ) xir_append_ptr_op2(xbb, OP_CALL1, arg0, SOURCE_REGISTER_OPERAND, arg1)
16.532 +#define XOPCALL1I( arg0, arg1 ) xir_append_ptr_op2(xbb, OP_CALL1, arg0, INT_IMM_OPERAND, arg1)
16.533 +#define XOPCALLR( arg0, arg1 ) xir_append_ptr_op2(xbb, OP_CALLR, arg0, SOURCE_REGISTER_OPERAND, arg1)
16.534 +
16.535 +/**************************** IR Modification ******************************/
16.536 +
16.537 +/**
16.538 + * Insert a new instruction immediately before the given existing inst.
16.539 + */
16.540 +void xir_insert_op( xir_op_t op, xir_op_t before );
16.541 +
16.542 +/**
16.543 + * Insert the block start..end immediately before the given instruction
16.544 + */
16.545 +void xir_insert_block( xir_op_t start, xir_op_t end, xir_op_t before );
16.546 +
16.547 +/**
16.548 + * Remove the specified instruction completely from the block in which it appears.
16.549 + * Note: removing terminators with this method may break the representation.
16.550 + * Op itself is not modified.
16.551 + */
16.552 +void xir_remove_op( xir_op_t op );
16.553 +
16.554 +/**
16.555 + * Apply a shuffle directly to the given operand, and return the result
16.556 + */
16.557 +uint32_t xir_shuffle_imm32( uint32_t shuffle, uint32_t operand );
16.558 +
16.559 +/**
16.560 + * Apply a shuffle transitively to the operation (which must also be a shuffle).
16.561 + * For example, given the sequence
16.562 + * op1: shuffle 0x2134, r12
16.563 + * op2: shuffle 0x3412, r12
16.564 + * xir_trans_shuffle( 0x2134, op2 ) can be used to replace op2 wih
16.565 + * shuffle 0x3421, r12
16.566 + */
16.567 +void xir_shuffle_op( uint32_t shuffle, xir_op_t it );
16.568 +
16.569 +/**
16.570 + * Return the number of instructions that would be emitted by xir_shuffle_lower
16.571 + * for the given instruction (not including the leading nop, if there is one)
16.572 + */
16.573 +int xir_shuffle_lower_size( xir_op_t it );
16.574 +
16.575 +/**
16.576 + * Transform a shuffle instruction into an equivalent sequence of shifts, and
16.577 + * logical operations.
16.578 + * @return the last instruction in the resultant sequence (which may be the
16.579 + * original instruction pointer).
16.580 + */
16.581 +xir_op_t xir_shuffle_lower( xir_basic_block_t xbb, xir_op_t it, int tmp1, int tmp2 );
16.582 +
16.583 +
16.584 +/**************************** Debugging ******************************/
16.585 +
16.586 +/**
16.587 + * Verify the integrity of an IR block - abort with assertion failure on any
16.588 + * errors.
16.589 + */
16.590 +void xir_verify_block( xir_op_t start, xir_op_t end );
16.591 +
16.592 +/**
16.593 + * Set the register name mappings for source and target registers - only really
16.594 + * used for debug output
16.595 + */
16.596 +void xir_set_register_names( const char **source_regs, const char **target_regs );
16.597 +
16.598 +/**
16.599 + * Set the symbol table mappings for target points - also only really for
16.600 + * debugging output.
16.601 + */
16.602 +void xir_set_symbol_table( const struct xir_symbol_entry *symtab );
16.603 +
16.604 +/**
16.605 + * Dump the specified block of IR to stdout
16.606 + */
16.607 +void xir_dump_block( xir_op_t start, xir_op_t end );
16.608 +
16.609 +
16.610 +#endif /* !lxdream_xir_H */
17.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
17.2 +++ b/src/xlat/xiropt.h Tue Apr 07 10:55:03 2009 +0000
17.3 @@ -0,0 +1,89 @@
17.4 +/**
17.5 + * $Id: livevar.h 931 2008-10-31 02:57:59Z nkeynes $
17.6 + *
17.7 + * IR optimizations
17.8 + *
17.9 + * Copyright (c) 2009 Nathan Keynes.
17.10 + *
17.11 + * This program is free software; you can redistribute it and/or modify
17.12 + * it under the terms of the GNU General Public License as published by
17.13 + * the Free Software Foundation; either version 2 of the License, or
17.14 + * (at your option) any later version.
17.15 + *
17.16 + * This program is distributed in the hope that it will be useful,
17.17 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
17.18 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17.19 + * GNU General Public License for more details.
17.20 + */
17.21 +
17.22 +#ifndef lxdream_xiropt_H
17.23 +#define lxdream_xiropt_H 1
17.24 +
17.25 +#include "lxdream.h"
17.26 +#include "xlat/xir.h"
17.27 +
17.28 +/**
17.29 + * Live range data-structure. For each value defined by a program block,
17.30 + * we track it's def, last use, and distance between. This is slightly
17.31 + * complicated by two additional concerns around dead store elimination:
17.32 + * * A value may be coherent or dirty (ie value is consistent with the
17.33 + * in-memory state data or not)
17.34 + * * At the end of it's range, a value may be live, unconditionally
17.35 + * dead, or conditionally dead, depending on whether the home memory
17.36 + * location is overwritten before the end of the block.
17.37 + * A value is _conditionally_ dead if it is overwritten within the block,
17.38 + * but may be exposed by an exception.
17.39 + *
17.40 + * We represent this by an additional field visible_length - the length of
17.41 + * time (in instructions) that the value is (potentially) externally visible.
17.42 + * It takes the following values:
17.43 + * -1 - Always visible (ie live at end of block)
17.44 + * 0 - Never visible (ie coherent)
17.45 + * == use_length - dead after last use
17.46 + * > use_length - eventually dead at end of visibility
17.47 + */
17.48 +struct live_range {
17.49 + xir_op_t def; /* Value defining instruction */
17.50 + xir_offset_t def_offset; /* Offset of def relative to start of block */
17.51 + xir_op_t range_end; /* Last use of the value */
17.52 + xir_offset_t use_length; /* Length of range to last real use */
17.53 + xir_offset_t visible_length; /* Length of full range of visibility */
17.54 +};
17.55 +
17.56 +/**
17.57 + * Replaces registers with immediates where the value is constant. Also
17.58 + * kills align instructions where the value can be determined to be
17.59 + * aligned already (either constant address or redundant alignment check),
17.60 + * and sat* instructions where S can be determined.
17.61 + *
17.62 + * Performs a single forward pass over the IR.
17.63 + */
17.64 +void xir_constant_propagation( xir_basic_block_t xbb, xir_op_t begin, xir_op_t end );
17.65 +
17.66 +/**
17.67 + * Kill any instructions where the result cannot be exposed - that is, the value
17.68 + * is overwritten before the end of the block. Values that may be exposed by an
17.69 + * exception (but are otherwise dead) are removed by adding repair code to the
17.70 + * exception path where possible (essentially if the value can be reconstructed
17.71 + * from live values).
17.72 + *
17.73 + * Performs a single backwards pass over the IR
17.74 + */
17.75 +void xir_dead_code_elimination( xir_basic_block_t xbb, xir_op_t begin, xir_op_t end );
17.76 +
17.77 +/**
17.78 + * Compute live range data for the code in the range start..end.
17.79 + * @param start First instruction to consider
17.80 + * @param end terminating instruction (not included in analysis). NULL for
17.81 + * entire block.
17.82 + * @param live_ranges output buffer to receive live-range data
17.83 + * @param live_ranges_size Number of entries in live_ranges.
17.84 + * @return TRUE on success, FALSE if the algorithm ran out of buffer space.
17.85 + */
17.86 +gboolean xir_live_range_calculate( xir_op_t begin, xir_op_t end,
17.87 + struct live_range *live_ranges, unsigned int live_ranges_size );
17.88 +
17.89 +void xir_live_range_dump( struct live_range *ranges );
17.90 +
17.91 +
17.92 +#endif /* !lxdream_livevar_H */
18.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
18.2 +++ b/src/xlat/xirsup.c Tue Apr 07 10:55:03 2009 +0000
18.3 @@ -0,0 +1,209 @@
18.4 +/**
18.5 + * $Id: xirsup.c 931 2008-10-31 02:57:59Z nkeynes $
18.6 + *
18.7 + * XIR support functions and transformations for the convenience of other
18.8 + * passes/targets.
18.9 + *
18.10 + * Copyright (c) 2009 Nathan Keynes.
18.11 + *
18.12 + * This program is free software; you can redistribute it and/or modify
18.13 + * it under the terms of the GNU General Public License as published by
18.14 + * the Free Software Foundation; either version 2 of the License, or
18.15 + * (at your option) any later version.
18.16 + *
18.17 + * This program is distributed in the hope that it will be useful,
18.18 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
18.19 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18.20 + * GNU General Public License for more details.
18.21 + */
18.22 +
18.23 +#include "xlat/xir.h"
18.24 +
18.25 +/**************************** Shuffle ****************************/
18.26 +/**
18.27 + * Shuffle is a high-level instruction that rearranges bytes in an operand
18.28 + * according to an immediate pattern. This can be encoded directly on x86
18.29 + * using SSE/MMX registers, otherwise it needs to be lowered first.
18.30 + */
18.31 +
18.32 +/**
18.33 + * Apply a shuffle directly to the given operand, and return the result
18.34 + */
18.35 +uint32_t xir_shuffle_imm32( uint32_t shuffle, uint32_t operand )
18.36 +{
18.37 + int i=0,j;
18.38 + uint32_t tmp = shuffle;
18.39 + uint32_t result = 0;
18.40 + for( i=0; i<4; i++ ) {
18.41 + j = (tmp & 0x0F)-1;
18.42 + tmp >>= 4;
18.43 + if( j >= 0 && j < 4 ) {
18.44 + j = (operand >> ((3-j)<<3)) & 0xFF;
18.45 + result |= (j << (i<<3));
18.46 + }
18.47 + }
18.48 + return result;
18.49 +}
18.50 +
18.51 +/**
18.52 + * Apply a shuffle transitively to the operation (which must also be a shuffle).
18.53 + * For example, given the sequence
18.54 + * op1: shuffle 0x1243, r12
18.55 + * op2: shuffle 0x3412, r12
18.56 + * xir_trans_shuffle( 0x1243, op2 ) can be used to replace op2 wih
18.57 + * shuffle 0x4312, r12
18.58 + */
18.59 +void xir_shuffle_op( uint32_t shuffle, xir_op_t it )
18.60 +{
18.61 + int i=0,j;
18.62 + uint32_t in1 = shuffle;
18.63 + uint32_t in2 = it->operand[0].value.i;
18.64 + uint32_t result = 0;
18.65 + for( i=0; i<4; i++ ) {
18.66 + j = (in2 & 0x0F)-1;
18.67 + in2 >>= 4;
18.68 + if( j >= 0 && j < 4 ) {
18.69 + j = (in1 >> ((3-j)<<2)) & 0x0F;
18.70 + result |= (j << (i<<2));
18.71 + }
18.72 + }
18.73 + it->operand[0].value.i = result;
18.74 +}
18.75 +
18.76 +/**
18.77 + * Return the cost of lowering the specified shuffle as the number of instructions
18.78 + * involved.
18.79 + */
18.80 +int xir_shuffle_lower_size( xir_op_t it )
18.81 +{
18.82 + int mask_for_shift[7] = {0,0,0,0,0,0,0}; /* -3 .. 0 .. +3 */
18.83 + int arg = it->operand[0].value.i, i;
18.84 + int icount=0, found = 0;
18.85 +
18.86 + if( arg == 0x1234 ) {
18.87 + return 0;
18.88 + }
18.89 +
18.90 + /* Figure out the shift (in bytes) for each sub-byte and construct the mask/shift array */
18.91 + for( i=0; i<4; i++ ) {
18.92 + int val = (arg&0x0F);
18.93 + if( val >= 1 && val <= 4 ) {
18.94 + int shift = val - (4-i);
18.95 + mask_for_shift[shift+3] |= ( (0xFF) << (i<<3) );
18.96 + }
18.97 + arg >>= 4;
18.98 + }
18.99 +
18.100 + for( i=-3; i<4; i++ ) {
18.101 + if( mask_for_shift[i+3] != 0 ) {
18.102 + uint32_t maxmask = 0xFFFFFFFF;
18.103 + if( i < 0 ) {
18.104 + icount++;
18.105 + maxmask >>= ((-i)<<3);
18.106 + } else if( i > 0 ) {
18.107 + icount++;
18.108 + maxmask <<= (i<<3);
18.109 + }
18.110 + if( mask_for_shift[i+3] != maxmask ) {
18.111 + icount++;
18.112 + }
18.113 + if( found != 0 ) {
18.114 + icount += 2;
18.115 + }
18.116 + found++;
18.117 + }
18.118 + }
18.119 + return icount;
18.120 +}
18.121 +
18.122 +/**
18.123 + * Transform a shuffle instruction into an equivalent sequence of shifts, and
18.124 + * logical operations.
18.125 + */
18.126 +xir_op_t xir_shuffle_lower( xir_basic_block_t xbb, xir_op_t it, int tmp1, int tmp2 )
18.127 +{
18.128 + int mask_for_shift[7] = {0,0,0,0,0,0,0}; /* -3 .. 0 .. +3 */
18.129 + int arg = it->operand[0].value.i, i, first=3, last=-3;
18.130 +
18.131 + if( arg == 0x1234 ) { /* Identity - NOP */
18.132 + it->opcode = OP_NOP;
18.133 + it->operand[0].type = NO_OPERAND;
18.134 + it->operand[1].type = NO_OPERAND;
18.135 + return it;
18.136 + }
18.137 +
18.138 +
18.139 + /* Figure out the shift (in bytes) for each sub-byte and construct the mask/shift array */
18.140 + for( i=0; i<4; i++ ) {
18.141 + int val = (arg&0x0F);
18.142 + if( val >= 1 && val <= 4 ) {
18.143 + int shift = val - (4-i);
18.144 + mask_for_shift[shift+3] |= ( (0xFF) << (i<<3) );
18.145 + if( shift > last ) {
18.146 + last = shift;
18.147 + }
18.148 + if( shift < first ) {
18.149 + first = shift;
18.150 + }
18.151 + }
18.152 + arg >>= 4;
18.153 + }
18.154 +
18.155 + int shiftertype = it->operand[1].type, shifterval = it->operand[1].value.i;
18.156 + xir_op_t seq = xbb->ir_ptr;
18.157 +
18.158 + for( i=first; i<=last; i++ ) {
18.159 + if( mask_for_shift[i+3] != 0 ) {
18.160 + uint32_t maxmask = 0xFFFFFFFF;
18.161 + if( first != i ) {
18.162 + shiftertype = SOURCE_REGISTER_OPERAND;
18.163 + if( last == i ) {
18.164 + shifterval = tmp1;
18.165 + } else {
18.166 + shifterval = tmp2;
18.167 + xir_append_op2( xbb, OP_MOV, SOURCE_REGISTER_OPERAND, tmp1, shiftertype, shifterval );
18.168 + }
18.169 + }
18.170 + if( i < 0 ) {
18.171 + xir_append_op2( xbb, OP_SLR, INT_IMM_OPERAND, (-i)<<3, shiftertype, shifterval );
18.172 + maxmask >>= ((-i)<<3);
18.173 + } else if( i > 0 ) {
18.174 + xir_append_op2( xbb, OP_SLL, INT_IMM_OPERAND, i<<3, shiftertype, shifterval );
18.175 + maxmask <<= (i<<3);
18.176 + }
18.177 + if( mask_for_shift[i+3] != maxmask ) {
18.178 + xir_append_op2( xbb, OP_AND, INT_IMM_OPERAND, mask_for_shift[i+3], shiftertype, shifterval );
18.179 + }
18.180 + if( first != i ) {
18.181 + xir_append_op2( xbb, OP_OR, shiftertype, shifterval, it->operand[1].type, it->operand[1].value.i );
18.182 + }
18.183 + }
18.184 + }
18.185 +
18.186 + /* Replace original shuffle with either a temp move or a nop */
18.187 + if( first != last ) {
18.188 + it->opcode = OP_MOV;
18.189 + it->operand[0].type = it->operand[1].type;
18.190 + it->operand[0].value.i = it->operand[1].value.i;
18.191 + it->operand[1].type = SOURCE_REGISTER_OPERAND;
18.192 + it->operand[1].value.i = tmp1;
18.193 + } else {
18.194 + it->opcode = OP_NOP;
18.195 + it->operand[0].type = NO_OPERAND;
18.196 + it->operand[1].type = NO_OPERAND;
18.197 + }
18.198 +
18.199 + /* Finally insert the new sequence after the original op */
18.200 + if( xbb->ir_ptr != seq ) {
18.201 + xir_op_t last = xbb->ir_ptr-1;
18.202 + last->next = it->next;
18.203 + it->next = seq;
18.204 + seq->prev = it;
18.205 + if( last->next != 0 ) {
18.206 + last->next->prev = last;
18.207 + }
18.208 + return last;
18.209 + } else {
18.210 + return it;
18.211 + }
18.212 +}
18.213 \ No newline at end of file
19.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
19.2 +++ b/src/xlat/xlat.h Tue Apr 07 10:55:03 2009 +0000
19.3 @@ -0,0 +1,171 @@
19.4 +/**
19.5 + * $Id: xlat.h 931 2008-10-31 02:57:59Z nkeynes $
19.6 + *
19.7 + * Internal translation data structures and functions.
19.8 + *
19.9 + * Copyright (c) 2009 Nathan Keynes.
19.10 + *
19.11 + * This program is free software; you can redistribute it and/or modify
19.12 + * it under the terms of the GNU General Public License as published by
19.13 + * the Free Software Foundation; either version 2 of the License, or
19.14 + * (at your option) any later version.
19.15 + *
19.16 + * This program is distributed in the hope that it will be useful,
19.17 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
19.18 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19.19 + * GNU General Public License for more details.
19.20 + */
19.21 +
19.22 +#ifndef lxdream_xlat_H
19.23 +#define lxdream_xlat_H 1
19.24 +
19.25 +#include "xlat/xiropt.h"
19.26 +#include "xlat/xltcache.h"
19.27 +
19.28 +typedef struct target_data *target_data_t;
19.29 +
19.30 +/**
19.31 + * Source machine description. This should be immutable once constructed.
19.32 + **/
19.33 +struct xlat_source_machine {
19.34 + const char *name;
19.35 + void *state_data; /* Pointer to source machine state structure */
19.36 + const char **reg_names; /* Register names, indexed by offset/4 */
19.37 + uint32_t pc_offset; /* Offset of source PC, relative to state_data */
19.38 + uint32_t delayed_pc_offset; /* Offset of source delayed PC offset, relative to state_data */
19.39 + uint32_t t_offset; /* Offset of source T reg, relative to state_data */
19.40 + uint32_t m_offset;
19.41 + uint32_t q_offset;
19.42 + uint32_t s_offset;
19.43 +
19.44 + /**
19.45 + * Decode a basic block of instructions from start, stopping after a
19.46 + * control transfer or before the given end instruction.
19.47 + * @param sd source data. This method should set the address_space field.
19.48 + * @return the pc value after the last decoded instruction.
19.49 + */
19.50 + uint32_t (*decode_basic_block)(xir_basic_block_t xbb);
19.51 +};
19.52 +
19.53 +
19.54 +/* Target machine description (no these are not meant to be symmetrical) */
19.55 +struct xlat_target_machine {
19.56 + const char *name;
19.57 + /* Register information */
19.58 + const char **reg_names;
19.59 +
19.60 + /* Required functions */
19.61 +
19.62 + /**
19.63 + * Test if the given operands are legal for the opcode. Note that it is assumed that
19.64 + * target register operands are always legal. This is used by the register allocator
19.65 + * to determine when it can fuse load/stores with another operation.
19.66 + */
19.67 + gboolean (*is_legal)( xir_opcode_t op, xir_operand_type_t arg0, xir_operand_type_t arg1 );
19.68 +
19.69 + /**
19.70 + * Lower IR closer to the machine, handling machine-specific issues that can't
19.71 + * wait until final code-gen. Can add additional instructions where required.
19.72 + */
19.73 + void (*lower)( xir_basic_block_t xbb, xir_op_t begin, xir_op_t end );
19.74 +
19.75 + /**
19.76 + * Determine the memory required to emit code for the specified block excluding
19.77 + * exceptions. This can be an overestimate,
19.78 + * as long as it is at least large enough for the final code.
19.79 + * @param begin start of code block
19.80 + * @param end end of code block
19.81 + * @return estimated size of emitted code.
19.82 + */
19.83 + uint32_t (*get_code_size)( xir_op_t begin, xir_op_t end );
19.84 +
19.85 + /**
19.86 + * Final target code generation.
19.87 + * @param td target_data information.
19.88 + * @param begin start of code block
19.89 + * @param end end of code block
19.90 + * @param exception_table Table of pointers to exception code
19.91 + * @return number of bytes actually emitted.
19.92 + */
19.93 + uint32_t (*codegen)( target_data_t td, xir_op_t begin, xir_op_t end );
19.94 +};
19.95 +
19.96 +
19.97 +/**
19.98 + * Fixup records generated while assembling the code. Records are one of the
19.99 + * following types:
19.100 + * Constant (32 or 64-bit)
19.101 + * Exception (from a memory call or RAISEME instruction)
19.102 + *
19.103 + * Relocations may be 32/64 bit absolute or 32-bit PC-relative. The value in the
19.104 + * relocation cell is taken as the addend if nonzero. For relative relocations,
19.105 + * the relative displacement is calculated from the end of the fixup value -
19.106 + * that is, for a REL32, the result will be
19.107 + * *fixup_loc += &target - (fixup_loc+4)
19.108 + *
19.109 + * In principle we could use a global constant table, but that adds complexity
19.110 + * at this stage.
19.111 + */
19.112 +
19.113 +#define TARGET_FIXUP_CONST32 0x00
19.114 +#define TARGET_FIXUP_CONST64 0x01
19.115 +#define TARGET_FIXUP_RAISE 0x02
19.116 +#define TARGET_FIXUP_RAISEEXT 0x03 /* An exception that can be raised from outside the generated code */
19.117 +#define TARGET_FIXUP_OFFSET 0x04 /* Offset within the code block */
19.118 +#define TARGET_FIXUP_POINTER 0x05 /* Absolute pointer */
19.119 +
19.120 +#define TARGET_FIXUP_ABS32 0x00
19.121 +#define TARGET_FIXUP_ABS64 0x10
19.122 +#define TARGET_FIXUP_REL32 0x20
19.123 +#define TARGET_FIXUP_REL64 0x30
19.124 +
19.125 +#define TARGET_FIXUP_TARGET(x) ((x)&0x0F)
19.126 +#define TARGET_FIXUP_MODE(x) ((x)&0xF0)
19.127 +
19.128 +typedef struct target_fixup_struct {
19.129 + int fixup_type; /* Combination of TARGET_FIXUP flags above */
19.130 + uint32_t fixup_offset; /* Location of fixup (to be modified) relative to start of block */
19.131 + uint32_t target_offset;
19.132 + union {
19.133 + uint32_t i;
19.134 + uint64_t q;
19.135 + float f;
19.136 + double d;
19.137 + void *p;
19.138 + xir_op_t *exc;
19.139 + } value;
19.140 +} *target_fixup_t;
19.141 +
19.142 +/**
19.143 + * Temporary data maintained during code generation
19.144 + */
19.145 +struct target_data {
19.146 + struct xlat_target_machine *mach;
19.147 + struct xlat_source_macine *src;
19.148 + xlat_cache_block_t block;
19.149 + uint8_t *xlat_output;
19.150 + target_fixup_t fixup_table;
19.151 + int fixup_table_posn;
19.152 + int fixup_table_size;
19.153 +};
19.154 +
19.155 +/** Add fixup to a 32-bit constant memory value, adding the value to the constant table */
19.156 +void target_add_const32_fixup( target_data_t td, int mode, void *location, uint32_t i );
19.157 +/** Add fixup to a 64-bit constant memory value, adding the value to the constant table */
19.158 +void target_add_const64_fixup( target_data_t td, int mode, void *location, uint64_t i );
19.159 +/** Add fixup to an internal exception handler block */
19.160 +void target_add_raise_fixup( target_data_t td, int type, void *location, xir_op_t *exc );
19.161 +/** Add fixup to an externally accessible exception handle block */
19.162 +void target_add_raiseext_fixup( target_data_t td, int type, void *location, xir_op_t *exc );
19.163 +/** Add fixup to an arbitrary offset within the code block */
19.164 +void target_add_offset_fixup( target_data_t td, int type, void *location, uint32_t off );
19.165 +/** Add fixup to an arbitrary pointer */
19.166 +void target_add_pointer_fixup( target_data_t td, int type, void *location, void *p );
19.167 +
19.168 +/**
19.169 + * Generate final code for the block.
19.170 + * @return entry point of the code block.
19.171 + */
19.172 +void *target_codegen( xlat_target_machine_t target, xir_basic_block_t xbb );
19.173 +
19.174 +#endif /* lxdream_xlat_H */
20.1 --- a/src/xlat/xltcache.h Tue Apr 07 10:39:02 2009 +0000
20.2 +++ b/src/xlat/xltcache.h Tue Apr 07 10:55:03 2009 +0000
20.3 @@ -23,6 +23,21 @@
20.4 #define lxdream_xltcache_H 1
20.5
20.6 /**
20.7 + * Exception table record - this is filled out by the codegen used by the codegen (to backpatch)
20.8 + * and linker (for longjmp recover)
20.9 + */;
20.10 +typedef struct xlat_exception_record {
20.11 + uint32_t xlat_pc_offset; // Native PC exception source (relative to start of code block)
20.12 + uint32_t xlat_exc_offset; // Exception entry point (relative to start of code block)
20.13 +} *xlat_exception_record_t;
20.14 +
20.15 +typedef struct xlat_exception_table {
20.16 + xlat_exception_record_t exc;
20.17 + size_t next_record;
20.18 + size_t table_size;
20.19 +} *xlat_exception_table_t;
20.20 +
20.21 +/**
20.22 * For now, recovery is purely a matter of mapping native pc => sh4 pc,
20.23 * and updating sh4r.pc & sh4r.slice_cycles accordingly. In future more
20.24 * detailed recovery may be required if the translator optimizes more
.