Search
lxdream.org :: lxdream/src/xlat/x86/x86op.h
lxdream 0.9.1
released Jun 29
Download Now
filename src/xlat/x86/x86op.h
changeset 991:60c7fab9c880
next995:eb9d43e8aa08
author nkeynes
date Wed Mar 04 23:12:21 2009 +0000 (13 years ago)
permissions -rw-r--r--
last change Move xltcache to xlat/ src directory
Commit new and improved x86 opcode file - cleaned up and added support for amd64 extended registers
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * x86/x86-64 Instruction generation macros
     5  *
     6  * Copyright (c) 2009 Nathan Keynes.
     7  *
     8  * This program is free software; you can redistribute it and/or modify
     9  * it under the terms of the GNU General Public License as published by
    10  * the Free Software Foundation; either version 2 of the License, or
    11  * (at your option) any later version.
    12  *
    13  * This program is distributed in the hope that it will be useful,
    14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    16  * GNU General Public License for more details.
    17  */
    18 #include <stdint.h>
    19 #include <assert.h>
    21 /******************************** Constants *****************************/
    23 #define REG_NONE -1
    25 /* 64-bit general-purpose regs */
    26 #define REG_RAX 0
    27 #define REG_RCX 1
    28 #define REG_RDX 2
    29 #define REG_RBX 3
    30 #define REG_RSP 4
    31 #define REG_RBP 5
    32 #define REG_RSI 6 
    33 #define REG_RDI 7
    34 #define REG_R8  8
    35 #define REG_R9  9
    36 #define REG_R10 10
    37 #define REG_R11 11
    38 #define REG_R12 12
    39 #define REG_R13 13
    40 #define REG_R14 14
    41 #define REG_R15 15
    43 /* 32-bit general-purpose regs */
    44 #define REG_EAX  0
    45 #define REG_ECX  1
    46 #define REG_EDX  2
    47 #define REG_EBX  3
    48 #define REG_ESP  4
    49 #define REG_EBP  5
    50 #define REG_ESI  6 
    51 #define REG_EDI  7
    52 #define REG_R8D  8
    53 #define REG_R9D  9
    54 #define REG_R10D 10
    55 #define REG_R11D 11
    56 #define REG_R12D 12
    57 #define REG_R13D 13
    58 #define REG_R14D 14
    59 #define REG_R15D 15
    61 /* 8-bit general-purpose regs (no-rex prefix) */
    62 #define REG_AL   0
    63 #define REG_CL   1
    64 #define REG_DL   2
    65 #define REG_BL   3
    66 #define REG_AH   4
    67 #define REG_CH   5
    68 #define REG_DH   6
    69 #define REG_BH   7
    71 /* 8-bit general-purpose regs (rex-prefix) */
    72 #define REG_SPL  4
    73 #define REG_BPL  5
    74 #define REG_SIL  6
    75 #define REG_DIL  7
    76 #define REG_R8L  8
    77 #define REG_R9L  9
    78 #define REG_R10L 10
    79 #define REG_R11L 11
    80 #define REG_R12L 12
    81 #define REG_R13L 13
    82 #define REG_R14L 14
    83 #define REG_R15L 15
    85 /* Condition flag variants */
    86 #define X86_COND_O   0x00  /* OF=1 */
    87 #define X86_COND_NO  0x01  /* OF=0 */
    88 #define X86_COND_B   0x02  /* CF=1 */
    89 #define X86_COND_C   0x02  /* CF=1 */
    90 #define X86_CONF_NAE 0x02  /* CF=1 */
    91 #define X86_COND_AE  0x03  /* CF=0 */
    92 #define X86_COND_NB  0x03  /* CF=0 */
    93 #define X86_COND_NC  0x03  /* CF=0 */
    94 #define X86_COND_E   0x04  /* ZF=1 */
    95 #define X86_COND_Z   0x04  /* ZF=1 */
    96 #define X86_COND_NE  0x05  /* ZF=0 */
    97 #define X86_COND_NZ  0x05  /* ZF=0 */
    98 #define X86_COND_BE  0x06  /* CF=1 || ZF=1 */
    99 #define X86_COND_NA  0x06  /* CF=1 || ZF=1 */
   100 #define X86_COND_A   0x07  /* CF=0 && ZF=0 */
   101 #define X86_COND_NBE 0x07  /* CF=0 && ZF=0 */
   102 #define X86_COND_S   0x08  /* SF=1 */
   103 #define X86_COND_NS  0x09  /* SF=0 */
   104 #define X86_COND_P   0x0A  /* PF=1 */
   105 #define X86_COND_PE  0x0A  /* PF=1 */
   106 #define X86_COND_NP  0x0B  /* PF=0 */
   107 #define X86_COND_PO  0x0B  /* PF=0 */
   108 #define X86_COND_L   0x0C  /* SF!=OF */
   109 #define X86_COND_NGE 0x0C  /* SF!=OF */
   110 #define X86_COND_GE  0x0D  /* SF=OF */
   111 #define X86_COND_NL  0x0D  /* SF=OF */
   112 #define X86_COND_LE  0x0E  /* ZF=1 || SF!=OF */
   113 #define X86_COND_NG  0x0E  /* ZF=1 || SF!=OF */
   114 #define X86_COND_G   0x0F  /* ZF=0 && SF=OF */
   115 #define X86_COND_NLE 0x0F  /* ZF=0 && SF=OF */
   117 /* SSE floating pointer comparison variants */
   118 #define SSE_CMP_EQ    0x00
   119 #define SSE_CMP_LT    0x01
   120 #define SSE_CMP_LE    0x02
   121 #define SSE_CMP_UNORD 0x03
   122 #define SSE_CMP_NE    0x04
   123 #define SSE_CMP_NLT   0x05
   124 #define SSE_CMP_NLE   0x06
   125 #define SSE_CMP_ORD   0x07
   127 /************************** Internal definitions ***************************/
   128 #define PREF_REXB 0x41
   129 #define PREF_REXX 0x42
   130 #define PREF_REXR 0x44
   131 #define PREF_REXW 0x48
   133 extern unsigned char *xlat_output;
   135 #define OP(x) *xlat_output++ = (x)
   136 #define OP16(x) *((uint16_t *)xlat_output) = (x); xlat_output+=2
   137 #define OP32(x) *((uint32_t *)xlat_output) = (x); xlat_output+=4
   138 #define OP64(x) *((uint64_t *)xlat_output) = (x); xlat_output+=8
   139 #define OPPTR(x) *((void **)xlat_output) = ((void *)x); xlat_output+=(sizeof(void*))
   141 /* Primary opcode emitter, eg OPCODE(0x0FBE) for MOVSX */
   142 #define OPCODE(x) if( (x) > 0xFFFF ) { OP(x>>16); OP((x>>8)&0xFF); OP(x&0xFF); } else if( (x) > 0xFF ) { OP(x>>8); OP(x&0xFF); } else { OP(x); }
   144 /* Test if immediate value is representable as a signed 8-bit integer */
   145 #define IS_INT8(imm) ((imm) >= INT8_MIN && (imm) <= INT8_MAX)
   147 /**
   148  * Encode opcode+reg with no mod/rm (eg MOV imm64, r32)
   149  */
   150 static void x86_encode_opcodereg( int rexw, uint32_t opcode, int reg )
   151 {
   152     int rex = rexw;
   153     reg &= 0x0F;
   154     if( reg >= 8 ) {
   155         rex |= PREF_REXB;
   156         reg -= 8;
   157     }
   158     if( rex != 0 ) {
   159         OP(rex);
   160     }
   161     OPCODE(opcode + reg);
   162 }
   164 /**
   165  * Encode opcode with mod/rm reg-reg operation.
   166  * @param opcode primary instruction opcode
   167  * @param rr reg field 
   168  * @param rb r/m field
   169  */
   170 static int x86_encode_reg_rm( int rexw, uint32_t opcode, int rr, int rb )
   171 {
   172     int rex = rexw;
   173     rr &= 0x0F;
   174     rb &= 0x0F;
   175     if( rr >= 8 ) {
   176         rex |= PREF_REXR;
   177         rr -= 8;
   178     }
   179     if( rb >= 8 ) {
   180         rex |= PREF_REXB;
   181         rb -= 8;
   182     }
   183     if( rex != 0 ) {
   184         OP(rex);
   185     }
   186     OPCODE(opcode);
   187     OP(0xC0|(rr<<3)|rb);
   188 }
   190 /**
   191  * Encode opcode + 32-bit mod/rm memory address. (RIP-relative not supported here)
   192  * @param rexw REX.W prefix is required, otherwise 0
   193  * @param rr Reg-field register (required). 
   194  * @param rb Base (unscaled) register, or -1 for no base register. 
   195  * @param rx Index (scaled) register, or -1 for no index register
   196  * @param ss Scale shift (0..3) applied to index register (ignored if no index register)
   197  * @param disp32 Signed displacement (0 for none)
   198  */ 
   199 static void x86_encode_modrm( int rexw, uint32_t opcode, int rr, int rb, int rx, int ss, int32_t disp32 )
   200 {
   201     /* Construct the rex prefix where necessary */
   202     int rex = rexw;
   203     rr &= 0x0F;
   204     if( rr >= 8 ) {
   205         rex |= PREF_REXR;
   206         rr -= 8;
   207     }
   208     if( rb != -1 ) {
   209         rb &= 0x0F;
   210         if( rb >= 8 ) {
   211             rex |= PREF_REXB;
   212             rb -= 8;
   213         }
   214     }
   215     if( rx != -1 ) {
   216         rx &= 0x0F;
   217         if( rx >= 8 ) {
   218             rex |= PREF_REXX;
   219             rx -= 8;
   220         }
   221     }
   223     if( rex != 0 ) {
   224         OP(rex);
   225     }
   226     OPCODE(opcode);
   228     if( rx == -1 ) {
   229         if( rb == -1 ) {
   230             /* [disp32] displacement only - use SIB form for 64-bit mode safety */
   231             OP(0x04|(rr<<3));
   232             OP(0x25);
   233             OP32(disp32);
   234         } else if( rb == REG_ESP ) { /* [%esp + disp32] - SIB is mandatory for %esp/%r12 encodings */
   235             if( disp32 == 0 ) {
   236                 OP(0x04|(rr<<3));
   237                 OP(0x24);
   238             } else if( IS_INT8(disp32) ) {
   239                 OP(0x44|(rr<<3));
   240                 OP(0x24);
   241                 OP((int8_t)disp32);
   242             } else {
   243                 OP(0x84|(rr<<3));
   244                 OP(0x24);
   245                 OP32(disp32);
   246             }
   247         } else {
   248             if( disp32 == 0 && rb != REG_EBP ) { /* [%ebp] is encoded as [%ebp+0] */
   249                 OP((rr<<3)|rb);
   250             } else if( IS_INT8(disp32) ) {
   251                 OP(0x40|(rr<<3)|rb);
   252                 OP((int8_t)disp32);
   253             } else {
   254                 OP(0x80|(rr<<3)|rb);
   255                 OP32(disp32);
   256             }
   257         }
   258     } else { /* We have a scaled index. Goody */
   259         assert( ((rx != REG_ESP) || (rex&PREF_REXX)) && "Bug: attempt to index through %esp" ); /* Indexing by %esp is impossible */
   260         if( rb == -1 ) { /* [disp32 + rx << ss] */
   261             OP(0x04|(rr<<3));
   262             OP(0x05|(ss<<6)|(rx<<3));
   263             OP32(disp32);
   264         } else if( disp32 == 0 && rb != REG_EBP ) { /* [rb + rx << ss]. (Again, %ebp needs to be %ebp+0) */
   265             OP(0x04|(rr<<3));
   266             OP((ss<<6)|(rx<<3)|rb);
   267         } else if( IS_INT8(disp32) ) {
   268             OP(0x44|(rr<<3));
   269             OP((ss<<6)|(rx<<3)|rb);
   270             OP((int8_t)disp32);
   271         } else {
   272             OP(0x84|(rr<<3));
   273             OP((ss<<6)|(rx<<3)|rb);
   274             OP32(disp32);
   275         }
   276     }
   277 }
   279 /**
   280  * Encode opcode + RIP-relative mod/rm (64-bit mode only)
   281  * @param rexw PREF_REXW or 0
   282  * @param opcode primary instruction opcode
   283  * @param rr mod/rm reg field
   284  * @param disp32 RIP-relative displacement
   285  */
   286 static void x86_encode_modrm_rip(int rexw, uint32_t opcode, int rr, int32_t disp32)
   287 {
   288     int rex = rexw;
   289     rr &= 0x0F;
   290     if( rr >= 8 ) {
   291         rex |= PREF_REXR;
   292         rr -= 8;
   293     }
   294     if( rex != 0 ) {
   295         OP(rex);
   296     }
   297     OPCODE(opcode);
   298     OP(0x05|(rr<<3));
   299     OP32(disp32);
   300 }
   302 /* 32/64-bit op emitters. 64-bit versions include a rex.w prefix. Note that any
   303  * other prefixes (mandatory or otherwise) need to be emitted prior to these 
   304  * functions
   305  */ 
   306 #define x86_encode_opcode64(opcode,reg) x86_encode_opcodereg(PREF_REXW, opcode,reg)
   307 #define x86_encode_opcode32(opcode,reg) x86_encode_opcodereg(0,opcode,reg)
   308 #define x86_encode_r32_rm32(opcode,rr,rb) x86_encode_reg_rm(0,opcode,rr,rb)
   309 #define x86_encode_r64_rm64(opcode,rr,rb) x86_encode_reg_rm(PREF_REXW,opcode,rr,rb)
   310 #define x86_encode_r32_mem32(opcode,rr,rb,rx,ss,disp32) x86_encode_modrm(0,opcode,rr,rb,rx,ss,disp32)
   311 #define x86_encode_r64_mem64(opcode,rr,rb,rx,ss,disp32) x86_encode_modrm(PREF_REXW,opcode,rr,rb,rx,ss,disp32)
   312 #define x86_encode_rptr_memptr(opcode,rr,rb,rx,ss,disp32) x86_encode_modrm( (sizeof(void *)==8) ? PREF_REXW : 0,opcode,rr,rb,rx,ss,disp32)
   313 #define x86_encode_r32_mem32disp32(opcode,rr,rb,disp32) x86_encode_modrm(0,opcode,rr,rb,-1,0,disp32)
   314 #define x86_encode_r64_mem64disp64(opcode,rr,rb,disp32) x86_encode_modrm(PREF_REXW,opcode,rr,rb,-1,0,disp32)
   315 #define x86_encode_r32_ripdisp32(opcode,rr,disp32) x86_encode_modrm_rip(0,opcode,rr,disp32)
   316 #define x86_encode_r64_ripdisp64(opcode,rr,disp32) x86_encode_modrm_rip(PREF_REXW,opcode,rr,disp32)
   318 /* Convenience versions for the common rbp/rsp relative displacements */
   319 #define x86_encode_r32_rbpdisp32(opcode,rr,disp32) x86_encode_modrm(0,opcode,rr,REG_RBP,-1,0,disp32)
   320 #define x86_encode_r64_rbpdisp64(opcode,rr,disp32) x86_encode_modrm(PREF_REXW,opcode,rr,REG_RBP,-1,0,disp32)
   321 #define x86_encode_r32_rspdisp32(opcode,rr,disp32) x86_encode_modrm(0,opcode,rr,REG_RSP,-1,0,disp32)
   322 #define x86_encode_r64_rspdisp64(opcode,rr,disp32) x86_encode_modrm(PREF_REXW,opcode,rr,REG_RSP,-1,0,disp32)
   324 /* Immediate-selection variants (for instructions with imm8s/imm32 variants) */
   325 #define x86_encode_imms_rm32(opcode8,opcode32,reg,imm,rb) \
   326     if( IS_INT8(((int32_t)imm)) ) { x86_encode_r32_rm32(opcode8,reg,rb); OP((int8_t)imm); \
   327                 } else { x86_encode_r32_rm32(opcode32,reg,rb); OP32(imm); }
   328 #define x86_encode_imms_rm64(opcode8,opcode32,reg,imm,rb) \
   329     if( IS_INT8(((int32_t)imm)) ) { x86_encode_r64_rm64(opcode8,reg,rb); OP((int8_t)imm); \
   330                 } else { x86_encode_r64_rm64(opcode32,reg,rb); OP32(imm); }
   331 #define x86_encode_imms_rbpdisp32(opcode8,opcode32,reg,imm,disp) \
   332     if( IS_INT8(((int32_t)imm)) ) { x86_encode_r32_rbpdisp32(opcode8,reg,disp); OP((int8_t)imm); \
   333                 } else { x86_encode_r32_rbpdisp32(opcode32,reg,disp); OP32(imm); }
   334 #define x86_encode_imms_r32disp32(opcode8,opcode32,reg,imm,rb,disp) \
   335     if( IS_INT8(((int32_t)imm)) ) { x86_encode_r32_mem32disp32(opcode8,reg,rb,disp); OP((int8_t)imm); \
   336                 } else { x86_encode_r32_mem32disp32(opcode32,reg,rb,disp); OP32(imm); }
   337 #define x86_encode_imms_rbpdisp64(opcode8,opcode32,reg,imm,disp) \
   338     if( IS_INT8(((int32_t)imm)) ) { x86_encode_r64_rbpdisp64(opcode8,reg,disp); OP((int8_t)imm); \
   339                 } else { x86_encode_r64_rbpdisp64(opcode32,reg,disp); OP32(imm); }
   341 /*************************** Instruction definitions ***********************/
   342 /* Note this does not try to be an exhaustive definition of the instruction -
   343  * it generally only has the forms that we actually need here.
   344  */
   345 /* Core Integer instructions */
   346 #define ADCB_imms_r8(imm,r1)         x86_encode_r32_rm32(0x80, 2, r1); OP(imm)
   347 #define ADCB_r8_r8(r1,r2)            x86_encode_r32_rm32(0x10, r1, r2)
   348 #define ADCL_imms_r32(imm,r1)        x86_encode_imms_rm32(0x83, 0x81, 2, imm, r1)
   349 #define ADCL_imms_rbpdisp(imm,disp)  x86_encode_imms_rbpdisp32(0x83, 0x81, 2, imm, disp)
   350 #define ADCL_r32_r32(r1,r2)          x86_encode_r32_rm32(0x11, r1, r2)
   351 #define ADCL_r32_rbpdisp(r1,disp)    x86_encode_r32_rbpdisp32(0x11, r1, disp)
   352 #define ADCL_rbpdisp_r32(disp,r1)    x86_encode_r32_rbpdisp32(0x13, r1, disp)
   353 #define ADCQ_imms_r64(imm,r1)        x86_encode_imms_rm64(0x83, 0x81, 2, imm, r1)
   354 #define ADCQ_r64_r64(r1,r2)          x86_encode_r64_rm64(0x11, r1, r2)
   356 #define ADDB_imms_r8(imm,r1)         x86_encode_r32_rm32(0x80, 0, r1); OP(imm)
   357 #define ADDB_r8_r8(r1,r2)            x86_encode_r32_rm32(0x00, r1, r2)
   358 #define ADDL_imms_r32(imm,r1)        x86_encode_imms_rm32(0x83, 0x81, 0, imm, r1)
   359 #define ADDL_imms_r32disp(imm,rb,d)  x86_encode_imms_r32disp32(0x83, 0x81, 0, imm, rb, d)
   360 #define ADDL_imms_rbpdisp(imm,disp)  x86_encode_imms_rbpdisp32(0x83, 0x81, 0, imm, disp)
   361 #define ADDL_r32_r32(r1,r2)          x86_encode_r32_rm32(0x01, r1, r2)
   362 #define ADDL_r32_rbpdisp(r1,disp)    x86_encode_r32_rbpdisp32(0x01, r1, disp)
   363 #define ADDL_r32_r32disp(r1,r2,dsp)  x86_encode_r32_mem32disp32(0x01, r1, r2, dsp)
   364 #define ADDL_rbpdisp_r32(disp,r1)    x86_encode_r32_rbpdisp32(0x03, r1, disp)
   365 #define ADDQ_imms_r64(imm,r1)        x86_encode_imms_rm64(0x83, 0x81, 0, imm, r1)
   366 #define ADDQ_r64_r64(r1,r2)          x86_encode_r64_rm64(0x01, r1, r2)
   368 #define ANDB_imms_r8(imm,r1)         x86_encode_r32_rm32(0x80, 4, r1); OP(imm)
   369 #define ANDB_r8_r8(r1,r2)            x86_encode_r32_rm32(0x20, r1, r2)
   370 #define ANDL_imms_r32(imm,r1)        x86_encode_imms_rm32(0x83, 0x81, 4, imm, r1)
   371 #define ANDL_imms_rbpdisp(imm,disp)  x86_encode_imms_rbpdisp32(0x83,0x81,4,imm,disp)
   372 #define ANDL_r32_r32(r1,r2)          x86_encode_r32_rm32(0x21, r1, r2)
   373 #define ANDL_r32_rbpdisp(r1,disp)    x86_encode_r32_rbpdisp32(0x21, r1, disp)
   374 #define ANDL_rbpdisp_r32(disp,r1)    x86_encode_r32_rbpdisp32(0x23, r1, disp)
   375 #define ANDQ_r64_r64(r1,r2)          x86_encode_r64_rm64(0x21, r1, r2)
   376 #define ANDQ_imms_r64(imm,r1)        x86_encode_imms_rm64(0x83, 0x81, 4, imm, r1)
   378 #define CLC()                        OP(0xF8)
   379 #define CLD()                        OP(0xFC)
   380 #define CMC()                        OP(0xF5)
   382 #define CMOVCCL_cc_r32_r32(cc,r1,r2) x86_encode_r32_rm32(0x0F40+(cc), r2, r1)
   383 #define CMOVCCL_cc_rbpdisp_r32(cc,d,r1) x86_encode_r32_rbpdisp32(0x0F40+(cc), r1, d)
   385 #define CMPB_imms_r8(imm,r1)         x86_encode_r32_rm32(0x80, 7, r1); OP(imm)
   386 #define CMPB_imms_rbpdisp(imm,disp)  x86_encode_r32_rbpdisp32(0x80, 7, disp); OP(imm)
   387 #define CMPB_r8_r8(r1,r2)            x86_encode_r32_rm32(0x38, r1, r2)
   388 #define CMPL_imms_r32(imm,r1)        x86_encode_imms_rm32(0x83, 0x81, 7, imm, r1)
   389 #define CMPL_imms_rbpdisp(imm,disp)  x86_encode_imms_rbpdisp32(0x83, 0x81, 7, imm, disp)
   390 #define CMPL_r32_r32(r1,r2)          x86_encode_r32_rm32(0x39, r1, r2)
   391 #define CMPL_r32_rbpdisp(r1,disp)    x86_encode_r32_rbpdisp32(0x39, r1, disp)
   392 #define CMPL_rbpdisp_r32(disp,r1)    x86_encode_r32_rbpdisp32(0x3B, r1, disp)
   393 #define CMPQ_imms_r64(imm,r1)        x86_encode_imms_rm64(0x83, 0x81, 7, imm, r1)
   394 #define CMPQ_r64_r64(r1,r2)          x86_encode_r64_rm64(0x39, r1, r2)
   396 #define IDIVL_r32(r1)                x86_encode_r32_rm32(0xF7, 7, r1)
   397 #define IDIVL_rbpdisp(disp)          x86_encode_r32_rbpdisp32(0xF7, 7, disp)
   398 #define IDIVQ_r64(r1)                x86_encode_r64_rm64(0xF7, 7, r1)
   400 #define IMULL_imms_r32(imm,r1)       x86_encode_imms_rm32(0x6B,0x69, r1, imm, r1)
   401 #define IMULL_r32(r1)                x86_encode_r32_rm32(0xF7, 5, r1)
   402 #define IMULL_r32_r32(r1,r2)         x86_encode_r32_rm32(0x0FAF, r2, r1)
   403 #define IMULL_rbpdisp(disp)          x86_encode_r32_rbpdisp32(0xF7, 5, disp)
   404 #define IMULL_rbpdisp_r32(disp,r1)   x86_encode_r32_rbpdisp32(0x0FAF, r1, disp)
   405 #define IMULL_rspdisp(disp)          x86_encode_r32_rspdisp32(0xF7, 5, disp)
   406 #define IMULL_rspdisp_r32(disp,r1)   x86_encode_r32_rspdisp32(0x0FAF, r1, disp)
   407 #define IMULQ_imms_r64(imm,r1)       x86_encode_imms_rm64(0x6B,0x69, r1, imm, r1)
   408 #define IMULQ_r64_r64(r1,r2)         x86_encode_r64_rm64(0x0FAF, r2, r1)
   410 #define LEAL_r32disp_r32(r1,disp,r2) x86_encode_r32_mem32(0x8D, r2, r1, -1, 0, disp)
   411 #define LEAL_rbpdisp_r32(disp,r1)    x86_encode_r32_rbpdisp32(0x8D, r1, disp)
   412 #define LEAL_sib_r32(ss,ii,bb,d,r1)  x86_encode_r32_mem32(0x8D, r1, bb, ii, ss, d)
   413 #define LEAQ_r64disp_r64(r1,disp,r2) x86_encode_r64_mem64(0x8D, r2, r1, -1, 0, disp)
   414 #define LEAQ_rbpdisp_r64(disp,r1)    x86_encode_r64_rbpdisp64(0x8D, r1, disp)
   415 #define LEAP_rptrdisp_rptr(r1,d,r2)  x86_encode_rptr_memptr(0x8D, r2, r1, -1, 0, disp)
   416 #define LEAP_rbpdisp_rptr(disp,r1)   x86_encode_rptr_memptr(0x8D, r1, REG_RBP, -1, 0, disp)
   417 #define LEAP_sib_rptr(ss,ii,bb,d,r1) x86_encode_rptr_memptr(0x8D, r1, bb, ii, ss, d)
   419 #define MOVB_r8_r8(r1,r2)            x86_encode_r32_rm32(0x88, r1, r2)
   420 #define MOVL_imm32_r32(i32,r1)       x86_encode_opcode32(0xB8, r1); OP32(i32)
   421 #define MOVL_imm32_rbpdisp(i,disp)   x86_encode_r32_rbpdisp32(0xC7,0,disp); OP32(i)
   422 #define MOVL_imm32_rspdisp(i,disp)   x86_encode_r32_rspdisp32(0xC7,0,disp); OP32(i)
   423 #define MOVL_moffptr_eax(p)          OP(0xA1); OPPTR(p)
   424 #define MOVL_r32_r32(r1,r2)          x86_encode_r32_rm32(0x89, r1, r2)
   425 #define MOVL_r32_r32disp(r1,r2,dsp)  x86_encode_r32_mem32disp32(0x89, r1, r2, dsp)
   426 #define MOVL_r32_rbpdisp(r1,disp)    x86_encode_r32_rbpdisp32(0x89, r1, disp)
   427 #define MOVL_r32_rspdisp(r1,disp)    x86_encode_r32_rspdisp32(0x89, r1, disp)
   428 #define MOVL_r32_sib(r1,ss,ii,bb,d)  x86_encode_r32_mem32(0x89, r1, bb, ii, ss, d)
   429 #define MOVL_r32disp_r32(r1,dsp,r2)  x86_encode_r32_mem32disp32(0x8B, r2, r1, dsp)
   430 #define MOVL_rbpdisp_r32(disp,r1)    x86_encode_r32_rbpdisp32(0x8B, r1, disp)
   431 #define MOVL_rspdisp_r32(disp,r1)    x86_encode_r32_rspdisp32(0x8B, r1, disp)
   432 #define MOVL_sib_r32(ss,ii,bb,d,r1)  x86_encode_r32_mem32(0x8B, r1, bb, ii, ss, d)
   433 #define MOVQ_imm64_r64(i64,r1)       x86_encode_opcode64(0xB8, r1); OP64(i64)
   434 #define MOVQ_moffptr_rax(p)          OP(PREF_REXW); OP(0xA1); OPPTR(p)
   435 #define MOVQ_r64_r64(r1,r2)          x86_encode_r64_rm64(0x89, r1, r2)
   436 #define MOVQ_r64_rbpdisp(r1,disp)    x86_encode_r64_rbpdisp64(0x89, r1, disp)
   437 #define MOVQ_r64_rspdisp(r1,disp)    x86_encode_r64_rspdisp64(0x89, r1, disp)
   438 #define MOVQ_rbpdisp_r64(disp,r1)    x86_encode_r64_rbpdisp64(0x8B, r1, disp)
   439 #define MOVQ_rspdisp_r64(disp,r1)    x86_encode_r64_rspdisp64(0x8B, r1, disp)
   440 #define MOVP_immptr_rptr(p,r1)       x86_encode_opcodereg( (sizeof(void*)==8 ? PREF_REXW : 0), 0xB8, r1); OPPTR(p)
   441 #define MOVP_moffptr_rax(p)          if( sizeof(void*)==8 ) { OP(PREF_REXW); } OP(0xA1); OPPTR(p)
   442 #define MOVP_sib_rptr(ss,ii,bb,d,r1) x86_encode_rptr_memptr(0x8B, r1, bb, ii, ss, d)
   444 #define MOVSXL_r8_r32(r1,r2)         x86_encode_r32_rm32(0x0FBE, r2, r1)
   445 #define MOVSXL_r16_r32(r1,r2)        x86_encode_r32_rm32(0x0FBF, r2, r1)
   446 #define MOVSXL_rbpdisp8_r32(disp,r1) x86_encode_r32_rbpdisp32(0x0FBE, r1, disp) 
   447 #define MOVSXL_rbpdisp16_r32(dsp,r1) x86_encode_r32_rbpdisp32(0x0FBF, r1, dsp) 
   448 #define MOVSXQ_imm32_r64(i32,r1)     x86_encode_r64_rm64(0xC7, 0, r1); OP32(i32) /* Technically a MOV */
   449 #define MOVSXQ_r8_r64(r1,r2)         x86_encode_r64_rm64(0x0FBE, r2, r1)
   450 #define MOVSXQ_r16_r64(r1,r2)        x86_encode_r64_rm64(0x0FBF, r2, r1)
   451 #define MOVSXQ_r32_r64(r1,r2)        x86_encode_r64_rm64(0x63, r2, r1)
   452 #define MOVSXQ_rbpdisp32_r64(dsp,r1) x86_encode_r64_rbpdisp64(0x63, r1, dsp)
   454 #define MOVZXL_r8_r32(r1,r2)         x86_encode_r32_rm32(0x0FB6, r2, r1)
   455 #define MOVZXL_r16_r32(r1,r2)        x86_encode_r32_rm32(0x0FB7, r2, r1)
   456 #define MOVZXL_rbpdisp8_r32(disp,r1) x86_encode_r32_rbpdisp32(0x0FB6, r1, disp)
   457 #define MOVZXL_rbpdisp16_r32(dsp,r1) x86_encode_r32_rbpdisp32(0x0FB7, r1, dsp)
   459 #define MULL_r32(r1)                 x86_encode_r32_rm32(0xF7, 4, r1)
   460 #define MULL_rbpdisp(disp)           x86_encode_r32_rbpdisp32(0xF7,4,disp)
   461 #define MULL_rspdisp(disp)           x86_encode_r32_rspdisp32(0xF7,4,disp)
   463 #define NEGB_r8(r1)                  x86_encode_r32_rm32(0xF6, 3, r1)
   464 #define NEGL_r32(r1)                 x86_encode_r32_rm32(0xF7, 3, r1)
   465 #define NEGL_rbpdisp(r1)             x86_encode_r32_rbspdisp32(0xF7, 3, disp)
   466 #define NEGQ_r64(r1)                 x86_encode_r64_rm64(0xF7, 3, r1)
   468 #define NOTB_r8(r1)                  x86_encode_r32_rm32(0xF6, 2, r1)
   469 #define NOTL_r32(r1)                 x86_encode_r32_rm32(0xF7, 2, r1)
   470 #define NOTL_rbpdisp(r1)             x86_encode_r32_rbspdisp32(0xF7, 2, disp)
   471 #define NOTQ_r64(r1)                 x86_encode_r64_rm64(0xF7, 2, r1)
   473 #define ORB_imms_r8(imm,r1)          x86_encode_r32_rm32(0x80, 1, r1); OP(imm)
   474 #define ORB_r8_r8(r1,r2)             x86_encode_r32_rm32(0x08, r1, r2)
   475 #define ORL_imms_r32(imm,r1)         x86_encode_imms_rm32(0x83, 0x81, 1, imm, r1)
   476 #define ORL_imms_rbpdisp(imm,disp)   x86_encode_imms_rbpdisp32(0x83,0x81,1,imm,disp)
   477 #define ORL_r32_r32(r1,r2)           x86_encode_r32_rm32(0x09, r1, r2)
   478 #define ORL_r32_rbpdisp(r1,disp)     x86_encode_r32_rbpdisp32(0x09, r1, disp)
   479 #define ORL_rbpdisp_r32(disp,r1)     x86_encode_r32_rbpdisp32(0x0B, r1, disp)
   480 #define ORQ_imms_r64(imm,r1)         x86_encode_imms_rm64(0x83, 0x81, 1, imm, r1)
   481 #define ORQ_r64_r64(r1,r2)           x86_encode_r64_rm64(0x09, r1, r2)
   483 #define POP_r32(r1)                  x86_encode_opcode32(0x58, r1)
   485 #define PUSH_imm32(imm)              OP(0x68); OP32(imm)
   486 #define PUSH_r32(r1)                 x86_encode_opcode32(0x50, r1)
   488 #define RCLL_cl_r32(r1)              x86_encode_r32_rm32(0xD3,2,r1)
   489 #define RCLL_imm_r32(imm,r1)         if( imm == 1 ) { x86_encode_r32_rm32(0xD1,2,r1); } else { x86_encode_r32_rm32(0xC1,2,r1); OP(imm); }
   490 #define RCLQ_cl_r64(r1)              x86_encode_r64_rm64(0xD3,2,r1)
   491 #define RCLQ_imm_r64(imm,r1)         if( imm == 1 ) { x86_encode_r64_rm64(0xD1,2,r1); } else { x86_encode_r64_rm64(0xC1,2,r1); OP(imm); }
   492 #define RCRL_cl_r32(r1)              x86_encode_r32_rm32(0xD3,3,r1)
   493 #define RCRL_imm_r32(imm,r1)         if( imm == 1 ) { x86_encode_r32_rm32(0xD1,3,r1); } else { x86_encode_r32_rm32(0xC1,3,r1); OP(imm); }
   494 #define RCRQ_cl_r64(r1)              x86_encode_r64_rm64(0xD3,3,r1)
   495 #define RCRQ_imm_r64(imm,r1)         if( imm == 1 ) { x86_encode_r64_rm64(0xD1,3,r1); } else { x86_encode_r64_rm64(0xC1,3,r1); OP(imm); }
   496 #define ROLL_cl_r32(r1)              x86_encode_r32_rm32(0xD3,0,r1)
   497 #define ROLL_imm_r32(imm,r1)         if( imm == 1 ) { x86_encode_r32_rm32(0xD1,0,r1); } else { x86_encode_r32_rm32(0xC1,0,r1); OP(imm); }
   498 #define ROLQ_cl_r64(r1)              x86_encode_r64_rm64(0xD3,0,r1)
   499 #define ROLQ_imm_r64(imm,r1)         if( imm == 1 ) { x86_encode_r64_rm64(0xD1,0,r1); } else { x86_encode_r64_rm64(0xC1,0,r1); OP(imm); }
   500 #define RORL_cl_r32(r1)              x86_encode_r32_rm32(0xD3,1,r1)
   501 #define RORL_imm_r32(imm,r1)         if( imm == 1 ) { x86_encode_r32_rm32(0xD1,1,r1); } else { x86_encode_r32_rm32(0xC1,1,r1); OP(imm); }
   502 #define RORQ_cl_r64(r1)              x86_encode_r64_rm64(0xD3,1,r1)
   503 #define RORQ_imm_r64(imm,r1)         if( imm == 1 ) { x86_encode_r64_rm64(0xD1,1,r1); } else { x86_encode_r64_rm64(0xC1,1,r1); OP(imm); }
   505 #define SARL_cl_r32(r1)              x86_encode_r32_rm32(0xD3,7,r1)
   506 #define SARL_imm_r32(imm,r1)         if( imm == 1 ) { x86_encode_r32_rm32(0xD1,7,r1); } else { x86_encode_r32_rm32(0xC1,7,r1); OP(imm); }
   507 #define SARQ_cl_r64(r1)              x86_encode_r64_rm64(0xD3,7,r1)
   508 #define SARQ_imm_r64(imm,r1)         if( imm == 1 ) { x86_encode_r64_rm64(0xD1,7,r1); } else { x86_encode_r64_rm64(0xC1,7,r1); OP(imm); }
   509 #define SHLL_cl_r32(r1)              x86_encode_r32_rm32(0xD3,4,r1)
   510 #define SHLL_imm_r32(imm,r1)         if( imm == 1 ) { x86_encode_r32_rm32(0xD1,4,r1); } else { x86_encode_r32_rm32(0xC1,4,r1); OP(imm); }
   511 #define SHLQ_cl_r64(r1)              x86_encode_r64_rm64(0xD3,4,r1)
   512 #define SHLQ_imm_r64(imm,r1)         if( imm == 1 ) { x86_encode_r64_rm64(0xD1,4,r1); } else { x86_encode_r64_rm64(0xC1,4,r1); OP(imm); }
   513 #define SHRL_cl_r32(r1)              x86_encode_r32_rm32(0xD3,5,r1)
   514 #define SHRL_imm_r32(imm,r1)         if( imm == 1 ) { x86_encode_r32_rm32(0xD1,5,r1); } else { x86_encode_r32_rm32(0xC1,5,r1); OP(imm); }
   515 #define SHRQ_cl_r64(r1)              x86_encode_r64_rm64(0xD3,5,r1)
   516 #define SHRQ_imm_r64(imm,r1)         if( imm == 1 ) { x86_encode_r64_rm64(0xD1,5,r1); } else { x86_encode_r64_rm64(0xC1,5,r1); OP(imm); }
   518 #define SBBB_imms_r8(imm,r1)         x86_encode_r32_rm32(0x80, 3, r1); OP(imm)
   519 #define SBBB_r8_r8(r1,r2)            x86_encode_r32_rm32(0x18, r1, r2)
   520 #define SBBL_imms_r32(imm,r1)        x86_encode_imms_rm32(0x83, 0x81, 3, imm, r1)
   521 #define SBBL_imms_rbpdisp(imm,disp)  x86_encode_imms_rbpdisp32(0x83,0x81,3,imm,disp)
   522 #define SBBL_r32_r32(r1,r2)          x86_encode_r32_rm32(0x19, r1, r2)
   523 #define SBBL_r32_rbpdisp(r1,disp)    x86_encode_r32_rbpdisp32(0x19, r1, disp)
   524 #define SBBL_rbpdisp_r32(disp,r1)    x86_encode_r32_rbpdisp32(0x1B, r1, disp)
   525 #define SBBQ_imms_r64(imm,r1)        x86_encode_imms_rm64(0x83, 0x81, 3, imm, r1)
   526 #define SBBQ_r64_r64(r1,r2)          x86_encode_r64_rm64(0x19, r1, r2)
   528 #define SETCCB_cc_r8(cc,r1)          x86_encode_r32_rm32(0x0F90+(cc), 0, r1)
   529 #define SETCCB_cc_rbpdisp(cc,disp)   x86_encode_r32_rbpdisp32(0x0F90+(cc), 0, disp)
   531 #define STC()                        OP(0xF9)
   532 #define STD()                        OP(0xFD)
   534 #define SUBB_imms_r8(imm,r1)         x86_encode_r32_rm32(0x80, 5, r1); OP(imm)
   535 #define SUBB_r8_r8(r1,r2)            x86_encode_r32_rm32(0x28, r1, r2)
   536 #define SUBL_imms_r32(imm,r1)        x86_encode_imms_rm32(0x83, 0x81, 5, imm, r1)
   537 #define SUBL_imms_rbpdisp(imm,disp)  x86_encode_imms_rbpdisp32(0x83,0x81,5,imm,disp)
   538 #define SUBL_r32_r32(r1,r2)          x86_encode_r32_rm32(0x29, r1, r2)
   539 #define SUBL_r32_rbpdisp(r1,disp)    x86_encode_r32_rbpdisp32(0x29, r1, disp)
   540 #define SUBL_rbpdisp_r32(disp,r1)    x86_encode_r32_rbpdisp32(0x2B, r1, disp)
   541 #define SUBQ_imms_r64(imm,r1)        x86_encode_imms_rm64(0x83, 0x81, 5, imm, r1)
   542 #define SUBQ_r64_r64(r1,r2)          x86_encode_r64_rm64(0x29, r1, r2)
   544 #define TESTB_imms_r8(imm,r1)        x86_encode_r32_rm32(0xF6, 0, r1); OP(imm)
   545 #define TESTB_r8_r8(r1,r2)           x86_encode_r32_rm32(0x84, r1, r2)
   546 #define TESTL_imms_r32(imm,r1)       x86_encode_r32_rm32(0xF7, 0, r1); OP32(imm)
   547 #define TESTL_imms_rbpdisp(imm,dsp)  x86_encode_r32_rbpdisp32(0xF7, 0, dsp); OP32(imm)
   548 #define TESTL_r32_r32(r1,r2)         x86_encode_r32_rm32(0x85, r1, r2)
   549 #define TESTL_r32_rbpdisp(r1,disp)   x86_encode_r32_rbpdisp32(0x85, r1, disp)
   550 #define TESTL_rbpdisp_r32(disp,r1)   x86_encode_r32_rbpdisp32(0x85, r1, disp) /* Same OP */
   551 #define TESTQ_imms_r64(imm,r1)       x86_encode_r64_rm64(0xF7, 0, r1); OP32(imm)
   552 #define TESTQ_r64_r64(r1,r2)         x86_encode_r64_rm64(0x85, r1, r2)
   554 #define XCHGB_r8_r8(r1,r2)           x86_encode_r32_rm32(0x86, r1, r2)
   555 #define XCHGL_r32_r32(r1,r2)         x86_encode_r32_rm32(0x87, r1, r2)
   556 #define XCHGQ_r64_r64(r1,r2)         x86_encode_r64_rm64(0x87, r1, r2)
   558 #define XORB_imms_r8(imm,r1)         x86_encode_r32_rm32(0x80, 6, r1); OP(imm)
   559 #define XORB_r8_r8(r1,r2)            x86_encode_r32_rm32(0x30, r1, r2)
   560 #define XORL_imms_r32(imm,r1)        x86_encode_imms_rm32(0x83, 0x81, 6, imm, r1)
   561 #define XORL_imms_rbpdisp(imm,disp)  x86_encode_imms_rbpdisp32(0x83,0x81,6,imm,disp)
   562 #define XORL_r32_r32(r1,r2)          x86_encode_r32_rm32(0x31, r1, r2)
   563 #define XORL_r32_rbpdisp(r1,disp)    x86_encode_r32_rbpdisp32(0x31, r1, disp)
   564 #define XORL_rbpdisp_r32(disp,r1)    x86_encode_r32_rbpdisp32(0x33, r1, disp)
   565 #define XORQ_imms_r64(imm,r1)         x86_encode_imms_rm64(0x83, 0x81, 6, imm, r1)
   566 #define XORQ_r64_r64(r1,r2)           x86_encode_r64_rm64(0x31, r1, r2)
   568 /* Control flow */
   569 #define CALL_rel(rel)                OP(0xE8); OP32(rel)
   570 #define CALL_imm32(ptr)              x86_encode_r32_mem32disp32(0xFF, 2, -1, ptr)
   571 #define CALL_r32(r1)                 x86_encode_r32_rm32(0xFF, 2, r1)
   572 #define CALL_r32disp(r1,disp)        x86_encode_r32_mem32disp32(0xFF, 2, r1, disp)
   574 #define JCC_cc_rel8(cc,rel)          OP(0x70+(cc)); OP(rel)
   575 #define JCC_cc_rel32(cc,rel)         OP(0x0F); OP(0x80+(cc)); OP32(rel)
   576 #define JCC_cc_rel(cc,rel)           if( IS_INT8(rel) ) { JCC_cc_rel8(cc,(int8_t)rel); } else { JCC_cc_rel32(cc,rel); }
   578 #define JMP_rel8(rel)                OP(0xEB); OP(rel)
   579 #define JMP_rel32(rel)               OP(0xE9); OP32(rel)
   580 #define JMP_rel(rel)                 if( IS_INT8(rel) ) { JMP_rel8((int8_t)rel); } else { JMP_rel32(rel); }
   581 #define JMP_prerel(rel)              if( IS_INT8(((int32_t)rel)-2) ) { JMP_rel8(((int8_t)rel)-2); } else { JMP_rel32(((int32_t)rel)-5); }
   582 #define JMP_r32(r1,disp)             x86_encode_r32_rm32(0xFF, 4, r1)
   583 #define JMP_r32disp(r1,disp)         x86_encode_r32_mem32disp32(0xFF, 4, r1, disp)
   584 #define RET()                        OP(0xC3)
   585 #define RET_imm(imm)                 OP(0xC2); OP16(imm)
   588 /* x87 Floating point instructions */
   589 #define FABS_st0()                   OP(0xD9); OP(0xE1)
   590 #define FADDP_st(st)                 OP(0xDE); OP(0xC0+(st))
   591 #define FCHS_st0()                   OP(0xD9); OP(0xE0)
   592 #define FCOMIP_st(st)                OP(0xDF); OP(0xF0+(st))
   593 #define FDIVP_st(st)                 OP(0xDE); OP(0xF8+(st))
   594 #define FILD_r32disp(r32, disp)      x86_encode_r32_mem32disp32(0xDB, 0, r32, disp)
   595 #define FLD0_st0()                   OP(0xD9); OP(0xEE);
   596 #define FLD1_st0()                   OP(0xD9); OP(0xE8);
   597 #define FLDCW_r32disp(r32, disp)     x86_encode_r32_mem32disp32(0xD9, 5, r32, disp)
   598 #define FMULP_st(st)                 OP(0xDE); OP(0xC8+(st))
   599 #define FNSTCW_r32disp(r32, disp)    x86_encode_r32_mem32disp32(0xD9, 7, r32, disp)
   600 #define FPOP_st()                    OP(0xDD); OP(0xC0); OP(0xD9); OP(0xF7)
   601 #define FSUBP_st(st)                 OP(0xDE); OP(0xE8+(st))
   602 #define FSQRT_st0()                  OP(0xD9); OP(0xFA)
   604 #define FILD_rbpdisp(disp)           x86_encode_r32_rbpdisp32(0xDB, 0, disp)
   605 #define FLDF_rbpdisp(disp)           x86_encode_r32_rbpdisp32(0xD9, 0, disp)
   606 #define FLDD_rbpdisp(disp)           x86_encode_r32_rbpdisp32(0xDD, 0, disp)
   607 #define FISTP_rbpdisp(disp)          x86_encode_r32_rbpdisp32(0xDB, 3, disp)
   608 #define FSTPF_rbpdisp(disp)          x86_encode_r32_rbpdisp32(0xD9, 3, disp)
   609 #define FSTPD_rbpdisp(disp)          x86_encode_r32_rbpdisp32(0xDD, 3, disp)
   612 /* SSE Packed floating point instructions */
   613 #define ADDPS_rbpdisp_xmm(disp,r1)   x86_encode_r32_rbpdisp32(0x0F58, r1, disp)
   614 #define ADDPS_xmm_xmm(r1,r2)         x86_encode_r32_rm32(0x0F58, r2, r1)
   615 #define ANDPS_rbpdisp_xmm(disp,r1)   x86_encode_r32_rbpdisp32(0x0F54, r1, disp)
   616 #define ANDPS_xmm_xmm(r1,r2)         x86_encode_r32_rm32(0x0F54, r2, r1)
   617 #define ANDNPS_rbpdisp_xmm(disp,r1)  x86_encode_r32_rbpdisp32(0x0F55, r1, disp)
   618 #define ANDNPS_xmm_xmm(r1,r2)        x86_encode_r32_rm32(0x0F55, r2, r1)
   619 #define CMPPS_cc_rbpdisp_xmm(cc,d,r) x86_encode_r32_rbpdisp32(0x0FC2, r, d); OP(cc)
   620 #define CMPPS_cc_xmm_xmm(cc,r1,r2)   x86_encode_r32_rm32(0x0FC2, r2, r1); OP(cc)
   621 #define DIVPS_rbpdisp_xmm(disp,r1)   x86_encode_r32_rbpdisp32(0x0F5E, r1, disp)
   622 #define DIVPS_xmm_xmm(r1,r2)         x86_encode_r32_rm32(0x0F5E, r2, r1)
   623 #define MAXPS_rbpdisp_xmm(disp,r1)   x86_encode_r32_rbpdisp32(0x0F5F, r1, disp)
   624 #define MAXPS_xmm_xmm(r1,r2)         x86_encode_r32_rm32(0x0F5F, r2, r1)
   625 #define MINPS_rbpdisp_xmm(disp,r1)   x86_encode_r32_rbpdisp32(0x0F5D, r1, disp)
   626 #define MINPS_xmm_xmm(r1,r2)         x86_encode_r32_rm32(0x0F5D, r2, r1)
   627 #define MOV_xmm_xmm(r1,r2)           x86_encode_r32_rm32(0x0F28, r2, r1)
   628 #define MOVAPS_rbpdisp_xmm(disp,r1)  x86_encode_r32_rbpdisp32(0x0F28, r1, disp)
   629 #define MOVAPS_xmm_rbpdisp(r1,disp)  x86_encode_r32_rbpdisp32(0x0F29, r1, disp)
   630 #define MOVHLPS_xmm_xmm(r1,r2)       x86_encode_r32_rm32(0x0F12, r2, r1)
   631 #define MOVHPS_rbpdisp_xmm(disp,r1)  x86_encode_r32_rbpdisp32(0x0F16, r1, disp)
   632 #define MOVHPS_xmm_rbpdisp(r1,disp)  x86_encode_r32_rbpdisp32(0x0F17, r1, disp)
   633 #define MOVLHPS_xmm_xmm(r1,r2)       x86_encode_r32_rm32(0x0F16, r2, r1)
   634 #define MOVLPS_rbpdisp_xmm(disp,r1)  x86_encode_r32_rbpdisp32(0x0F12, r1, disp)
   635 #define MOVLPS_xmm_rbpdisp(r1,disp)  x86_encode_r32_rbpdisp32(0x0F13, r1, disp)
   636 #define MOVUPS_rbpdisp_xmm(disp,r1)  x86_encode_r32_rbpdisp32(0x0F10, r1, disp)
   637 #define MOVUPS_xmm_rbpdisp(disp,r1)  x86_encode_r32_rbpdisp32(0x0F11, r1, disp)
   638 #define MULPS_xmm_xmm(r1,r2)         x86_encode_r32_rm32(0x0F59, r2, r1)
   639 #define MULPS_rbpdisp_xmm(disp,r1)   x86_encode_r32_rbpdisp32(0xF59, r1, disp)
   640 #define ORPS_rbpdisp_xmm(disp,r1)    x86_encode_r32_rbpdisp32(0x0F56, r1, disp)
   641 #define ORPS_xmm_xmm(r1,r2)          x86_encode_r32_rm32(0x0F56, r2, r1)
   642 #define RCPPS_rbpdisp_xmm(disp,r1)   x86_encode_r32_rbpdisp32(0xF53, r1, disp)
   643 #define RCPPS_xmm_xmm(r1,r2)         x86_encode_r32_rm32(0x0F53, r2, r1)
   644 #define RSQRTPS_rbpdisp_xmm(disp,r1) x86_encode_r32_rbpdisp32(0x0F52, r1, disp)
   645 #define RSQRTPS_xmm_xmm(r1,r2)       x86_encode_r32_rm32(0x0F52, r2, r1)
   646 #define SHUFPS_rbpdisp_xmm(disp,r1)  x86_encode_r32_rbpdisp32(0x0FC6, r1, disp)
   647 #define SHUFPS_xmm_xmm(r1,r2)        x86_encode_r32_rm32(0x0FC6, r2, r1)
   648 #define SQRTPS_rbpdisp_xmm(disp,r1)  x86_encode_r32_rbpdisp32(0x0F51, r1, disp)
   649 #define SQRTPS_xmm_xmm(r1,r2)        x86_encode_r32_rm32(0x0F51, r2, r1)
   650 #define SUBPS_rbpdisp_xmm(disp,r1)   x86_encode_r32_rbpdisp32(0x0F5C, r1, disp)
   651 #define SUBPS_xmm_xmm(r1,r2)         x86_encode_r32_rm32(0x0F5C, r2, r1)
   652 #define UNPCKHPS_rbpdisp_xmm(dsp,r1) x86_encode_r32_rbpdisp32(0x0F15, r1, disp)
   653 #define UNPCKHPS_xmm_xmm(r1,r2)      x86_encode_r32_rm32(0x0F15, r2, r1)
   654 #define UNPCKLPS_rbpdisp_xmm(dsp,r1) x86_encode_r32_rbpdisp32(0x0F14, r1, disp)
   655 #define UNPCKLPS_xmm_xmm(r1,r2)      x86_encode_r32_rm32(0x0F14, r2, r1)
   656 #define XORPS_rbpdisp_xmm(disp,r1)   x86_encode_r32_rbpdisp32(0x0F57, r1, disp)
   657 #define XORPS_xmm_xmm(r1,r2)         x86_encode_r32_rm32(0x0F57, r2, r1)
   659 /* SSE Scalar floating point instructions */
   660 #define ADDSS_rbpdisp_xmm(disp,r1)   OP(0xF3); x86_encode_r32_rbpdisp32(0x0F58, r1, disp)
   661 #define ADDSS_xmm_xmm(r1,r2)         OP(0xF3); x86_encode_r32_rm32(0x0F58, r2, r1)
   662 #define CMPSS_cc_rbpdisp_xmm(cc,d,r) OP(0xF3); x86_encode_r32_rbpdisp32(0x0FC2, r, d); OP(cc)
   663 #define CMPSS_cc_xmm_xmm(cc,r1,r2)   OP(0xF3); x86_encode_r32_rm32(0x0FC2, r2, r1); OP(cc)
   664 #define COMISS_rbpdisp_xmm(disp,r1)  x86_encode_r32_rbpdisp32(0x0F2F, r1, disp)
   665 #define COMISS_xmm_xmm(r1,r2)        x86_encode_r32_rm32(0x0F2F, r2, r1)
   666 #define DIVSS_rbpdisp_xmm(disp,r1)   OP(0xF3); x86_encode_r32_rbpdisp32(0x0F5E, r1, disp)
   667 #define DIVSS_xmm_xmm(r1,r2)         OP(0xF3); x86_encode_r32_rm32(0x0F5E, r2, r1)
   668 #define MAXSS_rbpdisp_xmm(disp,r1)   OP(0xF3); x86_encode_r32_rbpdisp32(0x0F5F, r1, disp)
   669 #define MAXSS_xmm_xmm(r1,r2)         OP(0xF3); x86_encode_r32_rm32(0x0F5F, r2, r1)
   670 #define MINSS_rbpdisp_xmm(disp,r1)   OP(0xF3); x86_encode_r32_rbpdisp32(0x0F5D, r1, disp)
   671 #define MINSS_xmm_xmm(r1,r2)         OP(0xF3); x86_encode_r32_rm32(0x0F5D, r2, r1)
   672 #define MOVSS_rbpdisp_xmm(disp,r1)   OP(0xF3); x86_encode_r32_rbpdisp32(0x0F10, r1, disp)
   673 #define MOVSS_xmm_rbpdisp(r1,disp)   OP(0xF3); x86_encode_r32_rbpdisp32(0x0F11, r1, disp)
   674 #define MOVSS_xmm_xmm(r1,r2)         OP(0xF3); x86_encode_r32_rm32(0x0F10, r2, r1)
   675 #define MULSS_rbpdisp_xmm(disp,r1)   OP(0xF3); x86_encode_r32_rbpdisp32(0xF59, r1, disp)
   676 #define MULSS_xmm_xmm(r1,r2)         OP(0xF3); x86_encode_r32_rm32(0x0F59, r2, r1)
   677 #define RCPSS_rbpdisp_xmm(disp,r1)   OP(0xF3); x86_encode_r32_rbpdisp32(0xF53, r1, disp)
   678 #define RCPSS_xmm_xmm(r1,r2)         OP(0xF3); x86_encode_r32_rm32(0x0F53, r2, r1)
   679 #define RSQRTSS_rbpdisp_xmm(disp,r1) OP(0xF3); x86_encode_r32_rbpdisp32(0x0F52, r1, disp)
   680 #define RSQRTSS_xmm_xmm(r1,r2)       OP(0xF3); x86_encode_r32_rm32(0x0F52, r2, r1)
   681 #define SQRTSS_rbpdisp_xmm(disp,r1)  OP(0xF3); x86_encode_r32_rbpdisp32(0x0F51, r1, disp)
   682 #define SQRTSS_xmm_xmm(r1,r2)        OP(0xF3); x86_encode_r32_rm32(0x0F51, r2, r1)
   683 #define SUBSS_rbpdisp_xmm(disp,r1)   OP(0xF3); x86_encode_r32_rbpdisp32(0x0F5C, r1, disp)
   684 #define SUBSS_xmm_xmm(r1,r2)         OP(0xF3); x86_encode_r32_rm32(0x0F5C, r2, r1)
   685 #define UCOMISS_rbpdisp_xmm(dsp,r1)  x86_encode_r32_rbpdisp32(0x0F2E, r1, dsp)
   686 #define UCOMISS_xmm_xmm(r1,r2)       x86_encode_r32_rm32(0x0F2E, r2, r1)
   688 /* SSE2 Packed floating point instructions */
   689 #define ADDPD_rbpdisp_xmm(disp,r1)   OP(0x66); x86_encode_r32_rbpdisp32(0x0F58, r1, disp)
   690 #define ADDPD_xmm_xmm(r1,r2)         OP(0x66); x86_encode_r32_rm32(0x0F58, r2, r1)
   691 #define ANDPD_rbpdisp_xmm(disp,r1)   OP(0x66); x86_encode_r32_rbpdisp32(0x0F54, r1, disp)
   692 #define ANDPD_xmm_xmm(r1,r2)         OP(0x66); x86_encode_r32_rm32(0x0F54, r2, r1)
   693 #define ANDNPD_rbpdisp_xmm(disp,r1)  OP(0x66); x86_encode_r32_rbpdisp32(0x0F55, r1, disp)
   694 #define ANDNPD_xmm_xmm(r1,r2)        OP(0x66); x86_encode_r32_rm32(0x0F55, r2, r1)
   695 #define CMPPD_cc_rbpdisp_xmm(cc,d,r) OP(0x66); x86_encode_r32_rbpdisp32(0x0FC2, r, d); OP(cc)
   696 #define CMPPD_cc_xmm_xmm(cc,r1,r2)   OP(0x66); x86_encode_r32_rm32(0x0FC2, r2, r1); OP(cc)
   697 #define CVTPD2PS_rbpdisp_xmm(dsp,r1) OP(0x66); x86_encode_r32_rbpdisp32(0x0F5A, r1, disp)
   698 #define CVTPD2PS_xmm_xmm(r1,r2)      OP(0x66); x86_encode_r32_rm32(0x0F5A, r2, r1)
   699 #define CVTPS2PD_rbpdisp_xmm(dsp,r1) x86_encode_r32_rbpdisp32(0x0F5A, r1, disp)
   700 #define CVTPS2PD_xmm_xmm(r1,r2)      x86_encode_r32_rm32(0x0F5A, r2, r1)
   701 #define DIVPD_rbpdisp_xmm(disp,r1)   OP(0x66); x86_encode_r32_rbpdisp32(0x0F5E, r1, disp)
   702 #define DIVPD_xmm_xmm(r1,r2)         OP(0x66); x86_encode_r32_rm32(0x0F5E, r2, r1)
   703 #define MAXPD_rbpdisp_xmm(disp,r1)   OP(0x66); x86_encode_r32_rbpdisp32(0x0F5F, r1, disp)
   704 #define MAXPD_xmm_xmm(r1,r2)         OP(0x66); x86_encode_r32_rm32(0x0F5F, r2, r1)
   705 #define MINPD_rbpdisp_xmm(disp,r1)   OP(0x66); x86_encode_r32_rbpdisp32(0x0F5D, r1, disp)
   706 #define MINPD_xmm_xmm(r1,r2)         OP(0x66); x86_encode_r32_rm32(0x0F5D, r2, r1)
   707 #define MOVHPD_rbpdisp_xmm(disp,r1)  OP(0x66); x86_encode_r32_rbpdisp32(0x0F16, r1, disp)
   708 #define MOVHPD_xmm_rbpdisp(r1,disp)  OP(0x66); x86_encode_r32_rbpdisp32(0x0F17, r1, disp)
   709 #define MOVLPD_rbpdisp_xmm(disp,r1)  OP(0x66); x86_encode_r32_rbpdisp32(0x0F12, r1, disp)
   710 #define MOVLPD_xmm_rbpdisp(r1,disp)  OP(0x66); x86_encode_r32_rbpdisp32(0x0F13, r1, disp)
   711 #define MULPD_rbpdisp_xmm(disp,r1)   OP(0x66); x86_encode_r32_rbpdisp32(0xF59, r1, disp)
   712 #define MULPD_xmm_xmm(r1,r2)         OP(0x66); x86_encode_r32_rm32(0x0F59, r2, r1)
   713 #define ORPD_rbpdisp_xmm(disp,r1)    OP(0x66); x86_encode_r32_rbpdisp32(0x0F56, r1, disp)
   714 #define ORPD_xmm_xmm(r1,r2)          OP(0x66); x86_encode_r32_rm32(0x0F56, r2, r1)
   715 #define SHUFPD_rbpdisp_xmm(disp,r1)  OP(0x66); x86_encode_r32_rbpdisp32(0x0FC6, r1, disp)
   716 #define SHUFPD_xmm_xmm(r1,r2)        OP(0x66); x86_encode_r32_rm32(0x0FC6, r2, r1)
   717 #define SUBPD_rbpdisp_xmm(disp,r1)   OP(0x66); x86_encode_r32_rbpdisp32(0x0F5C, r1, disp)
   718 #define SUBPD_xmm_xmm(r1,r2)         OP(0x66); x86_encode_r32_rm32(0x0F5C, r2, r1)
   719 #define UNPCKHPD_rbpdisp_xmm(dsp,r1) OP(0x66); x86_encode_r32_rbpdisp32(0x0F15, r1, disp)
   720 #define UNPCKHPD_xmm_xmm(r1,r2)      OP(0x66); x86_encode_r32_rm32(0x0F15, r2, r1)
   721 #define UNPCKLPD_rbpdisp_xmm(dsp,r1) OP(0x66); x86_encode_r32_rbpdisp32(0x0F14, r1, disp)
   722 #define UNPCKLPD_xmm_xmm(r1,r2)      OP(0x66); x86_encode_r32_rm32(0x0F14, r2, r1)
   723 #define XORPD_rbpdisp_xmm(disp,r1)   OP(0x66); x86_encode_r32_rbpdisp32(0x0F57, r1, disp)
   724 #define XORPD_xmm_xmm(r1,r2)         OP(0x66); x86_encode_r32_rm32(0x0F57, r2, r1)
   727 /* SSE2 Scalar floating point instructions */
   728 #define ADDSD_rbpdisp_xmm(disp,r1)   OP(0xF2); x86_encode_r32_rbpdisp32(0x0F58, r1, disp)
   729 #define ADDSD_xmm_xmm(r1,r2)         OP(0xF2); x86_encode_r32_rm32(0x0F58, r2, r1)
   730 #define CMPSD_cc_rbpdisp_xmm(cc,d,r) OP(0xF2); x86_encode_r32_rbpdisp32(0x0FC2, r, d); OP(cc)
   731 #define CMPSD_cc_xmm_xmm(cc,r1,r2)   OP(0xF2); x86_encode_r32_rm32(0x0FC2, r2, r1); OP(cc)
   732 #define COMISD_rbpdisp_xmm(disp,r1)  OP(0x66); x86_encode_r32_rbpdisp32(0x0F2F, r1, disp)
   733 #define COMISD_xmm_xmm(r1,r2)        OP(0x66); x86_encode_r32_rm32(0x0F2F, r2, r1)
   734 #define DIVSD_rbpdisp_xmm(disp,r1)   OP(0xF2); x86_encode_r32_rbpdisp32(0x0F5E, r1, disp)
   735 #define DIVSD_xmm_xmm(r1,r2)         OP(0xF2); x86_encode_r32_rm32(0x0F5E, r2, r1)
   736 #define MAXSD_rbpdisp_xmm(disp,r1)   OP(0xF2); x86_encode_r32_rbpdisp32(0x0F5F, r1, disp)
   737 #define MAXSD_xmm_xmm(r1,r2)         OP(0xF2); x86_encode_r32_rm32(0x0F5F, r2, r1)
   738 #define MINSD_rbpdisp_xmm(disp,r1)   OP(0xF2); x86_encode_r32_rbpdisp32(0x0F5D, r1, disp)
   739 #define MINSD_xmm_xmm(r1,r2)         OP(0xF2); x86_encode_r32_rm32(0x0F5D, r2, r1)
   740 #define MOVSD_rbpdisp_xmm(disp,r1)   OP(0xF2); x86_encode_r32_rbpdisp32(0x0F10, r1, disp)
   741 #define MOVSD_xmm_rbpdisp(r1,disp)   OP(0xF2); x86_encode_r32_rbpdisp32(0x0F11, r1, disp)
   742 #define MOVSD_xmm_xmm(r1,r2)         OP(0xF2); x86_encode_r32_rm32(0x0F10, r2, r1)
   743 #define MULSD_rbpdisp_xmm(disp,r1)   OP(0xF2); x86_encode_r32_rbpdisp32(0xF59, r1, disp)
   744 #define MULSD_xmm_xmm(r1,r2)         OP(0xF2); x86_encode_r32_rm32(0x0F59, r2, r1)
   745 #define SQRTSD_rbpdisp_xmm(disp,r1)  OP(0xF2); x86_encode_r32_rbpdisp32(0x0F51, r1, disp)
   746 #define SQRTSD_xmm_xmm(r1,r2)        OP(0xF2); x86_encode_r32_rm32(0x0F51, r2, r1)
   747 #define SUBSD_rbpdisp_xmm(disp,r1)   OP(0xF2); x86_encode_r32_rbpdisp32(0x0F5C, r1, disp)
   748 #define SUBSD_xmm_xmm(r1,r2)         OP(0xF2); x86_encode_r32_rm32(0x0F5C, r2, r1)
   749 #define UCOMISD_rbpdisp_xmm(dsp,r1)  OP(0x66); x86_encode_r32_rbpdisp32(0x0F2E, r1, dsp)
   750 #define UCOMISD_xmm_xmm(r1,r2)       OP(0x66); x86_encode_r32_rm32(0x0F2E, r2, r1)
   752 /* SSE3 floating point instructions */
   753 #define ADDSUBPD_rbpdisp_xmm(dsp,r1) OP(0x66); x86_encode_r32_rbpdisp32(0x0FD0, r1, dsp)
   754 #define ADDSUBPD_xmm_xmm(r1,r2)      OP(0x66); x86_encode_r32_rm32(0x0FD0, r2, r1)
   755 #define ADDSUBPS_rbpdisp_xmm(dsp,r1) OP(0xF2); x86_encode_r32_rbpdisp32(0x0FD0, r1, dsp)
   756 #define ADDSUBPS_xmm_xmm(r1,r2)      OP(0xF2); x86_encode_r32_rm32(0x0FD0, r2, r1)
   757 #define HADDPD_rbpdisp_xmm(dsp,r1)   OP(0x66); x86_encode_r32_rbpdisp32(0x0F7C, r1, dsp)
   758 #define HADDPD_xmm_xmm(r1,r2)        OP(0x66); x86_encode_r32_rm32(0x0F7C, r2, r1)
   759 #define HADDPS_rbpdisp_xmm(dsp,r1)   OP(0xF2); x86_encode_r32_rbpdisp32(0x0F7C, r1, dsp)
   760 #define HADDPS_xmm_xmm(r1,r2)        OP(0xF2); x86_encode_r32_rm32(0x0F7C, r2, r1)
   761 #define HSUBPD_rbpdisp_xmm(dsp,r1)   OP(0x66); x86_encode_r32_rbpdisp32(0x0F7D, r1, dsp)
   762 #define HSUBPD_xmm_xmm(r1,r2)        OP(0x66); x86_encode_r32_rm32(0x0F7D, r2, r1)
   763 #define HSUBPS_rbpdisp_xmm(dsp,r1)   OP(0xF2); x86_encode_r32_rbpdisp32(0x0F7D, r1, dsp)
   764 #define HSUBPS_xmm_xmm(r1,r2)        OP(0xF2); x86_encode_r32_rm32(0x0F7D, r2, r1)
   765 #define MOVSHDUP_rbpdisp_xmm(dsp,r1) OP(0xF3); x86_encode_r32_rbpdisp32(0x0F16, r1, dsp)
   766 #define MOVSHDUP_xmm_xmm(r1,r2)      OP(0xF3); x86_encode_r32_rm32(0x0F16, r2, r1)
   767 #define MOVSLDUP_rbpdisp_xmm(dsp,r1) OP(0xF3); x86_encode_r32_rbpdisp32(0x0F12, r1, dsp)
   768 #define MOVSLDUP_xmm_xmm(r1,r2)      OP(0xF3); x86_encode_r32_rm32(0x0F12, r2, r1)
.