nasm-cvs Mailing List for The Netwide Assembler (Page 32)

Brought to you by: cyrillos, fbkotler, hpa

nasm-cvs — CVS commit messages

You can subscribe to this list here.

2002	Jan	Feb	Mar	Apr	May (208)	Jun (43)	Jul	Aug (2)	Sep (17)	Oct	Nov (4)	Dec (9)
2003	Jan	Feb (11)	Mar (3)	Apr (2)	May	Jun (3)	Jul (29)	Aug (29)	Sep (48)	Oct	Nov	Dec (5)
2004	Jan (1)	Feb	Mar	Apr	May	Jun	Jul	Aug	Sep	Oct	Nov	Dec (1)
2005	Jan (12)	Feb (1)	Mar (1)	Apr	May (1)	Jun (2)	Jul	Aug	Sep (4)	Oct (3)	Nov (1)	Dec (2)
2006	Jan (1)	Feb (2)	Mar (1)	Apr	May (1)	Jun	Jul	Aug (1)	Sep (2)	Oct (21)	Nov (25)	Dec (16)
2007	Jan (26)	Feb (26)	Mar (18)	Apr (51)	May (45)	Jun (26)	Jul (6)	Aug (85)	Sep (161)	Oct (111)	Nov (83)	Dec (18)
2008	Jan (31)	Feb (27)	Mar	Apr (16)	May (142)	Jun (136)	Jul (51)	Aug (21)	Sep (47)	Oct (428)	Nov (19)	Dec (6)
2009	Jan (11)	Feb (37)	Mar (17)	Apr (15)	May (13)	Jun (61)	Jul (127)	Aug (15)	Sep (22)	Oct (28)	Nov (37)	Dec (10)
2010	Jan (18)	Feb (22)	Mar (10)	Apr (41)	May	Jun (48)	Jul (61)	Aug (54)	Sep (34)	Oct (15)	Nov (49)	Dec (11)
2011	Jan	Feb (24)	Mar (10)	Apr (9)	May	Jun (33)	Jul (41)	Aug (20)	Sep	Oct	Nov	Dec
2012	Jan	Feb (86)	Mar (12)	Apr	May (10)	Jun	Jul (9)	Aug (4)	Sep (11)	Oct (3)	Nov (3)	Dec (10)
2013	Jan (1)	Feb (23)	Mar (15)	Apr (7)	May (20)	Jun (3)	Jul (15)	Aug	Sep (29)	Oct (16)	Nov (69)	Dec (18)
2014	Jan	Feb (8)	Mar	Apr	May (16)	Jun (7)	Jul	Aug (5)	Sep (2)	Oct (4)	Nov (25)	Dec (8)
2015	Jan (6)	Feb (6)	Mar	Apr (1)	May (2)	Jun (1)	Jul (7)	Aug	Sep (2)	Oct (1)	Nov (6)	Dec
2016	Jan (12)	Feb (97)	Mar (57)	Apr (52)	May (33)	Jun (1)	Jul (1)	Aug	Sep	Oct (3)	Nov (3)	Dec
2017	Jan (4)	Feb	Mar (23)	Apr (5)	May	Jun (2)	Jul (3)	Aug (2)	Sep	Oct (6)	Nov (3)	Dec (3)
2018	Jan (4)	Feb (11)	Mar	Apr (1)	May (3)	Jun (6)	Jul	Aug (5)	Sep (5)	Oct (36)	Nov (128)	Dec (18)
2019	Jan	Feb	Mar (1)	Apr (1)	May	Jun	Jul	Aug	Sep	Oct	Nov	Dec
2020	Jan	Feb	Mar	Apr	May (24)	Jun	Jul	Aug	Sep	Oct	Nov	Dec

Flat | Threaded

<< < 1 .. 30 31 32 33 34 .. 154 > >> (Page 32 of 154)

[nasm:avx512] AVX-512: Fix comments

From: nasm-bot f. J. K. S. <jin...@in...> - 2013-09-21 12:30:30

Commit-ID:  6d16d2836da3ad4aaad2b48b5879f24f2581876f
Gitweb:     http://repo.or.cz/w/nasm.git?a=commitdiff;h=6d16d2836da3ad4aaad2b48b5879f24f2581876f
Author:     Jin Kyu Song <jin...@in...>
AuthorDate: Wed, 28 Aug 2013 19:15:24 -0700
Committer:  Cyrill Gorcunov <gor...@gm...>
CommitDate: Thu, 29 Aug 2013 10:03:09 +0400

AVX-512: Fix comments

Fixed or purged some old comments and added a comment for a previous patch.

Signed-off-by: Jin Kyu Song <jin...@in...>
Signed-off-by: Cyrill Gorcunov <gor...@gm...>


---
 nasm.h           | 3 ++-
 regs.dat         | 2 +-
 test/gas2nasm.py | 1 -
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/nasm.h b/nasm.h
index 8d61748..5ae9d54 100644
--- a/nasm.h
+++ b/nasm.h
@@ -689,12 +689,13 @@ typedef struct insn { /* an instruction itself */
     uint8_t         evex_p[3];              /* EVEX.P0: [RXB,R',00,mm], P1: [W,vvvv,1,pp] */
                                             /* EVEX.P2: [z,L'L,b,V',aaa] */
     enum ttypes     evex_tuple;             /* Tuple type for compressed Disp8*N */
-    int             evex_rm;                /* static rounding mode for AVX3 (EVEX) */
+    int             evex_rm;                /* static rounding mode for AVX512 (EVEX) */
     int8_t          evex_brerop;            /* BR/ER/SAE operand position */
 } insn;
 
 enum geninfo { GI_SWITCH };
 
+/* Instruction flags type: IF_* flags are defined in insns.h */
 typedef uint64_t iflags_t;
 
 /*
diff --git a/regs.dat b/regs.dat
index 1e083d0..fb112e6 100644
--- a/regs.dat
+++ b/regs.dat
@@ -123,7 +123,7 @@ xmm1-31	XMMREG		xmmreg		1
 ymm0	YMM0		ymmreg		0
 ymm1-31	YMMREG		ymmreg		1
 
-# AVX3 registers
+# AVX512 registers
 zmm0	ZMM0		zmmreg		0
 zmm1-31	ZMMREG		zmmreg		1
 
diff --git a/test/gas2nasm.py b/test/gas2nasm.py
index a00af92..d0b8579 100755
--- a/test/gas2nasm.py
+++ b/test/gas2nasm.py
@@ -89,7 +89,6 @@ def write_rawbytes(data, options):
 if __name__ == "__main__":
     options = setup()
     recs = read(options)
-    print "AVX3.1 instructions"
 
     write_rawbytes(recs, options)

[nasm:avx512] AVX-512: Add a feature to generate a raw bytecode file

From: nasm-bot f. J. K. S. <jin...@in...> - 2013-09-21 12:30:29

Commit-ID:  fe0ee08586f9cb31e8bc52200818a5bbb9d4c149
Gitweb:     http://repo.or.cz/w/nasm.git?a=commitdiff;h=fe0ee08586f9cb31e8bc52200818a5bbb9d4c149
Author:     Jin Kyu Song <jin...@in...>
AuthorDate: Fri, 23 Aug 2013 18:40:49 -0700
Committer:  Cyrill Gorcunov <gor...@gm...>
CommitDate: Wed, 28 Aug 2013 14:27:25 +0400

AVX-512: Add a feature to generate a raw bytecode file

>From gas testsuite file, a text file containing raw bytecodes
is useful when verifying the output of NASM.

Signed-off-by: Jin Kyu Song <jin...@in...>
Signed-off-by: Cyrill Gorcunov <gor...@gm...>


---
 test/gas2nasm.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/test/gas2nasm.py b/test/gas2nasm.py
index de16745..a00af92 100755
--- a/test/gas2nasm.py
+++ b/test/gas2nasm.py
@@ -21,6 +21,9 @@ def setup():
     parser.add_option('-b', dest='bits', action='store',
             default="",
             help='Bits for output ASM file.')
+    parser.add_option('-r', dest='raw_output', action='store',
+            default="",
+            help='Name for raw output bytes in text')
     (options, args) =  parser.parse_args()
     return options
 
@@ -77,11 +80,19 @@ def write(data, options):
                 outstr = outstrfmt % tuple(insn)
                 out.write(outstr)
 
+def write_rawbytes(data, options):
+    if options.raw_output:
+        with open(options.raw_output, 'wb') as out:
+            for insn in data:
+                out.write(insn[0] + '\n')
+
 if __name__ == "__main__":
     options = setup()
     recs = read(options)
     print "AVX3.1 instructions"
 
+    write_rawbytes(recs, options)
+
     recs = commas(recs)
 
     write(recs, options)

[nasm:avx512] AVX-512: Add EVEX encoding and new instructions

From: nasm-bot f. J. K. S. <jin...@in...> - 2013-09-21 12:30:27

Commit-ID:  cc1dc9de53137e864bde06573556723149239f29
Gitweb:     http://repo.or.cz/w/nasm.git?a=commitdiff;h=cc1dc9de53137e864bde06573556723149239f29
Author:     Jin Kyu Song <jin...@in...>
AuthorDate: Thu, 15 Aug 2013 19:01:25 -0700
Committer:  Cyrill Gorcunov <gor...@gm...>
CommitDate: Fri, 16 Aug 2013 09:06:15 +0400

AVX-512: Add EVEX encoding and new instructions

EVEX encoding support includes 32 vector regs (XMM/YMM/ZMM),
opmask, broadcasting, embedded rounding mode,
suppress all exceptions, compressed displacement.

Signed-off-by: Jin Kyu Song <jin...@in...>
Signed-off-by: Cyrill Gorcunov <gor...@gm...>


---
 assemble.c | 326 ++++++++++++++++++++++++++++++++++++++------
 disasm.c   |   6 +
 insns.dat  | 448 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 insns.h    |  10 +-
 insns.pl   | 140 +++++++++++++++++--
 nasm.h     |  54 +++++++-
 opflags.h  |   6 +-
 parser.c   |   6 +
 regs.dat   |   4 +-
 9 files changed, 925 insertions(+), 75 deletions(-)

diff --git a/assemble.c b/assemble.c
index b119f86..6054d4a 100644
--- a/assemble.c
+++ b/assemble.c
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------- *
  *
- *   Copyright 1996-2012 The NASM Authors - All Rights Reserved
+ *   Copyright 1996-2013 The NASM Authors - All Rights Reserved
  *   See the file AUTHORS included with the NASM distribution for
  *   the specific copyright holders.
  *
@@ -67,6 +67,35 @@
  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  *                 field equal to digit b.
+ *
+ * \240..\243    - this instruction uses EVEX rather than REX or VEX/XOP, with the
+ *                 V field taken from operand 0..3.
+ * \250          - this instruction uses EVEX rather than REX or VEX/XOP, with the
+ *                 V field set to 1111b.
+ * EVEX prefixes are followed by the sequence:
+ * \cm\wlp\tup    where cm is:
+ *                  cc 000 0mm
+ *                  c = 2 for EVEX and m is the legacy escape (0f, 0f38, 0f3a)
+ *                and wlp is:
+ *                  00 wwl lpp
+ *                  [l0]  ll = 0 (.128, .lz)
+ *                  [l1]  ll = 1 (.256)
+ *                  [l2]  ll = 2 (.512)
+ *                  [lig] ll = 3 for EVEX.L'L don't care (always assembled as 0)
+ *
+ *                  [w0]  ww = 0 for W = 0
+ *                  [w1]  ww = 1 for W = 1
+ *                  [wig] ww = 2 for W don't care (always assembled as 0)
+ *                  [ww]  ww = 3 for W used as REX.W
+ *
+ *                  [p0]  pp = 0 for no prefix
+ *                  [60]  pp = 1 for legacy prefix 60
+ *                  [f3]  pp = 2
+ *                  [f2]  pp = 3
+ *
+ *                tup is tuple type for Disp8*N from %tuple_codes in insns.pl
+ *                    (compressed displacement encoding)
+ *
  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  * \260..\263    - this instruction uses VEX/XOP rather than REX, with the
  *                 V field taken from operand 0..3.
@@ -76,9 +105,9 @@
  * VEX/XOP prefixes are followed by the sequence:
  * \tmm\wlp        where mm is the M field; and wlp is:
  *                 00 wwl lpp
- *		   [l0]  ll = 0 for L = 0 (.128, .lz)
- *		   [l1]  ll = 1 for L = 1 (.256)
- *		   [lig] ll = 2 for L don't care (always assembled as 0)
+ *                 [l0]  ll = 0 for L = 0 (.128, .lz)
+ *                 [l1]  ll = 1 for L = 1 (.256)
+ *                 [lig] ll = 2 for L don't care (always assembled as 0)
  *
  *                 [w0]  ww = 0 for W = 0
  *                 [w1 ] ww = 1 for W = 1
@@ -136,6 +165,7 @@
  *                 used for conditional jump over longer jump
  * \374          - this instruction takes an XMM VSIB memory EA
  * \375          - this instruction takes an YMM VSIB memory EA
+ * \376          - this instruction takes an ZMM VSIB memory EA
  */
 
 #include "compiler.h"
@@ -174,6 +204,7 @@ typedef struct {
     int bytes;                    /* # of bytes of offset needed */
     int size;                     /* lazy - this is sib+bytes+1 */
     uint8_t modrm, sib, rex, rip; /* the bytes themselves */
+    int8_t disp8;                  /* compressed displacement for EVEX */
 } ea;
 
 #define GEN_SIB(scale, index, base)                 \
@@ -200,9 +231,10 @@ static opflags_t regflag(const operand *);
 static int32_t regval(const operand *);
 static int rexflags(int, opflags_t, int);
 static int op_rexflags(const operand *, int);
+static int op_evexflags(const operand *, int, uint8_t);
 static void add_asp(insn *, int);
 
-static enum ea_type process_ea(operand *, ea *, int, int, int, opflags_t);
+static enum ea_type process_ea(operand *, ea *, int, int, opflags_t, insn *);
 
 static int has_prefix(insn * ins, enum prefix_pos pos, int prefix)
 {
@@ -820,6 +852,7 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 
     ins->rex = 0;               /* Ensure REX is reset */
     eat = EA_SCALAR;            /* Expect a scalar EA */
+    memset(ins->evex_p, 0, 3);  /* Ensure EVEX is reset */
 
     if (ins->prefixes[PPS_OSIZE] == P_O64)
         ins->rex |= REX_W;
@@ -910,6 +943,23 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits,
             length++;
             break;
 
+        case4(0240):
+            ins->rex |= REX_EV;
+            ins->vexreg = regval(opx);
+            ins->evex_p[2] |= op_evexflags(opx, EVEX_P2VP, 2); /* High-16 NDS */
+            ins->vex_cm = *codes++;
+            ins->vex_wlp = *codes++;
+            ins->evex_tuple = (*codes++ - 0300);
+            break;
+
+        case 0250:
+            ins->rex |= REX_EV;
+            ins->vexreg = 0;
+            ins->vex_cm = *codes++;
+            ins->vex_wlp = *codes++;
+            ins->evex_tuple = (*codes++ - 0300);
+            break;
+
         case4(0254):
             length += 4;
             break;
@@ -1076,6 +1126,10 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits,
             eat = EA_YMMVSIB;
             break;
 
+        case 0376:
+            eat = EA_ZMMVSIB;
+            break;
+
         case4(0100):
         case4(0110):
         case4(0120):
@@ -1093,6 +1147,7 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits,
                 int rfield;
                 opflags_t rflags;
                 struct operand *opy = &ins->oprs[op2];
+                struct operand *oplast;
 
                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
 
@@ -1100,12 +1155,30 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits,
                     /* pick rfield from operand b (opx) */
                     rflags = regflag(opx);
                     rfield = nasm_regvals[opx->basereg];
+                    /* find the last SIMD operand where ER decorator resides */
+                    oplast = &ins->oprs[op1 > op2 ? op1 : op2];
                 } else {
                     rflags = 0;
                     rfield = c & 7;
+                    oplast = opy;
                 }
-                if (process_ea(opy, &ea_data, bits,ins->addr_size,
-                               rfield, rflags) != eat) {
+
+                if (oplast->decoflags & ER) {
+                    /* set EVEX.RC (rounding control) and b */
+                    ins->evex_p[2] |= (((ins->evex_rm - BRC_RN) << 5) & EVEX_P2LL) |
+                                      EVEX_P2B;
+                } else {
+                    /* set EVEX.L'L (vector length) */
+                    ins->evex_p[2] |= ((ins->vex_wlp << (5 - 2)) & EVEX_P2LL);
+                    if ((oplast->decoflags & SAE) ||
+                        (opy->decoflags & BRDCAST_MASK)) {
+                        /* set EVEX.b */
+                        ins->evex_p[2] |= EVEX_P2B;
+                    }
+                }
+
+                if (process_ea(opy, &ea_data, bits,
+                               rfield, rflags, ins) != eat) {
                     errfunc(ERR_NONFATAL, "invalid effective address");
                     return -1;
                 } else {
@@ -1132,11 +1205,11 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits,
         ins->rex &= ~REX_P;        /* Don't force REX prefix due to high reg */
     }
 
-    if (ins->rex & REX_V) {
+    if (ins->rex & (REX_V | REX_EV)) {
         int bad32 = REX_R|REX_W|REX_X|REX_B;
 
         if (ins->rex & REX_H) {
-            errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
+            errfunc(ERR_NONFATAL, "cannot use high register in AVX instruction");
             return -1;
         }
         switch (ins->vex_wlp & 060) {
@@ -1157,7 +1230,9 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits,
             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
             return -1;
         }
-        if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B)))
+        if (ins->rex & REX_EV)
+            length += 4;
+        else if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B)))
             length += 3;
         else
             length += 2;
@@ -1194,7 +1269,7 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 static inline unsigned int emit_rex(insn *ins, int32_t segment, int64_t offset, int bits)
 {
     if (bits == 64) {
-        if ((ins->rex & REX_REAL) && !(ins->rex & REX_V)) {
+        if ((ins->rex & REX_REAL) && !(ins->rex & (REX_V | REX_EV))) {
             ins->rex = (ins->rex & REX_REAL) | REX_P;
             out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
             ins->rex = 0;
@@ -1431,6 +1506,25 @@ static void gencode(int32_t segment, int64_t offset, int bits,
             offset += 4;
             break;
 
+        case4(0240):
+        case 0250:
+            codes += 3;
+            ins->evex_p[2] |= op_evexflags(&ins->oprs[0],
+                                           EVEX_P2Z | EVEX_P2AAA, 2);
+            ins->evex_p[2] ^= EVEX_P2VP;        /* 1's complement */
+            bytes[0] = 0x62;
+            /* EVEX.X can be set by either REX or EVEX for different reasons */
+            bytes[1] = (~(((ins->rex & 7) << 5) |
+                          (ins->evex_p[0] & (EVEX_P0X | EVEX_P0RP))) & 0xf0) |
+                        (ins->vex_cm & 3);
+            bytes[2] = ((ins->rex & REX_W) << (7 - 3)) |
+                       ((~ins->vexreg & 15) << 3) |
+                       (1 << 2) | (ins->vex_wlp & 3);
+            bytes[3] = ins->evex_p[2];
+            out(offset, segment, &bytes, OUT_RAWDATA, 4, NO_SEG, NO_SEG);
+            offset += 4;
+            break;
+
         case4(0260):
         case 0270:
             codes += 2;
@@ -1631,6 +1725,10 @@ static void gencode(int32_t segment, int64_t offset, int bits,
             eat = EA_YMMVSIB;
             break;
 
+        case 0376:
+            eat = EA_ZMMVSIB;
+            break;
+
         case4(0100):
         case4(0110):
         case4(0120):
@@ -1661,8 +1759,8 @@ static void gencode(int32_t segment, int64_t offset, int bits,
                     rfield = c & 7;
                 }
 
-                if (process_ea(opy, &ea_data, bits, ins->addr_size,
-                               rfield, rflags) != eat)
+                if (process_ea(opy, &ea_data, bits,
+                               rfield, rflags, ins) != eat)
                     errfunc(ERR_NONFATAL, "invalid effective address");
 
                 p = bytes;
@@ -1687,7 +1785,8 @@ static void gencode(int32_t segment, int64_t offset, int bits,
                 case 2:
                 case 4:
                 case 8:
-                    data = opy->offset;
+                    /* use compressed displacement, if available */
+                    data = ea_data.disp8 ? ea_data.disp8 : opy->offset;
                     s += ea_data.bytes;
                     if (ea_data.rip) {
                         if (opy->segment == segment) {
@@ -1702,9 +1801,9 @@ static void gencode(int32_t segment, int64_t offset, int bits,
                                 insn_end - offset, opy->segment, opy->wrt);
                         }
                     } else {
-                        if (overflow_general(opy->offset, ins->addr_size >> 3) ||
-                            signed_bits(opy->offset, ins->addr_size) !=
-                            signed_bits(opy->offset, ea_data.bytes * 8))
+                        if (overflow_general(data, ins->addr_size >> 3) ||
+                            signed_bits(data, ins->addr_size) !=
+                            signed_bits(data, ea_data.bytes * 8))
                             warn_overflow(ERR_PASS2, ea_data.bytes);
 
                         out(offset, segment, &data, OUT_ADDRESS,
@@ -1774,6 +1873,40 @@ static int rexflags(int val, opflags_t flags, int mask)
     return rex & mask;
 }
 
+static int evexflags(int val, decoflags_t deco,
+                     int mask, uint8_t byte)
+{
+    int evex = 0;
+
+    switch(byte) {
+    case 0:
+        if (val >= 16)
+            evex |= (EVEX_P0RP | EVEX_P0X);
+        break;
+    case 2:
+        if (val >= 16)
+            evex |= EVEX_P2VP;
+        if (deco & Z)
+            evex |= EVEX_P2Z;
+        if (deco & OPMASK_MASK)
+            evex |= deco & EVEX_P2AAA;
+        break;
+    }
+    return evex & mask;
+}
+
+static int op_evexflags(const operand * o, int mask, uint8_t byte)
+{
+    int val;
+
+    if (!is_register(o->basereg))
+        errfunc(ERR_PANIC, "invalid operand passed to op_evexflags()");
+
+    val = nasm_regvals[o->basereg];
+
+    return evexflags(val, o->decoflags, mask, byte);
+}
+
 static enum match_result find_match(const struct itemplate **tempp,
                                     insn *instruction,
                                     int32_t segment, int64_t offset, int bits)
@@ -1908,6 +2041,9 @@ static enum match_result matches(const struct itemplate *itemp,
         asize = BITS256;
         break;
     case IF_SZ:
+        asize = BITS512;
+        break;
+    case IF_SIZE:
         switch (bits) {
         case 16:
             asize = BITS16;
@@ -1961,10 +2097,12 @@ static enum match_result matches(const struct itemplate *itemp,
      */
     for (i = 0; i < itemp->operands; i++) {
         opflags_t type = instruction->oprs[i].type;
+        decoflags_t deco = instruction->oprs[i].decoflags;
         if (!(type & SIZE_MASK))
             type |= size[i];
 
-        if (itemp->opd[i] & ~type & ~SIZE_MASK) {
+        if ((itemp->opd[i] & ~type & ~SIZE_MASK) ||
+            (itemp->deco[i] & deco) != deco) {
             return MERR_INVALOP;
         } else if ((itemp->opd[i] & SIZE_MASK) &&
                    (itemp->opd[i] & SIZE_MASK) != (type & SIZE_MASK)) {
@@ -2036,16 +2174,116 @@ static enum match_result matches(const struct itemplate *itemp,
     return MOK_GOOD;
 }
 
+/*
+ * Check if offset is a multiple of N with corresponding tuple type
+ * if Disp8*N is available, compressed displacement is stored in compdisp
+ */
+static bool is_disp8n(operand *input, insn *ins, int8_t *compdisp)
+{
+    const uint8_t fv_n[2][2][VLMAX] = {{{16, 32, 64}, {4, 4, 4}},
+                                       {{16, 32, 64}, {8, 8, 8}}};
+    const uint8_t hv_n[2][VLMAX]    =  {{8, 16, 32}, {4, 4, 4}};
+    const uint8_t dup_n[VLMAX]      =   {8, 32, 64};
+
+    bool evex_b           = input->decoflags & BRDCAST_MASK;
+    enum ttypes   tuple   = ins->evex_tuple;
+    /* vex_wlp composed as [wwllpp] */
+    enum vectlens vectlen = (ins->vex_wlp & 0x0c) >> 2;
+    /* wig(=2) is treated as w0(=0) */
+    bool evex_w           = (ins->vex_wlp & 0x10) >> 4;
+    int32_t off           = input->offset;
+    uint8_t n = 0;
+    int32_t disp8;
+
+    switch(tuple) {
+    case FV:
+        n = fv_n[evex_w][evex_b][vectlen];
+        break;
+    case HV:
+        n = hv_n[evex_b][vectlen];
+        break;
+
+    case FVM:
+        /* 16, 32, 64 for VL 128, 256, 512 respectively*/
+        n = 1 << (vectlen + 4);
+        break;
+    case T1S8:  /* N = 1 */
+    case T1S16: /* N = 2 */
+        n = tuple - T1S8 + 1;
+        break;
+    case T1S:
+        /* N = 4 for 32bit, 8 for 64bit */
+        n = evex_w ? 8 : 4;
+        break;
+    case T1F32:
+    case T1F64:
+        /* N = 4 for 32bit, 8 for 64bit */
+        n = (tuple == T1F32 ? 4 : 8);
+        break;
+    case T2:
+    case T4:
+    case T8:
+        if (vectlen + 7 <= (evex_w + 5) + (tuple - T2 + 1))
+            n = 0;
+        else
+            n = 1 << (tuple - T2 + evex_w + 4);
+        break;
+    case HVM:
+    case QVM:
+    case OVM:
+        n = 1 << (OVM - tuple + vectlen + 1);
+        break;
+    case M128:
+        n = 16;
+        break;
+    case DUP:
+        n = dup_n[vectlen];
+        break;
+
+    default:
+        break;
+    }
+
+    if (n && !(off & (n - 1))) {
+        disp8 = off / n;
+        /* if it fits in Disp8 */
+        if (disp8 >= -128 && disp8 <= 127) {
+            *compdisp = disp8;
+            return true;
+        }
+    }
+
+    *compdisp = 0;
+    return false;
+}
+
+/*
+ * Check if ModR/M.mod should/can be 01.
+ * - EAF_BYTEOFFS is set
+ * - offset can fit in a byte when EVEX is not used
+ * - offset can be compressed when EVEX is used
+ */
+#define IS_MOD_01()     (input->eaflags & EAF_BYTEOFFS ||       \
+                         (o >= -128 && o <= 127 &&              \
+                          seg == NO_SEG && !forw_ref &&         \
+                          !(input->eaflags & EAF_WORDOFFS) &&   \
+                          !(ins->rex & REX_EV)) ||              \
+                         (ins->rex & REX_EV &&                  \
+                          is_disp8n(input, ins, &output->disp8)))
+
 static enum ea_type process_ea(operand *input, ea *output, int bits,
-                               int addrbits, int rfield, opflags_t rflags)
+                               int rfield, opflags_t rflags, insn *ins)
 {
     bool forw_ref = !!(input->opflags & OPFLAG_UNKNOWN);
+    int addrbits = ins->addr_size;
 
     output->type    = EA_SCALAR;
     output->rip     = false;
 
     /* REX flags for the rfield operand */
     output->rex     |= rexflags(rfield, rflags, REX_R | REX_P | REX_W | REX_H);
+    /* EVEX.R' flag for the REG operand */
+    ins->evex_p[0]  |= evexflags(rfield, 0, EVEX_P0RP, 0);
 
     if (is_class(REGISTER, input->type)) {
         /*
@@ -2054,10 +2292,17 @@ static enum ea_type process_ea(operand *input, ea *output, int bits,
         if (!is_register(input->basereg))
             goto err;
 
-        if (!is_class(REG_EA, regflag(input)))
+        if (!is_reg_class(REG_EA, input->basereg))
             goto err;
 
+        /* broadcasting is not available with a direct register operand. */
+        if (input->decoflags & BRDCAST_MASK) {
+            nasm_error(ERR_NONFATAL, "Broadcasting not allowed from a register");
+            goto err;
+        }
+
         output->rex         |= op_rexflags(input, REX_B | REX_P | REX_W | REX_H);
+        ins->evex_p[0]      |= op_evexflags(input, EVEX_P0X, 0);
         output->sib_present = false;    /* no SIB necessary */
         output->bytes       = 0;        /* no offset necessary either */
         output->modrm       = GEN_MODRM(3, rfield, nasm_regvals[input->basereg]);
@@ -2065,6 +2310,14 @@ static enum ea_type process_ea(operand *input, ea *output, int bits,
         /*
          * It's a memory reference.
          */
+
+        /* Embedded rounding or SAE is not available with a mem ref operand. */
+        if (input->decoflags & (ER | SAE)) {
+            nasm_error(ERR_NONFATAL,
+                       "Embedded rounding is available only with reg-reg op.");
+            return -1;
+        }
+
         if (input->basereg == -1 &&
             (input->indexreg == -1 || input->scale == 0)) {
             /*
@@ -2125,7 +2378,7 @@ static enum ea_type process_ea(operand *input, ea *output, int bits,
             }
 
             /* if either one are a vector register... */
-            if ((ix|bx) & (XMMREG|YMMREG) & ~REG_EA) {
+            if ((ix|bx) & (XMMREG|YMMREG|ZMMREG) & ~REG_EA) {
                 opflags_t sok = BITS32 | BITS64;
                 int32_t o = input->offset;
                 int mod, scale, index, base;
@@ -2134,7 +2387,7 @@ static enum ea_type process_ea(operand *input, ea *output, int bits,
                  * For a vector SIB, one has to be a vector and the other,
                  * if present, a GPR.  The vector must be the index operand.
                  */
-                if (it == -1 || (bx & (XMMREG|YMMREG) & ~REG_EA)) {
+                if (it == -1 || (bx & (XMMREG|YMMREG|ZMMREG) & ~REG_EA)) {
                     if (s == 0)
                         s = 1;
                     else if (s != 1)
@@ -2165,11 +2418,13 @@ static enum ea_type process_ea(operand *input, ea *output, int bits,
                     (addrbits == 64 && !(sok & BITS64)))
                     goto err;
 
-                output->type = (ix & YMMREG & ~REG_EA)
-                    ? EA_YMMVSIB : EA_XMMVSIB;
+                output->type = ((ix & ZMMREG & ~REG_EA) ? EA_ZMMVSIB
+                                : ((ix & YMMREG & ~REG_EA)
+                                ? EA_YMMVSIB : EA_XMMVSIB));
 
-                output->rex |= rexflags(it, ix, REX_X);
-                output->rex |= rexflags(bt, bx, REX_B);
+                output->rex    |= rexflags(it, ix, REX_X);
+                output->rex    |= rexflags(bt, bx, REX_B);
+                ins->evex_p[2] |= evexflags(it, 0, EVEX_P2VP, 2);
 
                 index = it & 7; /* it is known to be != -1 */
 
@@ -2199,10 +2454,7 @@ static enum ea_type process_ea(operand *input, ea *output, int bits,
                         seg == NO_SEG && !forw_ref &&
                         !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
                         mod = 0;
-                    else if (input->eaflags & EAF_BYTEOFFS ||
-                             (o >= -128 && o <= 127 &&
-                              seg == NO_SEG && !forw_ref &&
-                              !(input->eaflags & EAF_WORDOFFS)))
+                    else if (IS_MOD_01())
                         mod = 1;
                     else
                         mod = 2;
@@ -2293,10 +2545,7 @@ static enum ea_type process_ea(operand *input, ea *output, int bits,
                             seg == NO_SEG && !forw_ref &&
                             !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
                             mod = 0;
-                        else if (input->eaflags & EAF_BYTEOFFS ||
-                                 (o >= -128 && o <= 127 &&
-                                  seg == NO_SEG && !forw_ref &&
-                                  !(input->eaflags & EAF_WORDOFFS)))
+                        else if (IS_MOD_01())
                             mod = 1;
                         else
                             mod = 2;
@@ -2340,10 +2589,7 @@ static enum ea_type process_ea(operand *input, ea *output, int bits,
                             seg == NO_SEG && !forw_ref &&
                             !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
                             mod = 0;
-                        else if (input->eaflags & EAF_BYTEOFFS ||
-                                 (o >= -128 && o <= 127 &&
-                                  seg == NO_SEG && !forw_ref &&
-                                  !(input->eaflags & EAF_WORDOFFS)))
+                        else if (IS_MOD_01())
                             mod = 1;
                         else
                             mod = 2;
@@ -2428,9 +2674,7 @@ static enum ea_type process_ea(operand *input, ea *output, int bits,
                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
                     mod = 0;
-                else if (input->eaflags & EAF_BYTEOFFS ||
-                         (o >= -128 && o <= 127 && seg == NO_SEG &&
-                          !forw_ref && !(input->eaflags & EAF_WORDOFFS)))
+                else if (IS_MOD_01())
                     mod = 1;
                 else
                     mod = 2;
diff --git a/disasm.c b/disasm.c
index 97bf27e..9d2e1b1 100644
--- a/disasm.c
+++ b/disasm.c
@@ -328,6 +328,8 @@ static uint8_t *do_ea(uint8_t *data, int modrm, int asize,
 		op->indexreg = nasm_rd_xmmreg[index | ((rex & REX_X) ? 8 : 0)];
 	    else if (type == EA_YMMVSIB)
 		op->indexreg = nasm_rd_ymmreg[index | ((rex & REX_X) ? 8 : 0)];
+	    else if (type == EA_ZMMVSIB)
+		op->indexreg = nasm_rd_zmmreg[index | ((rex & REX_X) ? 8 : 0)];
 	    else if (index == 4 && !(rex & REX_X))
 		op->indexreg = -1; /* ESP/RSP cannot be an index */
             else if (a64)
@@ -868,6 +870,10 @@ static int matches(const struct itemplate *t, uint8_t *data,
             eat = EA_YMMVSIB;
             break;
 
+        case 0376:
+            eat = EA_ZMMVSIB;
+            break;
+
 	default:
 	    return false;	/* Unknown code */
 	}
diff --git a/insns.dat b/insns.dat
index 0b55b68..320280a 100644
--- a/insns.dat
+++ b/insns.dat
@@ -1064,16 +1064,16 @@ PUSH		reg_ds				[-:	1e]					8086,NOLONG
 PUSH		reg_fs				[-:	0f a0]					386
 PUSH		reg_gs				[-:	0f a8]					386
 PUSH		imm8				[i:	6a ib,s]					186
-PUSH		sbyteword16			[i:	o16 6a ib,s]				186,AR0,SZ,ND
-PUSH		imm16				[i:	o16 68 iw]				186,AR0,SZ
-PUSH		sbytedword32			[i:	o32 6a ib,s]				386,NOLONG,AR0,SZ,ND
-PUSH		imm32				[i:	o32 68 id]				386,NOLONG,AR0,SZ
+PUSH		sbyteword16			[i:	o16 6a ib,s]				186,AR0,SIZE,ND
+PUSH		imm16				[i:	o16 68 iw]				186,AR0,SIZE
+PUSH		sbytedword32			[i:	o32 6a ib,s]				386,NOLONG,AR0,SIZE,ND
+PUSH		imm32				[i:	o32 68 id]				386,NOLONG,AR0,SIZE
 PUSH		sbytedword32			[i:	o32 6a ib,s]				386,NOLONG,SD,ND
 PUSH		imm32				[i:	o32 68 id]				386,NOLONG,SD
-PUSH		sbytedword64			[i:	o64nw 6a ib,s]				X64,AR0,SZ,ND
-PUSH		imm64				[i:	o64nw 68 id,s]				X64,AR0,SZ
-PUSH		sbytedword32			[i:	o64nw 6a ib,s]				X64,AR0,SZ,ND
-PUSH		imm32				[i:	o64nw 68 id,s]				X64,AR0,SZ
+PUSH		sbytedword64			[i:	o64nw 6a ib,s]				X64,AR0,SIZE,ND
+PUSH		imm64				[i:	o64nw 68 id,s]				X64,AR0,SIZE
+PUSH		sbytedword32			[i:	o64nw 6a ib,s]				X64,AR0,SIZE,ND
+PUSH		imm32				[i:	o64nw 68 id,s]				X64,AR0,SIZE
 PUSHA		void				[	odf 60]					186,NOLONG
 PUSHAD		void				[	o32 60]					386,NOLONG
 PUSHAW		void				[	o16 60]					186,NOLONG
@@ -3457,7 +3457,437 @@ TZMSK		reg32,rm32			[vm:	xop.ndd.lz.m9.w0 01 /4]			FUTURE,TBM
 TZMSK		reg64,rm64			[vm:	xop.ndd.lz.m9.w1 01 /4]			LONG,FUTURE,TBM
 T1MSKC		reg32,rm32			[vm:	xop.ndd.lz.m9.w0 01 /7]			FUTURE,TBM
 T1MSKC		reg64,rm64			[vm:	xop.ndd.lz.m9.w1 01 /7]			LONG,FUTURE,TBM
-+
+
+;# Intel AVX512 instructions
+;
+; based on pub number 319433-015 dated July 2013
+;
+VADDPD           zmmreg|mask|z,zmmreg,zmmrm512|b64|er          [rvm:fv:         evex.nds.512.66.0f.w1 58 /r ]  AVX512,FUTURE
+VADDPS           zmmreg|mask|z,zmmreg,zmmrm512|b32|er          [rvm:fv:            evex.nds.512.0f.w0 58 /r ]  AVX512,FUTURE
+VADDSD           xmmreg|mask|z,xmmreg,xmmrm64|er               [rvm:t1s:        evex.nds.lig.f2.0f.w1 58 /r ]  AVX512,FUTURE
+VADDSS           xmmreg|mask|z,xmmreg,xmmrm32|er               [rvm:t1s:        evex.nds.lig.f3.0f.w0 58 /r ]  AVX512,FUTURE
+VALIGND          zmmreg|mask|z,zmmreg,zmmrm512|b32,imm8        [rvmi:fv:   evex.nds.512.66.0f3a.w0 03 /r ib ]  AVX512,FUTURE
+VALIGNQ          zmmreg|mask|z,zmmreg,zmmrm512|b64,imm8        [rvmi:fv:   evex.nds.512.66.0f3a.w1 03 /r ib ]  AVX512,FUTURE
+VBLENDMPD        zmmreg|mask|z,zmmreg,zmmrm512|b64             [rvm:fv:       evex.nds.512.66.0f38.w1 65 /r ]  AVX512,FUTURE
+VBLENDMPS        zmmreg|mask|z,zmmreg,zmmrm512|b32             [rvm:fv:       evex.nds.512.66.0f38.w0 65 /r ]  AVX512,FUTURE
+VBROADCASTF32X4  zmmreg|mask|z,mem128                          [rm:t4:            evex.512.66.0f38.w0 1a /r ]  AVX512,FUTURE
+VBROADCASTF64X4  zmmreg|mask|z,mem256                          [rm:t4:            evex.512.66.0f38.w1 1b /r ]  AVX512,FUTURE
+VBROADCASTI32X4  zmmreg|mask|z,mem128                          [rm:t4:            evex.512.66.0f38.w0 5a /r ]  AVX512,FUTURE
+VBROADCASTI64X4  zmmreg|mask|z,mem256                          [rm:t4:            evex.512.66.0f38.w1 5b /r ]  AVX512,FUTURE
+VBROADCASTSD     zmmreg|mask|z,mem64                           [rm:t1s:           evex.512.66.0f38.w1 19 /r ]  AVX512,FUTURE
+VBROADCASTSD     zmmreg|mask|z,xmmreg                          [rm:               evex.512.66.0f38.w1 19 /r ]  AVX512,FUTURE
+VBROADCASTSS     zmmreg|mask|z,mem32                           [rm:t1s:           evex.512.66.0f38.w0 18 /r ]  AVX512,FUTURE
+VBROADCASTSS     zmmreg|mask|z,xmmreg                          [rm:               evex.512.66.0f38.w0 18 /r ]  AVX512,FUTURE
+VCMPPD           opmaskreg|mask,zmmreg,zmmrm512|b64|sae,imm8   [rvmi:fv:     evex.nds.512.66.0f.w1 c2 /r ib ]  AVX512,FUTURE
+VCMPPS           opmaskreg|mask,zmmreg,zmmrm512|b32|sae,imm8   [rvmi:fv:        evex.nds.512.0f.w0 c2 /r ib ]  AVX512,FUTURE
+VCMPSD           opmaskreg|mask,xmmreg,xmmrm64|sae,imm8        [rvmi:t1s:    evex.nds.lig.f2.0f.w1 c2 /r ib ]  AVX512,FUTURE
+VCMPSS           opmaskreg|mask,xmmreg,xmmrm32|sae,imm8        [rvmi:t1s:    evex.nds.lig.f3.0f.w0 c2 /r ib ]  AVX512,FUTURE
+VCOMISD          xmmreg,xmmrm64|sae                            [rm:t1s:             evex.lig.66.0f.w1 2f /r ]  AVX512,FUTURE
+VCOMISS          xmmreg,xmmrm32|sae                            [rm:t1s:                evex.lig.0f.w0 2f /r ]  AVX512,FUTURE
+VCOMPRESSPD      mem512|mask,zmmreg                            [mr:t1s:           evex.512.66.0f38.w1 8a /r ]  AVX512,FUTURE
+VCOMPRESSPD      zmmreg|mask|z,zmmreg                          [mr:               evex.512.66.0f38.w1 8a /r ]  AVX512,FUTURE
+VCOMPRESSPS      mem512|mask,zmmreg                            [mr:t1s:           evex.512.66.0f38.w0 8a /r ]  AVX512,FUTURE
+VCOMPRESSPS      zmmreg|mask|z,zmmreg                          [mr:               evex.512.66.0f38.w0 8a /r ]  AVX512,FUTURE
+VCVTDQ2PD        zmmreg|mask|z,ymmrm256|b32|er                 [rm:hv:              evex.512.f3.0f.w0 e6 /r ]  AVX512,FUTURE
+VCVTDQ2PS        zmmreg|mask|z,zmmrm512|b32|er                 [rm:fv:                 evex.512.0f.w0 5b /r ]  AVX512,FUTURE
+VCVTPD2DQ        ymmreg|mask|z,zmmrm512|b64|er                 [rm:fv:              evex.512.f2.0f.w1 e6 /r ]  AVX512,FUTURE
+VCVTPD2PS        ymmreg|mask|z,zmmrm512|b64|er                 [rm:fv:              evex.512.66.0f.w1 5a /r ]  AVX512,FUTURE
+VCVTPD2UDQ       ymmreg|mask|z,zmmrm512|b64|er                 [rm:fv:                 evex.512.0f.w1 79 /r ]  AVX512,FUTURE
+VCVTPH2PS        zmmreg|mask|z,ymmrm256|sae                    [rm:hvm:           evex.512.66.0f38.w0 13 /r ]  AVX512,FUTURE
+VCVTPS2DQ        zmmreg|mask|z,zmmrm512|b32|er                 [rm:fv:              evex.512.66.0f.w0 5b /r ]  AVX512,FUTURE
+VCVTPS2PD        zmmreg|mask|z,ymmrm256|b32|sae                [rm:hv:                 evex.512.0f.w0 5a /r ]  AVX512,FUTURE
+VCVTPS2PH        mem256|mask,zmmreg|sae,imm8                   [mri:hvm:       evex.512.66.0f3a.w0 1d /r ib ]  AVX512,FUTURE
+VCVTPS2PH        ymmreg|mask|z,zmmreg|sae,imm8                 [mri:hvm:       evex.512.66.0f3a.w0 1d /r ib ]  AVX512,FUTURE
+VCVTPS2UDQ       zmmreg|mask|z,zmmrm512|b32|er                 [rm:fv:                 evex.512.0f.w0 79 /r ]  AVX512,FUTURE
+VCVTSD2SI        reg32,xmmrm64|er                              [rm:t1f64:           evex.lig.f2.0f.w0 2d /r ]  AVX512,FUTURE
+VCVTSD2SI        reg64,xmmrm64|er                              [rm:t1f64:           evex.lig.f2.0f.w1 2d /r ]  AVX512,FUTURE
+VCVTSD2SS        xmmreg|mask|z,xmmreg,xmmrm64|er               [rvm:t1s:        evex.nds.lig.f2.0f.w1 5a /r ]  AVX512,FUTURE
+VCVTSD2USI       reg32,xmmrm64|er                              [rm:t1f64:           evex.lig.f2.0f.w0 79 /r ]  AVX512,FUTURE
+VCVTSD2USI       reg64,xmmrm64|er                              [rm:t1f64:           evex.lig.f2.0f.w1 79 /r ]  AVX512,FUTURE
+VCVTSI2SD        xmmreg,xmmreg,rm32|er                         [rvm:t1s:        evex.nds.lig.f2.0f.w0 2a /r ]  AVX512,FUTURE
+VCVTSI2SD        xmmreg,xmmreg,rm64|er                         [rvm:t1s:        evex.nds.lig.f2.0f.w1 2a /r ]  AVX512,FUTURE
+VCVTSI2SS        xmmreg,xmmreg,rm32|er                         [rvm:t1s:        evex.nds.lig.f3.0f.w0 2a /r ]  AVX512,FUTURE
+VCVTSI2SS        xmmreg,xmmreg,rm64|er                         [rvm:t1s:        evex.nds.lig.f3.0f.w1 2a /r ]  AVX512,FUTURE
+VCVTSS2SD        xmmreg|mask|z,xmmreg,xmmrm32|sae              [rvm:t1s:        evex.nds.lig.f3.0f.w0 5a /r ]  AVX512,FUTURE
+VCVTSS2SI        reg32,xmmrm32|er                              [rm:t1f32:           evex.lig.f3.0f.w0 2d /r ]  AVX512,FUTURE
+VCVTSS2SI        reg64,xmmrm32|er                              [rm:t1f32:           evex.lig.f3.0f.w1 2d /r ]  AVX512,FUTURE
+VCVTSS2USI       reg32,xmmrm32|er                              [rm:t1f32:           evex.lig.f3.0f.w0 79 /r ]  AVX512,FUTURE
+VCVTSS2USI       reg64,xmmrm32|er                              [rm:t1f32:           evex.lig.f3.0f.w1 79 /r ]  AVX512,FUTURE
+VCVTTPD2DQ       ymmreg|mask|z,zmmrm512|b64|sae                [rm:fv:              evex.512.66.0f.w1 e6 /r ]  AVX512,FUTURE
+VCVTTPD2UDQ      ymmreg|mask|z,zmmrm512|b64|sae                [rm:fv:                 evex.512.0f.w1 78 /r ]  AVX512,FUTURE
+VCVTTPS2DQ       zmmreg|mask|z,zmmrm512|b32|sae                [rm:fv:              evex.512.f3.0f.w0 5b /r ]  AVX512,FUTURE
+VCVTTPS2UDQ      zmmreg|mask|z,zmmrm512|b32|sae                [rm:fv:                 evex.512.0f.w0 78 /r ]  AVX512,FUTURE
+VCVTTSD2SI       reg32,xmmrm64|sae                             [rm:t1f64:           evex.lig.f2.0f.w0 2c /r ]  AVX512,FUTURE
+VCVTTSD2SI       reg64,xmmrm64|sae                             [rm:t1f64:           evex.lig.f2.0f.w1 2c /r ]  AVX512,FUTURE
+VCVTTSD2USI      reg32,xmmrm64|sae                             [rm:t1f64:           evex.lig.f2.0f.w0 78 /r ]  AVX512,FUTURE
+VCVTTSD2USI      reg64,xmmrm64|sae                             [rm:t1f64:           evex.lig.f2.0f.w1 78 /r ]  AVX512,FUTURE
+VCVTTSS2SI       reg32,xmmrm32|sae                             [rm:t1f32:           evex.lig.f3.0f.w0 2c /r ]  AVX512,FUTURE
+VCVTTSS2SI       reg64,xmmrm32|sae                             [rm:t1f32:           evex.lig.f3.0f.w1 2c /r ]  AVX512,FUTURE
+VCVTTSS2USI      reg32,xmmrm32|sae                             [rm:t1f32:           evex.lig.f3.0f.w0 78 /r ]  AVX512,FUTURE
+VCVTTSS2USI      reg64,xmmrm32|sae                             [rm:t1f32:           evex.lig.f3.0f.w1 78 /r ]  AVX512,FUTURE
+VCVTUDQ2PD       zmmreg|mask|z,ymmrm256|b32|er                 [rm:hv:              evex.512.f3.0f.w0 7a /r ]  AVX512,FUTURE
+VCVTUDQ2PS       zmmreg|mask|z,zmmrm512|b32|er                 [rm:fv:              evex.512.f2.0f.w0 7a /r ]  AVX512,FUTURE
+VCVTUSI2SD       xmmreg,xmmreg,rm32|er                         [rvm:t1s:        evex.nds.lig.f2.0f.w0 7b /r ]  AVX512,FUTURE
+VCVTUSI2SD       xmmreg,xmmreg,rm64|er                         [rvm:t1s:        evex.nds.lig.f2.0f.w1 7b /r ]  AVX512,FUTURE
+VCVTUSI2SS       xmmreg,xmmreg,rm32|er                         [rvm:t1s:        evex.nds.lig.f3.0f.w0 7b /r ]  AVX512,FUTURE
+VCVTUSI2SS       xmmreg,xmmreg,rm64|er                         [rvm:t1s:        evex.nds.lig.f3.0f.w1 7b /r ]  AVX512,FUTURE
+VDIVPD           zmmreg|mask|z,zmmreg,zmmrm512|b64|er          [rvm:fv:         evex.nds.512.66.0f.w1 5e /r ]  AVX512,FUTURE
+VDIVPS           zmmreg|mask|z,zmmreg,zmmrm512|b32|er          [rvm:fv:            evex.nds.512.0f.w0 5e /r ]  AVX512,FUTURE
+VDIVSD           xmmreg|mask|z,xmmreg,xmmrm64|er               [rvm:t1s:        evex.nds.lig.f2.0f.w1 5e /r ]  AVX512,FUTURE
+VDIVSS           xmmreg|mask|z,xmmreg,xmmrm32|er               [rvm:t1s:        evex.nds.lig.f3.0f.w0 5e /r ]  AVX512,FUTURE
+VEXPANDPD        zmmreg|mask|z,mem512                          [rm:t1s:           evex.512.66.0f38.w1 88 /r ]  AVX512,FUTURE
+VEXPANDPD        zmmreg|mask|z,zmmreg                          [rm:t1s:           evex.512.66.0f38.w1 88 /r ]  AVX512,FUTURE
+VEXPANDPS        zmmreg|mask|z,mem512                          [rm:t1s:           evex.512.66.0f38.w0 88 /r ]  AVX512,FUTURE
+VEXPANDPS        zmmreg|mask|z,zmmreg                          [rm:t1s:           evex.512.66.0f38.w0 88 /r ]  AVX512,FUTURE
+VEXTRACTF32X4    mem128|mask,zmmreg,imm8                       [mri:t4:        evex.512.66.0f3a.w0 19 /r ib ]  AVX512,FUTURE
+VEXTRACTF32X4    xmmreg|mask|z,zmmreg,imm8                     [mri:t4:        evex.512.66.0f3a.w0 19 /r ib ]  AVX512,FUTURE
+VEXTRACTF64X4    mem256|mask,zmmreg,imm8                       [mri:t4:        evex.512.66.0f3a.w1 1b /r ib ]  AVX512,FUTURE
+VEXTRACTF64X4    ymmreg|mask|z,zmmreg,imm8                     [mri:           evex.512.66.0f3a.w1 1b /r ib ]  AVX512,FUTURE
+VEXTRACTI32X4    mem128|mask,zmmreg,imm8                       [mri:t4:        evex.512.66.0f3a.w0 39 /r ib ]  AVX512,FUTURE
+VEXTRACTI32X4    xmmreg|mask|z,zmmreg,imm8                     [mri:           evex.512.66.0f3a.w0 39 /r ib ]  AVX512,FUTURE
+VEXTRACTI64X4    mem256|mask,zmmreg,imm8                       [mri:t4:        evex.512.66.0f3a.w1 3b /r ib ]  AVX512,FUTURE
+VEXTRACTI64X4    ymmreg|mask|z,zmmreg,imm8                     [mri:           evex.512.66.0f3a.w1 3b /r ib ]  AVX512,FUTURE
+VEXTRACTPS       rm32,xmmreg,imm8                              [mri:t1s:      evex.128.66.0f3a.wig 17 /r ib ]  AVX512,FUTURE
+VFIXUPIMMPD      zmmreg|mask|z,zmmreg,zmmrm512|b64|sae,imm8    [rvmi:fv:   evex.nds.512.66.0f3a.w1 54 /r ib ]  AVX512,FUTURE
+VFIXUPIMMPS      zmmreg|mask|z,zmmreg,zmmrm512|b32|sae,imm8    [rvmi:fv:   evex.nds.512.66.0f3a.w0 54 /r ib ]  AVX512,FUTURE
+VFIXUPIMMSD      xmmreg|mask|z,xmmreg,xmmrm64|sae,imm8         [rvmi:t1s:  evex.nds.lig.66.0f3a.w1 55 /r ib ]  AVX512,FUTURE
+VFIXUPIMMSS      xmmreg|mask|z,xmmreg,xmmrm32|sae,imm8         [rvmi:t1s:  evex.nds.lig.66.0f3a.w0 55 /r ib ]  AVX512,FUTURE
+VFMADD132PD      zmmreg|mask|z,zmmreg,zmmrm512|b64|er          [rvm:fv:       evex.nds.512.66.0f38.w1 98 /r ]  AVX512,FUTURE
+VFMADD132PS      zmmreg|mask|z,zmmreg,zmmrm512|b32|er          [rvm:fv:       evex.nds.512.66.0f38.w0 98 /r ]  AVX512,FUTURE
+VFMADD132SD      xmmreg|mask|z,xmmreg,xmmrm64|er               [rvm:t1s:      evex.nds.lig.66.0f38.w1 99 /r ]  AVX512,FUTURE
+VFMADD132SS      xmmreg|mask|z,xmmreg,xmmrm32|er               [rvm:t1s:      evex.nds.lig.66.0f38.w0 99 /r ]  AVX512,FUTURE
+VFMADD213PD      zmmreg|mask|z,zmmreg,zmmrm512|b64|er          [rvm:fv:       evex.nds.512.66.0f38.w1 a8 /r ]  AVX512,FUTURE
+VFMADD213PS      zmmreg|mask|z,zmmreg,zmmrm512|b32|er          [rvm:fv:       evex.nds.512.66.0f38.w0 a8 /r ]  AVX512,FUTURE
+VFMADD213SD      xmmreg|mask|z,xmmreg,xmmrm64|er               [rvm:t1s:      evex.nds.lig.66.0f38.w1 a9 /r ]  AVX512,FUTURE
+VFMADD213SS      xmmreg|mask|z,xmmreg,xmmrm32|er               [rvm:t1s:      evex.nds.lig.66.0f38.w0 a9 /r ]  AVX512,FUTURE
+VFMADD231PD      zmmreg|mask|z,zmmreg,zmmrm512|b64|er          [rvm:fv:       evex.nds.512.66.0f38.w1 b8 /r ]  AVX512,FUTURE
+VFMADD231PS      zmmreg|mask|z,zmmreg,zmmrm512|b32|er          [rvm:fv:       evex.nds.512.66.0f38.w0 b8 /r ]  AVX512,FUTURE
+VFMADD231SD      xmmreg|mask|z,xmmreg,xmmrm64|er               [rvm:t1s:      evex.nds.lig.66.0f38.w1 b9 /r ]  AVX512,FUTURE
+VFMADD231SS      xmmreg|mask|z,xmmreg,xmmrm32|er               [rvm:t1s:      evex.nds.lig.66.0f38.w0 b9 /r ]  AVX512,FUTURE
+VFMADDSUB132PD   zmmreg|mask|z,zmmreg,zmmrm512|b64|er          [rvm:fv:       evex.nds.512.66.0f38.w1 96 /r ]  AVX512,FUTURE
+VFMADDSUB132PS   zmmreg|mask|z,zmmreg,zmmrm512|b32|er          [rvm:fv:       evex.nds.512.66.0f38.w0 96 /r ]  AVX512,FUTURE
+VFMADDSUB213PD   zmmreg|mask|z,zmmreg,zmmrm512|b64|er          [rvm:fv:       evex.nds.512.66.0f38.w1 a6 /r ]  AVX512,FUTURE
+VFMADDSUB213PS   zmmreg|mask|z,zmmreg,zmmrm512|b32|er          [rvm:fv:       evex.nds.512.66.0f38.w0 a6 /r ]  AVX512,FUTURE
+VFMADDSUB231PD   zmmreg|mask|z,zmmreg,zmmrm512|b64|er          [rvm:fv:       evex.nds.512.66.0f38.w1 b6 /r ]  AVX512,FUTURE
+VFMADDSUB231PS   zmmreg|mask|z,zmmreg,zmmrm512|b32|er          [rvm:fv:       evex.nds.512.66.0f38.w0 b6 /r ]  AVX512,FUTURE
+VFMSUB132PD      zmmreg|mask|z,zmmreg,zmmrm512|b64|er          [rvm:fv:       evex.nds.512.66.0f38.w1 9a /r ]  AVX512,FUTURE
+VFMSUB132PS      zmmreg|mask|z,zmmreg,zmmrm512|b32|er          [rvm:fv:       evex.nds.512.66.0f38.w0 9a /r ]  AVX512,FUTURE
+VFMSUB132SD      xmmreg|mask|z,xmmreg,xmmrm64|er               [rvm:t1s:      evex.nds.lig.66.0f38.w1 9b /r ]  AVX512,FUTURE
+VFMSUB132SS      xmmreg|mask|z,xmmreg,xmmrm32|er               [rvm:t1s:      evex.nds.lig.66.0f38.w0 9b /r ]  AVX512,FUTURE
+VFMSUB213PD      zmmreg|mask|z,zmmreg,zmmrm512|b64|er          [rvm:fv:       evex.nds.512.66.0f38.w1 aa /r ]  AVX512,FUTURE
+VFMSUB213PS      zmmreg|mask|z,zmmreg,zmmrm512|b32|er          [rvm:fv:       evex.nds.512.66.0f38.w0 aa /r ]  AVX512,FUTURE
+VFMSUB213SD      xmmreg|mask|z,xmmreg,xmmrm64|er               [rvm:t1s:      evex.nds.lig.66.0f38.w1 ab /r ]  AVX512,FUTURE
+VFMSUB213SS      xmmreg|mask|z,xmmreg,xmmrm32|er               [rvm:t1s:      evex.nds.lig.66.0f38.w0 ab /r ]  AVX512,FUTURE
+VFMSUB231PD      zmmreg|mask|z,zmmreg,zmmrm512|b64|er          [rvm:fv:       evex.nds.512.66.0f38.w1 ba /r ]  AVX512,FUTURE
+VFMSUB231PS      zmmreg|mask|z,zmmreg,zmmrm512|b32|er          [rvm:fv:       evex.nds.512.66.0f38.w0 ba /r ]  AVX512,FUTURE
+VFMSUB231SD      xmmreg|mask|z,xmmreg,xmmrm64|er               [rvm:t1s:      evex.nds.lig.66.0f38.w1 bb /r ]  AVX512,FUTURE
+VFMSUB231SS      xmmreg|mask|z,xmmreg,xmmrm32|er               [rvm:t1s:      evex.nds.lig.66.0f38.w0 bb /r ]  AVX512,FUTURE
+VFMSUBADD132PD   zmmreg|mask|z,zmmreg,zmmrm512|b64|er          [rvm:fv:       evex.nds.512.66.0f38.w1 97 /r ]  AVX512,FUTURE
+VFMSUBADD132PS   zmmreg|mask|z,zmmreg,zmmrm512|b32|er          [rvm:fv:       evex.nds.512.66.0f38.w0 97 /r ]  AVX512,FUTURE
+VFMSUBADD213PD   zmmreg|mask|z,zmmreg,zmmrm512|b64|er          [rvm:fv:       evex.nds.512.66.0f38.w1 a7 /r ]  AVX512,FUTURE
+VFMSUBADD213PS   zmmreg|mask|z,zmmreg,zmmrm512|b32|er          [rvm:fv:       evex.nds.512.66.0f38.w0 a7 /r ]  AVX512,FUTURE
+VFMSUBADD231PD   zmmreg|mask|z,zmmreg,zmmrm512|b64|er          [rvm:fv:       evex.nds.512.66.0f38.w1 b7 /r ]  AVX512,FUTURE
+VFMSUBADD231PS   zmmreg|mask|z,zmmreg,zmmrm512|b32|er          [rvm:fv:       evex.nds.512.66.0f38.w0 b7 /r ]  AVX512,FUTURE
+VFNMADD132PD     zmmreg|mask|z,zmmreg,zmmrm512|b64|er          [rvm:fv:       evex.nds.512.66.0f38.w1 9c /r ]  AVX512,FUTURE
+VFNMADD132PS     zmmreg|mask|z,zmmreg,zmmrm512|b32|er          [rvm:fv:       evex.nds.512.66.0f38.w0 9c /r ]  AVX512,FUTURE
+VFNMADD132SD     xmmreg|mask|z,xmmreg,xmmrm64|er               [rvm:t1s:      evex.nds.lig.66.0f38.w1 9d /r ]  AVX512,FUTURE
+VFNMADD132SS     xmmreg|mask|z,xmmreg,xmmrm32|er               [rvm:t1s:      evex.nds.lig.66.0f38.w0 9d /r ]  AVX512,FUTURE
+VFNMADD213PD     zmmreg|mask|z,zmmreg,zmmrm512|b64|er          [rvm:fv:       evex.nds.512.66.0f38.w1 ac /r ]  AVX512,FUTURE
+VFNMADD213PS     zmmreg|mask|z,zmmreg,zmmrm512|b32|er          [rvm:fv:       evex.nds.512.66.0f38.w0 ac /r ]  AVX512,FUTURE
+VFNMADD213SD     xmmreg|mask|z,xmmreg,xmmrm64|er               [rvm:t1s:      evex.nds.lig.66.0f38.w1 ad /r ]  AVX512,FUTURE
+VFNMADD213SS     xmmreg|mask|z,xmmreg,xmmrm32|er               [rvm:t1s:      evex.nds.lig.66.0f38.w0 ad /r ]  AVX512,FUTURE
+VFNMADD231PD     zmmreg|mask|z,zmmreg,zmmrm512|b64|er          [rvm:fv:       evex.nds.512.66.0f38.w1 bc /r ]  AVX512,FUTURE
+VFNMADD231PS     zmmreg|mask|z,zmmreg,zmmrm512|b32|er          [rvm:fv:       evex.nds.512.66.0f38.w0 bc /r ]  AVX512,FUTURE
+VFNMADD231SD     xmmreg|mask|z,xmmreg,xmmrm64|er               [rvm:t1s:      evex.nds.lig.66.0f38.w1 bd /r ]  AVX512,FUTURE
+VFNMADD231SS     xmmreg|mask|z,xmmreg,xmmrm32|er               [rvm:t1s:      evex.nds.lig.66.0f38.w0 bd /r ]  AVX512,FUTURE
+VFNMSUB132PD     zmmreg|mask|z,zmmreg,zmmrm512|b64|er          [rvm:fv:       evex.nds.512.66.0f38.w1 9e /r ]  AVX512,FUTURE
+VFNMSUB132PS     zmmreg|mask|z,zmmreg,zmmrm512|b32|er          [rvm:fv:       evex.nds.512.66.0f38.w0 9e /r ]  AVX512,FUTURE
+VFNMSUB132SD     xmmreg|mask|z,xmmreg,xmmrm64|er               [rvm:t1s:      evex.nds.lig.66.0f38.w1 9f /r ]  AVX512,FUTURE
+VFNMSUB132SS     xmmreg|mask|z,xmmreg,xmmrm32|er               [rvm:t1s:      evex.nds.lig.66.0f38.w0 9f /r ]  AVX512,FUTURE
+VFNMSUB213PD     zmmreg|mask|z,zmmreg,zmmrm512|b64|er          [rvm:fv:       evex.nds.512.66.0f38.w1 ae /r ]  AVX512,FUTURE
+VFNMSUB213PS     zmmreg|mask|z,zmmreg,zmmrm512|b32|er          [rvm:fv:       evex.nds.512.66.0f38.w0 ae /r ]  AVX512,FUTURE
+VFNMSUB213SD     xmmreg|mask|z,xmmreg,xmmrm64|er               [rvm:t1s:      evex.nds.lig.66.0f38.w1 af /r ]  AVX512,FUTURE
+VFNMSUB213SS     xmmreg|mask|z,xmmreg,xmmrm32|er               [rvm:t1s:      evex.nds.lig.66.0f38.w0 af /r ]  AVX512,FUTURE
+VFNMSUB231PD     zmmreg|mask|z,zmmreg,zmmrm512|b64|er          [rvm:fv:       evex.nds.512.66.0f38.w1 be /r ]  AVX512,FUTURE
+VFNMSUB231PS     zmmreg|mask|z,zmmreg,zmmrm512|b32|er          [rvm:fv:       evex.nds.512.66.0f38.w0 be /r ]  AVX512,FUTURE
+VFNMSUB231SD     xmmreg|mask|z,xmmreg,xmmrm64|er               [rvm:t1s:      evex.nds.lig.66.0f38.w1 bf /r ]  AVX512,FUTURE
+VFNMSUB231SS     xmmreg|mask|z,xmmreg,xmmrm32|er               [rvm:t1s:      evex.nds.lig.66.0f38.w0 bf /r ]  AVX512,FUTURE
+VGATHERDPD       zmmreg|mask,ymem64                            [rm:t1s:     vsiby evex.512.66.0f38.w1 92 /r ]  AVX512,FUTURE
+VGATHERDPS       zmmreg|mask,zmem32                            [rm:t1s:     vsibz evex.512.66.0f38.w0 92 /r ]  AVX512,FUTURE
+VGATHERQPD       zmmreg|mask,zmem64                            [rm:t1s:     vsibz evex.512.66.0f38.w1 93 /r ]  AVX512,FUTURE
+VGATHERQPS       ymmreg|mask,zmem32                            [rm:t1s:     vsibz evex.512.66.0f38.w0 93 /r ]  AVX512,FUTURE
+VGETEXPPD        zmmreg|mask|z,zmmrm512|b64|sae                [rm:fv:            evex.512.66.0f38.w1 42 /r ]  AVX512,FUTURE
+VGETEXPPS        zmmreg|mask|z,zmmrm512|b32|sae                [rm:fv:            evex.512.66.0f38.w0 42 /r ]  AVX512,FUTURE
+VGETEXPSD        xmmreg|mask|z,xmmreg,xmmrm64|sae              [rvm:t1s:      evex.nds.lig.66.0f38.w1 43 /r ]  AVX512,FUTURE
+VGETEXPSS        xmmreg|mask|z,xmmreg,xmmrm32|sae              [rvm:t1s:      evex.nds.lig.66.0f38.w0 43 /r ]  AVX512,FUTURE
+VGETMANTPD       zmmreg|mask|z,zmmrm512|b64|sae,imm8           [rmi:fv:        evex.512.66.0f3a.w1 26 /r ib ]  AVX512,FUTURE
+VGETMANTPS       zmmreg|mask|z,zmmrm512|b32|sae,imm8           [rmi:fv:        evex.512.66.0f3a.w0 26 /r ib ]  AVX512,FUTURE
+VGETMANTSD       xmmreg|mask|z,xmmreg,xmmrm64|sae,imm8         [rvmi:t1s:  evex.nds.lig.66.0f3a.w1 27 /r ib ]  AVX512,FUTURE
+VGETMANTSS       xmmreg|mask|z,xmmreg,xmmrm32|sae,imm8         [rvmi:t1s:  evex.nds.lig.66.0f3a.w0 27 /r ib ]  AVX512,FUTURE
+VINSERTF32X4     zmmreg|mask|z,zmmreg,xmmrm128,imm8            [rvmi:t4:   evex.nds.512.66.0f3a.w0 18 /r ib ]  AVX512,FUTURE
+VINSERTF64X4     zmmreg|mask|z,zmmreg,ymmrm256,imm8            [rvmi:t4:   evex.nds.512.66.0f3a.w1 1a /r ib ]  AVX512,FUTURE
+VINSERTI32X4     zmmreg|mask|z,zmmreg,xmmrm128,imm8            [rvmi:t4:   evex.nds.512.66.0f3a.w0 38 /r ib ]  AVX512,FUTURE
+VINSERTI64X4     zmmreg|mask|z,zmmreg,ymmrm256,imm8            [rvmi:t4:   evex.nds.512.66.0f3a.w1 3a /r ib ]  AVX512,FUTURE
+VINSERTPS        xmmreg,xmmreg,xmmrm32,imm8                    [rvmi:t1s:  evex.nds.128.66.0f3a.w0 21 /r ib ]  AVX512,FUTURE
+VMAXPD           zmmreg|mask|z,zmmreg,zmmrm512|b64|sae         [rvm:fv:         evex.nds.512.66.0f.w1 5f /r ]  AVX512,FUTURE
+VMAXPS           zmmreg|mask|z,zmmreg,zmmrm512|b32|sae         [rvm:fv:            evex.nds.512.0f.w0 5f /r ]  AVX512,FUTURE
+VMAXSD           xmmreg|mask|z,xmmreg,xmmrm64|sae              [rvm:t1s:        evex.nds.lig.f2.0f.w1 5f /r ]  AVX512,FUTURE
+VMAXSS           xmmreg|mask|z,xmmreg,xmmrm32|sae              [rvm:t1s:        evex.nds.lig.f3.0f.w0 5f /r ]  AVX512,FUTURE
+VMINPD           zmmreg|mask|z,zmmreg,zmmrm512|b64|sae         [rvm:fv:         evex.nds.512.66.0f.w1 5d /r ]  AVX512,FUTURE
+VMINPS           zmmreg|mask|z,zmmreg,zmmrm512|b32|sae         [rvm:fv:            evex.nds.512.0f.w0 5d /r ]  AVX512,FUTURE
+VMINSD           xmmreg|mask|z,xmmreg,xmmrm64|sae              [rvm:t1s:        evex.nds.lig.f2.0f.w1 5d /r ]  AVX512,FUTURE
+VMINSS           xmmreg|mask|z,xmmreg,xmmrm32|sae              [rvm:t1s:        evex.nds.lig.f3.0f.w0 5d /r ]  AVX512,FUTURE
+VMOVAPD          mem512|mask,zmmreg                            [mr:fvm:             evex.512.66.0f.w1 29 /r ]  AVX512,FUTURE
+VMOVAPD          zmmreg|mask|z,zmmreg                          [mr:                 evex.512.66.0f.w1 29 /r ]  AVX512,FUTURE
+VMOVAPD          zmmreg|mask|z,zmmrm512                        [rm:fvm:             evex.512.66.0f.w1 28 /r ]  AVX512,FUTURE
+VMOVAPS          mem512|mask,zmmreg                            [mr:fvm:                evex.512.0f.w0 29 /r ]  AVX512,FUTURE
+VMOVAPS          zmmreg|mask|z,zmmreg                          [mr:                    evex.512.0f.w0 29 /r ]  AVX512,FUTURE
+VMOVAPS          zmmreg|mask|z,zmmrm512                        [rm:fvm:                evex.512.0f.w0 28 /r ]  AVX512,FUTURE
+VMOVD            rm32,xmmreg                                   [mr:t1s:             evex.128.66.0f.w0 7e /r ]  AVX512,FUTURE
+VMOVD            xmmreg,rm32                                   [rm:t1s:             evex.128.66.0f.w0 6e /r ]  AVX512,FUTURE
+VMOVDDUP         zmmreg|mask|z,zmmrm512                        [rm:dup:             evex.512.f2.0f.w1 12 /r ]  AVX512,FUTURE
+VMOVDQA32        mem512|mask,zmmreg                            [mr:fvm:             evex.512.66.0f.w0 7f /r ]  AVX512,FUTURE
+VMOVDQA32        zmmreg|mask|z,zmmreg                          [mr:                 evex.512.66.0f.w0 7f /r ]  AVX512,FUTURE
+VMOVDQA32        zmmreg|mask|z,zmmrm512                        [rm:fvm:             evex.512.66.0f.w0 6f /r ]  AVX512,FUTURE
+VMOVDQA64        mem512|mask,zmmreg                            [mr:fvm:             evex.512.66.0f.w1 7f /r ]  AVX512,FUTURE
+VMOVDQA64        zmmreg|mask|z,zmmreg                          [mr:                 evex.512.66.0f.w1 7f /r ]  AVX512,FUTURE
+VMOVDQA64        zmmreg|mask|z,zmmrm512                        [rm:fvm:             evex.512.66.0f.w1 6f /r ]  AVX512,FUTURE
+VMOVDQU32        mem512|mask,zmmreg                            [mr:fvm:             evex.512.f3.0f.w0 7f /r ]  AVX512,FUTURE
+VMOVDQU32        zmmreg|mask|z,zmmreg                          [mr:                 evex.512.f3.0f.w0 7f /r ]  AVX512,FUTURE
+VMOVDQU32        zmmreg|mask|z,zmmrm512                        [rm:fvm:             evex.512.f3.0f.w0 6f /r ]  AVX512,FUTURE
+VMOVDQU64        mem512|mask,zmmreg                            [mr:fvm:             evex.512.f3.0f.w1 7f /r ]  AVX512,FUTURE
+VMOVDQU64        zmmreg|mask|z,zmmreg                          [mr:                 evex.512.f3.0f.w1 7f /r ]  AVX512,FUTURE
+VMOVDQU64        zmmreg|mask|z,zmmrm512                        [rm:fvm:             evex.512.f3.0f.w1 6f /r ]  AVX512,FUTURE
+VMOVHLPS         xmmreg,xmmreg,xmmreg                          [rvm:               evex.nds.128.0f.w0 12 /r ]  AVX512,FUTURE
+VMOVHPD          mem64,xmmreg                                  [mr:t1s:             evex.128.66.0f.w1 17 /r ]  AVX512,FUTURE
+VMOVHPD          xmmreg,xmmreg,mem64                           [rvm:t1s:        evex.nds.128.66.0f.w1 16 /r ]  AVX512,FUTURE
+VMOVHPS          mem64,xmmreg                                  [mr:t2:                 evex.128.0f.w0 17 /r ]  AVX512,FUTURE
+VMOVHPS          xmmreg,xmmreg,mem64                           [rvm:t2:            evex.nds.128.0f.w0 16 /r ]  AVX512,FUTURE
+VMOVLHPS         xmmreg,xmmreg,xmmreg                          [rvm:               evex.nds.128.0f.w0 16 /r ]  AVX512,FUTURE
+VMOVLPD          mem64,xmmreg                                  [mr:t1s:             evex.128.66.0f.w1 13 /r ]  AVX512,FUTURE
+VMOVLPD          xmmreg,xmmreg,mem64                           [rvm:t1s:        evex.nds.128.66.0f.w1 12 /r ]  AVX512,FUTURE
+VMOVLPS          mem64,xmmreg                                  [mr:t2:                 evex.128.0f.w0 13 /r ]  AVX512,FUTURE
+VMOVLPS          xmmreg,xmmreg,mem64                           [rvm:t2:            evex.nds.128.0f.w0 12 /r ]  AVX512,FUTURE
+VMOVNTDQ         mem512,zmmreg                                 [mr:fvm:             evex.512.66.0f.w0 e7 /r ]  AVX512,FUTURE
+VMOVNTDQA        zmmreg,mem512                                 [rm:fvm:           evex.512.66.0f38.w0 2a /r ]  AVX512,FUTURE
+VMOVNTPD         mem512,zmmreg                                 [mr:fvm:             evex.512.66.0f.w1 2b /r ]  AVX512,FUTURE
+VMOVNTPS         mem512,zmmreg                                 [mr:fvm:                evex.512.0f.w0 2b /r ]  AVX512,FUTURE
+VMOVQ            rm64,xmmreg                                   [mr:t1s:             evex.128.66.0f.w1 7e /r ]  AVX512,FUTURE
+VMOVQ            xmmreg,rm64                                   [rm:t1s:             evex.128.66.0f.w1 6e /r ]  AVX512,FUTURE
+VMOVQ            xmmreg,xmmrm64                                [rm:t1s:             evex.128.f3.0f.w1 7e /r ]  AVX512,FUTURE
+VMOVQ            xmmrm64,xmmreg                                [mr:t1s:             evex.128.66.0f.w1 d6 /r ]  AVX512,FUTURE
+VMOVSD           mem64|mask,xmmreg                             [mr:t1s:             evex.lig.f2.0f.w1 11 /r ]  AVX512,FUTURE
+VMOVSD           xmmreg|mask|z,mem64                           [rm:t1s:             evex.lig.f2.0f.w1 10 /r ]  AVX512,FUTURE
+VMOVSD           xmmreg|mask|z,xmmreg,xmmreg                   [mvr:            evex.nds.lig.f2.0f.w1 11 /r ]  AVX512,FUTURE
+VMOVSD           xmmreg|mask|z,xmmreg,xmmreg                   [rvm:            evex.nds.lig.f2.0f.w1 10 /r ]  AVX512,FUTURE
+VMOVSHDUP        zmmreg|mask|z,zmmrm512                        [rm:fvm:             evex.512.f3.0f.w0 16 /r ]  AVX512,FUTURE
+VMOVSLDUP        zmmreg|mask|z,zmmrm512                        [rm:fvm:             evex.512.f3.0f.w0 12 /r ]  AVX512,FUTURE
+VMOVSS           mem32|mask,xmmreg                             [mr:t1s:             evex.lig.f3.0f.w0 11 /r ]  AVX512,FUTURE
+VMOVSS           xmmreg|mask|z,mem32                           [rm:t1s:             evex.lig.f3.0f.w0 10 /r ]  AVX512,FUTURE
+VMOVSS           xmmreg|mask|z,xmmreg,xmmreg                   [mvr:            evex.nds.lig.f3.0f.w0 11 /r ]  AVX512,FUTURE
+VMOVSS           xmmreg|mask|z,xmmreg,xmmreg                   [rvm:            evex.nds.lig.f3.0f.w0 10 /r ]  AVX512,FUTURE
+VMOVUPD          mem512|mask,zmmreg                            [mr:fvm:             evex.512.66.0f.w1 11 /r ]  AVX512,FUTURE
+VMOVUPD          zmmreg|mask|z,zmmreg                          [mr:                 evex.512.66.0f.w1 11 /r ]  AVX512,FUTURE
+VMOVUPD          zmmreg|mask|z,zmmrm512                        [rm:fvm:             evex.512.66.0f.w1 10 /r ]  AVX512,FUTURE
+VMOVUPS          mem512|mask,zmmreg                            [mr:fvm:                evex.512.0f.w0 11 /r ]  AVX512,FUTURE
+VMOVUPS          zmmreg|mask|z,zmmreg                          [mr:                    evex.512.0f.w0 11 /r ]  AVX512,FUTURE
+VMOVUPS          zmmreg|mask|z,zmmrm512                        [rm:fvm:                evex.512.0f.w0 10 /r ]  AVX512,FUTURE
+VMULPD           zmmreg|mask|z,zmmreg,zmmrm512|b64|er          [rvm:fv:         evex.nds.512.66.0f.w1 59 /r ]  AVX512,FUTURE
+VMULPS           zmmreg|mask|z,zmmreg,zmmrm512|b32|er          [rvm:fv:            evex.nds.512.0f.w0 59 /r ]  AVX512,FUTURE
+VMULSD           xmmreg|mask|z,xmmreg,xmmrm64|er               [rvm:t1s:        evex.nds.lig.f2.0f.w1 59 /r ]  AVX512,FUTURE
+VMULSS           xmmreg|mask|z,xmmreg,xmmrm32|er               [rvm:t1s:        evex.nds.lig.f3.0f.w0 59 /r ]  AVX512,FUTURE
+VPABSD           zmmreg|mask|z,zmmrm512|b32                    [rm:fv:            evex.512.66.0f38.w0 1e /r ]  AVX512,FUTURE
+VPABSQ           zmmreg|mask|z,zmmrm512|b64                    [rm:fv:            evex.512.66.0f38.w1 1f /r ]  AVX512,FUTURE
+VPADDD           zmmreg|mask|z,zmmreg,zmmrm512|b32             [rvm:fv:         evex.nds.512.66.0f.w0 fe /r ]  AVX512,FUTURE
+VPADDQ           zmmreg|mask|z,zmmreg,zmmrm512|b64             [rvm:fv:         evex.nds.512.66.0f.w1 d4 /r ]  AVX512,FUTURE
+VPANDD           zmmreg|mask|z,zmmreg,zmmrm512|b32             [rvm:fv:         evex.nds.512.66.0f.w0 db /r ]  AVX512,FUTURE
+VPANDND          zmmreg|mask|z,zmmreg,zmmrm512|b32             [rvm:fv:         evex.nds.512.66.0f.w0 df /r ]  AVX512,FUTURE
+VPANDNQ          zmmreg|mask|z,zmmreg,zmmrm512|b64             [rvm:fv:         evex.nds.512.66.0f.w1 df /r ]  AVX512,FUTURE
+VPANDQ           zmmreg|mask|z,zmmreg,zmmrm512|b64             [rvm:fv:         evex.nds.512.66.0f.w1 db /r ]  AVX512,FUTURE
+VPBLENDMD        zmmreg|mask|z,zmmreg,zmmrm512|b32             [rvm:fv:       evex.nds.512.66.0f38.w0 64 /r ]  AVX512,FUTURE
+VPBLENDMQ        zmmreg|mask|z,zmmreg,zmmrm512|b64             [rvm:fv:       evex.nds.512.66.0f38.w1 64 /r ]  AVX512,FUTURE
+VPBROADCASTD     zmmreg|mask|z,mem32                           [rm:t1s:           evex.512.66.0f38.w0 58 /r ]  AVX512,FUTURE
+VPBROADCASTD     zmmreg|mask|z,reg32                           [rm:               evex.512.66.0f38.w0 7c /r ]  AVX512,FUTURE
+VPBROADCASTD     zmmreg|mask|z,xmmreg                          [rm:               evex.512.66.0f38.w0 58 /r ]  AVX512,FUTURE
+VPBROADCASTQ     zmmreg|mask|z,mem64                           [rm:t1s:           evex.512.66.0f38.w1 59 /r ]  AVX512,FUTURE
+VPBROADCASTQ     zmmreg|mask|z,reg64                           [rm:               evex.512.66.0f38.w1 7c /r ]  AVX512,FUTURE
+VPBROADCASTQ     zmmreg|mask|z,xmmreg                          [rm:               evex.512.66.0f38.w1 59 /r ]  AVX512,FUTURE
+VPCMPD           opmaskreg|mask,zmmreg,zmmrm512|b32,imm8       [rvmi:fv:   evex.nds.512.66.0f3a.w0 1f /r ib ]  AVX512,FUTURE
+VPCMPEQD         opmaskreg|mask,zmmreg,zmmrm512|b32            [rvm:fv:         evex.nds.512.66.0f.w0 76 /r ]  AVX512,FUTURE
+VPCMPEQQ         opmaskreg|mask,zmmreg,zmmrm512|b64            [rvm:fv:       evex.nds.512.66.0f38.w1 29 /r ]  AVX512,FUTURE
+VPCMPGTD         opmaskreg|mask,zmmreg,zmmrm512|b32            [rvm:fv:         evex.nds.512.66.0f.w0 66 /r ]  AVX512,FUTURE
+VPCMPGTQ         opmaskreg|mask,zmmreg,zmmrm512|b64            [rvm:fv:       evex.nds.512.66.0f38.w1 37 /r ]  AVX512,FUTURE
+VPCMPQ           opmaskreg|mask,zmmreg,zmmrm512|b64,imm8       [rvmi:fv:   evex.nds.512.66.0f3a.w1 1f /r ib ]  AVX512,FUTURE
+VPCMPUD          opmaskreg|mask,zmmreg,zmmrm512|b32,imm8       [rvmi:fv:   evex.nds.512.66.0f3a.w0 1e /r ib ]  AVX512,FUTURE
+VPCMPUQ          opmaskreg|mask,zmmreg,zmmrm512|b64,imm8       [rvmi:fv:   evex.nds.512.66.0f3a.w1 1e /r ib ]  AVX512,FUTURE
+VPCOMPRESSD      mem512|mask,zmmreg                            [mr:t1s:           evex.512.66.0f38.w0 8b /r ]  AVX512,FUTURE
+VPCOMPRESSD      zmmreg|mask|z,zmmreg                          [mr:               evex.512.66.0f38.w0 8b /r ]  AVX512,FUTURE
+VPCOMPRESSQ      mem512|mask,zmmreg                            [mr:t1s:           evex.512.66.0f38.w1 8b /r ]  AVX512,FUTURE
+VPCOMPRESSQ      zmmreg|mask|z,zmmreg                          [mr:               evex.512.66.0f38.w1 8b /r ]  AVX512,FUTURE
+VPERMD           zmmreg|mask|z,zmmreg,zmmrm512|b32             [rvm:fv:       evex.nds.512.66.0f38.w0 36 /r ]  AVX512,FUTURE
+VPERMI2D         zmmreg|mask|z,zmmreg,zmmrm512|b32             [rvm:fv:       evex.nds.512.66.0f38.w0 76 /r ]  AVX512,FUTURE
+VPERMI2PD        zmmreg|mask|z,zmmreg,zmmrm512|b64             [rvm:fv:       evex.nds.512.66.0f38.w1 77 /r ]  AVX512,FUTURE
+VPERMI2PS        zmmreg|mask|z,zmmreg,zmmrm512|b32             [rvm:fv:       evex.nds.512.66.0f38.w0 77 /r ]  AVX512,FUTURE
+VPERMI2Q         zmmreg|mask|z,zmmreg,zmmrm512|b64             [rvm:fv:       evex.nds.512.66.0f38.w1 76 /r ]  AVX512,FUTURE
+VPERMILPD        zmmreg|mask|z,zmmreg,zmmrm512|b64             [rvm:fv:       evex.nds.512.66.0f38.w1 0d /r ]  AVX512,FUTURE
+VPERM...
 
[truncated message content]

[nasm:avx512] AVX-512: Fix match function to check the range of registers

From: nasm-bot f. J. K. S. <jin...@in...> - 2013-09-21 12:30:25

Commit-ID:  66c61926b1fa8d22773bb43014d75d54ef43bf38
Gitweb:     http://repo.or.cz/w/nasm.git?a=commitdiff;h=66c61926b1fa8d22773bb43014d75d54ef43bf38
Author:     Jin Kyu Song <jin...@in...>
AuthorDate: Mon, 26 Aug 2013 20:28:43 -0700
Committer:  Cyrill Gorcunov <gor...@gm...>
CommitDate: Wed, 28 Aug 2013 09:37:31 +0400

AVX-512: Fix match function to check the range of registers

High-16 registers of XMM and YMM need to be encoded with EVEX not VEX.
Even if all the operand types match with VEX instruction format,
it should use EVEX instead.

Signed-off-by: Jin Kyu Song <jin...@in...>
Signed-off-by: Cyrill Gorcunov <gor...@gm...>


---
 assemble.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/assemble.c b/assemble.c
index c22075d..b0d4571 100644
--- a/assemble.c
+++ b/assemble.c
@@ -191,6 +191,7 @@ enum match_result {
     MERR_BADCPU,
     MERR_BADMODE,
     MERR_BADHLE,
+    MERR_ENCMISMATCH,
     /*
      * Matching success; the conditional ones first
      */
@@ -1233,6 +1234,10 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits,
         if (bits != 64 && ((ins->rex & bad32) || ins->vexreg > 7)) {
             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
             return -1;
+        } else if (!(ins->rex & REX_EV) &&
+                   ((ins->vexreg > 15) || (ins->evex_p[0] & 0xf0))) {
+            errfunc(ERR_NONFATAL, "invalid high-16 register in non-AVX-512");
+            return -1;
         }
         if (ins->rex & REX_EV)
             length += 4;
@@ -2147,6 +2152,9 @@ static enum match_result matches(const struct itemplate *itemp,
                  */
                 opsizemissing = true;
             }
+        } else if (instruction->oprs[i].basereg >= 16 &&
+                   (itemp->flags & IF_INSMASK) != IF_AVX512) {
+            return MERR_ENCMISMATCH;
         }
     }

[nasm:avx512] AVX-512: Change the data type for instruction flags

From: nasm-bot f. J. K. S. <jin...@in...> - 2013-09-21 12:30:24

Commit-ID:  9bb987d8e0330429afba42015b1fc7c7ca0d1b16
Gitweb:     http://repo.or.cz/w/nasm.git?a=commitdiff;h=9bb987d8e0330429afba42015b1fc7c7ca0d1b16
Author:     Jin Kyu Song <jin...@in...>
AuthorDate: Mon, 26 Aug 2013 20:28:42 -0700
Committer:  Cyrill Gorcunov <gor...@gm...>
CommitDate: Wed, 28 Aug 2013 09:37:21 +0400

AVX-512: Change the data type for instruction flags

Increased the size of data type for instruction flags from 32bits to 64bits.
And a new type (iflags_t) is defined for better maintainability.

Bigger data type is needed because more instruction set types are coming
but there were not enough space for them. Since they are not bit masks,
only one instruction set is allowed for each instruction.

Signed-off-by: Jin Kyu Song <jin...@in...>
Signed-off-by: Cyrill Gorcunov <gor...@gm...>


---
 assemble.c |  6 +++---
 assemble.h |  4 ++--
 disasm.c   |  4 ++--
 disasm.h   |  2 +-
 insns.dat  | 46 +++++++++++++++++++++++-----------------------
 insns.h    | 53 ++++++++++++++++++++++++++++-------------------------
 insns.pl   | 15 +++++++++++++++
 nasm.c     |  8 ++++----
 nasm.h     |  2 ++
 ndisasm.c  |  2 +-
 10 files changed, 81 insertions(+), 61 deletions(-)

diff --git a/assemble.c b/assemble.c
index baae15f..c22075d 100644
--- a/assemble.c
+++ b/assemble.c
@@ -213,7 +213,7 @@ typedef struct {
 #define GEN_MODRM(mod, reg, rm)                     \
         (((mod) << 6) | (((reg) & 7) << 3) | ((rm) & 7))
 
-static uint32_t cpu;            /* cpu level received from nasm.c */
+static iflags_t cpu;            /* cpu level received from nasm.c */
 static efunc errfunc;
 static struct ofmt *outfmt;
 static ListGen *list;
@@ -377,7 +377,7 @@ static bool jmp_match(int32_t segment, int64_t offset, int bits,
     return (isize >= -128 && isize <= 127); /* is it byte size? */
 }
 
-int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
+int64_t assemble(int32_t segment, int64_t offset, int bits, iflags_t cp,
                  insn * instruction, struct ofmt *output, efunc error,
                  ListGen * listgen)
 {
@@ -680,7 +680,7 @@ int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
     return 0;
 }
 
-int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
+int64_t insn_size(int32_t segment, int64_t offset, int bits, iflags_t cp,
                   insn * instruction, efunc error)
 {
     const struct itemplate *temp;
diff --git a/assemble.h b/assemble.h
index e5e5015..1197d59 100644
--- a/assemble.h
+++ b/assemble.h
@@ -38,9 +38,9 @@
 #ifndef NASM_ASSEMBLE_H
 #define NASM_ASSEMBLE_H
 
-int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
+int64_t insn_size(int32_t segment, int64_t offset, int bits, iflags_t cp,
                insn * instruction, efunc error);
-int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
+int64_t assemble(int32_t segment, int64_t offset, int bits, iflags_t cp,
               insn * instruction, struct ofmt *output, efunc error,
               ListGen * listgen);
 
diff --git a/disasm.c b/disasm.c
index cc55d2c..9a5f9ad 100644
--- a/disasm.c
+++ b/disasm.c
@@ -944,7 +944,7 @@ static const char * const condition_name[16] = {
 };
 
 int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize,
-            int32_t offset, int autosync, uint32_t prefer)
+            int32_t offset, int autosync, iflags_t prefer)
 {
     const struct itemplate * const *p, * const *best_p;
     const struct disasm_index *ix;
@@ -955,7 +955,7 @@ int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize,
     uint8_t *origdata;
     int works;
     insn tmp_ins, ins;
-    uint32_t goodness, best;
+    iflags_t goodness, best;
     int best_pref;
     struct prefix_info prefix;
     bool end_prefix;
diff --git a/disasm.h b/disasm.h
index 3edbfd5..70a9a7b 100644
--- a/disasm.h
+++ b/disasm.h
@@ -41,7 +41,7 @@
 #define INSN_MAX 32             /* one instruction can't be longer than this */
 
 int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize,
-            int32_t offset, int autosync, uint32_t prefer);
+            int32_t offset, int autosync, iflags_t prefer);
 int32_t eatbyte(uint8_t *data, char *output, int outbufsize, int segsize);
 
 #endif
diff --git a/insns.dat b/insns.dat
index 7a0ec60..772a3e9 100644
--- a/insns.dat
+++ b/insns.dat
@@ -1514,8 +1514,8 @@ CMPPS		xmmreg,xmmreg,imm		[rmi:	np 0f c2 /r ib,u]			KATMAI,SSE,SB,AR2
 CMPSS		xmmreg,mem,imm			[rmi:	f3 0f c2 /r ib,u]			KATMAI,SSE,SB,AR2
 CMPSS		xmmreg,xmmreg,imm		[rmi:	f3 0f c2 /r ib,u]			KATMAI,SSE,SB,AR2
 COMISS		xmmreg,xmmrm32			[rm:	np 0f 2f /r]				KATMAI,SSE
-CVTPI2PS	xmmreg,mmxrm64			[rm:	np 0f 2a /r]				KATMAI,SSE,MMX
-CVTPS2PI	mmxreg,xmmrm64			[rm:	np 0f 2d /r]				KATMAI,SSE,MMX
+CVTPI2PS	xmmreg,mmxrm64			[rm:	np 0f 2a /r]				KATMAI,SSE
+CVTPS2PI	mmxreg,xmmrm64			[rm:	np 0f 2d /r]				KATMAI,SSE
 CVTSI2SS	xmmreg,mem			[rm:	f3 0f 2a /r]				KATMAI,SSE,SD,AR1,ND
 CVTSI2SS	xmmreg,rm32			[rm:	f3 0f 2a /r]				KATMAI,SSE,SD,AR1
 CVTSI2SS	xmmreg,rm64			[rm:	o64 f3 0f 2a /r]			X64,SSE,SQ,AR1
@@ -1523,7 +1523,7 @@ CVTSS2SI	reg32,xmmreg			[rm:	f3 0f 2d /r]				KATMAI,SSE,SD,AR1
 CVTSS2SI	reg32,mem			[rm:	f3 0f 2d /r]				KATMAI,SSE,SD,AR1
 CVTSS2SI	reg64,xmmreg			[rm:	o64 f3 0f 2d /r]			X64,SSE,SD,AR1
 CVTSS2SI	reg64,mem			[rm:	o64 f3 0f 2d /r]			X64,SSE,SD,AR1
-CVTTPS2PI	mmxreg,xmmrm			[rm:	np 0f 2c /r]				KATMAI,SSE,MMX,SQ
+CVTTPS2PI	mmxreg,xmmrm			[rm:	np 0f 2c /r]				KATMAI,SSE,SQ
 CVTTSS2SI	reg32,xmmrm			[rm:	f3 0f 2c /r]				KATMAI,SSE,SD,AR1
 CVTTSS2SI	reg64,xmmrm			[rm:	o64 f3 0f 2c /r]			X64,SSE,SD,AR1
 DIVPS		xmmreg,xmmrm128			[rm:	np 0f 5e /r]				KATMAI,SSE
@@ -1568,10 +1568,10 @@ UNPCKLPS	xmmreg,xmmrm128			[rm:	np 0f 14 /r]				KATMAI,SSE
 XORPS		xmmreg,xmmrm128			[rm:	np 0f 57 /r]				KATMAI,SSE
 
 ;# Introduced in Deschutes but necessary for SSE support
-FXRSTOR		mem				[m:	np 0f ae /1]				P6,SSE,FPU
-FXRSTOR64	mem				[m:	o64 np 0f ae /1]			X64,SSE,FPU
-FXSAVE		mem				[m:	np 0f ae /0]				P6,SSE,FPU
-FXSAVE64	mem				[m:	o64 np 0f ae /0]			X64,SSE,FPU
+FXRSTOR		mem				[m:	np 0f ae /1]				P6,SSE
+FXRSTOR64	mem				[m:	o64 np 0f ae /1]			X64,SSE
+FXSAVE		mem				[m:	np 0f ae /0]				P6,SSE
+FXSAVE64	mem				[m:	o64 np 0f ae /0]			X64,SSE
 
 ;# XSAVE group (AVX and extended state)
 ; Introduced in late Penryn ... we really need to clean up the handling
@@ -1863,37 +1863,37 @@ INVVPID		reg32,mem			[rm: 66 0f 38 81 /r]				VMX,SO,NOLONG
 INVVPID		reg64,mem			[rm: o64nw 66 0f 38 81 /r]			VMX,SO,LONG
 
 ;# Tejas New Instructions (SSSE3)
-PABSB		mmxreg,mmxrm			[rm:	np 0f 38 1c /r]				SSSE3,MMX,SQ
+PABSB		mmxreg,mmxrm			[rm:	np 0f 38 1c /r]				SSSE3,SQ
 PABSB		xmmreg,xmmrm			[rm:	66 0f 38 1c /r]				SSSE3
-PABSW		mmxreg,mmxrm			[rm:	np 0f 38 1d /r]				SSSE3,MMX,SQ
+PABSW		mmxreg,mmxrm			[rm:	np 0f 38 1d /r]				SSSE3,SQ
 PABSW		xmmreg,xmmrm			[rm:	66 0f 38 1d /r]				SSSE3
-PABSD		mmxreg,mmxrm			[rm:	np 0f 38 1e /r]				SSSE3,MMX,SQ
+PABSD		mmxreg,mmxrm			[rm:	np 0f 38 1e /r]				SSSE3,SQ
 PABSD		xmmreg,xmmrm			[rm:	66 0f 38 1e /r]				SSSE3
-PALIGNR		mmxreg,mmxrm,imm		[rmi:	np 0f 3a 0f /r ib,u]			SSSE3,MMX,SQ
+PALIGNR		mmxreg,mmxrm,imm		[rmi:	np 0f 3a 0f /r ib,u]			SSSE3,SQ
 PALIGNR		xmmreg,xmmrm,imm		[rmi:	66 0f 3a 0f /r ib,u]			SSSE3
-PHADDW		mmxreg,mmxrm			[rm:	np 0f 38 01 /r]				SSSE3,MMX,SQ
+PHADDW		mmxreg,mmxrm			[rm:	np 0f 38 01 /r]				SSSE3,SQ
 PHADDW		xmmreg,xmmrm			[rm:	66 0f 38 01 /r]				SSSE3
-PHADDD		mmxreg,mmxrm			[rm:	np 0f 38 02 /r]				SSSE3,MMX,SQ
+PHADDD		mmxreg,mmxrm			[rm:	np 0f 38 02 /r]				SSSE3,SQ
 PHADDD		xmmreg,xmmrm			[rm:	66 0f 38 02 /r]				SSSE3
-PHADDSW		mmxreg,mmxrm			[rm:	np 0f 38 03 /r]				SSSE3,MMX,SQ
+PHADDSW		mmxreg,mmxrm			[rm:	np 0f 38 03 /r]				SSSE3,SQ
 PHADDSW		xmmreg,xmmrm			[rm:	66 0f 38 03 /r]				SSSE3
-PHSUBW		mmxreg,mmxrm			[rm:	np 0f 38 05 /r]				SSSE3,MMX,SQ
+PHSUBW		mmxreg,mmxrm			[rm:	np 0f 38 05 /r]				SSSE3,SQ
 PHSUBW		xmmreg,xmmrm			[rm:	66 0f 38 05 /r]				SSSE3
-PHSUBD		mmxreg,mmxrm			[rm:	np 0f 38 06 /r]				SSSE3,MMX,SQ
+PHSUBD		mmxreg,mmxrm			[rm:	np 0f 38 06 /r]				SSSE3,SQ
 PHSUBD		xmmreg,xmmrm			[rm:	66 0f 38 06 /r]				SSSE3
-PHSUBSW		mmxreg,mmxrm			[rm:	np 0f 38 07 /r]				SSSE3,MMX,SQ
+PHSUBSW		mmxreg,mmxrm			[rm:	np 0f 38 07 /r]				SSSE3,SQ
 PHSUBSW		xmmreg,xmmrm			[rm:	66 0f 38 07 /r]				SSSE3
-PMADDUBSW	mmxreg,mmxrm			[rm:	np 0f 38 04 /r]				SSSE3,MMX,SQ
+PMADDUBSW	mmxreg,mmxrm			[rm:	np 0f 38 04 /r]				SSSE3,SQ
 PMADDUBSW	xmmreg,xmmrm			[rm:	66 0f 38 04 /r]				SSSE3
-PMULHRSW	mmxreg,mmxrm			[rm:	np 0f 38 0b /r]				SSSE3,MMX,SQ
+PMULHRSW	mmxreg,mmxrm			[rm:	np 0f 38 0b /r]				SSSE3,SQ
 PMULHRSW	xmmreg,xmmrm			[rm:	66 0f 38 0b /r]				SSSE3
-PSHUFB		mmxreg,mmxrm			[rm:	np 0f 38 00 /r]				SSSE3,MMX,SQ
+PSHUFB		mmxreg,mmxrm			[rm:	np 0f 38 00 /r]				SSSE3,SQ
 PSHUFB		xmmreg,xmmrm			[rm:	66 0f 38 00 /r]				SSSE3
-PSIGNB		mmxreg,mmxrm			[rm:	np 0f 38 08 /r]				SSSE3,MMX,SQ
+PSIGNB		mmxreg,mmxrm			[rm:	np 0f 38 08 /r]				SSSE3,SQ
 PSIGNB		xmmreg,xmmrm			[rm:	66 0f 38 08 /r]				SSSE3
-PSIGNW		mmxreg,mmxrm			[rm:	np 0f 38 09 /r]				SSSE3,MMX,SQ
+PSIGNW		mmxreg,mmxrm			[rm:	np 0f 38 09 /r]				SSSE3,SQ
 PSIGNW		xmmreg,xmmrm			[rm:	66 0f 38 09 /r]				SSSE3
-PSIGND		mmxreg,mmxrm			[rm:	np 0f 38 0a /r]				SSSE3,MMX,SQ
+PSIGND		mmxreg,mmxrm			[rm:	np 0f 38 0a /r]				SSSE3,SQ
 PSIGND		xmmreg,xmmrm			[rm:	66 0f 38 0a /r]				SSSE3
 
 ;# AMD SSE4A
diff --git a/insns.h b/insns.h
index 58a4cd7..ad795e2 100644
--- a/insns.h
+++ b/insns.h
@@ -19,7 +19,7 @@ struct itemplate {
     opflags_t       opd[MAX_OPERANDS];  /* bit flags for operand types */
     decoflags_t     deco[MAX_OPERANDS]; /* bit flags for operand decorators */
     const uint8_t   *code;              /* the code it assembles to */
-    uint32_t        flags;              /* some flags */
+    iflags_t        flags;              /* some flags */
 };
 
 /* Disassembler table structure */
@@ -72,6 +72,8 @@ extern const uint8_t nasm_bytecodes[];
  * (The default state if neither IF_SM nor IF_SM2 is specified is
  * that any operand with unspecified size in the template is
  * required to have unspecified size in the instruction too...)
+ *
+ * iflags_t is defined to store these flags.
  */
 
 #define IF_SM           0x00000001UL    /* size match */
@@ -103,33 +105,34 @@ extern const uint8_t nasm_bytecodes[];
 #define IF_LONG         0x00001000UL    /* long mode instruction */
 #define IF_NOHLE	0x00002000UL    /* HLE prefixes forbidden */
 /* These flags are currently not used for anything - intended for insn set */
-#define IF_UNDOC        0x00000000UL    /* it's an undocumented instruction */
-#define IF_FPU          0x00000000UL    /* it's an FPU instruction */
-#define IF_MMX          0x00000000UL    /* it's an MMX instruction */
-#define IF_3DNOW        0x00000000UL    /* it's a 3DNow! instruction */
-#define IF_SSE          0x00000000UL    /* it's a SSE (KNI, MMX2) instruction */
-#define IF_SSE2         0x00000000UL    /* it's a SSE2 instruction */
-#define IF_SSE3         0x00000000UL    /* it's a SSE3 (PNI) instruction */
-#define IF_VMX          0x00000000UL    /* it's a VMX instruction */
-#define IF_SSSE3        0x00000000UL    /* it's an SSSE3 instruction */
-#define IF_SSE4A        0x00000000UL    /* AMD SSE4a */
-#define IF_SSE41        0x00000000UL    /* it's an SSE4.1 instruction */
-#define IF_SSE42        0x00000000UL    /* HACK NEED TO REORGANIZE THESE BITS */
-#define IF_SSE5         0x00000000UL    /* HACK NEED TO REORGANIZE THESE BITS */
-#define IF_AVX          0x00000000UL    /* HACK NEED TO REORGANIZE THESE BITS */
-#define IF_AVX2         0x00000000UL    /* HACK NEED TO REORGANIZE THESE BITS */
-#define IF_AVX512       0x00000000UL    /* HACK NEED TO REORGANIZE THESE BITS */
-#define IF_FMA          0x00000000UL    /* HACK NEED TO REORGANIZE THESE BITS */
-#define IF_BMI1         0x00000000UL    /* HACK NEED TO REORGANIZE THESE BITS */
-#define IF_BMI2         0x00000000UL    /* HACK NEED TO REORGANIZE THESE BITS */
-#define IF_TBM          0x00000000UL    /* HACK NEED TO REORGANIZE THESE BITS */
-#define IF_HLE          0x00000000UL    /* HACK NEED TO REORGANIZE THESE BITS */
-#define IF_RTM          0x00000000UL    /* HACK NEED TO REORGANIZE THESE BITS */
-#define IF_INVPCID      0x00000000UL    /* HACK NEED TO REORGANIZE THESE BITS */
+#define IF_UNDOC        0x8000000000UL    /* it's an undocumented instruction */
+#define IF_HLE          0x4000000000UL    /* HACK NEED TO REORGANIZE THESE BITS */
+#define IF_FPU          0x0100000000UL    /* it's an FPU instruction */
+#define IF_MMX          0x0200000000UL    /* it's an MMX instruction */
+#define IF_3DNOW        0x0300000000UL    /* it's a 3DNow! instruction */
+#define IF_SSE          0x0400000000UL    /* it's a SSE (KNI, MMX2) instruction */
+#define IF_SSE2         0x0500000000UL    /* it's a SSE2 instruction */
+#define IF_SSE3         0x0600000000UL    /* it's a SSE3 (PNI) instruction */
+#define IF_VMX          0x0700000000UL    /* it's a VMX instruction */
+#define IF_SSSE3        0x0800000000UL    /* it's an SSSE3 instruction */
+#define IF_SSE4A        0x0900000000UL    /* AMD SSE4a */
+#define IF_SSE41        0x0A00000000UL    /* it's an SSE4.1 instruction */
+#define IF_SSE42        0x0B00000000UL    /* HACK NEED TO REORGANIZE THESE BITS */
+#define IF_SSE5         0x0C00000000UL    /* HACK NEED TO REORGANIZE THESE BITS */
+#define IF_AVX          0x0D00000000UL    /* it's an AVX     (128b) instruction */
+#define IF_AVX2         0x0E00000000UL    /* it's an AVX2    (256b) instruction */
+#define IF_AVX512       0x0F00000000UL    /* it's an AVX-512 (512b) instruction */
+#define IF_FMA          0x1000000000UL    /* HACK NEED TO REORGANIZE THESE BITS */
+#define IF_BMI1         0x1100000000UL    /* HACK NEED TO REORGANIZE THESE BITS */
+#define IF_BMI2         0x1200000000UL    /* HACK NEED TO REORGANIZE THESE BITS */
+#define IF_TBM          0x1300000000UL    /* HACK NEED TO REORGANIZE THESE BITS */
+#define IF_RTM          0x1400000000UL    /* HACK NEED TO REORGANIZE THESE BITS */
+#define IF_INVPCID      0x1500000000UL    /* HACK NEED TO REORGANIZE THESE BITS */
+#define IF_INSMASK      0xFF00000000UL    /* the mask for instruction set types */
 #define IF_PMASK        0xFF000000UL    /* the mask for processor types */
 #define IF_PLEVEL       0x0F000000UL    /* the mask for processor instr. level */
                                         /* also the highest possible processor */
-#define IF_PFMASK       0xF01FF800UL    /* the mask for disassembly "prefer" */
+#define IF_PFMASK       0xFFF0000000UL    /* the mask for disassembly "prefer" */
 #define IF_8086         0x00000000UL    /* 8086 instruction */
 #define IF_186          0x01000000UL    /* 186+ instruction */
 #define IF_286          0x02000000UL    /* 286+ instruction */
diff --git a/insns.pl b/insns.pl
index eb99f6b..60f7dd3 100755
--- a/insns.pl
+++ b/insns.pl
@@ -427,6 +427,10 @@ sub format_insn($$$$$) {
     my $num, $nd = 0;
     my @bytecode;
     my $op, @ops, $opp, @opx, @oppx, @decos, @opevex;
+    my @iflags = (  "FPU", "MMX", "3DNOW", "SSE", "SSE2",
+                    "SSE3", "VMX", "SSSE3", "SSE4A", "SSE41",
+                    "SSE42", "SSE5", "AVX", "AVX2", "AVX512",
+                    "FMA", "BMI1", "BMI2", "TBM", "RTM", "INVPCID");
 
     return (undef, undef) if $operands eq "ignore";
 
@@ -476,6 +480,17 @@ sub format_insn($$$$$) {
     }
     $decorators =~ tr/a-z/A-Z/;
 
+    # check if two different insn set types are set
+    $cnt = 0;
+    foreach $fla (split(/,/, $flags)) {
+        if ($fla ~~ @iflags) {
+            $cnt++;
+            if ($cnt >= 2) {
+                die "Too many insn set flags in $flags\n";
+            }
+        }
+    }
+
     # format the flags
     $flags =~ s/,/|IF_/g;
     $flags =~ s/(\|IF_ND|IF_ND\|)//, $nd = 1 if $flags =~ /IF_ND/;
diff --git a/nasm.c b/nasm.c
index 126f271..3a0c050 100644
--- a/nasm.c
+++ b/nasm.c
@@ -74,7 +74,7 @@ struct forwrefinfo {            /* info held on forward refs. */
 };
 
 static int get_bits(char *value);
-static uint32_t get_cpu(char *cpu_str);
+static iflags_t get_cpu(char *cpu_str);
 static void parse_cmdline(int, char **);
 static void assemble_file(char *, StrList **);
 static void nasm_verror_gnu(int severity, const char *fmt, va_list args);
@@ -106,8 +106,8 @@ static FILE *error_file;        /* Where to write error messages */
 FILE *ofile = NULL;
 int optimizing = MAX_OPTIMIZE; /* number of optimization passes to take */
 static int sb, cmd_sb = 16;    /* by default */
-static uint32_t cmd_cpu = IF_PLEVEL;       /* highest level by default */
-static uint32_t cpu = IF_PLEVEL;   /* passed to insn_size & assemble.c */
+static iflags_t cmd_cpu = IF_PLEVEL;       /* highest level by default */
+static iflags_t cpu = IF_PLEVEL;   /* passed to insn_size & assemble.c */
 int64_t global_offset_changed;      /* referenced in labels.c */
 int64_t prev_offset_changed;
 int32_t stall_count;
@@ -2006,7 +2006,7 @@ static void usage(void)
     fputs("type `nasm -h' for help\n", error_file);
 }
 
-static uint32_t get_cpu(char *value)
+static iflags_t get_cpu(char *value)
 {
     if (!strcmp(value, "8086"))
         return IF_8086;
diff --git a/nasm.h b/nasm.h
index fc5a18d..72986ee 100644
--- a/nasm.h
+++ b/nasm.h
@@ -694,6 +694,8 @@ typedef struct insn { /* an instruction itself */
 
 enum geninfo { GI_SWITCH };
 
+typedef uint64_t iflags_t;
+
 /*
  * The data structure defining an output format driver, and the
  * interfaces to the functions therein.
diff --git a/ndisasm.c b/ndisasm.c
index 710d1f0..638299f 100644
--- a/ndisasm.c
+++ b/ndisasm.c
@@ -88,7 +88,7 @@ int main(int argc, char **argv)
     bool autosync = false;
     int bits = 16, b;
     bool eof = false;
-    uint32_t prefer = 0;
+    iflags_t prefer = 0;
     bool rn_error;
     int32_t offset;
     FILE *fp;

[nasm:avx512] AVX-512: Fix a bug in calculating Disp8*N value

From: nasm-bot f. J. K. S. <jin...@in...> - 2013-09-21 12:30:23

Commit-ID:  d2d9c3ee3807791cb0240ea8b141643bc6d9b9a7
Gitweb:     http://repo.or.cz/w/nasm.git?a=commitdiff;h=d2d9c3ee3807791cb0240ea8b141643bc6d9b9a7
Author:     Jin Kyu Song <jin...@in...>
AuthorDate: Mon, 26 Aug 2013 20:28:41 -0700
Committer:  Cyrill Gorcunov <gor...@gm...>
CommitDate: Wed, 28 Aug 2013 09:37:14 +0400

AVX-512: Fix a bug in calculating Disp8*N value

Fixed a bug that derived an incorrect N value for tuple types of
T2, T4, T8.

Signed-off-by: Jin Kyu Song <jin...@in...>
Signed-off-by: Cyrill Gorcunov <gor...@gm...>


---
 assemble.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/assemble.c b/assemble.c
index 313ff8a..baae15f 100644
--- a/assemble.c
+++ b/assemble.c
@@ -2257,7 +2257,7 @@ static bool is_disp8n(operand *input, insn *ins, int8_t *compdisp)
         if (vectlen + 7 <= (evex_w + 5) + (tuple - T2 + 1))
             n = 0;
         else
-            n = 1 << (tuple - T2 + evex_w + 4);
+            n = 1 << (tuple - T2 + evex_w + 3);
         break;
     case HVM:
     case QVM:

[nasm:avx512] AVX-512: Moved {er} decorator position next to the last SIMD op

From: nasm-bot f. J. K. S. <jin...@in...> - 2013-09-21 12:30:22

Commit-ID:  c62bc20a0b6f8441e75616bdbcc300a510eb12f8
Gitweb:     http://repo.or.cz/w/nasm.git?a=commitdiff;h=c62bc20a0b6f8441e75616bdbcc300a510eb12f8
Author:     Jin Kyu Song <jin...@in...>
AuthorDate: Mon, 26 Aug 2013 20:28:38 -0700
Committer:  Cyrill Gorcunov <gor...@gm...>
CommitDate: Wed, 28 Aug 2013 09:35:47 +0400

AVX-512: Moved {er} decorator position next to the last SIMD op

This is for following the current syntax used in gas even though
this is not SDM conforming.
According to SDM, {er} should follow the last GPR op not SIMD op.
e.g. SDM : VCVTSI2SD xmm1, xmm2, r/m64{er}
    NASM : VCVTSI2SD xmm1, xmm2{er}, r/m64

Signed-off-by: Jin Kyu Song <jin...@in...>
Signed-off-by: Cyrill Gorcunov <gor...@gm...>


---
 insns.dat | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/insns.dat b/insns.dat
index 320280a..7a0ec60 100644
--- a/insns.dat
+++ b/insns.dat
@@ -3504,10 +3504,10 @@ VCVTSD2SI        reg64,xmmrm64|er                              [rm:t1f64:
 VCVTSD2SS        xmmreg|mask|z,xmmreg,xmmrm64|er               [rvm:t1s:        evex.nds.lig.f2.0f.w1 5a /r ]  AVX512,FUTURE
 VCVTSD2USI       reg32,xmmrm64|er                              [rm:t1f64:           evex.lig.f2.0f.w0 79 /r ]  AVX512,FUTURE
 VCVTSD2USI       reg64,xmmrm64|er                              [rm:t1f64:           evex.lig.f2.0f.w1 79 /r ]  AVX512,FUTURE
-VCVTSI2SD        xmmreg,xmmreg,rm32|er                         [rvm:t1s:        evex.nds.lig.f2.0f.w0 2a /r ]  AVX512,FUTURE
-VCVTSI2SD        xmmreg,xmmreg,rm64|er                         [rvm:t1s:        evex.nds.lig.f2.0f.w1 2a /r ]  AVX512,FUTURE
-VCVTSI2SS        xmmreg,xmmreg,rm32|er                         [rvm:t1s:        evex.nds.lig.f3.0f.w0 2a /r ]  AVX512,FUTURE
-VCVTSI2SS        xmmreg,xmmreg,rm64|er                         [rvm:t1s:        evex.nds.lig.f3.0f.w1 2a /r ]  AVX512,FUTURE
+VCVTSI2SD        xmmreg,xmmreg|er,rm32                         [rvm:t1s:        evex.nds.lig.f2.0f.w0 2a /r ]  AVX512,FUTURE
+VCVTSI2SD        xmmreg,xmmreg|er,rm64                         [rvm:t1s:        evex.nds.lig.f2.0f.w1 2a /r ]  AVX512,FUTURE
+VCVTSI2SS        xmmreg,xmmreg|er,rm32                         [rvm:t1s:        evex.nds.lig.f3.0f.w0 2a /r ]  AVX512,FUTURE
+VCVTSI2SS        xmmreg,xmmreg|er,rm64                         [rvm:t1s:        evex.nds.lig.f3.0f.w1 2a /r ]  AVX512,FUTURE
 VCVTSS2SD        xmmreg|mask|z,xmmreg,xmmrm32|sae              [rvm:t1s:        evex.nds.lig.f3.0f.w0 5a /r ]  AVX512,FUTURE
 VCVTSS2SI        reg32,xmmrm32|er                              [rm:t1f32:           evex.lig.f3.0f.w0 2d /r ]  AVX512,FUTURE
 VCVTSS2SI        reg64,xmmrm32|er                              [rm:t1f32:           evex.lig.f3.0f.w1 2d /r ]  AVX512,FUTURE
@@ -3527,10 +3527,10 @@ VCVTTSS2USI      reg32,xmmrm32|sae                             [rm:t1f32:
 VCVTTSS2USI      reg64,xmmrm32|sae                             [rm:t1f32:           evex.lig.f3.0f.w1 78 /r ]  AVX512,FUTURE
 VCVTUDQ2PD       zmmreg|mask|z,ymmrm256|b32|er                 [rm:hv:              evex.512.f3.0f.w0 7a /r ]  AVX512,FUTURE
 VCVTUDQ2PS       zmmreg|mask|z,zmmrm512|b32|er                 [rm:fv:              evex.512.f2.0f.w0 7a /r ]  AVX512,FUTURE
-VCVTUSI2SD       xmmreg,xmmreg,rm32|er                         [rvm:t1s:        evex.nds.lig.f2.0f.w0 7b /r ]  AVX512,FUTURE
-VCVTUSI2SD       xmmreg,xmmreg,rm64|er                         [rvm:t1s:        evex.nds.lig.f2.0f.w1 7b /r ]  AVX512,FUTURE
-VCVTUSI2SS       xmmreg,xmmreg,rm32|er                         [rvm:t1s:        evex.nds.lig.f3.0f.w0 7b /r ]  AVX512,FUTURE
-VCVTUSI2SS       xmmreg,xmmreg,rm64|er                         [rvm:t1s:        evex.nds.lig.f3.0f.w1 7b /r ]  AVX512,FUTURE
+VCVTUSI2SD       xmmreg,xmmreg|er,rm32                         [rvm:t1s:        evex.nds.lig.f2.0f.w0 7b /r ]  AVX512,FUTURE
+VCVTUSI2SD       xmmreg,xmmreg|er,rm64                         [rvm:t1s:        evex.nds.lig.f2.0f.w1 7b /r ]  AVX512,FUTURE
+VCVTUSI2SS       xmmreg,xmmreg|er,rm32                         [rvm:t1s:        evex.nds.lig.f3.0f.w0 7b /r ]  AVX512,FUTURE
+VCVTUSI2SS       xmmreg,xmmreg|er,rm64                         [rvm:t1s:        evex.nds.lig.f3.0f.w1 7b /r ]  AVX512,FUTURE
 VDIVPD           zmmreg|mask|z,zmmreg,zmmrm512|b64|er          [rvm:fv:         evex.nds.512.66.0f.w1 5e /r ]  AVX512,FUTURE
 VDIVPS           zmmreg|mask|z,zmmreg,zmmrm512|b32|er          [rvm:fv:            evex.nds.512.0f.w0 5e /r ]  AVX512,FUTURE
 VDIVSD           xmmreg|mask|z,xmmreg,xmmrm64|er               [rvm:t1s:        evex.nds.lig.f2.0f.w1 5e /r ]  AVX512,FUTURE
@@ -3548,6 +3548,7 @@ VEXTRACTI32X4    xmmreg|mask|z,zmmreg,imm8                     [mri:           e
 VEXTRACTI64X4    mem256|mask,zmmreg,imm8                       [mri:t4:        evex.512.66.0f3a.w1 3b /r ib ]  AVX512,FUTURE
 VEXTRACTI64X4    ymmreg|mask|z,zmmreg,imm8                     [mri:           evex.512.66.0f3a.w1 3b /r ib ]  AVX512,FUTURE
 VEXTRACTPS       rm32,xmmreg,imm8                              [mri:t1s:      evex.128.66.0f3a.wig 17 /r ib ]  AVX512,FUTURE
+VEXTRACTPS       rm64,xmmreg,imm8                              [mri:t1s:       evex.128.66.0f3a.w1 17 /r ib ]  AVX512,FUTURE
 VFIXUPIMMPD      zmmreg|mask|z,zmmreg,zmmrm512|b64|sae,imm8    [rvmi:fv:   evex.nds.512.66.0f3a.w1 54 /r ib ]  AVX512,FUTURE
 VFIXUPIMMPS      zmmreg|mask|z,zmmreg,zmmrm512|b32|sae,imm8    [rvmi:fv:   evex.nds.512.66.0f3a.w0 54 /r ib ]  AVX512,FUTURE
 VFIXUPIMMSD      xmmreg|mask|z,xmmreg,xmmrm64|sae,imm8         [rvmi:t1s:  evex.nds.lig.66.0f3a.w1 55 /r ib ]  AVX512,FUTURE

[nasm:avx512] AVX-512: Find the correct position of the last SIMD op

From: nasm-bot f. J. K. S. <jin...@in...> - 2013-09-21 12:30:21

Commit-ID:  4a6570616aa1fadf1544c0c099c4bf22683f367f
Gitweb:     http://repo.or.cz/w/nasm.git?a=commitdiff;h=4a6570616aa1fadf1544c0c099c4bf22683f367f
Author:     Jin Kyu Song <jin...@in...>
AuthorDate: Mon, 26 Aug 2013 20:28:39 -0700
Committer:  Cyrill Gorcunov <gor...@gm...>
CommitDate: Wed, 28 Aug 2013 09:35:54 +0400

AVX-512: Find the correct position of the last SIMD op

Since embedded rounding mode is following the last SIMD op,
GPR op should be skipped when finding the last SIMD op.

Signed-off-by: Jin Kyu Song <jin...@in...>
Signed-off-by: Cyrill Gorcunov <gor...@gm...>


---
 assemble.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/assemble.c b/assemble.c
index 4f0cd9c..313ff8a 100644
--- a/assemble.c
+++ b/assemble.c
@@ -1159,6 +1159,8 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits,
                     rfield = nasm_regvals[opx->basereg];
                     /* find the last SIMD operand where ER decorator resides */
                     oplast = &ins->oprs[op1 > op2 ? op1 : op2];
+                    while (oplast && is_class(REG_CLASS_GPR, oplast->type))
+                        oplast--;
                 } else {
                     rflags = 0;
                     rfield = c & 7;

[nasm:avx512] AVX-512: Add ZWORD keyword

From: nasm-bot f. J. K. S. <jin...@in...> - 2013-09-21 12:30:21

Commit-ID:  d4760c19b55ad7cda97c66e5caa29b405fa539a4
Gitweb:     http://repo.or.cz/w/nasm.git?a=commitdiff;h=d4760c19b55ad7cda97c66e5caa29b405fa539a4
Author:     Jin Kyu Song <jin...@in...>
AuthorDate: Wed, 21 Aug 2013 19:29:11 -0700
Committer:  Cyrill Gorcunov <gor...@gm...>
CommitDate: Thu, 22 Aug 2013 19:37:44 +0400

AVX-512: Add ZWORD keyword

ZWORD (512 bits) keyword is added

Signed-off-by: Jin Kyu Song <jin...@in...>
Signed-off-by: Cyrill Gorcunov <gor...@gm...>


---
 assemble.c | 2 ++
 disasm.c   | 3 +++
 nasm.h     | 1 +
 parser.c   | 5 +++++
 tokens.dat | 1 +
 5 files changed, 12 insertions(+)

diff --git a/assemble.c b/assemble.c
index 83971f6..4f0cd9c 100644
--- a/assemble.c
+++ b/assemble.c
@@ -265,6 +265,8 @@ static const char *size_name(int size)
         return "oword";
     case 32:
         return "yword";
+    case 64:
+        return "zword";
     default:
         return "???";
     }
diff --git a/disasm.c b/disasm.c
index 9d2e1b1..cc55d2c 100644
--- a/disasm.c
+++ b/disasm.c
@@ -1303,6 +1303,9 @@ int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize,
             if (t & BITS256)
                 slen +=
                     snprintf(output + slen, outbufsize - slen, "yword ");
+            if (t & BITS512)
+                slen +=
+                    snprintf(output + slen, outbufsize - slen, "zword ");
             if (t & FAR)
                 slen += snprintf(output + slen, outbufsize - slen, "far ");
             if (t & NEAR)
diff --git a/nasm.h b/nasm.h
index e46b5ca..fc5a18d 100644
--- a/nasm.h
+++ b/nasm.h
@@ -1011,6 +1011,7 @@ enum special_tokens {
     S_TWORD,
     S_WORD,
     S_YWORD,
+    S_ZWORD,
     SPECIAL_ENUM_LIMIT
 };
 
diff --git a/parser.c b/parser.c
index 4b3f059..ccbce49 100644
--- a/parser.c
+++ b/parser.c
@@ -660,6 +660,11 @@ is_expression:
                     result->oprs[operand].type |= BITS256;
                 setsize = 1;
                 break;
+            case S_ZWORD:
+                if (!setsize)
+                    result->oprs[operand].type |= BITS512;
+                setsize = 1;
+                break;
             case S_TO:
                 result->oprs[operand].type |= TO;
                 break;
diff --git a/tokens.dat b/tokens.dat
index 1a00e3d..d12b296 100644
--- a/tokens.dat
+++ b/tokens.dat
@@ -72,6 +72,7 @@ to
 tword
 word
 yword
+zword
 
 % TOKEN_FLOAT, 0, 0, 0
 __infinity__

[nasm:avx512] AVX-512: Reword comment about opmask decorators

From: nasm-bot f. J. K. S. <jin...@in...> - 2013-09-21 12:30:21

Commit-ID:  f9a71e0c3800092bb1db592de6870e4fe9e83444
Gitweb:     http://repo.or.cz/w/nasm.git?a=commitdiff;h=f9a71e0c3800092bb1db592de6870e4fe9e83444
Author:     Jin Kyu Song <jin...@in...>
AuthorDate: Wed, 21 Aug 2013 19:29:09 -0700
Committer:  Cyrill Gorcunov <gor...@gm...>
CommitDate: Thu, 22 Aug 2013 19:37:32 +0400

AVX-512: Reword comment about opmask decorators

Previous comment was not so clear.

Signed-off-by: Jin Kyu Song <jin...@in...>
Signed-off-by: Cyrill Gorcunov <gor...@gm...>


---
 parser.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/parser.c b/parser.c
index 5571c6f..4b3f059 100644
--- a/parser.c
+++ b/parser.c
@@ -196,7 +196,7 @@ static void process_size_override(insn *result, int operand)
 /*
  * when two or more decorators follow a register operand,
  * consecutive decorators are parsed here.
- * the order of decorators does not matter.
+ * opmask and zeroing decorators can be placed in any order.
  * e.g. zmm1 {k2}{z} or zmm2 {z,k3}
  * decorator(s) are placed at the end of an operand.
  */

[nasm:avx512] AVX-512: Fix parser to handle opmask decorator correctly

From: nasm-bot f. J. K. S. <jin...@in...> - 2013-09-21 12:30:20

Commit-ID:  50ab1522e95b508c0c61ac000aaece8469088b5b
Gitweb:     http://repo.or.cz/w/nasm.git?a=commitdiff;h=50ab1522e95b508c0c61ac000aaece8469088b5b
Author:     Jin Kyu Song <jin...@in...>
AuthorDate: Wed, 21 Aug 2013 19:29:12 -0700
Committer:  Cyrill Gorcunov <gor...@gm...>
CommitDate: Thu, 22 Aug 2013 19:37:49 +0400

AVX-512: Fix parser to handle opmask decorator correctly

When a memory reference operand is a destination, this could have
an opmask decorator as well.

Signed-off-by: Jin Kyu Song <jin...@in...>
Signed-off-by: Cyrill Gorcunov <gor...@gm...>


---
 parser.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/parser.c b/parser.c
index ccbce49..585abe2 100644
--- a/parser.c
+++ b/parser.c
@@ -758,17 +758,20 @@ is_expression:
                 recover = true;
             } else {            /* we got the required ] */
                 i = stdscan(NULL, &tokval);
-                if (i == TOKEN_DECORATOR) {
+                if ((i == TOKEN_DECORATOR) || (i == TOKEN_OPMASK)) {
                     /*
-                     * according to AVX512 spec, only broacast decorator is
-                     * expected for memory reference operands
+                     * according to AVX512 spec, broacast or opmask decorator
+                     * is expected for memory reference operands
                      */
                     if (tokval.t_flag & TFLAG_BRDCAST) {
                         brace_flags |= GEN_BRDCAST(0);
                         i = stdscan(NULL, &tokval);
+                    } else if (i == TOKEN_OPMASK) {
+                        brace_flags |= VAL_OPMASK(nasm_regvals[tokval.t_integer]);
+                        i = stdscan(NULL, &tokval);
                     } else {
-                        nasm_error(ERR_NONFATAL, "broadcast decorator"
-                                   "expected inside braces");
+                        nasm_error(ERR_NONFATAL, "broadcast or opmask "
+                                   "decorator expected inside braces");
                         recover = true;
                     }
                 }

[nasm:avx512] AVX-512: Add support for parsing braces

From: nasm-bot f. J. K. S. <jin...@in...> - 2013-09-21 12:30:20

Commit-ID:  72018a2b4326d5a647b8879ba8124300b68ca212
Gitweb:     http://repo.or.cz/w/nasm.git?a=commitdiff;h=72018a2b4326d5a647b8879ba8124300b68ca212
Author:     Jin Kyu Song <jin...@in...>
AuthorDate: Mon, 5 Aug 2013 20:46:18 -0700
Committer:  Cyrill Gorcunov <gor...@gm...>
CommitDate: Tue, 6 Aug 2013 09:37:52 +0400

AVX-512: Add support for parsing braces

AVX-512 introduced new syntax using braces for decorators.
Opmask, broadcat, rounding control use this new syntax.

http://software.intel.com/sites/default/files/319433-015.pdf

Signed-off-by: Jin Kyu Song <jin...@in...>
Signed-off-by: Cyrill Gorcunov <gor...@gm...>


---
 eval.c     |   4 +++
 nasm.h     | 108 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 opflags.h  |  21 ++++++++----
 parser.c   |  94 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 regs.dat   |  15 ++++++++-
 regs.pl    |   2 +-
 stdscan.c  |  94 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 tables.h   |   3 +-
 tokens.dat |  28 ++++++++++++----
 tokhash.pl |  17 +++++++---
 10 files changed, 358 insertions(+), 28 deletions(-)

diff --git a/eval.c b/eval.c
index 0035088..c57ff04 100644
--- a/eval.c
+++ b/eval.c
@@ -869,6 +869,7 @@ static expr *expr6(int critical)
     case TOKEN_INSN:            /* Opcodes that occur here are really labels */
     case TOKEN_HERE:
     case TOKEN_BASE:
+    case TOKEN_DECORATOR:
         begintemp();
         switch (i) {
         case TOKEN_NUM:
@@ -938,6 +939,9 @@ static expr *expr6(int critical)
             if (label_seg != NO_SEG)
                 addtotemp(EXPR_SEGBASE + label_seg, 1L);
             break;
+        case TOKEN_DECORATOR:
+            addtotemp(EXPR_RDSAE, tokval->t_integer);
+            break;
         }
         i = scan(scpriv, tokval);
         return finishtemp();
diff --git a/nasm.h b/nasm.h
index 7802d9b..fb6c6e9 100644
--- a/nasm.h
+++ b/nasm.h
@@ -226,6 +226,8 @@ enum token_type { /* token types, other than chars */
     TOKEN_FLOATIZE,     /* __floatX__ */
     TOKEN_STRFUNC,      /* __utf16*__, __utf32*__ */
     TOKEN_IFUNC,        /* __ilog2*__ */
+    TOKEN_DECORATOR,    /* decorators such as {...} */
+    TOKEN_OPMASK,       /* translated token for opmask registers */
 };
 
 enum floatize {
@@ -272,6 +274,7 @@ struct tokenval {
     int64_t             t_integer;
     int64_t             t_inttwo;
     enum token_type     t_type;
+    int8_t              t_flag;
 };
 typedef int (*scanner)(void *private_data, struct tokenval *tv);
 
@@ -352,11 +355,14 @@ typedef expr *(*evalfunc)(scanner sc, void *scprivate,
 /*
  * Special values for expr->type.
  * These come after EXPR_REG_END as defined in regs.h.
+ * Expr types : 0 ~ EXPR_REG_END, EXPR_UNKNOWN, EXPR_...., EXPR_RDSAE,
+ *              EXPR_SEGBASE ~ EXPR_SEGBASE + SEG_ABS, ...
  */
 #define EXPR_UNKNOWN    (EXPR_REG_END+1) /* forward references */
 #define EXPR_SIMPLE     (EXPR_REG_END+2)
 #define EXPR_WRT        (EXPR_REG_END+3)
-#define EXPR_SEGBASE    (EXPR_REG_END+4)
+#define EXPR_RDSAE      (EXPR_REG_END+4)
+#define EXPR_SEGBASE    (EXPR_REG_END+5)
 
 /*
  * Linked list of strings
@@ -466,6 +472,14 @@ enum ccode { /* condition code names */
     C_none = -1
 };
 
+/*
+ * token flags
+ */
+#define TFLAG_BRC       (1 << 0)    /* valid only with braces. {1to8}, {rd-sae}, ...*/
+#define TFLAG_BRC_OPT   (1 << 1)    /* may or may not have braces. opmasks {k1} */
+#define TFLAG_BRC_ANY   (TFLAG_BRC | TFLAG_BRC_OPT)
+#define TFLAG_BRDCAST   (1 << 2)    /* broadcasting decorator */
+
 static inline uint8_t get_cond_opcode(enum ccode c)
 {
     static const uint8_t ccode_opcodes[] = {
@@ -563,6 +577,7 @@ typedef struct operand { /* operand to an instruction */
     int32_t         wrt;        /* segment base it's relative to */
     int             eaflags;    /* special EA flags */
     int             opflags;    /* see OPFLAG_* defines below */
+    decoflags_t     decoflags;  /* decorator flags such as {...} */
 } operand;
 
 #define OPFLAG_FORWARD      1   /* operand is a forward reference */
@@ -627,6 +642,7 @@ typedef struct insn { /* an instruction itself */
     int             vexreg;                 /* Register encoded in VEX prefix */
     int             vex_cm;                 /* Class and M field for VEX prefix */
     int             vex_wlp;                /* W, P and L information for VEX prefix */
+    int             evex_rm;                /* static rounding mode for AVX3 (EVEX) */
 } insn;
 
 enum geninfo { GI_SWITCH };
@@ -951,6 +967,96 @@ enum special_tokens {
     SPECIAL_ENUM_LIMIT
 };
 
+enum decorator_tokens {
+    DECORATOR_ENUM_START    = SPECIAL_ENUM_LIMIT,
+    BRC_1TO8                = DECORATOR_ENUM_START,
+    BRC_1TO16,
+    BRC_RN,
+    BRC_RU,
+    BRC_RD,
+    BRC_RZ,
+    BRC_SAE,
+    BRC_Z,
+    DECORATOR_ENUM_LIMIT
+};
+
+/*
+ * AVX512 Decorator (decoflags_t) bits distribution (counted from 0)
+ *  3         2         1
+ * 10987654321098765432109876543210
+ *                |
+ *                | word boundary
+ * ............................1111 opmask
+ * ...........................1.... zeroing / merging
+ * ..........................1..... broadcast
+ * .........................1...... static rounding
+ * ........................1....... SAE
+ */
+
+/*
+ * Opmask register number
+ * identical to EVEX.aaa
+ *
+ * Bits: 0 - 3
+ */
+#define OPMASK_SHIFT            (0)
+#define OPMASK_BITS             (4)
+#define OPMASK_MASK             OP_GENMASK(OPMASK_BITS, OPMASK_SHIFT)
+#define GEN_OPMASK(bit)         OP_GENBIT(bit, OPMASK_SHIFT)
+#define VAL_OPMASK(val)         OP_GENVAL(val, OPMASK_BITS, OPMASK_SHIFT)
+
+/*
+ * zeroing / merging control available
+ * matching to EVEX.z
+ *
+ * Bits: 4
+ */
+#define Z_SHIFT                 (4)
+#define Z_BITS                  (1)
+#define Z_MASK                  OP_GENMASK(Z_BITS, Z_SHIFT)
+#define GEN_Z(bit)              OP_GENBIT(bit, Z_SHIFT)
+#define VAL_Z(val)              OP_GENVAL(val, Z_BITS, Z_SHIFT)
+
+/*
+ * broadcast - Whether this operand can be broadcasted
+ *
+ * Bits: 5
+ */
+#define BRDCAST_SHIFT           (5)
+#define BRDCAST_BITS            (1)
+#define BRDCAST_MASK            OP_GENMASK(BRDCAST_BITS, BRDCAST_SHIFT)
+#define GEN_BRDCAST(bit)        OP_GENBIT(bit, BRDCAST_SHIFT)
+#define VAL_BRDCAST(val)        OP_GENVAL(val, BRDCAST_BITS, BRDCAST_SHIFT)
+
+/*
+ * Whether this instruction can have a static rounding mode.
+ * It goes with the last simd operand because the static rounding mode
+ * decorator is located between the last simd operand and imm8 (if any).
+ *
+ * Bits: 6
+ */
+#define STATICRND_SHIFT         (6)
+#define STATICRND_BITS          (1)
+#define STATICRND_MASK          OP_GENMASK(STATICRND_BITS, STATICRND_SHIFT)
+#define GEN_STATICRND(bit)      OP_GENBIT(bit, STATICRND_SHIFT)
+
+/*
+ * SAE(Suppress all exception) available
+ *
+ * Bits: 7
+ */
+#define SAE_SHIFT               (7)
+#define SAE_BITS                (1)
+#define SAE_MASK                OP_GENMASK(SAE_BITS, SAE_SHIFT)
+#define GEN_SAE(bit)            OP_GENBIT(bit, SAE_SHIFT)
+
+#define MASK                    OPMASK_MASK             /* Opmask (k1 ~ 7) can be used */
+#define Z                       Z_MASK
+#define B32                     BRDCAST_MASK            /* {1to16} : load+op instruction can broadcast when it is reg-reg operation */
+#define B64                     BRDCAST_MASK            /* {1to8}  : There are two definitions just for conforming to SDM */
+#define ER                      STATICRND_MASK          /* ER(Embedded Rounding) == Static rounding mode */
+#define SAE                     SAE_MASK                /* SAE(Suppress All Exception) */
+
 /*
  * Global modes
  */
diff --git a/opflags.h b/opflags.h
index 41fce3d..ed7f8ee 100644
--- a/opflags.h
+++ b/opflags.h
@@ -39,6 +39,7 @@
 #define NASM_OPFLAGS_H
 
 #include "compiler.h"
+#include "tables.h"     /* for opflags_t and nasm_reg_flags[] */
 
 /*
  * Here we define the operand types. These are implemented as bit
@@ -53,10 +54,9 @@
  * if and only if "operand" belongs to class type "class".
  */
 
-typedef uint64_t opflags_t;
-
 #define OP_GENMASK(bits, shift)         (((UINT64_C(1) << (bits)) - 1) << (shift))
 #define OP_GENBIT(bit, shift)           (UINT64_C(1) << ((shift) + (bit)))
+#define OP_GENVAL(val, bits, shift)     (((val) & ((UINT64_C(1) << (bits)) - 1)) << (shift))
 
 /*
  * Type of operand: memory reference, register, etc.
@@ -162,11 +162,14 @@ typedef uint64_t opflags_t;
 #define REG_CLASS_RM_MMX        GEN_REG_CLASS(4)
 #define REG_CLASS_RM_XMM        GEN_REG_CLASS(5)
 #define REG_CLASS_RM_YMM        GEN_REG_CLASS(6)
+#define REG_CLASS_RM_ZMM        GEN_REG_CLASS(7)
+#define REG_CLASS_OPMASK        GEN_REG_CLASS(8)
 
-#define is_class(class, op)     (!((opflags_t)(class) & ~(opflags_t)(op)))
+#define is_class(class, op)         (!((opflags_t)(class) & ~(opflags_t)(op)))
+#define is_reg_class(class, reg)    is_class((class), nasm_reg_flags[(reg)])
 
-#define IS_SREG(op)             is_class(REG_SREG, nasm_reg_flags[(op)])
-#define IS_FSGS(op)             is_class(REG_FSGS, nasm_reg_flags[(op)])
+#define IS_SREG(op)                 is_reg_class(REG_SREG, (op))
+#define IS_FSGS(op)                 is_reg_class(REG_FSGS, (op))
 
 /* Register classes */
 #define REG_EA                  (                                               REGMEM | REGISTER)      /* 'normal' reg, qualifies as EA */
@@ -186,6 +189,12 @@ typedef uint64_t opflags_t;
 #define RM_YMM                  (                  REG_CLASS_RM_YMM           | REGMEM)                 /* YMM (AVX) operand */
 #define YMMREG                  (                  REG_CLASS_RM_YMM           | REGMEM | REGISTER)      /* YMM (AVX) register */
 #define YMM0                    (GEN_SUBCLASS(1) | REG_CLASS_RM_YMM           | REGMEM | REGISTER)      /* YMM register zero */
+#define RM_ZMM                  (                  REG_CLASS_RM_ZMM           | REGMEM)                 /* ZMM (AVX512) operand */
+#define ZMMREG                  (                  REG_CLASS_RM_ZMM           | REGMEM | REGISTER)      /* ZMM (AVX512) register */
+#define ZMM0                    (GEN_SUBCLASS(1) | REG_CLASS_RM_ZMM           | REGMEM | REGISTER)      /* ZMM register zero */
+#define RM_OPMASK               (                  REG_CLASS_OPMASK           | REGMEM)                 /* Opmask operand */
+#define OPMASKREG               (                  REG_CLASS_OPMASK           | REGMEM | REGISTER)      /* Opmask register */
+#define OPMASK0                 (GEN_SUBCLASS(1) | REG_CLASS_OPMASK           | REGMEM | REGISTER)      /* Opmask register zero (k0) */
 #define REG_CDT                 (                  REG_CLASS_CDT    | BITS32           | REGISTER)      /* CRn, DRn and TRn */
 #define REG_CREG                (GEN_SUBCLASS(1) | REG_CLASS_CDT    | BITS32           | REGISTER)      /* CRn */
 #define REG_DREG                (GEN_SUBCLASS(2) | REG_CLASS_CDT    | BITS32           | REGISTER)      /* DRn */
@@ -232,7 +241,7 @@ typedef uint64_t opflags_t;
 #define YMEM                    (GEN_SUBCLASS(4) | MEMORY)      /* 256-bit vector SIB */
 
 /* memory which matches any type of r/m operand */
-#define MEMORY_ANY              (MEMORY | RM_GPR | RM_MMX | RM_XMM | RM_YMM)
+#define MEMORY_ANY              (MEMORY | RM_GPR | RM_MMX | RM_XMM | RM_YMM | RM_ZMM)
 
 /* special immediate values */
 #define UNITY                   (GEN_SUBCLASS(0) | IMMEDIATE)   /* operand equals 1 */
diff --git a/parser.c b/parser.c
index afc422a..f7139f3 100644
--- a/parser.c
+++ b/parser.c
@@ -193,6 +193,51 @@ static void process_size_override(insn *result, int operand)
     }
 }
 
+/*
+ * when two or more decorators follow a register operand,
+ * consecutive decorators are parsed here.
+ * the order of decorators does not matter.
+ * e.g. zmm1 {k2}{z} or zmm2 {z,k3}
+ * decorator(s) are placed at the end of an operand.
+ */
+static bool parse_braces(decoflags_t *decoflags)
+{
+    int i;
+    bool recover = false;
+
+    i = tokval.t_type;
+    do {
+        if (i == TOKEN_OPMASK) {
+            if (*decoflags & OPMASK_MASK) {
+                nasm_error(ERR_NONFATAL, "opmask k%lu is already set",
+                           *decoflags & OPMASK_MASK);
+                *decoflags &= ~OPMASK_MASK;
+            }
+            *decoflags |= VAL_OPMASK(nasm_regvals[tokval.t_integer]);
+        } else if (i == TOKEN_DECORATOR) {
+            switch (tokval.t_integer) {
+            case BRC_Z:
+                /*
+                 * according to AVX512 spec, only zeroing/merging decorator
+                 * is supported with opmask
+                 */
+                *decoflags |= GEN_Z(0);
+                break;
+            }
+        } else if (i == ',' || i == TOKEN_EOS){
+            break;
+        } else {
+            nasm_error(ERR_NONFATAL, "only a series of valid decorators"
+                                     " expected");
+            recover = true;
+            break;
+        }
+        i = stdscan(NULL, &tokval);
+    } while(1);
+
+    return recover;
+}
+
 insn *parse_line(int pass, char *buffer, insn *result, ldfunc ldef)
 {
     bool insn_is_label = false;
@@ -557,10 +602,12 @@ is_expression:
         int mref;               /* is this going to be a memory ref? */
         int bracket;            /* is it a [] mref, or a & mref? */
         int setsize = 0;
+        decoflags_t brace_flags = 0;    /* flags for decorators in braces */
 
         result->oprs[operand].disp_size = 0;    /* have to zero this whatever */
         result->oprs[operand].eaflags   = 0;    /* and this */
         result->oprs[operand].opflags   = 0;
+        result->oprs[operand].decoflags = 0;
 
         i = stdscan(NULL, &tokval);
         if (i == TOKEN_EOS)
@@ -702,17 +749,37 @@ is_expression:
                 recover = true;
             } else {            /* we got the required ] */
                 i = stdscan(NULL, &tokval);
+                if (i == TOKEN_DECORATOR) {
+                    /*
+                     * according to AVX512 spec, only broacast decorator is
+                     * expected for memory reference operands
+                     */
+                    if (tokval.t_flag & TFLAG_BRDCAST) {
+                        brace_flags |= GEN_BRDCAST(0);
+                        i = stdscan(NULL, &tokval);
+                    } else {
+                        nasm_error(ERR_NONFATAL, "broadcast decorator"
+                                   "expected inside braces");
+                        recover = true;
+                    }
+                }
+
                 if (i != 0 && i != ',') {
                     nasm_error(ERR_NONFATAL, "comma or end of line expected");
                     recover = true;
                 }
             }
         } else {                /* immediate operand */
-            if (i != 0 && i != ',' && i != ':') {
-                nasm_error(ERR_NONFATAL, "comma, colon or end of line expected");
+            if (i != 0 && i != ',' && i != ':' &&
+                i != TOKEN_DECORATOR && i != TOKEN_OPMASK) {
+                nasm_error(ERR_NONFATAL, "comma, colon, decorator or end of "
+                                         "line expected after operand");
                 recover = true;
             } else if (i == ':') {
                 result->oprs[operand].type |= COLON;
+            } else if (i == TOKEN_DECORATOR || i == TOKEN_OPMASK) {
+                /* parse opmask (and zeroing) after an operand */
+                recover = parse_braces(&brace_flags);
             }
         }
         if (recover) {
@@ -856,6 +923,7 @@ is_expression:
             result->oprs[operand].indexreg = i;
             result->oprs[operand].scale = s;
             result->oprs[operand].offset = o;
+            result->oprs[operand].decoflags |= brace_flags;
         } else {                /* it's not a memory reference */
             if (is_just_unknown(value)) {       /* it's immediate but unknown */
                 result->oprs[operand].type      |= IMMEDIATE;
@@ -891,6 +959,27 @@ is_expression:
                             result->oprs[operand].type |= SDWORD;
                     }
                 }
+            } else if(value->type == EXPR_RDSAE) {
+                /*
+                 * it's not an operand but a rounding or SAE decorator.
+                 * put the decorator information in the (opflag_t) type field
+                 * of previous operand.
+                 */
+                operand --;
+                switch (value->value) {
+                case BRC_RN:
+                case BRC_RU:
+                case BRC_RD:
+                case BRC_RZ:
+                case BRC_SAE:
+                    result->oprs[operand].decoflags  |=
+                                        (value->value == BRC_SAE ? SAE : ER);
+                    result->evex_rm = value->value;
+                    break;
+                default:
+                    nasm_error(ERR_NONFATAL, "invalid decorator");
+                    break;
+                }
             } else {            /* it's a register */
                 opflags_t rs;
 
@@ -923,6 +1012,7 @@ is_expression:
                 result->oprs[operand].type      &= TO;
                 result->oprs[operand].type      |= REGISTER;
                 result->oprs[operand].type      |= nasm_reg_flags[value->type];
+                result->oprs[operand].decoflags |= brace_flags;
                 result->oprs[operand].basereg   = value->type;
 
                 if (rs && (result->oprs[operand].type & SIZE_MASK) != rs)
diff --git a/regs.dat b/regs.dat
index 57cef6a..742b69d 100644
--- a/regs.dat
+++ b/regs.dat
@@ -36,12 +36,17 @@
 #
 # The columns are:
 #
-# register name, assembler class, disassembler class(es), x86 register number
+# register name, assembler class, disassembler class(es), x86 register number[, token flag]
 #
 # If the register name ends in two numbers separated by a dash, then it is
 # repeated as many times as indicated, and the register number is
 # updated with it.
 #
+# If 'token flag' is present, this value will be assigned to tokflag field in
+# 'struct tokendata tokendata[]' table. Token flag can be used for specifying
+# special usage of corresponding register. E.g. opmask registers can be either
+# enclosed by curly braces or standalone operand depending on the usage.
+#
 
 # General-purpose registers
 al	REG_AL		reg8,reg8_rex	0
@@ -117,3 +122,11 @@ xmm1-15	XMMREG		xmmreg		1
 # AVX registers
 ymm0	YMM0		ymmreg		0
 ymm1-15	YMMREG		ymmreg		1
+
+# AVX3 registers
+zmm0	ZMM0		zmmreg		0
+zmm1-31	ZMMREG		zmmreg		1
+
+# Opmask registers
+k0		OPMASK0		opmaskreg	0
+k1-7	OPMASKREG	opmaskreg	1   TFLAG_BRC_OPT
diff --git a/regs.pl b/regs.pl
index 82c4829..52e5ca3 100755
--- a/regs.pl
+++ b/regs.pl
@@ -48,7 +48,7 @@ sub process_line($) {
     my($line) = @_;
     my @v;
 
-    if ( $line !~ /^\s*(\S+)\s*(\S+)\s*(\S+)\s*([0-9]+)$/i ) {
+    if ( $line !~ /^\s*(\S+)\s*(\S+)\s*(\S+)\s*([0-9]+)\s*(\S*)/i ) {
 	die "regs.dat:$nline: invalid input\n";
     }
     $reg      = $1;
diff --git a/stdscan.c b/stdscan.c
index b7d8000..b5e389d 100644
--- a/stdscan.c
+++ b/stdscan.c
@@ -53,6 +53,8 @@
 static char *stdscan_bufptr = NULL;
 static char **stdscan_tempstorage = NULL;
 static int stdscan_tempsize = 0, stdscan_templen = 0;
+static int brace = 0;               /* nested brace counter */
+static bool brace_opened = false;   /* if brace is just opened */
 #define STDSCAN_TEMP_DELTA 256
 
 void stdscan_set(char *str)
@@ -105,6 +107,40 @@ static char *stdscan_copy(char *p, int len)
     return text;
 }
 
+/*
+ * a token is enclosed with braces. proper token type will be assigned
+ * accordingly with the token flag.
+ * a closing brace is treated as an ending character of corresponding token.
+ */
+static int stdscan_handle_brace(struct tokenval *tv)
+{
+    if (!(tv->t_flag & TFLAG_BRC_ANY)) {
+        /* invalid token is put inside braces */
+        nasm_error(ERR_NONFATAL,
+                    "%s is not a valid decorator with braces", tv->t_charptr);
+        tv->t_type = TOKEN_INVALID;
+    } else if (tv->t_flag & TFLAG_BRC_OPT) {
+        if (is_reg_class(OPMASKREG, tv->t_integer)) {
+            /* within braces, opmask register is now used as a mask */
+            tv->t_type = TOKEN_OPMASK;
+        }
+    }
+
+    stdscan_bufptr = nasm_skip_spaces(stdscan_bufptr);
+
+    if (stdscan_bufptr[0] == '}') {
+        stdscan_bufptr ++;      /* skip the closing brace */
+        brace --;
+    } else if (stdscan_bufptr[0] != ',') {
+        /* treat {foo,bar} as {foo}{bar}
+         * by regarding ',' as a mere separator between decorators
+         */
+        nasm_error(ERR_NONFATAL, "closing brace expected");
+        tv->t_type = TOKEN_INVALID;
+    }
+    return tv->t_type;
+}
+
 int stdscan(void *private_data, struct tokenval *tv)
 {
     char ourcopy[MAX_KEYWORD + 1], *r, *s;
@@ -112,14 +148,22 @@ int stdscan(void *private_data, struct tokenval *tv)
     (void)private_data;         /* Don't warn that this parameter is unused */
 
     stdscan_bufptr = nasm_skip_spaces(stdscan_bufptr);
-    if (!*stdscan_bufptr)
+    if (!*stdscan_bufptr) {
+        /* nested brace shouldn't affect following lines */
+        brace = 0;
         return tv->t_type = TOKEN_EOS;
+    }
 
     /* we have a token; either an id, a number or a char */
     if (isidstart(*stdscan_bufptr) ||
-        (*stdscan_bufptr == '$' && isidstart(stdscan_bufptr[1]))) {
+        (*stdscan_bufptr == '$' && isidstart(stdscan_bufptr[1])) ||
+        (brace && isidchar(*stdscan_bufptr))) {     /* because of {1to8} */
         /* now we've got an identifier */
         bool is_sym = false;
+        int token_type;
+
+        /* opening brace is followed by any letter */
+        brace_opened = false;
 
         if (*stdscan_bufptr == '$') {
             is_sym = true;
@@ -128,7 +172,8 @@ int stdscan(void *private_data, struct tokenval *tv)
 
         r = stdscan_bufptr++;
         /* read the entire buffer to advance the buffer pointer but... */
-        while (isidchar(*stdscan_bufptr))
+        /* {rn-sae}, {rd-sae}, {ru-sae}, {rz-sae} contain '-' in tokens. */
+        while (isidchar(*stdscan_bufptr) || (brace && *stdscan_bufptr == '-'))
             stdscan_bufptr++;
 
         /* ... copy only up to IDLEN_MAX-1 characters */
@@ -143,7 +188,19 @@ int stdscan(void *private_data, struct tokenval *tv)
         *r = '\0';
         /* right, so we have an identifier sitting in temp storage. now,
          * is it actually a register or instruction name, or what? */
-        return nasm_token_hash(ourcopy, tv);
+        token_type = nasm_token_hash(ourcopy, tv);
+
+        if (likely(!brace)) {
+            if (likely(!(tv->t_flag & TFLAG_BRC))) {
+                /* most of the tokens fall into this case */
+                return token_type;
+            } else {
+                return tv->t_type = TOKEN_ID;
+            }
+        } else {
+            /* handle tokens inside braces */
+            return stdscan_handle_brace(tv);
+        }
     } else if (*stdscan_bufptr == '$' && !isnumchar(stdscan_bufptr[1])) {
         /*
          * It's a $ sign with no following hex number; this must
@@ -267,6 +324,35 @@ int stdscan(void *private_data, struct tokenval *tv)
     } else if (stdscan_bufptr[0] == '|' && stdscan_bufptr[1] == '|') {
         stdscan_bufptr += 2;
         return tv->t_type = TOKEN_DBL_OR;
+    } else if (stdscan_bufptr[0] == '{') {
+        stdscan_bufptr ++;      /* skip the opening brace */
+        brace ++;               /* in case of nested braces */
+        brace_opened = true;    /* brace is just opened */
+        return stdscan(private_data, tv);
+    } else if (stdscan_bufptr[0] == ',' && brace) {
+        /*
+         * a comma inside braces should be treated just as a separator.
+         * this is almost same as an opening brace except increasing counter.
+         */
+        stdscan_bufptr ++;
+        brace_opened = true;    /* brace is just opened */
+        return stdscan(private_data, tv);
+    } else if (stdscan_bufptr[0] == '}') {
+        stdscan_bufptr ++;      /* skip the closing brace */
+        if (brace) {
+            /* unhandled nested closing brace */
+            brace --;
+            /* if brace is closed without any content in it */
+            if (brace_opened) {
+                brace_opened = false;
+                nasm_error(ERR_NONFATAL, "nothing inside braces");
+            }
+            return stdscan(private_data, tv);
+        } else {
+            /* redundant closing brace */
+            return tv->t_type = TOKEN_INVALID;
+        }
+        return stdscan(private_data, tv);
     } else                      /* just an ordinary char */
         return tv->t_type = (uint8_t)(*stdscan_bufptr++);
 }
diff --git a/tables.h b/tables.h
index e6f84cb..d0db3b3 100644
--- a/tables.h
+++ b/tables.h
@@ -43,7 +43,6 @@
 #include "compiler.h"
 #include <inttypes.h>
 #include "insnsi.h"		/* For enum opcode */
-#include "opflags.h"		/* For opflags_t */
 
 /* --- From standard.mac via macros.pl: --- */
 
@@ -62,6 +61,8 @@ extern const char * const nasm_insn_names[];
 /* regs.c */
 extern const char * const nasm_reg_names[];
 /* regflags.c */
+typedef uint64_t opflags_t;
+typedef uint8_t  decoflags_t;
 extern const opflags_t nasm_reg_flags[];
 /* regvals.c */
 extern const int nasm_regvals[];
diff --git a/tokens.dat b/tokens.dat
index c2df469..1a00e3d 100644
--- a/tokens.dat
+++ b/tokens.dat
@@ -35,7 +35,7 @@
 # Tokens other than instructions and registers
 #
 
-% TOKEN_PREFIX, 0, P_*
+% TOKEN_PREFIX, 0, 0, P_*
 a16
 a32
 a64
@@ -55,7 +55,7 @@ wait
 xacquire
 xrelease
 
-% TOKEN_SPECIAL, 0, S_*
+% TOKEN_SPECIAL, 0, 0, S_*
 abs
 byte
 dword
@@ -73,13 +73,13 @@ tword
 word
 yword
 
-% TOKEN_FLOAT, 0, 0
+% TOKEN_FLOAT, 0, 0, 0
 __infinity__
 __nan__
 __qnan__
 __snan__
 
-% TOKEN_FLOATIZE, 0, FLOAT_{__float*__}
+% TOKEN_FLOATIZE, 0, 0, FLOAT_{__float*__}
 __float8__
 __float16__
 __float32__
@@ -89,7 +89,7 @@ __float80e__
 __float128l__
 __float128h__
 
-% TOKEN_STRFUNC, 0, STRFUNC_{__*__}
+% TOKEN_STRFUNC, 0, 0, STRFUNC_{__*__}
 __utf16__
 __utf16le__
 __utf16be__
@@ -97,12 +97,26 @@ __utf32__
 __utf32le__
 __utf32be__
 
-% TOKEN_IFUNC, 0, IFUNC_{__*__}
+% TOKEN_IFUNC, 0, 0, IFUNC_{__*__}
 __ilog2e__
 __ilog2w__
 __ilog2f__
 __ilog2c__
 
-% TOKEN_*, 0, 0
+% TOKEN_*, 0, 0, 0
 seg
 wrt
+
+% TOKEN_DECORATOR, 0, TFLAG_BRC | TFLAG_BRDCAST , BRC_1TO{1to*}
+1to8
+1to16
+
+% TOKEN_DECORATOR, 0, TFLAG_BRC, BRC_{*-sae}
+rn-sae
+rd-sae
+ru-sae
+rz-sae
+
+% TOKEN_DECORATOR, 0, TFLAG_BRC, BRC_*
+sae
+z
diff --git a/tokhash.pl b/tokhash.pl
index 6c05802..4ea387d 100755
--- a/tokhash.pl
+++ b/tokhash.pl
@@ -65,14 +65,14 @@ while (defined($line = <ID>)) {
 	    # Single instruction token
 	    if (!defined($tokens{$token})) {
 		$tokens{$token} = scalar @tokendata;
-		push(@tokendata, "\"${token}\", TOKEN_INSN, C_none, I_${insn}");
+		push(@tokendata, "\"${token}\", TOKEN_INSN, C_none, 0, I_${insn}");
 	    }
 	} else {
 	    # Conditional instruction
 	    foreach $cc (@conditions) {
 		if (!defined($tokens{$token.$cc})) {
 		    $tokens{$token.$cc} = scalar @tokendata;
-		    push(@tokendata, "\"${token}${cc}\", TOKEN_INSN, C_\U$cc\E, I_${insn}");
+		    push(@tokendata, "\"${token}${cc}\", TOKEN_INSN, C_\U$cc\E, 0, I_${insn}");
 		}
 	    }
 	}
@@ -85,8 +85,9 @@ close(ID);
 #
 open(RD, "< ${regs_dat}") or die "$0: cannot open $regs_dat: $!\n";
 while (defined($line = <RD>)) {
-    if ($line =~ /^([a-z0-9_-]+)\s/) {
+    if ($line =~ /^([a-z0-9_-]+)\s*\S+\s*\S+\s*[0-9]+\s*(\S*)/) {
 	$reg = $1;
+	$reg_flag = $2;
 
 	if ($reg =~ /^(.*[^0-9])([0-9]+)\-([0-9]+)(|[^0-9].*)$/) {
 	    $nregs = $3-$2+1;
@@ -104,7 +105,11 @@ while (defined($line = <RD>)) {
 		die "Duplicate definition: $reg\n";
 	    }
 	    $tokens{$reg} = scalar @tokendata;
-	    push(@tokendata, "\"${reg}\", TOKEN_REG, 0, R_\U${reg}\E");
+	    if ($reg_flag eq '') {
+	        push(@tokendata, "\"${reg}\", TOKEN_REG, 0, 0, R_\U${reg}\E");
+	    } else {
+	        push(@tokendata, "\"${reg}\", TOKEN_REG, 0, ${reg_flag}, R_\U${reg}\E");
+	    }
 
 	    if (defined($reg_prefix)) {
 		$reg_nr++;
@@ -214,7 +219,8 @@ if ($output eq 'h') {
     print "struct tokendata {\n";
     print "    const char *string;\n";
     print "    int16_t tokentype;\n";
-    print "    int16_t aux;\n";
+    print "    int8_t aux;\n";
+    print "    int8_t tokflag;\n";
     print "    int32_t num;\n";
     print "};\n";
     print "\n";
@@ -270,6 +276,7 @@ if ($output eq 'h') {
     print  "\n";
     print  "    tv->t_integer = data->num;\n";
     print  "    tv->t_inttwo  = data->aux;\n";
+    print  "    tv->t_flag    = data->tokflag;\n";
     print  "    return tv->t_type = data->tokentype;\n";
     print  "}\n";
 }

[nasm:avx512] AVX-512: Handle curly braces in multi-line macro parameters

From: nasm-bot f. J. K. S. <jin...@in...> - 2013-09-21 12:30:19

Commit-ID:  a800aed7b75d56114f2e1e4928cbc48ecf96a4a0
Gitweb:     http://repo.or.cz/w/nasm.git?a=commitdiff;h=a800aed7b75d56114f2e1e4928cbc48ecf96a4a0
Author:     Jin Kyu Song <jin...@in...>
AuthorDate: Wed, 21 Aug 2013 19:29:08 -0700
Committer:  Cyrill Gorcunov <gor...@gm...>
CommitDate: Thu, 22 Aug 2013 19:37:25 +0400

AVX-512: Handle curly braces in multi-line macro parameters

Multi-line macro uses curly braces for enclosing a parameter
containing comma(s). Passing curly braces as a part of a parameter
which is already enclosed with braces confuses the macro expander.

Escape character '\' is prefixed in this case.
e.g.) mmacro {1,2,3}, {4,\{5,6\}}
      mmacro gets 2 parameters of '1,2,3' and '4,{5,6}'

Signed-off-by: Jin Kyu Song <jin...@in...>
Signed-off-by: Cyrill Gorcunov <gor...@gm...>


---
 preproc.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/preproc.c b/preproc.c
index e2b12e4..b878e4b 100644
--- a/preproc.c
+++ b/preproc.c
@@ -208,6 +208,7 @@ enum pp_token_type {
     TOK_PREPROC_Q, TOK_PREPROC_QQ,
     TOK_PASTE,              /* %+ */
     TOK_INDIRECT,           /* %[...] */
+    TOK_BRACE,              /* \{...\} */
     TOK_SMAC_PARAM,         /* MUST BE LAST IN THE LIST!!! */
     TOK_MAX = INT_MAX       /* Keep compiler from reducing the range */
 };
@@ -1103,6 +1104,10 @@ static Token *tokenize(char *line)
             type = TOK_COMMENT;
             while (*p)
                 p++;
+        } else if (p[0] == '\\' && (p[1] == '{' || p[1] == '}')) {
+            type = TOK_BRACE;
+            p += 2;
+            line++;
         } else {
             /*
              * Anything else is an operator of some kind. We check

[nasm:avx512] AVX-512: Fix instruction match function

From: nasm-bot f. J. K. S. <jin...@in...> - 2013-09-21 12:30:19

Commit-ID:  4d1fc3f1a0865b82bbf5212cd601c0a4a1495fd6
Gitweb:     http://repo.or.cz/w/nasm.git?a=commitdiff;h=4d1fc3f1a0865b82bbf5212cd601c0a4a1495fd6
Author:     Jin Kyu Song <jin...@in...>
AuthorDate: Wed, 21 Aug 2013 19:29:10 -0700
Committer:  Cyrill Gorcunov <gor...@gm...>
CommitDate: Thu, 22 Aug 2013 19:37:37 +0400

AVX-512: Fix instruction match function

When an instruction allows broadcasting, the memory element size is
different from the size of normal memory operation.
This information is provided in a decoflags field, so it should try to
match those properties before it fails.

Signed-off-by: Jin Kyu Song <jin...@in...>
Signed-off-by: Cyrill Gorcunov <gor...@gm...>


---
 assemble.c | 35 +++++++++++++++++++++++++++++++----
 nasm.h     | 18 ++++++++++++++++--
 tables.h   |  2 +-
 3 files changed, 48 insertions(+), 7 deletions(-)

diff --git a/assemble.c b/assemble.c
index 6054d4a..83971f6 100644
--- a/assemble.c
+++ b/assemble.c
@@ -1915,10 +1915,22 @@ static enum match_result find_match(const struct itemplate **tempp,
     enum match_result m, merr;
     opflags_t xsizeflags[MAX_OPERANDS];
     bool opsizemissing = false;
+    int8_t broadcast = -1;
     int i;
 
+    /* find the position of broadcasting operand */
     for (i = 0; i < instruction->operands; i++)
-        xsizeflags[i] = instruction->oprs[i].type & SIZE_MASK;
+        if (instruction->oprs[i].decoflags & BRDCAST_MASK) {
+            broadcast = i;
+            break;
+        }
+
+    /* broadcasting uses a different data element size */
+    for (i = 0; i < instruction->operands; i++)
+        if (i == broadcast)
+            xsizeflags[i] = instruction->oprs[i].decoflags & BRSIZE_MASK;
+        else
+            xsizeflags[i] = instruction->oprs[i].type & SIZE_MASK;
 
     merr = MERR_INVALOP;
 
@@ -1936,7 +1948,10 @@ static enum match_result find_match(const struct itemplate **tempp,
              * Missing operand size and a candidate for fuzzy matching...
              */
             for (i = 0; i < temp->operands; i++)
-                xsizeflags[i] |= temp->opd[i] & SIZE_MASK;
+                if (i == broadcast)
+                    xsizeflags[i] |= temp->deco[i] & BRSIZE_MASK;
+                else
+                    xsizeflags[i] |= temp->opd[i] & SIZE_MASK;
             opsizemissing = true;
         }
         if (m > merr)
@@ -1962,7 +1977,10 @@ static enum match_result find_match(const struct itemplate **tempp,
         if ((xsizeflags[i] & (xsizeflags[i]-1)))
             goto done;                /* No luck */
 
-        instruction->oprs[i].type |= xsizeflags[i]; /* Set the size */
+        if (i == broadcast)
+            instruction->oprs[i].decoflags |= xsizeflags[i];
+        else
+            instruction->oprs[i].type |= xsizeflags[i]; /* Set the size */
     }
 
     /* Try matching again... */
@@ -2107,7 +2125,16 @@ static enum match_result matches(const struct itemplate *itemp,
         } else if ((itemp->opd[i] & SIZE_MASK) &&
                    (itemp->opd[i] & SIZE_MASK) != (type & SIZE_MASK)) {
             if (type & SIZE_MASK) {
-                return MERR_INVALOP;
+                /*
+                 * when broadcasting, the element size depends on
+                 * the instruction type. decorator flag should match.
+                 */
+#define MATCH_BRSZ(bits) (((type & SIZE_MASK) == BITS##bits) &&             \
+                          ((itemp->deco[i] & BRSIZE_MASK) == BR_BITS##bits))
+                if (!((deco & BRDCAST_MASK) &&
+                      (MATCH_BRSZ(32) || MATCH_BRSZ(64)))) {
+                    return MERR_INVALOP;
+                }
             } else if (!is_class(REGISTER, type)) {
                 /*
                  * Note: we don't honor extrinsic operand sizes for registers,
diff --git a/nasm.h b/nasm.h
index 628ec43..e46b5ca 100644
--- a/nasm.h
+++ b/nasm.h
@@ -1038,6 +1038,7 @@ enum decorator_tokens {
  * ..........................1..... broadcast
  * .........................1...... static rounding
  * ........................1....... SAE
+ * ......................11........ broadcast element size
  */
 #define OP_GENVAL(val, bits, shift)     (((val) & ((UINT64_C(1) << (bits)) - 1)) << (shift))
 
@@ -1096,10 +1097,23 @@ enum decorator_tokens {
 #define SAE_MASK                OP_GENMASK(SAE_BITS, SAE_SHIFT)
 #define GEN_SAE(bit)            OP_GENBIT(bit, SAE_SHIFT)
 
+/*
+ * Broadcasting element size.
+ *
+ * Bits: 8 - 9
+ */
+#define BRSIZE_SHIFT            (8)
+#define BRSIZE_BITS             (2)
+#define BRSIZE_MASK             OP_GENMASK(BRSIZE_BITS, BRSIZE_SHIFT)
+#define GEN_BRSIZE(bit)         OP_GENBIT(bit, BRSIZE_SHIFT)
+
+#define BR_BITS32               GEN_BRSIZE(0)
+#define BR_BITS64               GEN_BRSIZE(1)
+
 #define MASK                    OPMASK_MASK             /* Opmask (k1 ~ 7) can be used */
 #define Z                       Z_MASK
-#define B32                     BRDCAST_MASK            /* {1to16} : load+op instruction can broadcast when it is reg-reg operation */
-#define B64                     BRDCAST_MASK            /* {1to8}  : There are two definitions just for conforming to SDM */
+#define B32                     (BRDCAST_MASK|BR_BITS32) /* {1to16} : broadcast 32b * 16 to zmm(512b) */ 
+#define B64                     (BRDCAST_MASK|BR_BITS64) /* {1to8}  : broadcast 64b *  8 to zmm(512b) */
 #define ER                      STATICRND_MASK          /* ER(Embedded Rounding) == Static rounding mode */
 #define SAE                     SAE_MASK                /* SAE(Suppress All Exception) */
 
diff --git a/tables.h b/tables.h
index d0db3b3..4b14566 100644
--- a/tables.h
+++ b/tables.h
@@ -62,7 +62,7 @@ extern const char * const nasm_insn_names[];
 extern const char * const nasm_reg_names[];
 /* regflags.c */
 typedef uint64_t opflags_t;
-typedef uint8_t  decoflags_t;
+typedef uint16_t  decoflags_t;
 extern const opflags_t nasm_reg_flags[];
 /* regvals.c */
 extern const int nasm_regvals[];

[nasm:master] strfunc: Tabs to spaces conversion

From: nasm-bot f. C. G. <gor...@gm...> - 2013-09-21 12:27:28

Commit-ID:  e81b2ee85ca0f06705181dca615f13b50fcb3c35
Gitweb:     http://repo.or.cz/w/nasm.git?a=commitdiff;h=e81b2ee85ca0f06705181dca615f13b50fcb3c35
Author:     Cyrill Gorcunov <gor...@gm...>
AuthorDate: Sat, 21 Sep 2013 13:14:15 +0400
Committer:  Cyrill Gorcunov <gor...@gm...>
CommitDate: Sat, 21 Sep 2013 13:14:15 +0400

strfunc: Tabs to spaces conversion

Signed-off-by: Cyrill Gorcunov <gor...@gm...>


---
 strfunc.c | 392 +++++++++++++++++++++++++++++++-------------------------------
 1 file changed, 196 insertions(+), 196 deletions(-)

diff --git a/strfunc.c b/strfunc.c
index 4b5af40..236b9d2 100644
--- a/strfunc.c
+++ b/strfunc.c
@@ -53,56 +53,56 @@ static size_t utf8_to_16le(uint8_t *str, size_t len, char *op)
     uint32_t v = 0, vmin = 0;
 
     while (len--) {
-	c = *str++;
-
-	if (expect) {
-	    if ((c & 0xc0) != 0x80) {
-		expect = 0;
-		return -1;
-	    } else {
-		v = (v << 6) | (c & 0x3f);
-		if (!--expect) {
-		    if (v < vmin || v > 0x10ffff ||
-			(v >= 0xd800 && v <= 0xdfff)) {
-			return -1;
-		    } else if (v > 0xffff) {
-			v -= 0x10000;
-			EMIT(0xd800 | (v >> 10));
-			EMIT(0xdc00 | (v & 0x3ff));
-		    } else {
-			EMIT(v);
-		    }
-		}
-		continue;
-	    }
-	}
-
-	if (c < 0x80) {
-	    EMIT(c);
-	} else if (c < 0xc0 || c >= 0xfe) {
-	    /* Invalid UTF-8 */
-	    return -1;
-	} else if (c < 0xe0) {
-	    v = c & 0x1f;
-	    expect = 1;
-	    vmin = 0x80;
-	} else if (c < 0xf0) {
-	    v = c & 0x0f;
-	    expect = 2;
-	    vmin = 0x800;
-	} else if (c < 0xf8) {
-	    v = c & 0x07;
-	    expect = 3;
-	    vmin = 0x10000;
-	} else if (c < 0xfc) {
-	    v = c & 0x03;
-	    expect = 4;
-	    vmin = 0x200000;
-	} else {
-	    v = c & 0x01;
-	    expect = 5;
-	    vmin = 0x4000000;
-	}
+        c = *str++;
+
+        if (expect) {
+            if ((c & 0xc0) != 0x80) {
+                expect = 0;
+                return -1;
+            } else {
+                v = (v << 6) | (c & 0x3f);
+                if (!--expect) {
+                    if (v < vmin || v > 0x10ffff ||
+                        (v >= 0xd800 && v <= 0xdfff)) {
+                        return -1;
+                    } else if (v > 0xffff) {
+                        v -= 0x10000;
+                        EMIT(0xd800 | (v >> 10));
+                        EMIT(0xdc00 | (v & 0x3ff));
+                    } else {
+                        EMIT(v);
+                    }
+                }
+                continue;
+            }
+        }
+
+        if (c < 0x80) {
+            EMIT(c);
+        } else if (c < 0xc0 || c >= 0xfe) {
+            /* Invalid UTF-8 */
+            return -1;
+        } else if (c < 0xe0) {
+            v = c & 0x1f;
+            expect = 1;
+            vmin = 0x80;
+        } else if (c < 0xf0) {
+            v = c & 0x0f;
+            expect = 2;
+            vmin = 0x800;
+        } else if (c < 0xf8) {
+            v = c & 0x07;
+            expect = 3;
+            vmin = 0x10000;
+        } else if (c < 0xfc) {
+            v = c & 0x03;
+            expect = 4;
+            vmin = 0x200000;
+        } else {
+            v = c & 0x01;
+            expect = 5;
+            vmin = 0x4000000;
+        }
     }
 
     return expect ? (size_t)-1 : outlen << 1;
@@ -131,56 +131,56 @@ static size_t utf8_to_16be(uint8_t *str, size_t len, char *op)
     uint32_t v = 0, vmin = 0;
 
     while (len--) {
-	c = *str++;
-
-	if (expect) {
-	    if ((c & 0xc0) != 0x80) {
-		expect = 0;
-		return -1;
-	    } else {
-		v = (v << 6) | (c & 0x3f);
-		if (!--expect) {
-		    if (v < vmin || v > 0x10ffff ||
-			(v >= 0xd800 && v <= 0xdfff)) {
-			return -1;
-		    } else if (v > 0xffff) {
-			v -= 0x10000;
-			EMIT(0xdc00 | (v & 0x3ff));
-			EMIT(0xd800 | (v >> 10));
-		    } else {
-			EMIT(v);
-		    }
-		}
-		continue;
-	    }
-	}
-
-	if (c < 0x80) {
-	    EMIT(c);
-	} else if (c < 0xc0 || c >= 0xfe) {
-	    /* Invalid UTF-8 */
-	    return -1;
-	} else if (c < 0xe0) {
-	    v = c & 0x1f;
-	    expect = 1;
-	    vmin = 0x80;
-	} else if (c < 0xf0) {
-	    v = c & 0x0f;
-	    expect = 2;
-	    vmin = 0x800;
-	} else if (c < 0xf8) {
-	    v = c & 0x07;
-	    expect = 3;
-	    vmin = 0x10000;
-	} else if (c < 0xfc) {
-	    v = c & 0x03;
-	    expect = 4;
-	    vmin = 0x200000;
-	} else {
-	    v = c & 0x01;
-	    expect = 5;
-	    vmin = 0x4000000;
-	}
+        c = *str++;
+
+        if (expect) {
+            if ((c & 0xc0) != 0x80) {
+                expect = 0;
+                return -1;
+            } else {
+                v = (v << 6) | (c & 0x3f);
+                if (!--expect) {
+                    if (v < vmin || v > 0x10ffff ||
+                        (v >= 0xd800 && v <= 0xdfff)) {
+                        return -1;
+                    } else if (v > 0xffff) {
+                        v -= 0x10000;
+                        EMIT(0xdc00 | (v & 0x3ff));
+                        EMIT(0xd800 | (v >> 10));
+                    } else {
+                        EMIT(v);
+                    }
+                }
+                continue;
+            }
+        }
+
+        if (c < 0x80) {
+            EMIT(c);
+        } else if (c < 0xc0 || c >= 0xfe) {
+            /* Invalid UTF-8 */
+            return -1;
+        } else if (c < 0xe0) {
+            v = c & 0x1f;
+            expect = 1;
+            vmin = 0x80;
+        } else if (c < 0xf0) {
+            v = c & 0x0f;
+            expect = 2;
+            vmin = 0x800;
+        } else if (c < 0xf8) {
+            v = c & 0x07;
+            expect = 3;
+            vmin = 0x10000;
+        } else if (c < 0xfc) {
+            v = c & 0x03;
+            expect = 4;
+            vmin = 0x200000;
+        } else {
+            v = c & 0x01;
+            expect = 5;
+            vmin = 0x4000000;
+        }
     }
 
     return expect ? (size_t)-1 : outlen << 1;
@@ -201,50 +201,50 @@ static size_t utf8_to_32le(uint8_t *str, size_t len, char *op)
     uint32_t v = 0, vmin = 0;
 
     while (len--) {
-	c = *str++;
-
-	if (expect) {
-	    if ((c & 0xc0) != 0x80) {
-		return -1;
-	    } else {
-		v = (v << 6) | (c & 0x3f);
-		if (!--expect) {
-		    if (v < vmin || (v >= 0xd800 && v <= 0xdfff)) {
-			return -1;
-		    } else {
-			EMIT(v);
-		    }
-		}
-		continue;
-	    }
-	}
-
-	if (c < 0x80) {
-	    EMIT(c);
-	} else if (c < 0xc0 || c >= 0xfe) {
-	    /* Invalid UTF-8 */
-	    return -1;
-	} else if (c < 0xe0) {
-	    v = c & 0x1f;
-	    expect = 1;
-	    vmin = 0x80;
-	} else if (c < 0xf0) {
-	    v = c & 0x0f;
-	    expect = 2;
-	    vmin = 0x800;
-	} else if (c < 0xf8) {
-	    v = c & 0x07;
-	    expect = 3;
-	    vmin = 0x10000;
-	} else if (c < 0xfc) {
-	    v = c & 0x03;
-	    expect = 4;
-	    vmin = 0x200000;
-	} else {
-	    v = c & 0x01;
-	    expect = 5;
-	    vmin = 0x4000000;
-	}
+        c = *str++;
+
+        if (expect) {
+            if ((c & 0xc0) != 0x80) {
+                return -1;
+            } else {
+                v = (v << 6) | (c & 0x3f);
+                if (!--expect) {
+                    if (v < vmin || (v >= 0xd800 && v <= 0xdfff)) {
+                        return -1;
+                    } else {
+                        EMIT(v);
+                    }
+                }
+                continue;
+            }
+        }
+
+        if (c < 0x80) {
+            EMIT(c);
+        } else if (c < 0xc0 || c >= 0xfe) {
+            /* Invalid UTF-8 */
+            return -1;
+        } else if (c < 0xe0) {
+            v = c & 0x1f;
+            expect = 1;
+            vmin = 0x80;
+        } else if (c < 0xf0) {
+            v = c & 0x0f;
+            expect = 2;
+            vmin = 0x800;
+        } else if (c < 0xf8) {
+            v = c & 0x07;
+            expect = 3;
+            vmin = 0x10000;
+        } else if (c < 0xfc) {
+            v = c & 0x03;
+            expect = 4;
+            vmin = 0x200000;
+        } else {
+            v = c & 0x01;
+            expect = 5;
+            vmin = 0x4000000;
+        }
     }
 
     return expect ? (size_t)-1 : outlen << 2;
@@ -275,50 +275,50 @@ static size_t utf8_to_32be(uint8_t *str, size_t len, char *op)
     uint32_t v = 0, vmin = 0;
 
     while (len--) {
-	c = *str++;
-
-	if (expect) {
-	    if ((c & 0xc0) != 0x80) {
-		return -1;
-	    } else {
-		v = (v << 6) | (c & 0x3f);
-		if (!--expect) {
-		    if (v < vmin || (v >= 0xd800 && v <= 0xdfff)) {
-			return -1;
-		    } else {
-			EMIT(v);
-		    }
-		}
-		continue;
-	    }
-	}
-
-	if (c < 0x80) {
-	    EMIT(c);
-	} else if (c < 0xc0 || c >= 0xfe) {
-	    /* Invalid UTF-8 */
-	    return -1;
-	} else if (c < 0xe0) {
-	    v = c & 0x1f;
-	    expect = 1;
-	    vmin = 0x80;
-	} else if (c < 0xf0) {
-	    v = c & 0x0f;
-	    expect = 2;
-	    vmin = 0x800;
-	} else if (c < 0xf8) {
-	    v = c & 0x07;
-	    expect = 3;
-	    vmin = 0x10000;
-	} else if (c < 0xfc) {
-	    v = c & 0x03;
-	    expect = 4;
-	    vmin = 0x200000;
-	} else {
-	    v = c & 0x01;
-	    expect = 5;
-	    vmin = 0x4000000;
-	}
+        c = *str++;
+
+        if (expect) {
+            if ((c & 0xc0) != 0x80) {
+                return -1;
+            } else {
+                v = (v << 6) | (c & 0x3f);
+                if (!--expect) {
+                    if (v < vmin || (v >= 0xd800 && v <= 0xdfff)) {
+                        return -1;
+                    } else {
+                        EMIT(v);
+                    }
+                }
+                continue;
+            }
+        }
+
+        if (c < 0x80) {
+            EMIT(c);
+        } else if (c < 0xc0 || c >= 0xfe) {
+            /* Invalid UTF-8 */
+            return -1;
+        } else if (c < 0xe0) {
+            v = c & 0x1f;
+            expect = 1;
+            vmin = 0x80;
+        } else if (c < 0xf0) {
+            v = c & 0x0f;
+            expect = 2;
+            vmin = 0x800;
+        } else if (c < 0xf8) {
+            v = c & 0x07;
+            expect = 3;
+            vmin = 0x10000;
+        } else if (c < 0xfc) {
+            v = c & 0x03;
+            expect = 4;
+            vmin = 0x200000;
+        } else {
+            v = c & 0x01;
+            expect = 5;
+            vmin = 0x4000000;
+        }
     }
 
     return expect ? (size_t)-1 : outlen << 2;
@@ -337,12 +337,12 @@ size_t string_transform(char *str, size_t len, char **out, enum strfunc func)
 {
     /* This should match enum strfunc in nasm.h */
     static const transform_func str_transforms[] = {
-	utf8_to_16le,
-	utf8_to_16le,
-	utf8_to_16be,
-	utf8_to_32le,
-	utf8_to_32le,
-	utf8_to_32be,
+        utf8_to_16le,
+        utf8_to_16le,
+        utf8_to_16be,
+        utf8_to_32le,
+        utf8_to_32le,
+        utf8_to_32be,
     };
     transform_func transform = str_transforms[func];
     size_t outlen;
@@ -351,9 +351,9 @@ size_t string_transform(char *str, size_t len, char **out, enum strfunc func)
 
     outlen = transform(s, len, NULL);
     if (outlen == (size_t)-1)
-	return -1;
+        return -1;
 
     *out = buf = nasm_malloc(outlen+1);
-    buf[outlen] = '\0';		/* Forcibly null-terminate the buffer */
+    buf[outlen] = '\0'; /* Forcibly null-terminate the buffer */
     return transform(s, len, buf);
 }

[nasm:master] coff: Support for section names longer than 8 bytes

From: nasm-bot f. M. D. <ma...@gm...> - 2013-09-21 12:27:28

Commit-ID:  91c43d789a6f8a873393dae8c7ff4f9e7a7bcf91
Gitweb:     http://repo.or.cz/w/nasm.git?a=commitdiff;h=91c43d789a6f8a873393dae8c7ff4f9e7a7bcf91
Author:     Marat Dukhan <ma...@gm...>
AuthorDate: Fri, 20 Sep 2013 18:54:49 -0400
Committer:  Cyrill Gorcunov <gor...@gm...>
CommitDate: Sat, 21 Sep 2013 13:09:03 +0400

coff: Support for section names longer than 8 bytes

http://bugzilla.nasm.us/show_bug.cgi?id=3392233

Signed-off-by: Marat Dukhan <ma...@gm...>
Signed-off-by: Cyrill Gorcunov <gor...@gm...>


---
 output/outcoff.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 52 insertions(+), 11 deletions(-)

diff --git a/output/outcoff.c b/output/outcoff.c
index 258363f..b404347 100644
--- a/output/outcoff.c
+++ b/output/outcoff.c
@@ -137,7 +137,8 @@ struct Section {
     int32_t index;
     struct Reloc *head, **tail;
     uint32_t flags;             /* section flags */
-    char name[9];
+    char *name;
+    int32_t namepos;            /* Offset of name into the strings table */
     int32_t pos, relpos;
 };
 
@@ -215,7 +216,7 @@ static uint32_t strslen;
 static void coff_gen_init(void);
 static void coff_sect_write(struct Section *, const uint8_t *, uint32_t);
 static void coff_write(void);
-static void coff_section_header(char *, int32_t, int32_t, int32_t, int32_t, int, int32_t);
+static void coff_section_header(char *, int32_t, int32_t, int32_t, int32_t, int32_t, int, int32_t);
 static void coff_write_relocs(struct Section *);
 static void coff_write_symbols(void);
 
@@ -272,6 +273,7 @@ static void coff_cleanup(int debuginfo)
             sects[i]->head = sects[i]->head->next;
             nasm_free(r);
         }
+        nasm_free(sects[i]->name);
         nasm_free(sects[i]);
     }
     nasm_free(sects);
@@ -284,6 +286,7 @@ static void coff_cleanup(int debuginfo)
 static int coff_make_section(char *name, uint32_t flags)
 {
     struct Section *s;
+    size_t namelen;
 
     s = nasm_zalloc(sizeof(*s));
 
@@ -294,8 +297,20 @@ static int coff_make_section(char *name, uint32_t flags)
         s->index = def_seg;
     else
         s->index = seg_alloc();
-    strncpy(s->name, name, 8);
-    s->name[8] = '\0';
+    s->namepos = -1;
+    namelen = strlen(name);
+    if (namelen > 8) {
+        if (win32 || win64) {
+            s->namepos = strslen + 4;
+            saa_wbytes(strs, name, namelen + 1);
+            strslen += namelen + 1;
+        } else {
+            namelen = 8;
+        }
+    }
+    s->name = nasm_malloc(namelen + 1);
+    strncpy(s->name, name, namelen);
+    s->name[namelen] = '\0';
     s->flags = flags;
 
     if (nsects >= sectlen) {
@@ -337,9 +352,11 @@ static int32_t coff_section_names(char *name, int pass, int *bits)
     if (*p)
         *p++ = '\0';
     if (strlen(name) > 8) {
-        nasm_error(ERR_WARNING, "COFF section names limited to 8 characters:"
-              " truncating");
-        name[8] = '\0';
+        if (!win32 && !win64) {
+            nasm_error(ERR_WARNING,
+                       "COFF section names limited to 8 characters:  truncating");
+            name[8] = '\0';
+        }
     }
     flags = 0;
 
@@ -914,7 +931,7 @@ static void coff_write(void)
      */
     vsize = 0L;
     for (i = 0; i < nsects; i++) {
-        coff_section_header(sects[i]->name, vsize, sects[i]->len,
+        coff_section_header(sects[i]->name, sects[i]->namepos, vsize, sects[i]->len,
                             sects[i]->pos, sects[i]->relpos,
                             sects[i]->nrelocs, sects[i]->flags);
         vsize += sects[i]->len;
@@ -937,7 +954,7 @@ static void coff_write(void)
     saa_fpwrite(strs, ofile);
 }
 
-static void coff_section_header(char *name, int32_t vsize,
+static void coff_section_header(char *name, int32_t namepos, int32_t vsize,
                                 int32_t datalen, int32_t datapos,
                                 int32_t relpos, int nrelocs, int32_t flags)
 {
@@ -945,8 +962,32 @@ static void coff_section_header(char *name, int32_t vsize,
 
     (void)vsize;
 
-    strncpy(padname, name, 8);
-    fwrite(padname, 8, 1, ofile);
+    if (namepos == -1) {
+        strncpy(padname, name, 8);
+        fwrite(padname, 8, 1, ofile);
+    } else {
+        /*
+         * If name is longer than 8 bytes, write '/' followed
+         * by offset into the strings table represented as
+         * decimal number.
+         */
+        namepos = namepos % 100000000;
+        padname[0] = '/';
+        padname[1] = '0' + (namepos / 1000000);
+        namepos = namepos % 1000000;
+        padname[2] = '0' + (namepos / 100000);
+        namepos = namepos % 100000;
+        padname[3] = '0' + (namepos / 10000);
+        namepos = namepos % 10000;
+        padname[4] = '0' + (namepos / 1000);
+        namepos = namepos % 1000;
+        padname[5] = '0' + (namepos / 100);
+        namepos = namepos % 100;
+        padname[6] = '0' + (namepos / 10);
+        namepos = namepos % 10;
+        padname[7] = '0' + (namepos);
+        fwrite(padname, 8, 1, ofile);
+    }
 
     fwriteint32_t(0,            ofile); /* Virtual size field - set to 0 or vsize */
     fwriteint32_t(0L,           ofile); /* RVA/offset - we ignore */

[nasm:master] build: Include pregenerated nasm manpages

From: nasm-bot f. C. G. <gor...@gm...> - 2013-07-23 05:39:24

Commit-ID:  99427bdb6c85c812665f6d7b36ac520a631c5b23
Gitweb:     http://repo.or.cz/w/nasm.git?a=commitdiff;h=99427bdb6c85c812665f6d7b36ac520a631c5b23
Author:     Cyrill Gorcunov <gor...@gm...>
AuthorDate: Mon, 22 Jul 2013 10:09:35 +0400
Committer:  Cyrill Gorcunov <gor...@gm...>
CommitDate: Mon, 22 Jul 2013 10:09:35 +0400

build: Include pregenerated nasm manpages

It's been reported that force people to install
asciidoc for man pages generation is not acceptible.
Just ship them pregenerated.

http://bugzilla.nasm.us/show_bug.cgi?id=3392262

Signed-off-by: Cyrill Gorcunov <gor...@gm...>


---
 Makefile.in  |   5 -
 configure.in |  10 --
 nasm.1       | 423 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 ndisasm.1    | 120 +++++++++++++++++
 4 files changed, 543 insertions(+), 15 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index b3eb15f..e1846ce 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -63,11 +63,6 @@ endif
 .c.i:
 	$(CC) -E $(ALL_CFLAGS) -o $@ $<
 
-.txt.xml:
-	$(ASCIIDOC) -b docbook -d manpage -o $@ $<
-
-.xml.1:
-	$(XMLTO) man --skip-validation $< 2>/dev/null
 
 
 #-- Begin File Lists --#
diff --git a/configure.in b/configure.in
index 8571973..bd5df18 100644
--- a/configure.in
+++ b/configure.in
@@ -68,20 +68,10 @@ PA_ADD_CFLAGS([-pedantic])
 
 dnl Look for programs...
 AC_CHECK_PROGS(NROFF,    nroff,    false)
-AC_CHECK_PROGS(ASCIIDOC, asciidoc, false)
-AC_CHECK_PROGS(XMLTO,    xmlto,    false)
 AC_CHECK_PROGS(ACRODIST, acrodist, false)
 AC_CHECK_PROGS(PS2PDF,   ps2pdf,   false)
 AC_CHECK_PROGS(PSTOPDF,  pstopdf,  false)
 
-dnl Check for progs needed for manpage generation
-if test $ASCIIDOC = false; then
-  AC_MSG_WARN([No acsciidoc package found])
-fi
-if test $XMLTO = false; then
-  AC_MSG_WARN([No xmlto package found])
-fi
-
 dnl Checks for header files.
 AC_HEADER_STDC
 if test $ac_cv_header_stdc = no; then
diff --git a/nasm.1 b/nasm.1
new file mode 100644
index 0000000..632e01f
--- /dev/null
+++ b/nasm.1
@@ -0,0 +1,423 @@
+'\" t
+.\"     Title: nasm
+.\"    Author: [FIXME: author] [see http://docbook.sf.net/el/author]
+.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
+.\"      Date: 07/22/2013
+.\"    Manual: The Netwide Assembler Project
+.\"    Source: NASM
+.\"  Language: English
+.\"
+.TH "NASM" "1" "07/22/2013" "NASM" "The Netwide Assembler Project"
+.\" -----------------------------------------------------------------
+.\" * Define some portability stuff
+.\" -----------------------------------------------------------------
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.\" http://bugs.debian.org/507673
+.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.\" -----------------------------------------------------------------
+.\" * set default formatting
+.\" -----------------------------------------------------------------
+.\" disable hyphenation
+.nh
+.\" disable justification (adjust text to left margin only)
+.ad l
+.\" -----------------------------------------------------------------
+.\" * MAIN CONTENT STARTS HERE *
+.\" -----------------------------------------------------------------
+.SH "NAME"
+nasm \- the Netwide Assembler, a portable 80x86 assembler
+.SH "SYNOPSIS"
+.sp
+\fBnasm\fR [\fB\-@\fR response file] [\fB\-f\fR format] [\fB\-o\fR outfile] [\fB\-l\fR listfile] [\fIoptions\fR\&...] filename
+.SH "DESCRIPTION"
+.sp
+The \fBnasm\fR command assembles the file \fIfilename\fR and directs output to the file \fIoutfile\fR if specified\&. If \fIoutfile\fR is not specified, \fBnasm\fR will derive a default output file name from the name of its input file, usually by appending \(oq\&.o\(cq or \(oq\&.obj\(cq, or by removing all extensions for a raw binary file\&. Failing that, the output file name will be \(oqnasm\&.out\(cq\&.
+.SH "OPTIONS"
+.PP
+\fB\-@\fR \fIfilename\fR
+.RS 4
+Causes
+\fBnasm\fR
+to process options from filename as if they were included on the command line\&.
+.RE
+.PP
+\fB\-a\fR
+.RS 4
+Causes
+\fBnasm\fR
+to assemble the given input file without first applying the macro preprocessor\&.
+.RE
+.PP
+\fB\-D\fR|\fB\-d\fR \fImacro[=value]\fR
+.RS 4
+Pre\-defines a single\-line macro\&.
+.RE
+.PP
+\fB\-E\fR|\fB\-e\fR
+.RS 4
+Causes
+\fBnasm\fR
+to preprocess the given input file, and write the output to
+\fIstdout\fR
+(or the specified output file name), and not actually assemble anything\&.
+.RE
+.PP
+\fB\-f\fR \fIformat\fR
+.RS 4
+Specifies the output file format\&. To see a list of valid output formats, use the
+\fB\-hf\fR
+option\&.
+.RE
+.PP
+\fB\-F\fR \fIformat\fR
+.RS 4
+Specifies the debug information format\&. To see a list of valid output formats, use the
+\fB\-y\fR
+option (for example
+\fB\-felf \-y\fR)\&.
+.RE
+.PP
+\fB\-g\fR
+.RS 4
+Causes
+\fBnasm\fR
+to generate debug information in selected format\&.
+.RE
+.PP
+\fB\-h\fR
+.RS 4
+Causes
+\fBnasm\fR
+to exit immediately, after giving a summary of its invocation options\&.
+.RE
+.PP
+\fB\-hf\fR
+.RS 4
+Same as
+\fB\-h\fR
+, but also lists all valid output formats\&.
+.RE
+.PP
+\fB\-I\fR|\fB\-i\fR \fIdirectory\fR
+.RS 4
+Adds a directory to the search path for include files\&. The directory specification must include the trailing slash, as it will be directly prepended to the name of the include file\&.
+.RE
+.PP
+\fB\-l\fR \fIlistfile\fR
+.RS 4
+Causes an assembly listing to be directed to the given file, in which the original source is displayed on the right hand side (plus the source for included files and the expansions of multi\-line macros) and the generated code is shown in hex on the left\&.
+.RE
+.PP
+\fB\-M\fR
+.RS 4
+Causes
+\fBnasm\fR
+to output Makefile\-style dependencies to stdout; normal output is suppressed\&.
+.RE
+.PP
+\fB\-MG\fR \fIfile\fR
+.RS 4
+Same as
+\fB\-M\fR
+but assumes that missing Makefile dependecies are generated and added to dependency list without a prefix\&.
+.RE
+.PP
+\fB\-MF\fR \fIfile\fR
+.RS 4
+Output Makefile\-style dependencies to the specified file\&.
+.RE
+.PP
+\fB\-MD\fR \fIfile\fR
+.RS 4
+Same as a combination of
+\fB\-M\fR
+and
+\fB\-MF\fR
+options\&.
+.RE
+.PP
+\fB\-MT\fR \fIfile\fR
+.RS 4
+Override the default name of the dependency target dependency target name\&. This is normally the same as the output filename, specified by the
+\fB\-o\fR
+option\&.
+.RE
+.PP
+\fB\-MQ\fR \fIfile\fR
+.RS 4
+The same as
+\fB\-MT\fR
+except it tries to quote characters that have special meaning in Makefile syntax\&. This is not foolproof, as not all characters with special meaning are quotable in Make\&.
+.RE
+.PP
+\fB\-MP\fR
+.RS 4
+Emit phony target\&.
+.RE
+.PP
+\fB\-O\fR \fInumber\fR
+.RS 4
+Optimize branch offsets\&.
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+\fB\-O0\fR: No optimization
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+\fB\-O1\fR: Minimal optimization
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+\fB\-Ox\fR: Multipass optimization (default)
+.RE
+.RE
+.PP
+\fB\-o\fR \fIoutfile\fR
+.RS 4
+Specifies a precise name for the output file, overriding
+\fBnasm\fR\*(Aqs default means of determining it\&.
+.RE
+.PP
+\fB\-P\fR|\fB\-p\fR \fIfile\fR
+.RS 4
+Specifies a file to be pre\-included, before the main source file starts to be processed\&.
+.RE
+.PP
+\fB\-s\fR
+.RS 4
+Causes
+\fBnasm\fR
+to send its error messages and/or help text to stdout instead of stderr\&.
+.RE
+.PP
+\fB\-t\fR
+.RS 4
+Causes
+\fBnasm\fR
+to assemble in SciTech TASM compatible mode\&.
+.RE
+.PP
+\fB\-U\fR|\fB\-u\fR \fImacro\fR
+.RS 4
+Undefines a single\-line macro\&.
+.RE
+.PP
+\fB\-v\fR
+.RS 4
+Causes
+\fBnasm\fR
+to exit immediately, after displaying its version number\&.
+.RE
+.PP
+*\-W[no\-]foo\*(Aq
+.RS 4
+Causes
+\fBnasm\fR
+to enable or disable certain classes of warning messages, in gcc\-like style, for example
+\fB\-Worphan\-labels\fR
+or
+\fB\-Wno\-orphan\-labels\fR\&.
+.RE
+.PP
+\fB\-w\fR\fI[+\-]foo\fR
+.RS 4
+Causes
+\fBnasm\fR
+to enable or disable certain classes of warning messages, for example
+\fB\-w+orphan\-labels\fR
+or
+\fB\-w\-macro\-params\fR\&.
+.RE
+.PP
+\fB\-X\fR \fIformat\fR
+.RS 4
+Specifies error reporting format (gnu or vc)\&.
+.RE
+.PP
+\fB\-y\fR
+.RS 4
+Causes
+\fBnasm\fR
+to list supported debug formats\&.
+.RE
+.PP
+\fB\-Z\fR \fIfilename\fR
+.RS 4
+Causes
+\fBnasm\fR
+to redirect error messages to
+\fIfilename\fR\&. This option exists to support operating systems on which stderr is not easily redirected\&.
+.RE
+.PP
+\-\-prefix, \-\-postfix
+.RS 4
+Prepend or append (respectively) the given argument to all global or extern variables\&.
+.RE
+.SH "SYNTAX"
+.sp
+This man page does not fully describe the syntax of \fBnasm\fR\*(Aqs assembly language, but does give a summary of the differences from other assemblers\&.
+.sp
+\fIRegisters\fR have no leading \(oq%\(cq sign, unlike \fBgas\fR, and floating\-point stack registers are referred to as \fIst0\fR, \fIst1\fR, and so on\&.
+.sp
+\fIFloating\-point instructions\fR may use either the single\-operand form or the double\&. A \fITO\fR keyword is provided; thus, one could either write
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+fadd st0,st1
+fadd st1,st0
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+or one could use the alternative single\-operand forms
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+fadd st1
+fadd to st1
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+\fIUninitialised storage\fR is reserved using the \fIRESB\fR, \fIRESW\fR, \fIRESD\fR, \fIRESQ\fR, \fIREST\fR and \fIRESO\fR pseudo\-opcodes, each taking one parameter which gives the number of bytes, words, doublewords, quadwords or ten\-byte words to reserve\&.
+.sp
+\fIRepetition\fR of data items is not done by the \fIDUP\fR keyword as seen in DOS assemblers, but by the use of the \fITIMES\fR prefix, like this:
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+message: times 3 db \*(Aqabc\*(Aq
+         times 64\-$+message db 0
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+which defines the string abcabcabc, followed by the right number of zero bytes to make the total length up to 64 bytes\&.
+.sp
+\fISymbol references\fR are always understood to be immediate (i\&.e\&. the address of the symbol), unless square brackets are used, in which case the contents of the memory location are used\&. Thus:
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+mov ax,wordvar
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+loads AX with the address of the variable wordvar, whereas
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+mov ax,[wordvar]
+mov ax,[wordvar+1]
+mov ax,[es:wordvar+bx]
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+all refer to the \fIcontents\fR of memory locations\&. The syntaxes
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+mov ax,es:wordvar[bx]
+es mov ax,wordvar[1]
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+are not legal at all, although the use of a segment register name as an instruction prefix is valid, and can be used with instructions such as \fILODSB\fR which can\(cqt be overridden any other way\&.
+.sp
+\fIConstants\fR may be expressed numerically in most formats: a trailing H, Q or B denotes hex, octal or binary respectively, and a leading \(oq0x\(cq or \(oq$\(cq denotes hex as well\&. Leading zeros are not treated specially at all\&. Character constants may be enclosed in single or double quotes; there is no escape character\&. The ordering is little\-endian (reversed), so that the character constant \fI\*(Aqabcd\fR\*(Aq denotes 0x64636261 and not 0x61626364\&.
+.sp
+Local labels begin with a period, and their \(oqlocality\(cq is granted by the assembler prepending the name of the previous non\-local symbol\&. Thus declaring a label \(oq\&.loop\(cq after a label \(oqlabel\(cq has actually defined a symbol called \(oqlabel\&.loop\(cq\&.
+.SH "DIRECTIVES"
+.sp
+\fISECTION\fR \fIname\fR or \fISEGMENT\fR \fIname\fR causes \fBnasm\fR to direct all following code to the named section\&. Section names vary with output file format, although most formats support the names \fI\&.text\fR, \fI\&.data\fR and \fI\&.bss\fR\&. (The exception is the \fIobj\fR format, in which all segments are user\-definable\&.)
+.sp
+\fIABSOLUTE\fR \fIaddress\fR causes \fBnasm\fR to position its notional assembly point at an absolute address: so no code or data may be generated, but you can use \fIRESB\fR, \fIRESW\fR and \fIRESD\fR to move the assembly point further on, and you can define labels\&. So this directive may be used to define data structures\&. When you have finished doing absolute assembly, you must issue another \fISECTION\fR directive to return to normal assembly\&.
+.sp
+\fIBITS\fR \fI16\fR, \fIBITS\fR \fI32\fR or \fIBITS\fR \fI64\fR switches the default processor mode for which \fBnasm\fR is generating code: it is equivalent to \fIUSE16\fR or \fIUSE32\fR in DOS assemblers\&.
+.sp
+\fIEXTERN\fR \fIsymbol\fR and \fIGLOBAL\fR \fIsymbol\fR import and export symbol definitions, respectively, from and to other modules\&. Note that the \fIGLOBAL\fR directive must appear before the definition of the symbol it refers to\&.
+.sp
+\fISTRUC\fR \fIstrucname\fR and \fIENDSTRUC\fR, when used to bracket a number of \fIRESB\fR, \fIRESW\fR or similar instructions, define a data structure\&. In addition to defining the offsets of the structure members, the construct also defines a symbol for the size of the structure, which is simply the structure name with \fIsize\fR tacked on to the end\&.
+.SH "FORMAT-SPECIFIC DIRECTIVES"
+.sp
+\fIORG\fR \fIaddress\fR is used by the \fIbin\fR flat\-form binary output format, and specifies the address at which the output code will eventually be loaded\&.
+.sp
+\fIGROUP\fR \fIgrpname\fR \fIseg1\fR \fIseg2\fR\&... is used by the obj (Microsoft 16\-bit) output format, and defines segment groups\&. This format also uses \fIUPPERCASE\fR, which directs that all segment, group and symbol names output to the object file should be in uppercase\&. Note that the actual assembly is still case sensitive\&.
+.sp
+\fILIBRARY\fR \fIlibname\fR is used by the \fIrdf\fR output format, and causes a dependency record to be written to the output file which indicates that the program requires a certain library in order to run\&.
+.SH "MACRO PREPROCESSOR"
+.sp
+Single\-line macros are defined using the \fI%define\fR or \fI%idefine\fR commands, in a similar fashion to the C preprocessor\&. They can be overloaded with respect to number of parameters, although defining a macro with no parameters prevents the definition of any macro with the same name taking parameters, and vice versa\&. \fI%define\fR defines macros whose names match case\-sensitively, whereas \fI%idefine\fR defines case\-insensitive macros\&.
+.sp
+Multi\-line macros are defined using \fI%macro\fR and \fI%imacro\fR (the distinction is the same as that between \fI%define\fR and \fI%idefine\fR), whose syntax is as follows
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+%macro name minprm[\-maxprm][+][\&.nolist] [defaults]
+        <some lines of macro expansion text>
+%endmacro
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+Again, these macros may be overloaded\&. The trailing plus sign indicates that any parameters after the last one get subsumed, with their separating commas, into the last parameter\&. The \fIdefaults\fR part can be used to specify defaults for unspecified macro parameters after \fIminparam\fR\&. \fI%endm\fR is a valid synonym for \fI%endmacro\fR\&.
+.sp
+To refer to the macro parameters within a macro expansion, you use \fI%1\fR, \fI%2\fR and so on\&. You can also enforce that a macro parameter should contain a condition code by using \fI%+1\fR, and you can invert the condition code by using \fI%\-1\fR\&. You can also define a label specific to a macro invocation by prefixing it with a double \(oq%\(cq sign\&.
+.sp
+Files can be included using the \fI%include\fR directive, which works like C\&.
+.sp
+The preprocessor has a \(oqcontext stack\(cq, which may be used by one macro to store information that a later one will retrieve\&. You can push a context on the stack using \fI%push\fR, remove one using \fI%pop\fR, and change the name of the top context (without disturbing any associated definitions) using \fI%repl\fR\&. Labels and \fI%define\fR macros specific to the top context may be defined by prefixing their names with %$, and things specific to the next context down with %$$, and so on\&.
+.sp
+Conditional assembly is done by means of \fI%ifdef\fR, \fI%ifndef\fR, \fI%else\fR and \fI%endif\fR as in C\&. (Except that \fI%ifdef\fR can accept several putative macro names, and will evaluate TRUE if any of them is defined\&.) In addition, the directives \fI%ifctx\fR and \fI%ifnctx\fR can be used to condition on the name of the top context on the context stack\&. The obvious set of \(oqelse\-if\(cq directives, \fI%elifdef\fR, \fI%elifndef\fR, \fI%elifctx\fR and \fI%elifnctx\fR are also supported\&.
+.SH "BUGS"
+.sp
+Please report bugs through the bug tracker function at \m[blue]\fBhttp://nasm\&.us\fR\m[]\&.
+.SH "SEE ALSO"
+.sp
+\fBas\fR(1), \fBld\fR(1)\&.
diff --git a/ndisasm.1 b/ndisasm.1
new file mode 100644
index 0000000..345f66f
--- /dev/null
+++ b/ndisasm.1
@@ -0,0 +1,120 @@
+'\" t
+.\"     Title: ndisasm
+.\"    Author: [FIXME: author] [see http://docbook.sf.net/el/author]
+.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
+.\"      Date: 04/20/2013
+.\"    Manual: The Netwide Assembler Project
+.\"    Source: NASM
+.\"  Language: English
+.\"
+.TH "NDISASM" "1" "04/20/2013" "NASM" "The Netwide Assembler Project"
+.\" -----------------------------------------------------------------
+.\" * Define some portability stuff
+.\" -----------------------------------------------------------------
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.\" http://bugs.debian.org/507673
+.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.\" -----------------------------------------------------------------
+.\" * set default formatting
+.\" -----------------------------------------------------------------
+.\" disable hyphenation
+.nh
+.\" disable justification (adjust text to left margin only)
+.ad l
+.\" -----------------------------------------------------------------
+.\" * MAIN CONTENT STARTS HERE *
+.\" -----------------------------------------------------------------
+.SH "NAME"
+ndisasm \- the Netwide Disassembler, an 80x86 binary file disassembler
+.SH "SYNOPSIS"
+.sp
+\fBndisasm\fR [ \fB\-o\fR origin ] [ \fB\-s\fR sync\-point [\&...]] [ \fB\-a\fR | \fB\-i\fR ] [ \fB\-b\fR bits ] [ \fB\-u\fR ] [ \fB\-e\fR hdrlen ] [ \fB\-p\fR vendor ] [ \fB\-k\fR offset,length [\&...]] infile
+.SH "DESCRIPTION"
+.sp
+The \fBndisasm\fR command generates a disassembly listing of the binary file infile and directs it to stdout\&.
+.SH "OPTIONS"
+.PP
+\fB\-h\fR
+.RS 4
+Causes
+\fBndisasm\fR
+to exit immediately, after giving a summary of its invocation options\&.
+.RE
+.PP
+\fB\-r\fR|\fB\-v\fR
+.RS 4
+Causes
+\fBndisasm\fR
+to exit immediately, after displaying its version number\&.
+.RE
+.PP
+\fB\-o\fR \fIorigin\fR
+.RS 4
+Specifies the notional load address for the file\&. This option causes
+\fBndisasm\fR
+to get the addresses it lists down the left hand margin, and the target addresses of PC\-relative jumps and calls, right\&.
+.RE
+.PP
+\fB\-s\fR \fIsync\-point\fR
+.RS 4
+Manually specifies a synchronisation address, such that
+\fBndisasm\fR
+will not output any machine instruction which encompasses bytes on both sides of the address\&. Hence the instruction which starts at that address will be correctly disassembled\&.
+.RE
+.PP
+\fB\-e\fR \fIhdrlen\fR
+.RS 4
+Specifies a number of bytes to discard from the beginning of the file before starting disassembly\&. This does not count towards the calculation of the disassembly offset: the first
+\fIdisassembled\fR
+instruction will be shown starting at the given load address\&.
+.RE
+.PP
+\fB\-k\fR \fIoffset,length\fR
+.RS 4
+Specifies that
+\fIlength\fR
+bytes, starting from disassembly offset
+\fIoffset\fR, should be skipped over without generating any output\&. The skipped bytes still count towards the calculation of the disassembly offset\&.
+.RE
+.PP
+\fB\-a\fR|\fB\-i\fR
+.RS 4
+Enables automatic (or intelligent) sync mode, in which
+\fBndisasm\fR
+will attempt to guess where synchronisation should be performed, by means of examining the target addresses of the relative jumps and calls it disassembles\&.
+.RE
+.PP
+\fB\-b\fR \fIbits\fR
+.RS 4
+Specifies 16\-, 32\- or 64\-bit mode\&. The default is 16\-bit mode\&.
+.RE
+.PP
+\fB\-u\fR
+.RS 4
+Specifies 32\-bit mode, more compactly than using \(oq\-b 32\(cq\&.
+.RE
+.PP
+\fB\-p\fR \fIvendor\fR
+.RS 4
+Prefers instructions as defined by
+\fIvendor\fR
+in case of a conflict\&. Known
+\fIvendor\fR
+names include
+\fBintel\fR,
+\fBamd\fR,
+\fBcyrix\fR, and
+\fBidt\fR\&. The default is
+\fBintel\fR\&.
+.RE
+.SH "RESTRICTIONS"
+.sp
+\fBndisasm\fR only disassembles binary files: it has no understanding of the header information present in object or executable files\&. If you want to disassemble an object file, you should probably be using \fBobjdump\fR(1)\&.
+.sp
+Auto\-sync mode won\(cqt necessarily cure all your synchronisation problems: a sync marker can only be placed automatically if a jump or call instruction is found to refer to it \fIbefore\fR \fBndisasm\fR actually disassembles that part of the code\&. Also, if spurious jumps or calls result from disassembling non\-machine\-code data, sync markers may get placed in strange places\&. Feel free to turn auto\-sync off and go back to doing it manually if necessary\&.
+.SH "SEE ALSO"
+.sp
+\fBobjdump\fR(1)

[nasm:master] doc: changes -- Prepare for 2.10.09

From: nasm-bot f. C. G. <gor...@gm...> - 2013-07-23 05:39:23

Commit-ID:  e74e494805ba9adb928f9a8f2e4896c38d3fe264
Gitweb:     http://repo.or.cz/w/nasm.git?a=commitdiff;h=e74e494805ba9adb928f9a8f2e4896c38d3fe264
Author:     Cyrill Gorcunov <gor...@gm...>
AuthorDate: Tue, 23 Jul 2013 09:36:00 +0400
Committer:  Cyrill Gorcunov <gor...@gm...>
CommitDate: Tue, 23 Jul 2013 09:36:00 +0400

doc: changes -- Prepare for 2.10.09

Signed-off-by: Cyrill Gorcunov <gor...@gm...>


---
 doc/changes.src | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/doc/changes.src b/doc/changes.src
index c4eb4ca..bcd2ee6 100644
--- a/doc/changes.src
+++ b/doc/changes.src
@@ -7,6 +7,10 @@
 The NASM 2 series supports x86-64, and is the production version of NASM
 since 2007.
 
+\S{cl-2.10.09} Version 2.10.09
+
+\b Pregenerate man pages.
+
 \S{cl-2.10.08} Version 2.10.08
 
 \b Fix \c{VMOVNTDQA}, \c{MOVNTDQA} and \c{MOVLPD} instructions.

[nasm:master] NASM 2.10.09

From: nasm-bot f. C. G. <gor...@gm...> - 2013-07-23 05:39:22

Commit-ID:  b775985beefc968f9862d45764f7c7ad8e949299
Gitweb:     http://repo.or.cz/w/nasm.git?a=commitdiff;h=b775985beefc968f9862d45764f7c7ad8e949299
Author:     Cyrill Gorcunov <gor...@gm...>
AuthorDate: Tue, 23 Jul 2013 09:36:13 +0400
Committer:  Cyrill Gorcunov <gor...@gm...>
CommitDate: Tue, 23 Jul 2013 09:36:13 +0400

NASM 2.10.09



---
 version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/version b/version
index a55a51c..bb1af4a 100644
--- a/version
+++ b/version
@@ -1 +1 @@
-2.10.08
+2.10.09

[nasm:master] NASM 2.10.08

From: nasm-bot f. C. G. <gor...@gm...> - 2013-07-21 17:03:28

Commit-ID:  57a576ce1e027e62afcc45d5f698d22251b86207
Gitweb:     http://repo.or.cz/w/nasm.git?a=commitdiff;h=57a576ce1e027e62afcc45d5f698d22251b86207
Author:     Cyrill Gorcunov <gor...@gm...>
AuthorDate: Sun, 21 Jul 2013 20:58:43 +0400
Committer:  Cyrill Gorcunov <gor...@gm...>
CommitDate: Sun, 21 Jul 2013 20:58:43 +0400

NASM 2.10.08

Signed-off-by: Cyrill Gorcunov <gor...@gm...>


---
 version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/version b/version
index 80278b5..a55a51c 100644
--- a/version
+++ b/version
@@ -1 +1 @@
-2.10.07
+2.10.08

[nasm:master] test: Add br3392259

From: nasm-bot f. C. G. <gor...@gm...> - 2013-07-21 10:51:32

Commit-ID:  31d73aefb35a7446fca8f139d113fd71ea9a4c46
Gitweb:     http://repo.or.cz/w/nasm.git?a=commitdiff;h=31d73aefb35a7446fca8f139d113fd71ea9a4c46
Author:     Cyrill Gorcunov <gor...@gm...>
AuthorDate: Sun, 21 Jul 2013 11:18:01 +0400
Committer:  Cyrill Gorcunov <gor...@gm...>
CommitDate: Sun, 21 Jul 2013 14:45:48 +0400

test: Add br3392259

Signed-off-by: Cyrill Gorcunov <gor...@gm...>


---
 test/br3392259.asm | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/test/br3392259.asm b/test/br3392259.asm
new file mode 100644
index 0000000..bf12f0a
--- /dev/null
+++ b/test/br3392259.asm
@@ -0,0 +1,8 @@
+;Testname=br3392259; Arguments=-Ox -felf64 -obr3392259.o; Files=stdout stderr br3392259.o
+[BITS 64]
+
+	VMOVNTDQA	ymm1, yword [rsi]	; fails: "error: invalid combination of opcode and operands"
+	VMOVNTDQA	ymm1, [rsi]		; works
+	VMOVNTDQA	xmm1, oword [rsi]	; works
+	MOVNTDQA	xmm1, oword [rsi]	; fails, see bug 978756: "error: mismatch in operand sizes"
+	MOVNTDQA	xmm1, [rsi]		; works

[nasm:master] doc: changes -- Add fixes for VMOVNTDQA, MOVNTDQA, MOVLPD

From: nasm-bot f. C. G. <gor...@gm...> - 2013-07-21 10:51:31

Commit-ID:  85f734b6abd31bdec82b564d19c8ee531fd182d4
Gitweb:     http://repo.or.cz/w/nasm.git?a=commitdiff;h=85f734b6abd31bdec82b564d19c8ee531fd182d4
Author:     Cyrill Gorcunov <gor...@gm...>
AuthorDate: Sun, 21 Jul 2013 11:19:27 +0400
Committer:  Cyrill Gorcunov <gor...@gm...>
CommitDate: Sun, 21 Jul 2013 14:47:51 +0400

doc: changes -- Add fixes for VMOVNTDQA, MOVNTDQA, MOVLPD

Signed-off-by: Cyrill Gorcunov <gor...@gm...>


---
 doc/changes.src | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/changes.src b/doc/changes.src
index 24a53e5..c4eb4ca 100644
--- a/doc/changes.src
+++ b/doc/changes.src
@@ -9,6 +9,8 @@ since 2007.
 
 \S{cl-2.10.08} Version 2.10.08
 
+\b Fix \c{VMOVNTDQA}, \c{MOVNTDQA} and \c{MOVLPD} instructions.
+
 \b Fix collision for \c{VGATHERQPS}, \c{VPGATHERQD} instructions.
 
 \b Fix \c{VPMOVSXBQ}, \c{VGATHERQPD}, \c{VSPLLW} instructions.

[nasm:master] test: Add br978756

From: nasm-bot f. C. G. <gor...@gm...> - 2013-07-21 10:51:30

Commit-ID:  340ac1a915d1cf4cf9e5cc62a42a660646c0af3a
Gitweb:     http://repo.or.cz/w/nasm.git?a=commitdiff;h=340ac1a915d1cf4cf9e5cc62a42a660646c0af3a
Author:     Cyrill Gorcunov <gor...@gm...>
AuthorDate: Sun, 21 Jul 2013 14:45:02 +0400
Committer:  Cyrill Gorcunov <gor...@gm...>
CommitDate: Sun, 21 Jul 2013 14:45:48 +0400

test: Add br978756

Signed-off-by: Cyrill Gorcunov <gor...@gm...>


---
 test/br978756.asm | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/test/br978756.asm b/test/br978756.asm
new file mode 100644
index 0000000..3b85c2f
--- /dev/null
+++ b/test/br978756.asm
@@ -0,0 +1,7 @@
+;Testname=br978756; Arguments=-Ox -felf64 -obr978756.o; Files=stdout stderr br978756.o
+[bits 64]
+	MOVNTDQA	xmm1, oword [rsi]
+	MOVLPD		xmm2, qword [rdi]
+	MOVLPD		xmm2, [rdi]
+	MOVLPD		qword [rdi], xmm2
+	MOVLPD		[rdi], xmm2

[nasm:master] insns: Fix VMOVNTDQA instruction

From: nasm-bot f. C. G. <gor...@gm...> - 2013-07-21 10:51:29

Commit-ID:  d26ac53fe3fd5b9e617d3d8a7b908dad6b35246c
Gitweb:     http://repo.or.cz/w/nasm.git?a=commitdiff;h=d26ac53fe3fd5b9e617d3d8a7b908dad6b35246c
Author:     Cyrill Gorcunov <gor...@gm...>
AuthorDate: Sun, 21 Jul 2013 11:13:15 +0400
Committer:  Cyrill Gorcunov <gor...@gm...>
CommitDate: Sun, 21 Jul 2013 11:13:15 +0400

insns: Fix VMOVNTDQA instruction

http://bugzilla.nasm.us/show_bug.cgi?id=3392259

Signed-off-by: Cyrill Gorcunov <gor...@gm...>


---
 insns.dat | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/insns.dat b/insns.dat
index 9c39c6b..af1175b 100644
--- a/insns.dat
+++ b/insns.dat
@@ -3315,7 +3315,7 @@ VPUNPCKLWD	ymmreg,ymmreg*,ymmrm256		[rvm:	vex.nds.256.66.0f 61 /r]		FUTURE,AVX2
 VPUNPCKLDQ	ymmreg,ymmreg*,ymmrm256		[rvm:	vex.nds.256.66.0f 62 /r]		FUTURE,AVX2
 VPUNPCKLQDQ	ymmreg,ymmreg*,ymmrm256		[rvm:	vex.nds.256.66.0f 6c /r]		FUTURE,AVX2
 VPXOR		ymmreg,ymmreg*,ymmrm256		[rvm:	vex.nds.256.66.0f ef /r]		FUTURE,AVX2
-VMOVNTDQA	ymmreg,mem128			[rm:	vex.256.66.0f38 2a /r]			FUTURE,AVX2
+VMOVNTDQA	ymmreg,mem256			[rm:	vex.256.66.0f38 2a /r]			FUTURE,AVX2
 VBROADCASTSS	xmmreg,xmmreg			[rm:	vex.128.66.0f38.w0 18 /r]		FUTURE,AVX2
 VBROADCASTSS	ymmreg,xmmreg			[rm:	vex.256.66.0f38.w0 18 /r]		FUTURE,AVX2
 VBROADCASTSD	ymmreg,xmmreg			[rm:	vex.256.66.0f38.w0 19 /r]		FUTURE,AVX2

[nasm:master] insns: Fix MOVNTDQA instruction

From: nasm-bot f. C. G. <gor...@gm...> - 2013-07-21 10:51:28

Commit-ID:  7dff2f028ac2b6bac823036fad849a63195a0efc
Gitweb:     http://repo.or.cz/w/nasm.git?a=commitdiff;h=7dff2f028ac2b6bac823036fad849a63195a0efc
Author:     Cyrill Gorcunov <gor...@gm...>
AuthorDate: Sun, 21 Jul 2013 11:14:04 +0400
Committer:  Cyrill Gorcunov <gor...@gm...>
CommitDate: Sun, 21 Jul 2013 11:14:04 +0400

insns: Fix MOVNTDQA instruction

http://bugzilla.nasm.us/show_bug.cgi?id=3392259

Signed-off-by: Cyrill Gorcunov <gor...@gm...>


---
 insns.dat | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/insns.dat b/insns.dat
index af1175b..ed2f2c8 100644
--- a/insns.dat
+++ b/insns.dat
@@ -1919,7 +1919,7 @@ DPPS		xmmreg,xmmrm,imm		[rmi:	66 0f 3a 40 /r ib,u]			SSE41
 EXTRACTPS	rm32,xmmreg,imm			[mri:	66 0f 3a 17 /r ib,u]			SSE41
 EXTRACTPS	reg64,xmmreg,imm		[mri:	o64 66 0f 3a 17 /r ib,u]		SSE41,X64
 INSERTPS	xmmreg,xmmrm,imm		[rmi:	66 0f 3a 21 /r ib,u]			SSE41,SD
-MOVNTDQA	xmmreg,mem			[rm:	66 0f 38 2a /r]				SSE41
+MOVNTDQA	xmmreg,mem128			[rm:	66 0f 38 2a /r]				SSE41
 MPSADBW		xmmreg,xmmrm,imm		[rmi:	66 0f 3a 42 /r ib,u]			SSE41
 PACKUSDW	xmmreg,xmmrm			[rm:	66 0f 38 2b /r]				SSE41
 PBLENDVB	xmmreg,xmmrm,xmm0		[rm-:	66 0f 38 10 /r]				SSE41

172 messages has been excluded from this view by a project administrator.

Flat | Threaded

<< < 1 .. 30 31 32 33 34 .. 154 > >> (Page 32 of 154)