Thread: [Nasm-devel] [PATCH] AVX-512: Add support for parsing braces

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

AVX-512 introduced new syntax using braces for decorators.
Opmask, broadcat, rounding control use this new syntax.

http://software.intel.com/sites/default/files/319433-015.pdf

Signed-off-by: Jin Kyu Song <jin...@in...>
---
 eval.c     |    4 +++
 nasm.h     |  108 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 opflags.h  |   21 ++++++++----
 parser.c   |   94 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 regs.dat   |   15 ++++++++-
 regs.pl    |    2 +-
 stdscan.c  |   94 +++++++++++++++++++++++++++++++++++++++++++++++++---
 tables.h   |    3 +-
 tokens.dat |   28 ++++++++++++----
 tokhash.pl |   17 +++++++---
 10 files changed, 358 insertions(+), 28 deletions(-)

diff --git a/eval.c b/eval.c
index 0035088..c57ff04 100644
--- a/eval.c
+++ b/eval.c
@@ -869,6 +869,7 @@ static expr *expr6(int critical)
     case TOKEN_INSN:            /* Opcodes that occur here are really labels */
     case TOKEN_HERE:
     case TOKEN_BASE:
+    case TOKEN_DECORATOR:
         begintemp();
         switch (i) {
         case TOKEN_NUM:
@@ -938,6 +939,9 @@ static expr *expr6(int critical)
             if (label_seg != NO_SEG)
                 addtotemp(EXPR_SEGBASE + label_seg, 1L);
             break;
+        case TOKEN_DECORATOR:
+            addtotemp(EXPR_RDSAE, tokval->t_integer);
+            break;
         }
         i = scan(scpriv, tokval);
         return finishtemp();
diff --git a/nasm.h b/nasm.h
index 7802d9b..fb6c6e9 100644
--- a/nasm.h
+++ b/nasm.h
@@ -226,6 +226,8 @@ enum token_type { /* token types, other than chars */
     TOKEN_FLOATIZE,     /* __floatX__ */
     TOKEN_STRFUNC,      /* __utf16*__, __utf32*__ */
     TOKEN_IFUNC,        /* __ilog2*__ */
+    TOKEN_DECORATOR,    /* decorators such as {...} */
+    TOKEN_OPMASK,       /* translated token for opmask registers */
 };
 
 enum floatize {
@@ -272,6 +274,7 @@ struct tokenval {
     int64_t             t_integer;
     int64_t             t_inttwo;
     enum token_type     t_type;
+    int8_t              t_flag;
 };
 typedef int (*scanner)(void *private_data, struct tokenval *tv);
 
@@ -352,11 +355,14 @@ typedef expr *(*evalfunc)(scanner sc, void *scprivate,
 /*
  * Special values for expr->type.
  * These come after EXPR_REG_END as defined in regs.h.
+ * Expr types : 0 ~ EXPR_REG_END, EXPR_UNKNOWN, EXPR_...., EXPR_RDSAE,
+ *              EXPR_SEGBASE ~ EXPR_SEGBASE + SEG_ABS, ...
  */
 #define EXPR_UNKNOWN    (EXPR_REG_END+1) /* forward references */
 #define EXPR_SIMPLE     (EXPR_REG_END+2)
 #define EXPR_WRT        (EXPR_REG_END+3)
-#define EXPR_SEGBASE    (EXPR_REG_END+4)
+#define EXPR_RDSAE      (EXPR_REG_END+4)
+#define EXPR_SEGBASE    (EXPR_REG_END+5)
 
 /*
  * Linked list of strings
@@ -466,6 +472,14 @@ enum ccode { /* condition code names */
     C_none = -1
 };
 
+/*
+ * token flags
+ */
+#define TFLAG_BRC       (1 << 0)    /* valid only with braces. {1to8}, {rd-sae}, ...*/
+#define TFLAG_BRC_OPT   (1 << 1)    /* may or may not have braces. opmasks {k1} */
+#define TFLAG_BRC_ANY   (TFLAG_BRC | TFLAG_BRC_OPT)
+#define TFLAG_BRDCAST   (1 << 2)    /* broadcasting decorator */
+
 static inline uint8_t get_cond_opcode(enum ccode c)
 {
     static const uint8_t ccode_opcodes[] = {
@@ -563,6 +577,7 @@ typedef struct operand { /* operand to an instruction */
     int32_t         wrt;        /* segment base it's relative to */
     int             eaflags;    /* special EA flags */
     int             opflags;    /* see OPFLAG_* defines below */
+    decoflags_t     decoflags;  /* decorator flags such as {...} */
 } operand;
 
 #define OPFLAG_FORWARD      1   /* operand is a forward reference */
@@ -627,6 +642,7 @@ typedef struct insn { /* an instruction itself */
     int             vexreg;                 /* Register encoded in VEX prefix */
     int             vex_cm;                 /* Class and M field for VEX prefix */
     int             vex_wlp;                /* W, P and L information for VEX prefix */
+    int             evex_rm;                /* static rounding mode for AVX3 (EVEX) */
 } insn;
 
 enum geninfo { GI_SWITCH };
@@ -951,6 +967,96 @@ enum special_tokens {
     SPECIAL_ENUM_LIMIT
 };
 
+enum decorator_tokens {
+    DECORATOR_ENUM_START    = SPECIAL_ENUM_LIMIT,
+    BRC_1TO8                = DECORATOR_ENUM_START,
+    BRC_1TO16,
+    BRC_RN,
+    BRC_RU,
+    BRC_RD,
+    BRC_RZ,
+    BRC_SAE,
+    BRC_Z,
+    DECORATOR_ENUM_LIMIT
+};
+
+/*
+ * AVX512 Decorator (decoflags_t) bits distribution (counted from 0)
+ *  3         2         1
+ * 10987654321098765432109876543210
+ *                |
+ *                | word boundary
+ * ............................1111 opmask
+ * ...........................1.... zeroing / merging
+ * ..........................1..... broadcast
+ * .........................1...... static rounding
+ * ........................1....... SAE
+ */
+
+/*
+ * Opmask register number
+ * identical to EVEX.aaa
+ *
+ * Bits: 0 - 3
+ */
+#define OPMASK_SHIFT            (0)
+#define OPMASK_BITS             (4)
+#define OPMASK_MASK             OP_GENMASK(OPMASK_BITS, OPMASK_SHIFT)
+#define GEN_OPMASK(bit)         OP_GENBIT(bit, OPMASK_SHIFT)
+#define VAL_OPMASK(val)         OP_GENVAL(val, OPMASK_BITS, OPMASK_SHIFT)
+
+/*
+ * zeroing / merging control available
+ * matching to EVEX.z
+ *
+ * Bits: 4
+ */
+#define Z_SHIFT                 (4)
+#define Z_BITS                  (1)
+#define Z_MASK                  OP_GENMASK(Z_BITS, Z_SHIFT)
+#define GEN_Z(bit)              OP_GENBIT(bit, Z_SHIFT)
+#define VAL_Z(val)              OP_GENVAL(val, Z_BITS, Z_SHIFT)
+
+/*
+ * broadcast - Whether this operand can be broadcasted
+ *
+ * Bits: 5
+ */
+#define BRDCAST_SHIFT           (5)
+#define BRDCAST_BITS            (1)
+#define BRDCAST_MASK            OP_GENMASK(BRDCAST_BITS, BRDCAST_SHIFT)
+#define GEN_BRDCAST(bit)        OP_GENBIT(bit, BRDCAST_SHIFT)
+#define VAL_BRDCAST(val)        OP_GENVAL(val, BRDCAST_BITS, BRDCAST_SHIFT)
+
+/*
+ * Whether this instruction can have a static rounding mode.
+ * It goes with the last simd operand because the static rounding mode
+ * decorator is located between the last simd operand and imm8 (if any).
+ *
+ * Bits: 6
+ */
+#define STATICRND_SHIFT         (6)
+#define STATICRND_BITS          (1)
+#define STATICRND_MASK          OP_GENMASK(STATICRND_BITS, STATICRND_SHIFT)
+#define GEN_STATICRND(bit)      OP_GENBIT(bit, STATICRND_SHIFT)
+
+/*
+ * SAE(Suppress all exception) available
+ *
+ * Bits: 7
+ */
+#define SAE_SHIFT               (7)
+#define SAE_BITS                (1)
+#define SAE_MASK                OP_GENMASK(SAE_BITS, SAE_SHIFT)
+#define GEN_SAE(bit)            OP_GENBIT(bit, SAE_SHIFT)
+
+#define MASK                    OPMASK_MASK             /* Opmask (k1 ~ 7) can be used */
+#define Z                       Z_MASK
+#define B32                     BRDCAST_MASK            /* {1to16} : load+op instruction can broadcast when it is reg-reg operation */
+#define B64                     BRDCAST_MASK            /* {1to8}  : There are two definitions just for conforming to SDM */
+#define ER                      STATICRND_MASK          /* ER(Embedded Rounding) == Static rounding mode */
+#define SAE                     SAE_MASK                /* SAE(Suppress All Exception) */
+
 /*
  * Global modes
  */
diff --git a/opflags.h b/opflags.h
index 41fce3d..ed7f8ee 100644
--- a/opflags.h
+++ b/opflags.h
@@ -39,6 +39,7 @@
 #define NASM_OPFLAGS_H
 
 #include "compiler.h"
+#include "tables.h"     /* for opflags_t and nasm_reg_flags[] */
 
 /*
  * Here we define the operand types. These are implemented as bit
@@ -53,10 +54,9 @@
  * if and only if "operand" belongs to class type "class".
  */
 
-typedef uint64_t opflags_t;
-
 #define OP_GENMASK(bits, shift)         (((UINT64_C(1) << (bits)) - 1) << (shift))
 #define OP_GENBIT(bit, shift)           (UINT64_C(1) << ((shift) + (bit)))
+#define OP_GENVAL(val, bits, shift)     (((val) & ((UINT64_C(1) << (bits)) - 1)) << (shift))
 
 /*
  * Type of operand: memory reference, register, etc.
@@ -162,11 +162,14 @@ typedef uint64_t opflags_t;
 #define REG_CLASS_RM_MMX        GEN_REG_CLASS(4)
 #define REG_CLASS_RM_XMM        GEN_REG_CLASS(5)
 #define REG_CLASS_RM_YMM        GEN_REG_CLASS(6)
+#define REG_CLASS_RM_ZMM        GEN_REG_CLASS(7)
+#define REG_CLASS_OPMASK        GEN_REG_CLASS(8)
 
-#define is_class(class, op)     (!((opflags_t)(class) & ~(opflags_t)(op)))
+#define is_class(class, op)         (!((opflags_t)(class) & ~(opflags_t)(op)))
+#define is_reg_class(class, reg)    is_class((class), nasm_reg_flags[(reg)])
 
-#define IS_SREG(op)             is_class(REG_SREG, nasm_reg_flags[(op)])
-#define IS_FSGS(op)             is_class(REG_FSGS, nasm_reg_flags[(op)])
+#define IS_SREG(op)                 is_reg_class(REG_SREG, (op))
+#define IS_FSGS(op)                 is_reg_class(REG_FSGS, (op))
 
 /* Register classes */
 #define REG_EA                  (                                               REGMEM | REGISTER)      /* 'normal' reg, qualifies as EA */
@@ -186,6 +189,12 @@ typedef uint64_t opflags_t;
 #define RM_YMM                  (                  REG_CLASS_RM_YMM           | REGMEM)                 /* YMM (AVX) operand */
 #define YMMREG                  (                  REG_CLASS_RM_YMM           | REGMEM | REGISTER)      /* YMM (AVX) register */
 #define YMM0                    (GEN_SUBCLASS(1) | REG_CLASS_RM_YMM           | REGMEM | REGISTER)      /* YMM register zero */
+#define RM_ZMM                  (                  REG_CLASS_RM_ZMM           | REGMEM)                 /* ZMM (AVX512) operand */
+#define ZMMREG                  (                  REG_CLASS_RM_ZMM           | REGMEM | REGISTER)      /* ZMM (AVX512) register */
+#define ZMM0                    (GEN_SUBCLASS(1) | REG_CLASS_RM_ZMM           | REGMEM | REGISTER)      /* ZMM register zero */
+#define RM_OPMASK               (                  REG_CLASS_OPMASK           | REGMEM)                 /* Opmask operand */
+#define OPMASKREG               (                  REG_CLASS_OPMASK           | REGMEM | REGISTER)      /* Opmask register */
+#define OPMASK0                 (GEN_SUBCLASS(1) | REG_CLASS_OPMASK           | REGMEM | REGISTER)      /* Opmask register zero (k0) */
 #define REG_CDT                 (                  REG_CLASS_CDT    | BITS32           | REGISTER)      /* CRn, DRn and TRn */
 #define REG_CREG                (GEN_SUBCLASS(1) | REG_CLASS_CDT    | BITS32           | REGISTER)      /* CRn */
 #define REG_DREG                (GEN_SUBCLASS(2) | REG_CLASS_CDT    | BITS32           | REGISTER)      /* DRn */
@@ -232,7 +241,7 @@ typedef uint64_t opflags_t;
 #define YMEM                    (GEN_SUBCLASS(4) | MEMORY)      /* 256-bit vector SIB */
 
 /* memory which matches any type of r/m operand */
-#define MEMORY_ANY              (MEMORY | RM_GPR | RM_MMX | RM_XMM | RM_YMM)
+#define MEMORY_ANY              (MEMORY | RM_GPR | RM_MMX | RM_XMM | RM_YMM | RM_ZMM)
 
 /* special immediate values */
 #define UNITY                   (GEN_SUBCLASS(0) | IMMEDIATE)   /* operand equals 1 */
diff --git a/parser.c b/parser.c
index afc422a..f7139f3 100644
--- a/parser.c
+++ b/parser.c
@@ -193,6 +193,51 @@ static void process_size_override(insn *result, int operand)
     }
 }
 
+/*
+ * when two or more decorators follow a register operand,
+ * consecutive decorators are parsed here.
+ * the order of decorators does not matter.
+ * e.g. zmm1 {k2}{z} or zmm2 {z,k3}
+ * decorator(s) are placed at the end of an operand.
+ */
+static bool parse_braces(decoflags_t *decoflags)
+{
+    int i;
+    bool recover = false;
+
+    i = tokval.t_type;
+    do {
+        if (i == TOKEN_OPMASK) {
+            if (*decoflags & OPMASK_MASK) {
+                nasm_error(ERR_NONFATAL, "opmask k%lu is already set",
+                           *decoflags & OPMASK_MASK);
+                *decoflags &= ~OPMASK_MASK;
+            }
+            *decoflags |= VAL_OPMASK(nasm_regvals[tokval.t_integer]);
+        } else if (i == TOKEN_DECORATOR) {
+            switch (tokval.t_integer) {
+            case BRC_Z:
+                /*
+                 * according to AVX512 spec, only zeroing/merging decorator
+                 * is supported with opmask
+                 */
+                *decoflags |= GEN_Z(0);
+                break;
+            }
+        } else if (i == ',' || i == TOKEN_EOS){
+            break;
+        } else {
+            nasm_error(ERR_NONFATAL, "only a series of valid decorators"
+                                     " expected");
+            recover = true;
+            break;
+        }
+        i = stdscan(NULL, &tokval);
+    } while(1);
+
+    return recover;
+}
+
 insn *parse_line(int pass, char *buffer, insn *result, ldfunc ldef)
 {
     bool insn_is_label = false;
@@ -557,10 +602,12 @@ is_expression:
         int mref;               /* is this going to be a memory ref? */
         int bracket;            /* is it a [] mref, or a & mref? */
         int setsize = 0;
+        decoflags_t brace_flags = 0;    /* flags for decorators in braces */
 
         result->oprs[operand].disp_size = 0;    /* have to zero this whatever */
         result->oprs[operand].eaflags   = 0;    /* and this */
         result->oprs[operand].opflags   = 0;
+        result->oprs[operand].decoflags = 0;
 
         i = stdscan(NULL, &tokval);
         if (i == TOKEN_EOS)
@@ -702,17 +749,37 @@ is_expression:
                 recover = true;
             } else {            /* we got the required ] */
                 i = stdscan(NULL, &tokval);
+                if (i == TOKEN_DECORATOR) {
+                    /*
+                     * according to AVX512 spec, only broacast decorator is
+                     * expected for memory reference operands
+                     */
+                    if (tokval.t_flag & TFLAG_BRDCAST) {
+                        brace_flags |= GEN_BRDCAST(0);
+                        i = stdscan(NULL, &tokval);
+                    } else {
+                        nasm_error(ERR_NONFATAL, "broadcast decorator"
+                                   "expected inside braces");
+                        recover = true;
+                    }
+                }
+
                 if (i != 0 && i != ',') {
                     nasm_error(ERR_NONFATAL, "comma or end of line expected");
                     recover = true;
                 }
             }
         } else {                /* immediate operand */
-            if (i != 0 && i != ',' && i != ':') {
-                nasm_error(ERR_NONFATAL, "comma, colon or end of line expected");
+            if (i != 0 && i != ',' && i != ':' &&
+                i != TOKEN_DECORATOR && i != TOKEN_OPMASK) {
+                nasm_error(ERR_NONFATAL, "comma, colon, decorator or end of "
+                                         "line expected after operand");
                 recover = true;
             } else if (i == ':') {
                 result->oprs[operand].type |= COLON;
+            } else if (i == TOKEN_DECORATOR || i == TOKEN_OPMASK) {
+                /* parse opmask (and zeroing) after an operand */
+                recover = parse_braces(&brace_flags);
             }
         }
         if (recover) {
@@ -856,6 +923,7 @@ is_expression:
             result->oprs[operand].indexreg = i;
             result->oprs[operand].scale = s;
             result->oprs[operand].offset = o;
+            result->oprs[operand].decoflags |= brace_flags;
         } else {                /* it's not a memory reference */
             if (is_just_unknown(value)) {       /* it's immediate but unknown */
                 result->oprs[operand].type      |= IMMEDIATE;
@@ -891,6 +959,27 @@ is_expression:
                             result->oprs[operand].type |= SDWORD;
                     }
                 }
+            } else if(value->type == EXPR_RDSAE) {
+                /*
+                 * it's not an operand but a rounding or SAE decorator.
+                 * put the decorator information in the (opflag_t) type field
+                 * of previous operand.
+                 */
+                operand --;
+                switch (value->value) {
+                case BRC_RN:
+                case BRC_RU:
+                case BRC_RD:
+                case BRC_RZ:
+                case BRC_SAE:
+                    result->oprs[operand].decoflags  |=
+                                        (value->value == BRC_SAE ? SAE : ER);
+                    result->evex_rm = value->value;
+                    break;
+                default:
+                    nasm_error(ERR_NONFATAL, "invalid decorator");
+                    break;
+                }
             } else {            /* it's a register */
                 opflags_t rs;
 
@@ -923,6 +1012,7 @@ is_expression:
                 result->oprs[operand].type      &= TO;
                 result->oprs[operand].type      |= REGISTER;
                 result->oprs[operand].type      |= nasm_reg_flags[value->type];
+                result->oprs[operand].decoflags |= brace_flags;
                 result->oprs[operand].basereg   = value->type;
 
                 if (rs && (result->oprs[operand].type & SIZE_MASK) != rs)
diff --git a/regs.dat b/regs.dat
index 57cef6a..742b69d 100644
--- a/regs.dat
+++ b/regs.dat
@@ -36,12 +36,17 @@
 #
 # The columns are:
 #
-# register name, assembler class, disassembler class(es), x86 register number
+# register name, assembler class, disassembler class(es), x86 register number[, token flag]
 #
 # If the register name ends in two numbers separated by a dash, then it is
 # repeated as many times as indicated, and the register number is
 # updated with it.
 #
+# If 'token flag' is present, this value will be assigned to tokflag field in
+# 'struct tokendata tokendata[]' table. Token flag can be used for specifying
+# special usage of corresponding register. E.g. opmask registers can be either
+# enclosed by curly braces or standalone operand depending on the usage.
+#
 
 # General-purpose registers
 al	REG_AL		reg8,reg8_rex	0
@@ -117,3 +122,11 @@ xmm1-15	XMMREG		xmmreg		1
 # AVX registers
 ymm0	YMM0		ymmreg		0
 ymm1-15	YMMREG		ymmreg		1
+
+# AVX3 registers
+zmm0	ZMM0		zmmreg		0
+zmm1-31	ZMMREG		zmmreg		1
+
+# Opmask registers
+k0		OPMASK0		opmaskreg	0
+k1-7	OPMASKREG	opmaskreg	1   TFLAG_BRC_OPT
diff --git a/regs.pl b/regs.pl
index 82c4829..52e5ca3 100755
--- a/regs.pl
+++ b/regs.pl
@@ -48,7 +48,7 @@ sub process_line($) {
     my($line) = @_;
     my @v;
 
-    if ( $line !~ /^\s*(\S+)\s*(\S+)\s*(\S+)\s*([0-9]+)$/i ) {
+    if ( $line !~ /^\s*(\S+)\s*(\S+)\s*(\S+)\s*([0-9]+)\s*(\S*)/i ) {
 	die "regs.dat:$nline: invalid input\n";
     }
     $reg      = $1;
diff --git a/stdscan.c b/stdscan.c
index b7d8000..b5e389d 100644
--- a/stdscan.c
+++ b/stdscan.c
@@ -53,6 +53,8 @@
 static char *stdscan_bufptr = NULL;
 static char **stdscan_tempstorage = NULL;
 static int stdscan_tempsize = 0, stdscan_templen = 0;
+static int brace = 0;               /* nested brace counter */
+static bool brace_opened = false;   /* if brace is just opened */
 #define STDSCAN_TEMP_DELTA 256
 
 void stdscan_set(char *str)
@@ -105,6 +107,40 @@ static char *stdscan_copy(char *p, int len)
     return text;
 }
 
+/*
+ * a token is enclosed with braces. proper token type will be assigned
+ * accordingly with the token flag.
+ * a closing brace is treated as an ending character of corresponding token.
+ */
+static int stdscan_handle_brace(struct tokenval *tv)
+{
+    if (!(tv->t_flag & TFLAG_BRC_ANY)) {
+        /* invalid token is put inside braces */
+        nasm_error(ERR_NONFATAL,
+                    "%s is not a valid decorator with braces", tv->t_charptr);
+        tv->t_type = TOKEN_INVALID;
+    } else if (tv->t_flag & TFLAG_BRC_OPT) {
+        if (is_reg_class(OPMASKREG, tv->t_integer)) {
+            /* within braces, opmask register is now used as a mask */
+            tv->t_type = TOKEN_OPMASK;
+        }
+    }
+
+    stdscan_bufptr = nasm_skip_spaces(stdscan_bufptr);
+
+    if (stdscan_bufptr[0] == '}') {
+        stdscan_bufptr ++;      /* skip the closing brace */
+        brace --;
+    } else if (stdscan_bufptr[0] != ',') {
+        /* treat {foo,bar} as {foo}{bar}
+         * by regarding ',' as a mere separator between decorators
+         */
+        nasm_error(ERR_NONFATAL, "closing brace expected");
+        tv->t_type = TOKEN_INVALID;
+    }
+    return tv->t_type;
+}
+
 int stdscan(void *private_data, struct tokenval *tv)
 {
     char ourcopy[MAX_KEYWORD + 1], *r, *s;
@@ -112,14 +148,22 @@ int stdscan(void *private_data, struct tokenval *tv)
     (void)private_data;         /* Don't warn that this parameter is unused */
 
     stdscan_bufptr = nasm_skip_spaces(stdscan_bufptr);
-    if (!*stdscan_bufptr)
+    if (!*stdscan_bufptr) {
+        /* nested brace shouldn't affect following lines */
+        brace = 0;
         return tv->t_type = TOKEN_EOS;
+    }
 
     /* we have a token; either an id, a number or a char */
     if (isidstart(*stdscan_bufptr) ||
-        (*stdscan_bufptr == '$' && isidstart(stdscan_bufptr[1]))) {
+        (*stdscan_bufptr == '$' && isidstart(stdscan_bufptr[1])) ||
+        (brace && isidchar(*stdscan_bufptr))) {     /* because of {1to8} */
         /* now we've got an identifier */
         bool is_sym = false;
+        int token_type;
+
+        /* opening brace is followed by any letter */
+        brace_opened = false;
 
         if (*stdscan_bufptr == '$') {
             is_sym = true;
@@ -128,7 +172,8 @@ int stdscan(void *private_data, struct tokenval *tv)
 
         r = stdscan_bufptr++;
         /* read the entire buffer to advance the buffer pointer but... */
-        while (isidchar(*stdscan_bufptr))
+        /* {rn-sae}, {rd-sae}, {ru-sae}, {rz-sae} contain '-' in tokens. */
+        while (isidchar(*stdscan_bufptr) || (brace && *stdscan_bufptr == '-'))
             stdscan_bufptr++;
 
         /* ... copy only up to IDLEN_MAX-1 characters */
@@ -143,7 +188,19 @@ int stdscan(void *private_data, struct tokenval *tv)
         *r = '\0';
         /* right, so we have an identifier sitting in temp storage. now,
          * is it actually a register or instruction name, or what? */
-        return nasm_token_hash(ourcopy, tv);
+        token_type = nasm_token_hash(ourcopy, tv);
+
+        if (likely(!brace)) {
+            if (likely(!(tv->t_flag & TFLAG_BRC))) {
+                /* most of the tokens fall into this case */
+                return token_type;
+            } else {
+                return tv->t_type = TOKEN_ID;
+            }
+        } else {
+            /* handle tokens inside braces */
+            return stdscan_handle_brace(tv);
+        }
     } else if (*stdscan_bufptr == '$' && !isnumchar(stdscan_bufptr[1])) {
         /*
          * It's a $ sign with no following hex number; this must
@@ -267,6 +324,35 @@ int stdscan(void *private_data, struct tokenval *tv)
     } else if (stdscan_bufptr[0] == '|' && stdscan_bufptr[1] == '|') {
         stdscan_bufptr += 2;
         return tv->t_type = TOKEN_DBL_OR;
+    } else if (stdscan_bufptr[0] == '{') {
+        stdscan_bufptr ++;      /* skip the opening brace */
+        brace ++;               /* in case of nested braces */
+        brace_opened = true;    /* brace is just opened */
+        return stdscan(private_data, tv);
+    } else if (stdscan_bufptr[0] == ',' && brace) {
+        /*
+         * a comma inside braces should be treated just as a separator.
+         * this is almost same as an opening brace except increasing counter.
+         */
+        stdscan_bufptr ++;
+        brace_opened = true;    /* brace is just opened */
+        return stdscan(private_data, tv);
+    } else if (stdscan_bufptr[0] == '}') {
+        stdscan_bufptr ++;      /* skip the closing brace */
+        if (brace) {
+            /* unhandled nested closing brace */
+            brace --;
+            /* if brace is closed without any content in it */
+            if (brace_opened) {
+                brace_opened = false;
+                nasm_error(ERR_NONFATAL, "nothing inside braces");
+            }
+            return stdscan(private_data, tv);
+        } else {
+            /* redundant closing brace */
+            return tv->t_type = TOKEN_INVALID;
+        }
+        return stdscan(private_data, tv);
     } else                      /* just an ordinary char */
         return tv->t_type = (uint8_t)(*stdscan_bufptr++);
 }
diff --git a/tables.h b/tables.h
index e6f84cb..d0db3b3 100644
--- a/tables.h
+++ b/tables.h
@@ -43,7 +43,6 @@
 #include "compiler.h"
 #include <inttypes.h>
 #include "insnsi.h"		/* For enum opcode */
-#include "opflags.h"		/* For opflags_t */
 
 /* --- From standard.mac via macros.pl: --- */
 
@@ -62,6 +61,8 @@ extern const char * const nasm_insn_names[];
 /* regs.c */
 extern const char * const nasm_reg_names[];
 /* regflags.c */
+typedef uint64_t opflags_t;
+typedef uint8_t  decoflags_t;
 extern const opflags_t nasm_reg_flags[];
 /* regvals.c */
 extern const int nasm_regvals[];
diff --git a/tokens.dat b/tokens.dat
index c2df469..1a00e3d 100644
--- a/tokens.dat
+++ b/tokens.dat
@@ -35,7 +35,7 @@
 # Tokens other than instructions and registers
 #
 
-% TOKEN_PREFIX, 0, P_*
+% TOKEN_PREFIX, 0, 0, P_*
 a16
 a32
 a64
@@ -55,7 +55,7 @@ wait
 xacquire
 xrelease
 
-% TOKEN_SPECIAL, 0, S_*
+% TOKEN_SPECIAL, 0, 0, S_*
 abs
 byte
 dword
@@ -73,13 +73,13 @@ tword
 word
 yword
 
-% TOKEN_FLOAT, 0, 0
+% TOKEN_FLOAT, 0, 0, 0
 __infinity__
 __nan__
 __qnan__
 __snan__
 
-% TOKEN_FLOATIZE, 0, FLOAT_{__float*__}
+% TOKEN_FLOATIZE, 0, 0, FLOAT_{__float*__}
 __float8__
 __float16__
 __float32__
@@ -89,7 +89,7 @@ __float80e__
 __float128l__
 __float128h__
 
-% TOKEN_STRFUNC, 0, STRFUNC_{__*__}
+% TOKEN_STRFUNC, 0, 0, STRFUNC_{__*__}
 __utf16__
 __utf16le__
 __utf16be__
@@ -97,12 +97,26 @@ __utf32__
 __utf32le__
 __utf32be__
 
-% TOKEN_IFUNC, 0, IFUNC_{__*__}
+% TOKEN_IFUNC, 0, 0, IFUNC_{__*__}
 __ilog2e__
 __ilog2w__
 __ilog2f__
 __ilog2c__
 
-% TOKEN_*, 0, 0
+% TOKEN_*, 0, 0, 0
 seg
 wrt
+
+% TOKEN_DECORATOR, 0, TFLAG_BRC | TFLAG_BRDCAST , BRC_1TO{1to*}
+1to8
+1to16
+
+% TOKEN_DECORATOR, 0, TFLAG_BRC, BRC_{*-sae}
+rn-sae
+rd-sae
+ru-sae
+rz-sae
+
+% TOKEN_DECORATOR, 0, TFLAG_BRC, BRC_*
+sae
+z
diff --git a/tokhash.pl b/tokhash.pl
index 6c05802..4ea387d 100755
--- a/tokhash.pl
+++ b/tokhash.pl
@@ -65,14 +65,14 @@ while (defined($line = <ID>)) {
 	    # Single instruction token
 	    if (!defined($tokens{$token})) {
 		$tokens{$token} = scalar @tokendata;
-		push(@tokendata, "\"${token}\", TOKEN_INSN, C_none, I_${insn}");
+		push(@tokendata, "\"${token}\", TOKEN_INSN, C_none, 0, I_${insn}");
 	    }
 	} else {
 	    # Conditional instruction
 	    foreach $cc (@conditions) {
 		if (!defined($tokens{$token.$cc})) {
 		    $tokens{$token.$cc} = scalar @tokendata;
-		    push(@tokendata, "\"${token}${cc}\", TOKEN_INSN, C_\U$cc\E, I_${insn}");
+		    push(@tokendata, "\"${token}${cc}\", TOKEN_INSN, C_\U$cc\E, 0, I_${insn}");
 		}
 	    }
 	}
@@ -85,8 +85,9 @@ close(ID);
 #
 open(RD, "< ${regs_dat}") or die "$0: cannot open $regs_dat: $!\n";
 while (defined($line = <RD>)) {
-    if ($line =~ /^([a-z0-9_-]+)\s/) {
+    if ($line =~ /^([a-z0-9_-]+)\s*\S+\s*\S+\s*[0-9]+\s*(\S*)/) {
 	$reg = $1;
+	$reg_flag = $2;
 
 	if ($reg =~ /^(.*[^0-9])([0-9]+)\-([0-9]+)(|[^0-9].*)$/) {
 	    $nregs = $3-$2+1;
@@ -104,7 +105,11 @@ while (defined($line = <RD>)) {
 		die "Duplicate definition: $reg\n";
 	    }
 	    $tokens{$reg} = scalar @tokendata;
-	    push(@tokendata, "\"${reg}\", TOKEN_REG, 0, R_\U${reg}\E");
+	    if ($reg_flag eq '') {
+	        push(@tokendata, "\"${reg}\", TOKEN_REG, 0, 0, R_\U${reg}\E");
+	    } else {
+	        push(@tokendata, "\"${reg}\", TOKEN_REG, 0, ${reg_flag}, R_\U${reg}\E");
+	    }
 
 	    if (defined($reg_prefix)) {
 		$reg_nr++;
@@ -214,7 +219,8 @@ if ($output eq 'h') {
     print "struct tokendata {\n";
     print "    const char *string;\n";
     print "    int16_t tokentype;\n";
-    print "    int16_t aux;\n";
+    print "    int8_t aux;\n";
+    print "    int8_t tokflag;\n";
     print "    int32_t num;\n";
     print "};\n";
     print "\n";
@@ -270,6 +276,7 @@ if ($output eq 'h') {
     print  "\n";
     print  "    tv->t_integer = data->num;\n";
     print  "    tv->t_inttwo  = data->aux;\n";
+    print  "    tv->t_flag    = data->tokflag;\n";
     print  "    return tv->t_type = data->tokentype;\n";
     print  "}\n";
 }
-- 
1.7.9.5





Thread: [Nasm-devel] [PATCH] AVX-512: Add support for parsing braces

nasm-devel