|
Re: [cedet-semantic] using semantic/wisent as a parser
From: Joe Corneli <jcorneli@ma...> - 2004-05-09 00:29
|
Things are beginning to come along with this code for me. But here
is another problem that seems somewhat weird.
As part of my definition of expression (`expr') I have the following
rule:
| expr EQ expr
(TAG "expr eq" 'expr :value (concat "(equal " $1 " " $3 ")"))
| expr NEQ expr
(TAG "expr neq" 'expr :value (concat "(not (equal " $1 " " $3 "))"))
But this rule does not seem to be dealt with properly by the parser.
For example, neither of these two inputs is recognized:
foo <> bar
"foo" <> "bar"
If I modify the rule to say
| STRING_LITERAL EQ STRING_LITERAL
(TAG "expr eq" 'expr :value (concat "(equal " $1 " " $3 ")"))
| STRING_LITERAL NEQ STRING_LITERAL
(TAG "expr neq" 'expr :value (concat "(not (equal " $1 " " $3 "))"))
then
"foo" <> "bar"
is recognized just fine. Running semantic-lex-debug seems to
indicate that foo <> bar is lexically OK.
I'm including the revised file below. Perhaps someone can tell
me why this particular bit of code is causing trouble?
;;; simple.wy -- LALR grammar for (simplified) Tiger
%package simple-wy
;; Not really necessary, as it is the default start symbol
%start expr
%start function_parameters
%type <punctuation>
%token <punctuation> PLUS "+"
%token <punctuation> MINUS "-"
%token <punctuation> TIMES "*"
%token <punctuation> DIVIDE "/"
%type <symbol>
%token <symbol> symbol "[A-Za-z][_A-Za-z0-9]*"
%type <string>
%token <string> STRING_LITERAL
%type <number>
%token <number> NUMBER_LITERAL
%type <keyword>
%keyword WHILE "while"
%keyword FOR "for"
%keyword NIL "nil"
%keyword GREATER_THAN ">"
%keyword LESS_THAN "<"
;; these seem to need to be handled as keywords (not punctuation)
%keyword GREATER_THAN_OR_EQUAL ">="
%keyword LESS_THAN_OR_EQUAL "<="
%keyword EQ "="
%keyword NEQ "<>"
%left UMINUS
%left PLUS
%left MINUS
%left TIMES
%left DIVIDE
%type <open-paren>
%token <open-paren> LPAREN "("
%token <open-paren> LBRACE "{"
%token <open-paren> LBRACK "["
%type <close-paren>
%token <close-paren> RPAREN ")"
%token <close-paren> RBRACE "}"
%token <close-paren> RBRACK "]"
%type <block>
%token <block> PAREN_BLOCK "(LPAREN RPAREN)"
%token <block> BRACE_BLOCK "(LBRACE RBRACE)"
%token <block> BRACK_BLOCK "(LBRACK RBRACK)"
%%
;; For use with Semantic, must return valid semantic tags!
expr
: ;; Empty
| PAREN_BLOCK
(EXPANDFULL $1 function_parameters)
| FOR
(TAG "expr" 'expr :value $1)
| STRING_LITERAL
(TAG "string" 'expr :value $1)
| NUMBER_LITERAL
(TAG "number" 'expr :value $1)
| MINUS NUMBER_LITERAL %prec UMINUS
(TAG "expr" 'expr :value (concat "-" $2))
| NUMBER_LITERAL binop NUMBER_LITERAL
(TAG "binop result" 'expr :value (concat "(" $2 " " $1 " " $3 ")"))
| string_comparison
| expr EQ expr
(TAG "expr eq" 'expr :value (concat "(equal " $1 " " $3 ")"))
| expr NEQ expr
(TAG "expr neq" 'expr :value (concat "(not (equal " $1 " " $3 "))"))
;; | STRING_LITERAL EQ STRING_LITERAL
;; (TAG "expr eq" 'expr :value (concat "(equal " $1 " " $3 ")"))
;; | STRING_LITERAL NEQ STRING_LITERAL
;; (TAG "expr neq" 'expr :value (concat "(not (equal " $1 " " $3 "))"))
| symbol
(TAG "expr" 'expr :value $1)
| NIL
(TAG "expr" 'expr :value $1)
;
;; parameters: '(' [varargslist] ')'
function_parameters
: LPAREN
()
| RPAREN
()
;; | function_parameter COMMA
| function_parameter RPAREN
;
function_parameter
: WHILE
(TAG "function_parameter" 'function_parameter :value $1)
;
binop
: PLUS
| MINUS
| TIMES
| DIVIDE
| GREATER_THAN
| LESS_THAN
| GREATER_THAN_OR_EQUAL
| LESS_THAN_OR_EQUAL
;
string_comparison
: STRING_LITERAL GREATER_THAN STRING_LITERAL
(TAG "sum" 'expr :value (concat "(string> " $1 " " $3 ")"))
| STRING_LITERAL LESS_THAN STRING_LITERAL
(TAG "sum" 'expr :value (concat "(string< " $1 " " $3 ")"))
| STRING_LITERAL LESS_THAN_OR_EQUAL STRING_LITERAL
(TAG "sum" 'expr :value (concat "(or (string< " $1 " " $3 ") "
"(string= " $1 " " $3 "))"))
| STRING_LITERAL GREATER_THAN_OR_EQUAL STRING_LITERAL
(TAG "sum" 'expr :value (concat "(or (string> " $1 " " $3 ") "
"(string= " $1 " " $3 "))"))
;
;; expr_comparison
;; : expr EQ expr
;; (TAG "sum" 'expr :value (concat "(equal" $1 " " $3 ")"))
;; | expr NEQ expr
;; (TAG "sum" 'expr :value (concat "(not (equal" $1 " " $3 "))"))
;; ;
%%
;; this turns out not to be needed.
;; (define-lex-regex-type-analyzer simple-jac--<symbol>-regexp-analyzer
;; "regexp analyzer for <symbol> tokens."
;; "\\([A-Za-z][_A-Za-z0-9]\\)+"
;; '((symbol . "[A-Za-z][_A-Za-z0-9]*"))
;; 'symbol)
(define-lex simple-lexer
"Simple lexical analyzer."
semantic-lex-ignore-whitespace
semantic-lex-ignore-newline
semantic-lex-ignore-comments
;;;; Auto-generated analyzers.
simple-wy--<block>-block-analyzer
simple-wy--<keyword>-keyword-analyzer
simple-wy--<punctuation>-string-analyzer
simple-wy--<number>-regexp-analyzer
;; From the documentation string for `semantic-lex-tokens':
;; "Always add this analyzer *after* `semantic-lex-number', or other
;; analyzers that match its regular expression." (my emphasis)
simple-wy--<symbol>-regexp-analyzer
simple-wy--<string>-sexp-analyzer
;;;;
semantic-lex-default-action)
;;; simple.wy ends here
Just in case its relevant, here's the other file too.
;;; simple-mode.el -- major mode for a simple language
;;; Semantic parsing support
(require 'semantic-wisent)
(require 'simple-wy)
;;;###autoload
(defun semantic-default-simple-setup ()
"Set up a buffer for semantic parsing of a SIMPLE language."
;; Do some useful things.
(setq
semantic-ignore-comments t)
;; Install the parser
(simple-wy--install-parser)
;; Setup the lexer
(setq semantic-lex-analyzer 'simple-lexer
;; Do a full depth lexical analysis.
semantic-lex-depth nil))
;;;###autoload
(add-hook 'simple-mode-hook 'semantic-default-simple-setup)
;;; post parser processing
;; I'm not sure why Bovine is not getting rid of comments -- I suppose
;; I haven't dealt with them properly. I was sort of under the
;; impression that this would be handled automatically after setting
;; the document comment start and end -- but that doesn't seem to be
;; the case. Maybe I just need to remove comments as part of my own
;; preprocessing phase. Wouldn't be hard to do.
(defun clean-up-parser-output ()
(interactive)
(set-buffer (get-buffer "*Parser Output*"))
(flush-lines "overlay")
(flush-lines "expr")
;; (flush-lines "\"expr\" expr")
(goto-char (point-min))
(replace-regexp " (:value \"\\|\")$" "")
(goto-char (point-min))
(replace-regexp "\\\\\"" "\""))
(defun novinate ()
(interactive)
(bovinate)
(clean-up-parser-output))
;;; simple major mode
(defvar simple-mode-syntax-table
(let ((table (make-syntax-table (standard-syntax-table))))
(modify-syntax-entry ?\+ "." table) ;; Operator PLUS
(modify-syntax-entry ?\- "." table) ;; Operator MINUS
;; also deals with comments
(modify-syntax-entry ?\* ". 23" table) ;; Operator MULT
(modify-syntax-entry ?\/ ". 14" table) ;; Operator DIV
;; deal with brackets
(modify-syntax-entry ?\( "()" table)
(modify-syntax-entry ?\{ "(}" table)
(modify-syntax-entry ?\[ "(]" table)
(modify-syntax-entry ?\) ")(" table)
(modify-syntax-entry ?\} "){" table)
(modify-syntax-entry ?\] ")[" table)
table)
"Syntax table used in simple mode buffers.
Define operators as punctuations.")
;;;###autoload
(define-derived-mode simple-mode fundamental-mode "simple"
(setq comment-start-skip "/\\*+ *"))
;;;###autoload
(add-to-list 'auto-mode-alist '("\\.simple\\'" . simple-mode))
;(defvar simple-mode-hook nil)
(provide 'simple-mode)
;;; simple-mode.el ends here
|
| Thread | Author | Date | |
|---|---|---|---|
| Re: [cedet-semantic] using semantic/wisent as a parser | David PONCE <david.ponce@wa...> |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|