;;; wisi-parse.el --- Wisi parser
-;; Copyright (C) 2013 Free Software Foundation, Inc.
+;; Copyright (C) 2013, 2014 Free Software Foundation, Inc.
;; This file is part of GNU Emacs.
;;; Code:
+(require 'cl-lib)
(require 'semantic/wisent)
-(eval-when-compile (require 'cl-lib))
+
+;; WORKAROUND: for some reason, this condition doesn't work in batch mode!
+;; (when (and (= emacs-major-version 24)
+;; (= emacs-minor-version 2))
+ (require 'wisi-compat-24.2)
+;;)
+
+(defvar wisi-parse-max-parallel 15
+ "Maximum number of parallel parsers for acceptable performance.
+If a file needs more than this, it's probably an indication that
+the grammar is excessively redundant.")
+
+(defvar wisi-parse-max-parallel-current (cons 0 0)
+ "Cons (count . point); Maximum number of parallel parsers used in most recent parse,
+point at which that max was spawned.")
+
+(defvar wisi-debug 0
+ "wisi debug mode:
+0 : normal - ignore parse errors, for indenting new code
+1 : report parse errors (for running tests)
+2 : show parse states, position point at parse errors, debug-on-error works in parser
+3 : also show top 10 items of parser stack.")
(cl-defstruct (wisi-parser-state
(:copier nil))
'error-message
"wisi parse error")
-(defvar wisi-parse-max-parallel 15
- "Maximum number of parallel parsers for acceptable performance.
-If a file needs more than this, it's probably an indication that
-the grammar is excessively redundant.")
+(defvar-local wisi-cache-max 0
+ "Maximimum position in buffer where wisi-cache text properties are valid.")
-(defvar wisi-debug 0
- "wisi debug mode:
-0 : normal - ignore parse errors, for indenting new code
-1 : report parse errors (for running tests)
-2 : show parse states, position point at parse errors, debug-on-error works in parser
-3 : also show top 10 items of parser stack.")
+(defun wisi-token-text (token)
+ "Return buffer text from token range."
+ (let ((region (cdr token)))
+ (and region
+ (buffer-substring-no-properties (car region) (cdr region)))))
(defun wisi-parse (automaton lexer)
- "Parse input using the automaton specified in AUTOMATON.
+ "Parse current buffer from bob using the automaton specified in AUTOMATON.
- AUTOMATON is the parse table generated by `wisi-compile-grammar'.
- LEXER is a function with no argument called by the parser to
- obtain the next token in input, as a list (symbol text start
- . end), where `symbol' is the terminal symbol, `text' is the
- token string, `start . end' is the range in the buffer."
+ obtain the next token from the current buffer after point, as a
+ list (symbol text start . end), where `symbol' is the terminal
+ symbol, `text' is the token string, `start . end' is the range
+ in the buffer."
(let* ((actions (aref automaton 0))
(gotos (aref automaton 1))
(parser-states ;; vector of parallel parser states
:label 0
:active 'shift
:stack (make-vector wisent-parse-max-stack-size nil)
- ;; FIXME: better error message when stack overflows, so
- ;; user can set wisent-parse-max-stack-size in file-local
- ;; vars.
:sp 0
:pending nil)))
(active-parser-count 1)
active-parser-count-prev
(active 'shift)
- (token (funcall lexer))
- some-pending)
+ (token nil)
+ some-pending
+ )
+
+ (goto-char (point-min))
+ (aset (wisi-parser-state-stack (aref parser-states 0)) 0 0)
- (aset (wisi-parser-state-stack (aref parser-states 0)) 0 0) ;; Initial state
+ (setq token (funcall lexer))
+ (setq wisi-parse-max-parallel-current (cons 0 0))
(while (not (eq active 'accept))
(setq active-parser-count-prev active-parser-count)
;; spawn a new parser
(when (= active-parser-count wisi-parse-max-parallel)
(signal 'wisi-parse-error
- (wisi-error-msg (concat "too many parallel parsers required;"
- " simplify grammar, or increase `wisi-parse-max-parallel'"))))
+ (let ((state (aref (wisi-parser-state-stack parser-state)
+ (wisi-parser-state-sp parser-state))))
+ (wisi-error-msg (concat "too many parallel parsers required in grammar state %d;"
+ " simplify grammar, or increase `wisi-parse-max-parallel'"
+ state)))))
+
(let ((j (wisi-free-parser parser-states)))
(cond
((= j -1)
)))
)
(setq active-parser-count (1+ active-parser-count))
+ (when (> active-parser-count (car wisi-parse-max-parallel-current))
+ (setq wisi-parse-max-parallel-current (cons active-parser-count (point))))
(setf (wisi-parser-state-label result) j)
(aset parser-states j result))
(when (> wisi-debug 1)
(signal 'wisi-parse-error
(wisi-error-msg "syntax error in grammar state %d; unexpected %s, expecting one of %s"
state
- (nth 1 token)
+ (wisi-token-text token)
(mapcar 'car (aref actions state))))
))
(t
(wisi-error-msg
"syntax error in grammar state %d; unexpected %s, expecting one of %s"
state
- (nth 1 token)
+ (wisi-token-text token)
(mapcar 'car (aref actions state)))))
)))
(signal 'wisi-parse-error msg)))
(1
(setf (wisi-parser-state-active parser-state) nil); Don't save error for later.
- (wisi-execute-pending (wisi-parser-state-pending
- (aref parser-states (wisi-active-parser parser-states))))
- (setf (wisi-parser-state-pending
- (aref parser-states (wisi-active-parser parser-states)))
- nil))
+ (let ((parser-state (aref parser-states (wisi-active-parser parser-states))))
+ (wisi-execute-pending (wisi-parser-state-label parser-state)
+ (wisi-parser-state-pending parser-state))
+ (setf (wisi-parser-state-pending parser-state) nil)
+ ))
(t
;; We were in a parallel parse, and this parser
;; failed; mark it inactive, don't save error for
(when (eq active 'shift)
(when (> active-parser-count 1)
(setq active-parser-count (wisi-parse-elim-identical parser-states active-parser-count)))
+
(setq token (funcall lexer)))
)
(when (> active-parser-count 1)
(error "ambiguous parse result"))))
+(defun wisi-parsers-active-index (parser-states)
+ ;; only called when active-parser-count = 1
+ (let ((result nil)
+ (i 0))
+ (while (and (not result)
+ (< i (length parser-states)))
+ (when (wisi-parser-state-active (aref parser-states i))
+ (setq result i))
+ (setq i (1+ i)))
+ result))
+
(defun wisi-parsers-active (parser-states active-count)
"Return the type of parser cycle to execute.
PARSER-STATES[*].active is the last action a parser took. If it
executed again until another input token is available, after all
parsers have shifted the current token or terminated.
+Returns one of:
+
'accept : all PARSER-STATES have active set to nil or 'accept -
done parsing
'accept)
((= (+ shift-count accept-count) active-count)
'shift)
- (t (error "unexpected result in wisi-parsers-active"))
+ (t
+ ;; all parsers in error state; should not get here
+ (error "all parsers in error state; programmer error"))
)))
(defun wisi-free-parser (parser-states)
(dotimes (stack-i (wisi-parser-state-sp (aref parser-states parser-i)))
(setq
compare
- (and compare
+ (and compare ;; bypass expensive 'arefs' after first stack item compare fail
(equal (aref (wisi-parser-state-stack (aref parser-states parser-i)) stack-i)
(aref (wisi-parser-state-stack (aref parser-states (+ parser-i parser-j 1))) stack-i)))))
(when compare
(when (> wisi-debug 1)
(message "terminate identical parser %d (%d active)"
(+ parser-i parser-j 1) active-parser-count))
+ (setf (wisi-parser-state-active (aref parser-states (+ parser-i parser-j 1))) nil)
(when (= active-parser-count 1)
;; the actions for the two parsers are not
;; identical, but either is good enough for
- ;; indentation and navigation, so we just do one.
- (when (> wisi-debug 1) (message "executing actions for %d" (+ parser-i parser-j 1)))
- (wisi-execute-pending (wisi-parser-state-pending (aref parser-states (+ parser-i parser-j 1))))
- (setf (wisi-parser-state-pending (aref parser-states (+ parser-i parser-j 1))) nil)
-
- ;; clear pending of other parser so it can be reused
- (setf (wisi-parser-state-pending (aref parser-states parser-i)) nil))
-
- (setf (wisi-parser-state-active (aref parser-states (+ parser-i parser-j 1))) nil))
- )))
+ ;; indentation and navigation, so we just do the
+ ;; actions for the one that is not terminating.
+ (let ((parser-state (aref parser-states parser-i)))
+ (wisi-execute-pending (wisi-parser-state-label parser-state)
+ (wisi-parser-state-pending parser-state))
+ (setf (wisi-parser-state-pending parser-state) nil)
+ ))
+ ))))
)))
active-parser-count)
-(defun wisi-execute-pending (pending)
+(defun wisi-parse-max-pos (tokens)
+ "Return max position in tokens, or point if tokens nil."
+ (let ((result (if tokens 0 (point))))
+ (mapc
+ (lambda (token)
+ (when (cddr token)
+ (setq result (max (cddr token) result))))
+ tokens)
+ result)
+ )
+
+(defun wisi-parse-exec-action (func tokens)
+ "Execute action if all tokens past wisi-cache-max."
+ ;; We don't execute actions if all tokens are before wisi-cache-max,
+ ;; because later actions can update existing caches, and if the
+ ;; parse fails that won't happen. It also saves time.
+ ;;
+ ;; Also skip if no tokens; nothing to do. This can happen when all
+ ;; tokens in a grammar statement are optional.
+ (if (< 0 (length tokens))
+ (if (>= (wisi-parse-max-pos tokens) wisi-cache-max)
+
+ (funcall func tokens)
+
+ (when (> wisi-debug 1)
+ (message "... action skipped; before wisi-cache-max %d" wisi-cache-max)))
+
+ (when (> wisi-debug 1)
+ (message "... action skipped; no tokens"))
+ ))
+
+(defun wisi-execute-pending (parser-label pending)
+ (when (> wisi-debug 1) (message "%d: pending actions:" parser-label))
(while pending
(when (> wisi-debug 1) (message "%s" (car pending)))
- (apply (pop pending))))
+
+ (let ((func-args (pop pending)))
+ (wisi-parse-exec-action (car func-args) (cadr func-args)))
+ ))
(defun wisi-parse-1 (token parser-state pendingp actions gotos)
"Perform one shift or reduce on PARSER-STATE.
"Return a pair (START . END), the buffer region for a nonterminal.
STACK is the parser stack. I and J are the indices in STACK of
the first and last tokens of the nonterminal."
- (let ((start (cl-caddr (aref stack i)))
- (end (cl-cdddr (aref stack j))))
+ (let ((start (cadr (aref stack i)))
+ (end (cddr (aref stack j))))
(while (and (or (not start) (not end))
(/= i j))
(cond
((not start)
;; item i is an empty production
- (setq start (cl-caddr (aref stack (setq i (+ i 2))))))
+ (setq start (cadr (aref stack (setq i (+ i 2))))))
((not end)
;; item j is an empty production
- (setq end (cl-cdddr (aref stack (setq j (- j 2))))))
+ (setq end (cddr (aref stack (setq j (- j 2))))))
(t (setq i j))))
(and start end (cons start end))))
(wisi-nonterm-bounds stack (- sp (* 2 (1- token-count)) 1) (1- sp))))
(post-reduce-state (aref stack (- sp (* 2 token-count))))
(new-state (cdr (assoc nonterm (aref gotos post-reduce-state))))
- tokens)
+ (tokens (make-vector token-count nil)))
+
(when (not new-state)
(error "no goto for %s %d" nonterm post-reduce-state))
- (if (= 1 token-count)
- (setq tokens (list (aref stack (1- sp))))
- (dotimes (i token-count)
- (push (aref stack (- sp (* 2 i) 1)) tokens)))
+
+ (dotimes (i token-count)
+ (aset tokens (- token-count i 1) (aref stack (- sp (* 2 i) 1))))
+
(setq sp (+ 2 (- sp (* 2 token-count))))
- (aset stack (1- sp) (cons nonterm (cons nil nonterm-region)))
+ (aset stack (1- sp) (cons nonterm nonterm-region))
(aset stack sp new-state)
(setf (wisi-parser-state-sp parser-state) sp)
+
(if pendingp
(if (wisi-parser-state-pending parser-state)
(setf (wisi-parser-state-pending parser-state)
(list (list (nth 1 action) tokens))))
(setf (wisi-parser-state-pending parser-state)
(list (list (nth 1 action) tokens))))
- (funcall (nth 1 action) tokens))
+
+ ;; Not pending.
+ (wisi-parse-exec-action (nth 1 action) tokens)
+ )
))
(provide 'wisi-parse)