1 ;;; wisi-parse.el --- Wisi parser
3 ;; Copyright (C) 2013, 2014 Free Software Foundation, Inc.
5 ;; This file is part of GNU Emacs.
7 ;; GNU Emacs is free software: you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation, either version 3 of the License, or
10 ;; (at your option) any later version.
12 ;; GNU Emacs is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
23 ;; An extended LALR parser, that handles shift/reduce and
24 ;; reduce/reduce conflicts by spawning parallel parsers to follow each
30 (require 'semantic/wisent)
32 (cl-defstruct (wisi-parser-state
34 label ;; integer identifying parser for debug
37 ;; 'shift - need new token
38 ;; 'reduce - need reduce
39 ;; 'accept - parsing completed
40 ;; 'error - failed, error not reported yet
43 ;; 'pending-shift, 'pending-reduce - newly created parser; see wisi-parse
46 ;; Each stack item takes two slots: (token-symbol token-text (token-start . token-end)), state
47 ;; token-text is nil for nonterminals.
48 ;; this is _not_ the same as the wisent-parse stack; that leaves out token-symbol.
53 ;; list of (action-symbol stack-fragment)
56 (defun wisi-error-msg (message &rest args)
57 (let ((line (line-number-at-pos))
58 (col (- (point) (line-beginning-position))))
61 (file-name-nondirectory (buffer-name)) ;; buffer-file-name is sometimes nil here!?
63 (apply 'format message args))))
65 (defvar wisi-parse-error nil)
66 (put 'wisi-parse-error
68 '(error wisi-parse-error))
69 (put 'wisi-parse-error
73 (defvar wisi-parse-max-parallel 15
74 "Maximum number of parallel parsers for acceptable performance.
75 If a file needs more than this, it's probably an indication that
76 the grammar is excessively redundant.")
80 0 : normal - ignore parse errors, for indenting new code
81 1 : report parse errors (for running tests)
82 2 : show parse states, position point at parse errors, debug-on-error works in parser
83 3 : also show top 10 items of parser stack.")
85 (defun wisi-parse (automaton lexer)
86 "Parse input using the automaton specified in AUTOMATON.
88 - AUTOMATON is the parse table generated by `wisi-compile-grammar'.
90 - LEXER is a function with no argument called by the parser to
91 obtain the next token in input, as a list (symbol text start
92 . end), where `symbol' is the terminal symbol, `text' is the
93 token string, `start . end' is the range in the buffer."
94 (let* ((actions (aref automaton 0))
95 (gotos (aref automaton 1))
96 (parser-states ;; vector of parallel parser states
98 (make-wisi-parser-state
101 :stack (make-vector wisent-parse-max-stack-size nil)
104 (active-parser-count 1)
105 active-parser-count-prev
107 (token (funcall lexer))
110 (aset (wisi-parser-state-stack (aref parser-states 0)) 0 0) ;; Initial state
112 (while (not (eq active 'accept))
113 (setq active-parser-count-prev active-parser-count)
114 (setq some-pending nil)
115 (dotimes (parser-index (length parser-states))
116 (when (eq active (wisi-parser-state-active (aref parser-states parser-index)))
117 (let* ((parser-state (aref parser-states parser-index))
118 (result (wisi-parse-1 token parser-state (> active-parser-count 1) actions gotos)))
120 ;; spawn a new parser
121 (when (= active-parser-count wisi-parse-max-parallel)
122 (signal 'wisi-parse-error
123 (wisi-error-msg (concat "too many parallel parsers required;"
124 " simplify grammar, or increase `wisi-parse-max-parallel'"))))
125 (let ((j (wisi-free-parser parser-states)))
128 ;; Add to parser-states; the new parser won't be executed
129 ;; again in this parser-index loop.
130 (setq parser-states (vconcat parser-states (vector nil)))
131 (setq j (1- (length parser-states))))
133 ;; The new parser won't be executed again in this
134 ;; parser-index loop; nothing to do.
137 ;; Don't let the new parser execute again in this
138 ;; parser-index loop.
139 (setq some-pending t)
140 (setf (wisi-parser-state-active result)
141 (cl-case (wisi-parser-state-active result)
142 (shift 'pending-shift)
143 (reduce 'pending-reduce)
146 (setq active-parser-count (1+ active-parser-count))
147 (setf (wisi-parser-state-label result) j)
148 (aset parser-states j result))
149 (when (> wisi-debug 1)
150 (message "spawn parser (%d active)" active-parser-count)))
152 (when (eq 'error (wisi-parser-state-active parser-state))
153 (setq active-parser-count (1- active-parser-count))
154 (when (> wisi-debug 1)
155 (message "terminate parser (%d active)" active-parser-count))
156 (cl-case active-parser-count
159 ((= active-parser-count-prev 1)
160 ;; We were not in a parallel parse; report the error.
161 (let ((state (aref (wisi-parser-state-stack parser-state)
162 (wisi-parser-state-sp parser-state))))
163 (signal 'wisi-parse-error
164 (wisi-error-msg "syntax error in grammar state %d; unexpected %s, expecting one of %s"
167 (mapcar 'car (aref actions state))))
170 ;; Report errors from all parsers that failed on this token.
172 (dotimes (_ (length parser-states))
173 (let* ((parser-state (aref parser-states parser-index))
174 (state (aref (wisi-parser-state-stack parser-state)
175 (wisi-parser-state-sp parser-state))))
176 (when (eq 'error (wisi-parser-state-active parser-state))
181 "syntax error in grammar state %d; unexpected %s, expecting one of %s"
184 (mapcar 'car (aref actions state)))))
186 (signal 'wisi-parse-error msg)))
190 (setf (wisi-parser-state-active parser-state) nil); Don't save error for later.
191 (wisi-execute-pending (wisi-parser-state-pending
192 (aref parser-states (wisi-active-parser parser-states))))
193 (setf (wisi-parser-state-pending
194 (aref parser-states (wisi-active-parser parser-states)))
197 ;; We were in a parallel parse, and this parser
198 ;; failed; mark it inactive, don't save error for
200 (setf (wisi-parser-state-active parser-state) nil)
205 ;; Change pending-* parsers to *.
206 (dotimes (parser-index (length parser-states))
208 ((eq (wisi-parser-state-active (aref parser-states parser-index)) 'pending-shift)
209 (setf (wisi-parser-state-active (aref parser-states parser-index)) 'shift))
210 ((eq (wisi-parser-state-active (aref parser-states parser-index)) 'pending-reduce)
211 (setf (wisi-parser-state-active (aref parser-states parser-index)) 'reduce))
214 (setq active (wisi-parsers-active parser-states active-parser-count))
215 (when (eq active 'shift)
216 (when (> active-parser-count 1)
217 (setq active-parser-count (wisi-parse-elim-identical parser-states active-parser-count)))
218 (setq token (funcall lexer)))
220 (when (> active-parser-count 1)
221 (error "ambiguous parse result"))))
223 (defun wisi-parsers-active (parser-states active-count)
224 "Return the type of parser cycle to execute.
225 PARSER-STATES[*].active is the last action a parser took. If it
226 was 'shift, that parser used the input token, and should not be
227 executed again until another input token is available, after all
228 parsers have shifted the current token or terminated.
230 'accept : all PARSER-STATES have active set to nil or 'accept -
233 'shift : all PARSER-STATES have active set to nil, 'accept, or
234 'shift - get a new token, execute 'shift parsers.
236 'reduce : some PARSER-STATES have active set to 'reduce - no new
237 token, execute 'reduce parsers."
243 (while (and (not result)
244 (< i (length parser-states)))
245 (setq active (wisi-parser-state-active (aref parser-states i)))
247 ((eq active 'shift) (setq shift-count (1+ shift-count)))
248 ((eq active 'reduce) (setq result 'reduce))
249 ((eq active 'accept) (setq accept-count (1+ accept-count)))
255 ((= accept-count active-count)
257 ((= (+ shift-count accept-count) active-count)
259 (t (error "unexpected result in wisi-parsers-active"))
262 (defun wisi-free-parser (parser-states)
263 "Return index to a non-active parser in PARSER-STATES, -1 if there is none."
266 (while (and (not result)
267 (< i (length parser-states)))
268 (when (not (wisi-parser-state-active (aref parser-states i)))
271 (if result result -1)))
273 (defun wisi-active-parser (parser-states)
274 "Return index to the first active parser in PARSER-STATES."
277 (while (and (not result)
278 (< i (length parser-states)))
279 (when (wisi-parser-state-active (aref parser-states i))
283 (error "no active parsers"))
286 (defun wisi-parse-elim-identical (parser-states active-parser-count)
287 "Check for parsers in PARSER-STATES that have reached identical states eliminate one.
288 Return new ACTIVE-PARSER-COUNT. Assumes all parsers have active
289 nil, 'shift, or 'accept."
290 ;; parser-states passed by reference; active-parser-count by copy
291 ;; see test/ada_mode-slices.adb for example
292 (dotimes (parser-i (1- (length parser-states)))
293 (when (wisi-parser-state-active (aref parser-states parser-i))
294 (dotimes (parser-j (- (length parser-states) parser-i 1))
295 (when (wisi-parser-state-active (aref parser-states (+ parser-i parser-j 1)))
296 (when (eq (wisi-parser-state-sp (aref parser-states parser-i))
297 (wisi-parser-state-sp (aref parser-states (+ parser-i parser-j 1))))
299 (dotimes (stack-i (wisi-parser-state-sp (aref parser-states parser-i)))
303 (equal (aref (wisi-parser-state-stack (aref parser-states parser-i)) stack-i)
304 (aref (wisi-parser-state-stack (aref parser-states (+ parser-i parser-j 1))) stack-i)))))
306 ;; parser stacks are identical
307 (setq active-parser-count (1- active-parser-count))
308 (when (> wisi-debug 1)
309 (message "terminate identical parser %d (%d active)"
310 (+ parser-i parser-j 1) active-parser-count))
311 (when (= active-parser-count 1)
312 ;; the actions for the two parsers are not
313 ;; identical, but either is good enough for
314 ;; indentation and navigation, so we just do one.
315 (when (> wisi-debug 1) (message "executing actions for %d" (+ parser-i parser-j 1)))
316 (wisi-execute-pending (wisi-parser-state-pending (aref parser-states (+ parser-i parser-j 1))))
317 (setf (wisi-parser-state-pending (aref parser-states (+ parser-i parser-j 1))) nil)
319 ;; clear pending of other parser so it can be reused
320 (setf (wisi-parser-state-pending (aref parser-states parser-i)) nil))
322 (setf (wisi-parser-state-active (aref parser-states (+ parser-i parser-j 1))) nil))
327 (defun wisi-execute-pending (pending)
329 (when (> wisi-debug 1) (message "%s" (car pending)))
332 ((and (>= emacs-major-version 24)
333 (>= emacs-minor-version 3))
334 (apply (pop pending)))
337 (let ((func-args (pop pending)))
338 (apply (car func-args) (cdr func-args))))
341 (defun wisi-parse-1 (token parser-state pendingp actions gotos)
342 "Perform one shift or reduce on PARSER-STATE.
343 If PENDINGP, push actions onto PARSER-STATE.pending; otherwise execute them.
344 See `wisi-parse' for full details.
345 Return nil or new parser (a wisi-parse-state struct)."
346 (let* ((state (aref (wisi-parser-state-stack parser-state)
347 (wisi-parser-state-sp parser-state)))
348 (parse-action (wisent-parse-action (car token) (aref actions state)))
351 (when (> wisi-debug 1)
355 ;; put top 10 stack items
356 (let* ((count (min 20 (wisi-parser-state-sp parser-state)))
357 (msg (make-vector (+ 1 count) nil)))
359 (aset msg (- count i)
360 (aref (wisi-parser-state-stack parser-state) (- (wisi-parser-state-sp parser-state) i)))
362 (message "%d: %s: %d: %s"
363 (wisi-parser-state-label parser-state)
364 (wisi-parser-state-active parser-state)
365 (wisi-parser-state-sp parser-state)
367 (message " %d: %s: %s" state token parse-action))
368 (message "%d: %d: %s: %s" (wisi-parser-state-label parser-state) state token parse-action)))
370 (when (and (listp parse-action)
371 (not (symbolp (car parse-action))))
372 ;; Conflict; spawn a new parser.
373 (setq new-parser-state
374 (make-wisi-parser-state
376 :stack (vconcat (wisi-parser-state-stack parser-state))
377 :sp (wisi-parser-state-sp parser-state)
378 :pending (wisi-parser-state-pending parser-state)))
380 (wisi-parse-2 (cadr parse-action) token new-parser-state t gotos)
382 (setq parse-action (car parse-action))
386 (wisi-parse-2 parse-action token parser-state pendingp gotos)
390 (defun wisi-parse-2 (action token parser-state pendingp gotos)
391 "Execute parser ACTION (must not be a conflict).
395 (setf (wisi-parser-state-active parser-state) 'accept))
398 (setf (wisi-parser-state-active parser-state) 'error))
401 ;; Shift token and new state (= action) onto stack
402 (let ((stack (wisi-parser-state-stack parser-state)); reference
403 (sp (wisi-parser-state-sp parser-state))); copy
405 (aset stack (1- sp) token)
406 (aset stack sp action)
407 (setf (wisi-parser-state-sp parser-state) sp))
408 (setf (wisi-parser-state-active parser-state) 'shift))
411 (wisi-parse-reduce action parser-state pendingp gotos)
412 (setf (wisi-parser-state-active parser-state) 'reduce))
415 (defun wisi-nonterm-bounds (stack i j)
416 "Return a pair (START . END), the buffer region for a nonterminal.
417 STACK is the parser stack. I and J are the indices in STACK of
418 the first and last tokens of the nonterminal."
419 (let ((start (cl-caddr (aref stack i)))
420 (end (cl-cdddr (aref stack j))))
421 (while (and (or (not start) (not end))
425 ;; item i is an empty production
426 (setq start (cl-caddr (aref stack (setq i (+ i 2))))))
429 ;; item j is an empty production
430 (setq end (cl-cdddr (aref stack (setq j (- j 2))))))
433 (and start end (cons start end))))
435 (defun wisi-parse-reduce (action parser-state pendingp gotos)
436 "Reduce PARSER-STATE.stack, and execute or pend ACTION."
437 (let* ((stack (wisi-parser-state-stack parser-state)); reference
438 (sp (wisi-parser-state-sp parser-state)); copy
439 (token-count (or (nth 2 action) 0))
440 (nonterm (nth 0 action))
441 (nonterm-region (when (> token-count 0)
442 (wisi-nonterm-bounds stack (- sp (* 2 (1- token-count)) 1) (1- sp))))
443 (post-reduce-state (aref stack (- sp (* 2 token-count))))
444 (new-state (cdr (assoc nonterm (aref gotos post-reduce-state))))
446 (when (not new-state)
447 (error "no goto for %s %d" nonterm post-reduce-state))
448 (if (= 1 token-count)
449 (setq tokens (list (aref stack (1- sp))))
450 (dotimes (i token-count)
451 (push (aref stack (- sp (* 2 i) 1)) tokens)))
452 (setq sp (+ 2 (- sp (* 2 token-count))))
453 (aset stack (1- sp) (cons nonterm (cons nil nonterm-region)))
454 (aset stack sp new-state)
455 (setf (wisi-parser-state-sp parser-state) sp)
457 (if (wisi-parser-state-pending parser-state)
458 (setf (wisi-parser-state-pending parser-state)
459 (append (wisi-parser-state-pending parser-state)
460 (list (list (nth 1 action) tokens))))
461 (setf (wisi-parser-state-pending parser-state)
462 (list (list (nth 1 action) tokens))))
463 (funcall (nth 1 action) tokens))
466 (provide 'wisi-parse)
467 ;;; wisi-parse.el ends here