]> code.delx.au - gnu-emacs-elpa/blob - packages/wisi/wisi-parse.el
publish ada-mode 5.1.6, wisi 1.0.6, new package ada-ref-man
[gnu-emacs-elpa] / packages / wisi / wisi-parse.el
1 ;;; wisi-parse.el --- Wisi parser
2
3 ;; Copyright (C) 2013, 2014 Free Software Foundation, Inc.
4
5 ;; This file is part of GNU Emacs.
6
7 ;; GNU Emacs is free software: you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation, either version 3 of the License, or
10 ;; (at your option) any later version.
11
12 ;; GNU Emacs is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
16
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
19
20
21 ;;; Commentary:
22
23 ;; An extended LALR parser, that handles shift/reduce and
24 ;; reduce/reduce conflicts by spawning parallel parsers to follow each
25 ;; path.
26
27 ;;; Code:
28
29 (require 'cl-lib)
30 (require 'semantic/wisent)
31
32 ;; WORKAROUND: for some reason, this condition doesn't work in batch mode!
33 ;; (when (and (= emacs-major-version 24)
34 ;; (= emacs-minor-version 2))
35 (require 'wisi-compat-24.2)
36 ;;)
37
38 (defvar wisi-parse-max-parallel 15
39 "Maximum number of parallel parsers for acceptable performance.
40 If a file needs more than this, it's probably an indication that
41 the grammar is excessively redundant.")
42
43 (defvar wisi-parse-max-parallel-current (cons 0 0)
44 "Cons (count . point); Maximum number of parallel parsers used in most recent parse,
45 point at which that max was spawned.")
46
47 (defvar wisi-debug 0
48 "wisi debug mode:
49 0 : normal - ignore parse errors, for indenting new code
50 1 : report parse errors (for running tests)
51 2 : show parse states, position point at parse errors, debug-on-error works in parser
52 3 : also show top 10 items of parser stack.")
53
54 (cl-defstruct (wisi-parser-state
55 (:copier nil))
56 label ;; integer identifying parser for debug
57
58 active
59 ;; 'shift - need new token
60 ;; 'reduce - need reduce
61 ;; 'accept - parsing completed
62 ;; 'error - failed, error not reported yet
63 ;; nil - terminated
64 ;;
65 ;; 'pending-shift, 'pending-reduce - newly created parser; see wisi-parse
66
67 stack
68 ;; Each stack item takes two slots: (token-symbol token-text (token-start . token-end)), state
69 ;; token-text is nil for nonterminals.
70 ;; this is _not_ the same as the wisent-parse stack; that leaves out token-symbol.
71
72 sp ;; stack pointer
73
74 pending
75 ;; list of (action-symbol stack-fragment)
76 )
77
78 (defun wisi-error-msg (message &rest args)
79 (let ((line (line-number-at-pos))
80 (col (- (point) (line-beginning-position))))
81 (format
82 "%s:%d:%d: %s"
83 (file-name-nondirectory (buffer-name)) ;; buffer-file-name is sometimes nil here!?
84 line col
85 (apply 'format message args))))
86
87 (defvar wisi-parse-error nil)
88 (put 'wisi-parse-error
89 'error-conditions
90 '(error wisi-parse-error))
91 (put 'wisi-parse-error
92 'error-message
93 "wisi parse error")
94
95 (defvar-local wisi-cache-max 0
96 "Maximimum position in buffer where wisi-cache text properties are valid.")
97
98 (defun wisi-parse (automaton lexer)
99 "Parse current buffer from bob using the automaton specified in AUTOMATON.
100
101 - AUTOMATON is the parse table generated by `wisi-compile-grammar'.
102
103 - LEXER is a function with no argument called by the parser to
104 obtain the next token from the current buffer after point, as a
105 list (symbol text start . end), where `symbol' is the terminal
106 symbol, `text' is the token string, `start . end' is the range
107 in the buffer."
108 (let* ((actions (aref automaton 0))
109 (gotos (aref automaton 1))
110 (parser-states ;; vector of parallel parser states
111 (vector
112 (make-wisi-parser-state
113 :label 0
114 :active 'shift
115 :stack (make-vector wisent-parse-max-stack-size nil)
116 :sp 0
117 :pending nil)))
118 (active-parser-count 1)
119 active-parser-count-prev
120 (active 'shift)
121 (token nil)
122 some-pending
123 )
124
125 (goto-char (point-min))
126 (aset (wisi-parser-state-stack (aref parser-states 0)) 0 0)
127
128 (setq token (funcall lexer))
129 (setq wisi-parse-max-parallel-current (cons 0 0))
130
131 (while (not (eq active 'accept))
132 (setq active-parser-count-prev active-parser-count)
133 (setq some-pending nil)
134 (dotimes (parser-index (length parser-states))
135 (when (eq active (wisi-parser-state-active (aref parser-states parser-index)))
136 (let* ((parser-state (aref parser-states parser-index))
137 (result (wisi-parse-1 token parser-state (> active-parser-count 1) actions gotos)))
138 (when result
139 ;; spawn a new parser
140 (when (= active-parser-count wisi-parse-max-parallel)
141 (signal 'wisi-parse-error
142 (let ((state (aref (wisi-parser-state-stack parser-state)
143 (wisi-parser-state-sp parser-state))))
144 (wisi-error-msg (concat "too many parallel parsers required in grammar state %d;"
145 " simplify grammar, or increase `wisi-parse-max-parallel'"
146 state)))))
147
148 (let ((j (wisi-free-parser parser-states)))
149 (cond
150 ((= j -1)
151 ;; Add to parser-states; the new parser won't be executed
152 ;; again in this parser-index loop.
153 (setq parser-states (vconcat parser-states (vector nil)))
154 (setq j (1- (length parser-states))))
155 ((< j parser-index)
156 ;; The new parser won't be executed again in this
157 ;; parser-index loop; nothing to do.
158 )
159 (t
160 ;; Don't let the new parser execute again in this
161 ;; parser-index loop.
162 (setq some-pending t)
163 (setf (wisi-parser-state-active result)
164 (cl-case (wisi-parser-state-active result)
165 (shift 'pending-shift)
166 (reduce 'pending-reduce)
167 )))
168 )
169 (setq active-parser-count (1+ active-parser-count))
170 (when (> active-parser-count (car wisi-parse-max-parallel-current))
171 (setq wisi-parse-max-parallel-current (cons active-parser-count (point))))
172 (setf (wisi-parser-state-label result) j)
173 (aset parser-states j result))
174 (when (> wisi-debug 1)
175 (message "spawn parser (%d active)" active-parser-count)))
176
177 (when (eq 'error (wisi-parser-state-active parser-state))
178 (setq active-parser-count (1- active-parser-count))
179 (when (> wisi-debug 1)
180 (message "terminate parser (%d active)" active-parser-count))
181 (cl-case active-parser-count
182 (0
183 (cond
184 ((= active-parser-count-prev 1)
185 ;; We were not in a parallel parse; report the error.
186 (let ((state (aref (wisi-parser-state-stack parser-state)
187 (wisi-parser-state-sp parser-state))))
188 (signal 'wisi-parse-error
189 (wisi-error-msg "syntax error in grammar state %d; unexpected %s, expecting one of %s"
190 state
191 (nth 1 token)
192 (mapcar 'car (aref actions state))))
193 ))
194 (t
195 ;; Report errors from all parsers that failed on this token.
196 (let ((msg))
197 (dotimes (_ (length parser-states))
198 (let* ((parser-state (aref parser-states parser-index))
199 (state (aref (wisi-parser-state-stack parser-state)
200 (wisi-parser-state-sp parser-state))))
201 (when (eq 'error (wisi-parser-state-active parser-state))
202 (setq msg
203 (concat msg
204 (when msg "\n")
205 (wisi-error-msg
206 "syntax error in grammar state %d; unexpected %s, expecting one of %s"
207 state
208 (nth 1 token)
209 (mapcar 'car (aref actions state)))))
210 )))
211 (signal 'wisi-parse-error msg)))
212 ))
213
214 (1
215 (setf (wisi-parser-state-active parser-state) nil); Don't save error for later.
216 (wisi-execute-pending (wisi-parser-state-pending
217 (aref parser-states (wisi-active-parser parser-states))))
218 (setf (wisi-parser-state-pending
219 (aref parser-states (wisi-active-parser parser-states)))
220 nil))
221 (t
222 ;; We were in a parallel parse, and this parser
223 ;; failed; mark it inactive, don't save error for
224 ;; later.
225 (setf (wisi-parser-state-active parser-state) nil)
226 )))
227 )));; end dotimes
228
229 (when some-pending
230 ;; Change pending-* parsers to *.
231 (dotimes (parser-index (length parser-states))
232 (cond
233 ((eq (wisi-parser-state-active (aref parser-states parser-index)) 'pending-shift)
234 (setf (wisi-parser-state-active (aref parser-states parser-index)) 'shift))
235 ((eq (wisi-parser-state-active (aref parser-states parser-index)) 'pending-reduce)
236 (setf (wisi-parser-state-active (aref parser-states parser-index)) 'reduce))
237 )))
238
239 (setq active (wisi-parsers-active parser-states active-parser-count))
240 (when (eq active 'shift)
241 (when (> active-parser-count 1)
242 (setq active-parser-count (wisi-parse-elim-identical parser-states active-parser-count)))
243
244 (setq token (funcall lexer)))
245 )
246 (when (> active-parser-count 1)
247 (error "ambiguous parse result"))))
248
249 (defun wisi-parsers-active-index (parser-states)
250 ;; only called when active-parser-count = 1
251 (let ((result nil)
252 (i 0))
253 (while (and (not result)
254 (< i (length parser-states)))
255 (when (wisi-parser-state-active (aref parser-states i))
256 (setq result i))
257 (setq i (1+ i)))
258 result))
259
260 (defun wisi-parsers-active (parser-states active-count)
261 "Return the type of parser cycle to execute.
262 PARSER-STATES[*].active is the last action a parser took. If it
263 was 'shift, that parser used the input token, and should not be
264 executed again until another input token is available, after all
265 parsers have shifted the current token or terminated.
266
267 Returns one of:
268
269 'accept : all PARSER-STATES have active set to nil or 'accept -
270 done parsing
271
272 'shift : all PARSER-STATES have active set to nil, 'accept, or
273 'shift - get a new token, execute 'shift parsers.
274
275 'reduce : some PARSER-STATES have active set to 'reduce - no new
276 token, execute 'reduce parsers."
277 (let ((result nil)
278 (i 0)
279 (shift-count 0)
280 (accept-count 0)
281 active)
282 (while (and (not result)
283 (< i (length parser-states)))
284 (setq active (wisi-parser-state-active (aref parser-states i)))
285 (cond
286 ((eq active 'shift) (setq shift-count (1+ shift-count)))
287 ((eq active 'reduce) (setq result 'reduce))
288 ((eq active 'accept) (setq accept-count (1+ accept-count)))
289 )
290 (setq i (1+ i)))
291
292 (cond
293 (result )
294 ((= accept-count active-count)
295 'accept)
296 ((= (+ shift-count accept-count) active-count)
297 'shift)
298 (t
299 ;; all parsers in error state; should not get here
300 (error "all parsers in error state; programmer error"))
301 )))
302
303 (defun wisi-free-parser (parser-states)
304 "Return index to a non-active parser in PARSER-STATES, -1 if there is none."
305 (let ((result nil)
306 (i 0))
307 (while (and (not result)
308 (< i (length parser-states)))
309 (when (not (wisi-parser-state-active (aref parser-states i)))
310 (setq result i))
311 (setq i (1+ i)))
312 (if result result -1)))
313
314 (defun wisi-active-parser (parser-states)
315 "Return index to the first active parser in PARSER-STATES."
316 (let ((result nil)
317 (i 0))
318 (while (and (not result)
319 (< i (length parser-states)))
320 (when (wisi-parser-state-active (aref parser-states i))
321 (setq result i))
322 (setq i (1+ i)))
323 (unless result
324 (error "no active parsers"))
325 result))
326
327 (defun wisi-parse-elim-identical (parser-states active-parser-count)
328 "Check for parsers in PARSER-STATES that have reached identical states eliminate one.
329 Return new ACTIVE-PARSER-COUNT. Assumes all parsers have active
330 nil, 'shift, or 'accept."
331 ;; parser-states passed by reference; active-parser-count by copy
332 ;; see test/ada_mode-slices.adb for example
333 (dotimes (parser-i (1- (length parser-states)))
334 (when (wisi-parser-state-active (aref parser-states parser-i))
335 (dotimes (parser-j (- (length parser-states) parser-i 1))
336 (when (wisi-parser-state-active (aref parser-states (+ parser-i parser-j 1)))
337 (when (eq (wisi-parser-state-sp (aref parser-states parser-i))
338 (wisi-parser-state-sp (aref parser-states (+ parser-i parser-j 1))))
339 (let ((compare t))
340 (dotimes (stack-i (wisi-parser-state-sp (aref parser-states parser-i)))
341 (setq
342 compare
343 (and compare
344 (equal (aref (wisi-parser-state-stack (aref parser-states parser-i)) stack-i)
345 (aref (wisi-parser-state-stack (aref parser-states (+ parser-i parser-j 1))) stack-i)))))
346 (when compare
347 ;; parser stacks are identical
348 (setq active-parser-count (1- active-parser-count))
349 (when (> wisi-debug 1)
350 (message "terminate identical parser %d (%d active)"
351 (+ parser-i parser-j 1) active-parser-count))
352 (when (= active-parser-count 1)
353 ;; the actions for the two parsers are not
354 ;; identical, but either is good enough for
355 ;; indentation and navigation, so we just do one.
356 (when (> wisi-debug 1) (message "executing actions for %d" (+ parser-i parser-j 1)))
357 (wisi-execute-pending (wisi-parser-state-pending (aref parser-states (+ parser-i parser-j 1))))
358 (setf (wisi-parser-state-pending (aref parser-states (+ parser-i parser-j 1))) nil)
359
360 ;; clear pending of other parser so it can be reused
361 (setf (wisi-parser-state-pending (aref parser-states parser-i)) nil))
362
363 (setf (wisi-parser-state-active (aref parser-states (+ parser-i parser-j 1))) nil))
364 )))
365 )))
366 active-parser-count)
367
368 (defun wisi-parse-max-pos (tokens)
369 "Return max position in tokens, or point if tokens nil."
370 (let ((result (if tokens 0 (point))))
371 (mapc
372 (lambda (token)
373 (when (cl-cdddr token)
374 (setq result (max (cl-cdddr token) result))))
375 tokens)
376 result)
377 )
378
379 (defun wisi-parse-exec-action (func tokens)
380 "Execute action if all tokens past wisi-cache-max."
381 ;; We don't execute actions if all tokens are before wisi-cache-max,
382 ;; because later actions can update existing caches, and if the
383 ;; parse fails that won't happen. It also saves time.
384 (if (>= (wisi-parse-max-pos tokens) wisi-cache-max)
385
386 (funcall func tokens)
387
388 (when (> wisi-debug 1)
389 (message "... action skipped"))
390 ))
391
392 (defun wisi-execute-pending (pending)
393 (while pending
394 (when (> wisi-debug 1) (message "%s" (car pending)))
395
396 (let ((func-args (pop pending)))
397 (wisi-parse-exec-action (car func-args) (cadr func-args)))
398 ))
399
400 (defun wisi-parse-1 (token parser-state pendingp actions gotos)
401 "Perform one shift or reduce on PARSER-STATE.
402 If PENDINGP, push actions onto PARSER-STATE.pending; otherwise execute them.
403 See `wisi-parse' for full details.
404 Return nil or new parser (a wisi-parse-state struct)."
405 (let* ((state (aref (wisi-parser-state-stack parser-state)
406 (wisi-parser-state-sp parser-state)))
407 (parse-action (wisent-parse-action (car token) (aref actions state)))
408 new-parser-state)
409
410 (when (> wisi-debug 1)
411 ;; output trace info
412 (if (> wisi-debug 2)
413 (progn
414 ;; put top 10 stack items
415 (let* ((count (min 20 (wisi-parser-state-sp parser-state)))
416 (msg (make-vector (+ 1 count) nil)))
417 (dotimes (i count)
418 (aset msg (- count i)
419 (aref (wisi-parser-state-stack parser-state) (- (wisi-parser-state-sp parser-state) i)))
420 )
421 (message "%d: %s: %d: %s"
422 (wisi-parser-state-label parser-state)
423 (wisi-parser-state-active parser-state)
424 (wisi-parser-state-sp parser-state)
425 msg))
426 (message " %d: %s: %s" state token parse-action))
427 (message "%d: %d: %s: %s" (wisi-parser-state-label parser-state) state token parse-action)))
428
429 (when (and (listp parse-action)
430 (not (symbolp (car parse-action))))
431 ;; Conflict; spawn a new parser.
432 (setq new-parser-state
433 (make-wisi-parser-state
434 :active nil
435 :stack (vconcat (wisi-parser-state-stack parser-state))
436 :sp (wisi-parser-state-sp parser-state)
437 :pending (wisi-parser-state-pending parser-state)))
438
439 (wisi-parse-2 (cadr parse-action) token new-parser-state t gotos)
440 (setq pendingp t)
441 (setq parse-action (car parse-action))
442 );; when
443
444 ;; current parser
445 (wisi-parse-2 parse-action token parser-state pendingp gotos)
446
447 new-parser-state))
448
449 (defun wisi-parse-2 (action token parser-state pendingp gotos)
450 "Execute parser ACTION (must not be a conflict).
451 Return nil."
452 (cond
453 ((eq action 'accept)
454 (setf (wisi-parser-state-active parser-state) 'accept))
455
456 ((eq action 'error)
457 (setf (wisi-parser-state-active parser-state) 'error))
458
459 ((natnump action)
460 ;; Shift token and new state (= action) onto stack
461 (let ((stack (wisi-parser-state-stack parser-state)); reference
462 (sp (wisi-parser-state-sp parser-state))); copy
463 (setq sp (+ sp 2))
464 (aset stack (1- sp) token)
465 (aset stack sp action)
466 (setf (wisi-parser-state-sp parser-state) sp))
467 (setf (wisi-parser-state-active parser-state) 'shift))
468
469 (t
470 (wisi-parse-reduce action parser-state pendingp gotos)
471 (setf (wisi-parser-state-active parser-state) 'reduce))
472 ))
473
474 (defun wisi-nonterm-bounds (stack i j)
475 "Return a pair (START . END), the buffer region for a nonterminal.
476 STACK is the parser stack. I and J are the indices in STACK of
477 the first and last tokens of the nonterminal."
478 (let ((start (cl-caddr (aref stack i)))
479 (end (cl-cdddr (aref stack j))))
480 (while (and (or (not start) (not end))
481 (/= i j))
482 (cond
483 ((not start)
484 ;; item i is an empty production
485 (setq start (cl-caddr (aref stack (setq i (+ i 2))))))
486
487 ((not end)
488 ;; item j is an empty production
489 (setq end (cl-cdddr (aref stack (setq j (- j 2))))))
490
491 (t (setq i j))))
492 (and start end (cons start end))))
493
494 (defun wisi-parse-reduce (action parser-state pendingp gotos)
495 "Reduce PARSER-STATE.stack, and execute or pend ACTION."
496 (let* ((stack (wisi-parser-state-stack parser-state)); reference
497 (sp (wisi-parser-state-sp parser-state)); copy
498 (token-count (or (nth 2 action) 0))
499 (nonterm (nth 0 action))
500 (nonterm-region (when (> token-count 0)
501 (wisi-nonterm-bounds stack (- sp (* 2 (1- token-count)) 1) (1- sp))))
502 (post-reduce-state (aref stack (- sp (* 2 token-count))))
503 (new-state (cdr (assoc nonterm (aref gotos post-reduce-state))))
504 tokens)
505 (when (not new-state)
506 (error "no goto for %s %d" nonterm post-reduce-state))
507 (if (= 1 token-count)
508 (setq tokens (list (aref stack (1- sp))))
509 (dotimes (i token-count)
510 (push (aref stack (- sp (* 2 i) 1)) tokens)))
511 (setq sp (+ 2 (- sp (* 2 token-count))))
512 (aset stack (1- sp) (cons nonterm (cons nil nonterm-region)))
513 (aset stack sp new-state)
514 (setf (wisi-parser-state-sp parser-state) sp)
515 (if pendingp
516 (if (wisi-parser-state-pending parser-state)
517 (setf (wisi-parser-state-pending parser-state)
518 (append (wisi-parser-state-pending parser-state)
519 (list (list (nth 1 action) tokens))))
520 (setf (wisi-parser-state-pending parser-state)
521 (list (list (nth 1 action) tokens))))
522
523 ;; Not pending.
524 (wisi-parse-exec-action (nth 1 action) tokens)
525 )
526 ))
527
528 (provide 'wisi-parse)
529 ;;; wisi-parse.el ends here