]> code.delx.au - gnu-emacs-elpa/blob - packages/wisi/wisi-parse.el
Fix some quoting problems in doc strings
[gnu-emacs-elpa] / packages / wisi / wisi-parse.el
1 ;;; wisi-parse.el --- Wisi parser -*- lexical-binding:t -*-
2
3 ;; Copyright (C) 2013-2015 Free Software Foundation, Inc.
4
5 ;; This file is part of GNU Emacs.
6
7 ;; GNU Emacs is free software: you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation, either version 3 of the License, or
10 ;; (at your option) any later version.
11
12 ;; GNU Emacs is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
16
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
19
20
21 ;;; Commentary:
22
23 ;; An extended LALR parser, that handles shift/reduce and
24 ;; reduce/reduce conflicts by spawning parallel parsers to follow each
25 ;; path.
26
27 ;;; Code:
28
29 (require 'cl-lib)
30 (require 'semantic/wisent)
31
32 ;; WORKAROUND: for some reason, this condition doesn't work in batch mode!
33 ;; (when (and (= emacs-major-version 24)
34 ;; (= emacs-minor-version 2))
35 (require 'wisi-compat-24.2)
36 ;;)
37
38 (defvar wisi-parse-max-parallel 15
39 "Maximum number of parallel parsers for acceptable performance.
40 If a file needs more than this, it's probably an indication that
41 the grammar is excessively redundant.")
42
43 (defvar wisi-parse-max-parallel-current (cons 0 0)
44 "Cons (count . point); Maximum number of parallel parsers used in most recent parse,
45 point at which that max was spawned.")
46
47 (defvar wisi-debug 0
48 "wisi debug mode:
49 0 : normal - ignore parse errors, for indenting new code
50 1 : report parse errors (for running tests)
51 2 : show parse states, position point at parse errors, debug-on-error works in parser
52 3 : also show top 10 items of parser stack.")
53
54 (cl-defstruct (wisi-parser-state
55 (:copier nil))
56 label ;; integer identifying parser for debug
57
58 active
59 ;; 'shift - need new token
60 ;; 'reduce - need reduce
61 ;; 'accept - parsing completed
62 ;; 'error - failed, error not reported yet
63 ;; nil - terminated
64 ;;
65 ;; 'pending-shift, 'pending-reduce - newly created parser; see wisi-parse
66
67 stack
68 ;; Each stack item takes two slots: (token-symbol token-text (token-start . token-end)), state
69 ;; token-text is nil for nonterminals.
70 ;; this is _not_ the same as the wisent-parse stack; that leaves out token-symbol.
71
72 sp ;; stack pointer
73
74 pending
75 ;; list of (action-symbol stack-fragment)
76 )
77
78 (defun wisi-error-msg (message &rest args)
79 (let ((line (line-number-at-pos))
80 (col (- (point) (line-beginning-position))))
81 (format
82 "%s:%d:%d: %s"
83 (file-name-nondirectory (buffer-name)) ;; buffer-file-name is sometimes nil here!?
84 line col
85 (apply 'format message args))))
86
87 (defvar wisi-parse-error nil)
88 (put 'wisi-parse-error
89 'error-conditions
90 '(error wisi-parse-error))
91 (put 'wisi-parse-error
92 'error-message
93 "wisi parse error")
94
95 (defvar-local wisi-cache-max 0
96 "Maximimum position in buffer where wisi-cache text properties are valid.")
97
98 (defun wisi-token-text (token)
99 "Return buffer text from token range."
100 (let ((region (cdr token)))
101 (and region
102 (buffer-substring-no-properties (car region) (cdr region)))))
103
104 (defun wisi-parse (automaton lexer)
105 "Parse current buffer from bob using the automaton specified in AUTOMATON.
106
107 - AUTOMATON is the parse table generated by `wisi-compile-grammar'.
108
109 - LEXER is a function with no argument called by the parser to
110 obtain the next token from the current buffer after point, as a
111 list (symbol text start . end), where `symbol' is the terminal
112 symbol, `text' is the token string, `start . end' is the range
113 in the buffer."
114
115 ;; FIXME: (aref automaton 3) is the obarray storing the semantic actions;
116 ;; not used here (see related FIXME in wisi-compile)
117 (let* ((actions (aref automaton 0))
118 (gotos (aref automaton 1))
119 (parser-states ;; vector of parallel parser states
120 (vector
121 (make-wisi-parser-state
122 :label 0
123 :active 'shift
124 :stack (make-vector wisent-parse-max-stack-size nil)
125 :sp 0
126 :pending nil)))
127 (active-parser-count 1)
128 active-parser-count-prev
129 (active 'shift)
130 (token nil)
131 some-pending
132 )
133
134 (goto-char (point-min))
135 (aset (wisi-parser-state-stack (aref parser-states 0)) 0 0)
136
137 (setq token (funcall lexer))
138 (setq wisi-parse-max-parallel-current (cons 0 0))
139
140 (while (not (eq active 'accept))
141 (setq active-parser-count-prev active-parser-count)
142 (setq some-pending nil)
143 (dotimes (parser-index (length parser-states))
144 (when (eq active (wisi-parser-state-active (aref parser-states parser-index)))
145 (let* ((parser-state (aref parser-states parser-index))
146 (result (wisi-parse-1 token parser-state (> active-parser-count 1) actions gotos)))
147 (when result
148 ;; spawn a new parser
149 (when (= active-parser-count wisi-parse-max-parallel)
150 (signal 'wisi-parse-error
151 (let ((state (aref (wisi-parser-state-stack parser-state)
152 (wisi-parser-state-sp parser-state))))
153 (wisi-error-msg (concat "too many parallel parsers required in grammar state %d;"
154 " simplify grammar, or increase `wisi-parse-max-parallel'")
155 state))))
156
157 (let ((j (wisi-free-parser parser-states)))
158 (cond
159 ((= j -1)
160 ;; Add to parser-states; the new parser won't be executed
161 ;; again in this parser-index loop.
162 (setq parser-states (vconcat parser-states (vector nil)))
163 (setq j (1- (length parser-states))))
164 ((< j parser-index)
165 ;; The new parser won't be executed again in this
166 ;; parser-index loop; nothing to do.
167 )
168 (t
169 ;; Don't let the new parser execute again in this
170 ;; parser-index loop.
171 (setq some-pending t)
172 (setf (wisi-parser-state-active result)
173 (cl-case (wisi-parser-state-active result)
174 (shift 'pending-shift)
175 (reduce 'pending-reduce)
176 )))
177 )
178 (setq active-parser-count (1+ active-parser-count))
179 (when (> active-parser-count (car wisi-parse-max-parallel-current))
180 (setq wisi-parse-max-parallel-current (cons active-parser-count (point))))
181 (setf (wisi-parser-state-label result) j)
182 (aset parser-states j result))
183 (when (> wisi-debug 1)
184 (message "spawn parser (%d active)" active-parser-count)))
185
186 (when (eq 'error (wisi-parser-state-active parser-state))
187 (setq active-parser-count (1- active-parser-count))
188 (when (> wisi-debug 1)
189 (message "terminate parser (%d active)" active-parser-count))
190 (cl-case active-parser-count
191 (0
192 (cond
193 ((= active-parser-count-prev 1)
194 ;; We were not in a parallel parse; report the error.
195 (let ((state (aref (wisi-parser-state-stack parser-state)
196 (wisi-parser-state-sp parser-state))))
197 (signal 'wisi-parse-error
198 (wisi-error-msg "syntax error in grammar state %d; unexpected %s, expecting one of %s"
199 state
200 (wisi-token-text token)
201 (mapcar 'car (aref actions state))))
202 ))
203 (t
204 ;; Report errors from all parsers that failed on this token.
205 (let ((msg))
206 (dotimes (_ (length parser-states))
207 (let* ((parser-state (aref parser-states parser-index))
208 (state (aref (wisi-parser-state-stack parser-state)
209 (wisi-parser-state-sp parser-state))))
210 (when (eq 'error (wisi-parser-state-active parser-state))
211 (setq msg
212 (concat msg
213 (when msg "\n")
214 (wisi-error-msg
215 "syntax error in grammar state %d; unexpected %s, expecting one of %s"
216 state
217 (wisi-token-text token)
218 (mapcar 'car (aref actions state)))))
219 )))
220 (signal 'wisi-parse-error msg)))
221 ))
222
223 (1
224 (setf (wisi-parser-state-active parser-state) nil); Don't save error for later.
225 (let ((parser-state (aref parser-states (wisi-active-parser parser-states))))
226 (wisi-execute-pending (wisi-parser-state-label parser-state)
227 (wisi-parser-state-pending parser-state))
228 (setf (wisi-parser-state-pending parser-state) nil)
229 ))
230 (t
231 ;; We were in a parallel parse, and this parser
232 ;; failed; mark it inactive, don't save error for
233 ;; later.
234 (setf (wisi-parser-state-active parser-state) nil)
235 )))
236 )));; end dotimes
237
238 (when some-pending
239 ;; Change pending-* parsers to *.
240 (dotimes (parser-index (length parser-states))
241 (cond
242 ((eq (wisi-parser-state-active (aref parser-states parser-index)) 'pending-shift)
243 (setf (wisi-parser-state-active (aref parser-states parser-index)) 'shift))
244 ((eq (wisi-parser-state-active (aref parser-states parser-index)) 'pending-reduce)
245 (setf (wisi-parser-state-active (aref parser-states parser-index)) 'reduce))
246 )))
247
248 (setq active (wisi-parsers-active parser-states active-parser-count))
249 (when (eq active 'shift)
250 (when (> active-parser-count 1)
251 (setq active-parser-count (wisi-parse-elim-identical parser-states active-parser-count)))
252
253 (setq token (funcall lexer)))
254 )
255 (when (> active-parser-count 1)
256 (error "ambiguous parse result"))))
257
258 (defun wisi-parsers-active-index (parser-states)
259 ;; only called when active-parser-count = 1
260 (let ((result nil)
261 (i 0))
262 (while (and (not result)
263 (< i (length parser-states)))
264 (when (wisi-parser-state-active (aref parser-states i))
265 (setq result i))
266 (setq i (1+ i)))
267 result))
268
269 (defun wisi-parsers-active (parser-states active-count)
270 "Return the type of parser cycle to execute.
271 PARSER-STATES[*].active is the last action a parser took. If it
272 was `shift', that parser used the input token, and should not be
273 executed again until another input token is available, after all
274 parsers have shifted the current token or terminated.
275
276 Returns one of:
277
278 `accept' : all PARSER-STATES have active set to nil or `accept' -
279 done parsing
280
281 `shift' : all PARSER-STATES have active set to nil, `accept', or
282 `shift' - get a new token, execute `shift' parsers.
283
284 `reduce' : some PARSER-STATES have active set to `reduce' - no new
285 token, execute `reduce' parsers."
286 (let ((result nil)
287 (i 0)
288 (shift-count 0)
289 (accept-count 0)
290 active)
291 (while (and (not result)
292 (< i (length parser-states)))
293 (setq active (wisi-parser-state-active (aref parser-states i)))
294 (cond
295 ((eq active 'shift) (setq shift-count (1+ shift-count)))
296 ((eq active 'reduce) (setq result 'reduce))
297 ((eq active 'accept) (setq accept-count (1+ accept-count)))
298 )
299 (setq i (1+ i)))
300
301 (cond
302 (result )
303 ((= accept-count active-count)
304 'accept)
305 ((= (+ shift-count accept-count) active-count)
306 'shift)
307 (t
308 ;; all parsers in error state; should not get here
309 (error "all parsers in error state; programmer error"))
310 )))
311
312 (defun wisi-free-parser (parser-states)
313 "Return index to a non-active parser in PARSER-STATES, -1 if there is none."
314 (let ((result nil)
315 (i 0))
316 (while (and (not result)
317 (< i (length parser-states)))
318 (when (not (wisi-parser-state-active (aref parser-states i)))
319 (setq result i))
320 (setq i (1+ i)))
321 (if result result -1)))
322
323 (defun wisi-active-parser (parser-states)
324 "Return index to the first active parser in PARSER-STATES."
325 (let ((result nil)
326 (i 0))
327 (while (and (not result)
328 (< i (length parser-states)))
329 (when (wisi-parser-state-active (aref parser-states i))
330 (setq result i))
331 (setq i (1+ i)))
332 (unless result
333 (error "no active parsers"))
334 result))
335
336 (defun wisi-parse-elim-identical (parser-states active-parser-count)
337 "Check for parsers in PARSER-STATES that have reached identical states eliminate one.
338 Return new ACTIVE-PARSER-COUNT. Assumes all parsers have active
339 nil, `shift', or `accept'."
340 ;; parser-states passed by reference; active-parser-count by copy
341 ;; see test/ada_mode-slices.adb for example
342 (dotimes (parser-i (1- (length parser-states)))
343 (when (wisi-parser-state-active (aref parser-states parser-i))
344 (dotimes (parser-j (- (length parser-states) parser-i 1))
345 (when (wisi-parser-state-active (aref parser-states (+ parser-i parser-j 1)))
346 (when (eq (wisi-parser-state-sp (aref parser-states parser-i))
347 (wisi-parser-state-sp (aref parser-states (+ parser-i parser-j 1))))
348 (let ((compare t))
349 (dotimes (stack-i (wisi-parser-state-sp (aref parser-states parser-i)))
350 (setq
351 compare
352 (and compare ;; bypass expensive 'arefs' after first stack item compare fail
353 (equal (aref (wisi-parser-state-stack (aref parser-states parser-i)) stack-i)
354 (aref (wisi-parser-state-stack (aref parser-states (+ parser-i parser-j 1))) stack-i)))))
355 (when compare
356 ;; parser stacks are identical
357 (setq active-parser-count (1- active-parser-count))
358 (when (> wisi-debug 1)
359 (message "terminate identical parser %d (%d active)"
360 (+ parser-i parser-j 1) active-parser-count))
361 (setf (wisi-parser-state-active (aref parser-states (+ parser-i parser-j 1))) nil)
362 (when (= active-parser-count 1)
363 ;; the actions for the two parsers are not
364 ;; identical, but either is good enough for
365 ;; indentation and navigation, so we just do the
366 ;; actions for the one that is not terminating.
367 (let ((parser-state (aref parser-states parser-i)))
368 (wisi-execute-pending (wisi-parser-state-label parser-state)
369 (wisi-parser-state-pending parser-state))
370 (setf (wisi-parser-state-pending parser-state) nil)
371 ))
372 ))))
373 )))
374 active-parser-count)
375
376 (defun wisi-parse-max-pos (tokens)
377 "Return max position in tokens, or point if tokens nil."
378 (let ((result (if tokens 0 (point))))
379 (mapc
380 (lambda (token)
381 (when (cddr token)
382 (setq result (max (cddr token) result))))
383 tokens)
384 result)
385 )
386
387 (defun wisi-parse-exec-action (func nonterm tokens)
388 "Execute action if all tokens past wisi-cache-max."
389 ;; We don't execute actions if all tokens are before wisi-cache-max,
390 ;; because later actions can update existing caches, and if the
391 ;; parse fails that won't happen. It also saves time.
392 ;;
393 ;; Also skip if no tokens; nothing to do. This can happen when all
394 ;; tokens in a grammar statement are optional.
395 (if (< 0 (length tokens))
396 (if (>= (wisi-parse-max-pos tokens) wisi-cache-max)
397
398 (funcall func nonterm tokens)
399
400 (when (> wisi-debug 1)
401 (message "... action skipped; before wisi-cache-max %d" wisi-cache-max)))
402
403 (when (> wisi-debug 1)
404 (message "... action skipped; no tokens"))
405 ))
406
407 (defun wisi-execute-pending (parser-label pending)
408 (when (> wisi-debug 1) (message "%d: pending actions:" parser-label))
409 (while pending
410 (when (> wisi-debug 1) (message "%s" (car pending)))
411
412 (let ((func-args (pop pending)))
413 (wisi-parse-exec-action (nth 0 func-args) (nth 1 func-args) (cl-caddr func-args)))
414 ))
415
416 (defun wisi-parse-1 (token parser-state pendingp actions gotos)
417 "Perform one shift or reduce on PARSER-STATE.
418 If PENDINGP, push actions onto PARSER-STATE.pending; otherwise execute them.
419 See `wisi-parse' for full details.
420 Return nil or new parser (a wisi-parse-state struct)."
421 (let* ((state (aref (wisi-parser-state-stack parser-state)
422 (wisi-parser-state-sp parser-state)))
423 (parse-action (wisent-parse-action (car token) (aref actions state)))
424 new-parser-state)
425
426 (when (> wisi-debug 1)
427 ;; output trace info
428 (if (> wisi-debug 2)
429 (progn
430 ;; put top 10 stack items
431 (let* ((count (min 20 (wisi-parser-state-sp parser-state)))
432 (msg (make-vector (+ 1 count) nil)))
433 (dotimes (i count)
434 (aset msg (- count i)
435 (aref (wisi-parser-state-stack parser-state) (- (wisi-parser-state-sp parser-state) i)))
436 )
437 (message "%d: %s: %d: %s"
438 (wisi-parser-state-label parser-state)
439 (wisi-parser-state-active parser-state)
440 (wisi-parser-state-sp parser-state)
441 msg))
442 (message " %d: %s: %s" state token parse-action))
443 (message "%d: %d: %s: %s" (wisi-parser-state-label parser-state) state token parse-action)))
444
445 (when (and (listp parse-action)
446 (not (symbolp (car parse-action))))
447 ;; Conflict; spawn a new parser.
448 (setq new-parser-state
449 (make-wisi-parser-state
450 :active nil
451 :stack (vconcat (wisi-parser-state-stack parser-state))
452 :sp (wisi-parser-state-sp parser-state)
453 :pending (wisi-parser-state-pending parser-state)))
454
455 (wisi-parse-2 (cadr parse-action) token new-parser-state t gotos)
456 (setq pendingp t)
457 (setq parse-action (car parse-action))
458 );; when
459
460 ;; current parser
461 (wisi-parse-2 parse-action token parser-state pendingp gotos)
462
463 new-parser-state))
464
465 (defun wisi-parse-2 (action token parser-state pendingp gotos)
466 "Execute parser ACTION (must not be a conflict).
467 Return nil."
468 (cond
469 ((eq action 'accept)
470 (setf (wisi-parser-state-active parser-state) 'accept))
471
472 ((eq action 'error)
473 (setf (wisi-parser-state-active parser-state) 'error))
474
475 ((natnump action)
476 ;; Shift token and new state (= action) onto stack
477 (let ((stack (wisi-parser-state-stack parser-state)); reference
478 (sp (wisi-parser-state-sp parser-state))); copy
479 (setq sp (+ sp 2))
480 (aset stack (1- sp) token)
481 (aset stack sp action)
482 (setf (wisi-parser-state-sp parser-state) sp))
483 (setf (wisi-parser-state-active parser-state) 'shift))
484
485 (t
486 (wisi-parse-reduce action parser-state pendingp gotos)
487 (setf (wisi-parser-state-active parser-state) 'reduce))
488 ))
489
490 (defun wisi-nonterm-bounds (stack i j)
491 "Return a pair (START . END), the buffer region for a nonterminal.
492 STACK is the parser stack. I and J are the indices in STACK of
493 the first and last tokens of the nonterminal."
494 (let ((start (cadr (aref stack i)))
495 (end (cddr (aref stack j))))
496 (while (and (or (not start) (not end))
497 (/= i j))
498 (cond
499 ((not start)
500 ;; item i is an empty production
501 (setq start (cadr (aref stack (setq i (+ i 2))))))
502
503 ((not end)
504 ;; item j is an empty production
505 (setq end (cddr (aref stack (setq j (- j 2))))))
506
507 (t (setq i j))))
508 (and start end (cons start end))))
509
510 (defun wisi-parse-reduce (action parser-state pendingp gotos)
511 "Reduce PARSER-STATE.stack, and execute or pend ACTION."
512 (let* ((stack (wisi-parser-state-stack parser-state)); reference
513 (sp (wisi-parser-state-sp parser-state)); copy
514 (token-count (nth 2 action))
515 (nonterm (nth 0 action))
516 (nonterm-region (when (> token-count 0)
517 (wisi-nonterm-bounds stack (- sp (* 2 (1- token-count)) 1) (1- sp))))
518 (post-reduce-state (aref stack (- sp (* 2 token-count))))
519 (new-state (cdr (assoc nonterm (aref gotos post-reduce-state))))
520 (tokens (make-vector token-count nil)))
521
522 (when (not new-state)
523 (error "no goto for %s %d" nonterm post-reduce-state))
524
525 (when (nth 1 action)
526 ;; don't need wisi-tokens for a null user action
527 (dotimes (i token-count)
528 (aset tokens (- token-count i 1) (aref stack (- sp (* 2 i) 1)))))
529
530 (setq sp (+ 2 (- sp (* 2 token-count))))
531 (aset stack (1- sp) (cons nonterm nonterm-region))
532 (aset stack sp new-state)
533 (setf (wisi-parser-state-sp parser-state) sp)
534
535 (when (nth 1 action)
536 ;; nothing to do for a null user action
537 (if pendingp
538 (if (wisi-parser-state-pending parser-state)
539 (setf (wisi-parser-state-pending parser-state)
540 (append (wisi-parser-state-pending parser-state)
541 (list (list (nth 1 action) nonterm tokens))))
542 (setf (wisi-parser-state-pending parser-state)
543 (list (list (nth 1 action) nonterm tokens))))
544
545 ;; Not pending.
546 (wisi-parse-exec-action (nth 1 action) nonterm tokens)
547 ))
548 ))
549
550 (provide 'wisi-parse)
551 ;;; wisi-parse.el ends here