]> code.delx.au - gnu-emacs-elpa/blob - packages/wisi/wisi-parse.el
bd7ce7eec216b7383f8db455a4b0dee661451d80
[gnu-emacs-elpa] / packages / wisi / wisi-parse.el
1 ;;; wisi-parse.el --- Wisi parser
2
3 ;; Copyright (C) 2013, 2014 Free Software Foundation, Inc.
4
5 ;; This file is part of GNU Emacs.
6
7 ;; GNU Emacs is free software: you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation, either version 3 of the License, or
10 ;; (at your option) any later version.
11
12 ;; GNU Emacs is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
16
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
19
20
21 ;;; Commentary:
22
23 ;; An extended LALR parser, that handles shift/reduce and
24 ;; reduce/reduce conflicts by spawning parallel parsers to follow each
25 ;; path.
26
27 ;;; Code:
28
29 (require 'cl-lib)
30 (require 'semantic/wisent)
31
32 (cl-defstruct (wisi-parser-state
33 (:copier nil))
34 label ;; integer identifying parser for debug
35
36 active
37 ;; 'shift - need new token
38 ;; 'reduce - need reduce
39 ;; 'accept - parsing completed
40 ;; 'error - failed, error not reported yet
41 ;; nil - terminated
42 ;;
43 ;; 'pending-shift, 'pending-reduce - newly created parser; see wisi-parse
44
45 stack
46 ;; Each stack item takes two slots: (token-symbol token-text (token-start . token-end)), state
47 ;; token-text is nil for nonterminals.
48 ;; this is _not_ the same as the wisent-parse stack; that leaves out token-symbol.
49
50 sp ;; stack pointer
51
52 pending
53 ;; list of (action-symbol stack-fragment)
54 )
55
56 (defun wisi-error-msg (message &rest args)
57 (let ((line (line-number-at-pos))
58 (col (- (point) (line-beginning-position))))
59 (format
60 "%s:%d:%d: %s"
61 (file-name-nondirectory (buffer-name)) ;; buffer-file-name is sometimes nil here!?
62 line col
63 (apply 'format message args))))
64
65 (defvar wisi-parse-error nil)
66 (put 'wisi-parse-error
67 'error-conditions
68 '(error wisi-parse-error))
69 (put 'wisi-parse-error
70 'error-message
71 "wisi parse error")
72
73 (defvar wisi-parse-max-parallel 15
74 "Maximum number of parallel parsers for acceptable performance.
75 If a file needs more than this, it's probably an indication that
76 the grammar is excessively redundant.")
77
78 (defvar wisi-parse-max-parallel-current (cons 0 0)
79 "Cons (count . point); Maximum number of parallel parsers used in most recent parse,
80 point at which that max was spawned.")
81
82 (defvar wisi-debug 0
83 "wisi debug mode:
84 0 : normal - ignore parse errors, for indenting new code
85 1 : report parse errors (for running tests)
86 2 : show parse states, position point at parse errors, debug-on-error works in parser
87 3 : also show top 10 items of parser stack.")
88
89 (defun wisi-parse (automaton lexer)
90 "Parse input using the automaton specified in AUTOMATON.
91
92 - AUTOMATON is the parse table generated by `wisi-compile-grammar'.
93
94 - LEXER is a function with no argument called by the parser to
95 obtain the next token in input, as a list (symbol text start
96 . end), where `symbol' is the terminal symbol, `text' is the
97 token string, `start . end' is the range in the buffer."
98 (let* ((actions (aref automaton 0))
99 (gotos (aref automaton 1))
100 (parser-states ;; vector of parallel parser states
101 (vector
102 (make-wisi-parser-state
103 :label 0
104 :active 'shift
105 :stack (make-vector wisent-parse-max-stack-size nil)
106 :sp 0
107 :pending nil)))
108 (active-parser-count 1)
109 active-parser-count-prev
110 (active 'shift)
111 (token (funcall lexer))
112 some-pending)
113
114 (setq wisi-parse-max-parallel-current (cons 0 0))
115
116 (aset (wisi-parser-state-stack (aref parser-states 0)) 0 0) ;; Initial state
117
118 (while (not (eq active 'accept))
119 (setq active-parser-count-prev active-parser-count)
120 (setq some-pending nil)
121 (dotimes (parser-index (length parser-states))
122 (when (eq active (wisi-parser-state-active (aref parser-states parser-index)))
123 (let* ((parser-state (aref parser-states parser-index))
124 (result (wisi-parse-1 token parser-state (> active-parser-count 1) actions gotos)))
125 (when result
126 ;; spawn a new parser
127 (when (= active-parser-count wisi-parse-max-parallel)
128 (signal 'wisi-parse-error
129 (wisi-error-msg (concat "too many parallel parsers required;"
130 " simplify grammar, or increase `wisi-parse-max-parallel'"))))
131 (let ((j (wisi-free-parser parser-states)))
132 (cond
133 ((= j -1)
134 ;; Add to parser-states; the new parser won't be executed
135 ;; again in this parser-index loop.
136 (setq parser-states (vconcat parser-states (vector nil)))
137 (setq j (1- (length parser-states))))
138 ((< j parser-index)
139 ;; The new parser won't be executed again in this
140 ;; parser-index loop; nothing to do.
141 )
142 (t
143 ;; Don't let the new parser execute again in this
144 ;; parser-index loop.
145 (setq some-pending t)
146 (setf (wisi-parser-state-active result)
147 (cl-case (wisi-parser-state-active result)
148 (shift 'pending-shift)
149 (reduce 'pending-reduce)
150 )))
151 )
152 (setq active-parser-count (1+ active-parser-count))
153 (when (> active-parser-count (car wisi-parse-max-parallel-current))
154 (setq wisi-parse-max-parallel-current (cons active-parser-count (point))))
155 (setf (wisi-parser-state-label result) j)
156 (aset parser-states j result))
157 (when (> wisi-debug 1)
158 (message "spawn parser (%d active)" active-parser-count)))
159
160 (when (eq 'error (wisi-parser-state-active parser-state))
161 (setq active-parser-count (1- active-parser-count))
162 (when (> wisi-debug 1)
163 (message "terminate parser (%d active)" active-parser-count))
164 (cl-case active-parser-count
165 (0
166 (cond
167 ((= active-parser-count-prev 1)
168 ;; We were not in a parallel parse; report the error.
169 (let ((state (aref (wisi-parser-state-stack parser-state)
170 (wisi-parser-state-sp parser-state))))
171 (signal 'wisi-parse-error
172 (wisi-error-msg "syntax error in grammar state %d; unexpected %s, expecting one of %s"
173 state
174 (nth 1 token)
175 (mapcar 'car (aref actions state))))
176 ))
177 (t
178 ;; Report errors from all parsers that failed on this token.
179 (let ((msg))
180 (dotimes (_ (length parser-states))
181 (let* ((parser-state (aref parser-states parser-index))
182 (state (aref (wisi-parser-state-stack parser-state)
183 (wisi-parser-state-sp parser-state))))
184 (when (eq 'error (wisi-parser-state-active parser-state))
185 (setq msg
186 (concat msg
187 (when msg "\n")
188 (wisi-error-msg
189 "syntax error in grammar state %d; unexpected %s, expecting one of %s"
190 state
191 (nth 1 token)
192 (mapcar 'car (aref actions state)))))
193 )))
194 (signal 'wisi-parse-error msg)))
195 ))
196
197 (1
198 (setf (wisi-parser-state-active parser-state) nil); Don't save error for later.
199 (wisi-execute-pending (wisi-parser-state-pending
200 (aref parser-states (wisi-active-parser parser-states))))
201 (setf (wisi-parser-state-pending
202 (aref parser-states (wisi-active-parser parser-states)))
203 nil))
204 (t
205 ;; We were in a parallel parse, and this parser
206 ;; failed; mark it inactive, don't save error for
207 ;; later.
208 (setf (wisi-parser-state-active parser-state) nil)
209 )))
210 )));; end dotimes
211
212 (when some-pending
213 ;; Change pending-* parsers to *.
214 (dotimes (parser-index (length parser-states))
215 (cond
216 ((eq (wisi-parser-state-active (aref parser-states parser-index)) 'pending-shift)
217 (setf (wisi-parser-state-active (aref parser-states parser-index)) 'shift))
218 ((eq (wisi-parser-state-active (aref parser-states parser-index)) 'pending-reduce)
219 (setf (wisi-parser-state-active (aref parser-states parser-index)) 'reduce))
220 )))
221
222 (setq active (wisi-parsers-active parser-states active-parser-count))
223 (when (eq active 'shift)
224 (when (> active-parser-count 1)
225 (setq active-parser-count (wisi-parse-elim-identical parser-states active-parser-count)))
226 (setq token (funcall lexer)))
227 )
228 (when (> active-parser-count 1)
229 (error "ambiguous parse result"))))
230
231 (defun wisi-parsers-active (parser-states active-count)
232 "Return the type of parser cycle to execute.
233 PARSER-STATES[*].active is the last action a parser took. If it
234 was 'shift, that parser used the input token, and should not be
235 executed again until another input token is available, after all
236 parsers have shifted the current token or terminated.
237
238 'accept : all PARSER-STATES have active set to nil or 'accept -
239 done parsing
240
241 'shift : all PARSER-STATES have active set to nil, 'accept, or
242 'shift - get a new token, execute 'shift parsers.
243
244 'reduce : some PARSER-STATES have active set to 'reduce - no new
245 token, execute 'reduce parsers."
246 (let ((result nil)
247 (i 0)
248 (shift-count 0)
249 (accept-count 0)
250 active)
251 (while (and (not result)
252 (< i (length parser-states)))
253 (setq active (wisi-parser-state-active (aref parser-states i)))
254 (cond
255 ((eq active 'shift) (setq shift-count (1+ shift-count)))
256 ((eq active 'reduce) (setq result 'reduce))
257 ((eq active 'accept) (setq accept-count (1+ accept-count)))
258 )
259 (setq i (1+ i)))
260
261 (cond
262 (result )
263 ((= accept-count active-count)
264 'accept)
265 ((= (+ shift-count accept-count) active-count)
266 'shift)
267 (t (error "unexpected result in wisi-parsers-active"))
268 )))
269
270 (defun wisi-free-parser (parser-states)
271 "Return index to a non-active parser in PARSER-STATES, -1 if there is none."
272 (let ((result nil)
273 (i 0))
274 (while (and (not result)
275 (< i (length parser-states)))
276 (when (not (wisi-parser-state-active (aref parser-states i)))
277 (setq result i))
278 (setq i (1+ i)))
279 (if result result -1)))
280
281 (defun wisi-active-parser (parser-states)
282 "Return index to the first active parser in PARSER-STATES."
283 (let ((result nil)
284 (i 0))
285 (while (and (not result)
286 (< i (length parser-states)))
287 (when (wisi-parser-state-active (aref parser-states i))
288 (setq result i))
289 (setq i (1+ i)))
290 (unless result
291 (error "no active parsers"))
292 result))
293
294 (defun wisi-parse-elim-identical (parser-states active-parser-count)
295 "Check for parsers in PARSER-STATES that have reached identical states eliminate one.
296 Return new ACTIVE-PARSER-COUNT. Assumes all parsers have active
297 nil, 'shift, or 'accept."
298 ;; parser-states passed by reference; active-parser-count by copy
299 ;; see test/ada_mode-slices.adb for example
300 (dotimes (parser-i (1- (length parser-states)))
301 (when (wisi-parser-state-active (aref parser-states parser-i))
302 (dotimes (parser-j (- (length parser-states) parser-i 1))
303 (when (wisi-parser-state-active (aref parser-states (+ parser-i parser-j 1)))
304 (when (eq (wisi-parser-state-sp (aref parser-states parser-i))
305 (wisi-parser-state-sp (aref parser-states (+ parser-i parser-j 1))))
306 (let ((compare t))
307 (dotimes (stack-i (wisi-parser-state-sp (aref parser-states parser-i)))
308 (setq
309 compare
310 (and compare
311 (equal (aref (wisi-parser-state-stack (aref parser-states parser-i)) stack-i)
312 (aref (wisi-parser-state-stack (aref parser-states (+ parser-i parser-j 1))) stack-i)))))
313 (when compare
314 ;; parser stacks are identical
315 (setq active-parser-count (1- active-parser-count))
316 (when (> wisi-debug 1)
317 (message "terminate identical parser %d (%d active)"
318 (+ parser-i parser-j 1) active-parser-count))
319 (when (= active-parser-count 1)
320 ;; the actions for the two parsers are not
321 ;; identical, but either is good enough for
322 ;; indentation and navigation, so we just do one.
323 (when (> wisi-debug 1) (message "executing actions for %d" (+ parser-i parser-j 1)))
324 (wisi-execute-pending (wisi-parser-state-pending (aref parser-states (+ parser-i parser-j 1))))
325 (setf (wisi-parser-state-pending (aref parser-states (+ parser-i parser-j 1))) nil)
326
327 ;; clear pending of other parser so it can be reused
328 (setf (wisi-parser-state-pending (aref parser-states parser-i)) nil))
329
330 (setf (wisi-parser-state-active (aref parser-states (+ parser-i parser-j 1))) nil))
331 )))
332 )))
333 active-parser-count)
334
335 (defun wisi-execute-pending (pending)
336 (while pending
337 (when (> wisi-debug 1) (message "%s" (car pending)))
338
339 (cond
340 ((and (>= emacs-major-version 24)
341 (>= emacs-minor-version 3))
342 (apply (pop pending)))
343
344 (t
345 (let ((func-args (pop pending)))
346 (apply (car func-args) (cdr func-args))))
347 )))
348
349 (defun wisi-parse-1 (token parser-state pendingp actions gotos)
350 "Perform one shift or reduce on PARSER-STATE.
351 If PENDINGP, push actions onto PARSER-STATE.pending; otherwise execute them.
352 See `wisi-parse' for full details.
353 Return nil or new parser (a wisi-parse-state struct)."
354 (let* ((state (aref (wisi-parser-state-stack parser-state)
355 (wisi-parser-state-sp parser-state)))
356 (parse-action (wisent-parse-action (car token) (aref actions state)))
357 new-parser-state)
358
359 (when (> wisi-debug 1)
360 ;; output trace info
361 (if (> wisi-debug 2)
362 (progn
363 ;; put top 10 stack items
364 (let* ((count (min 20 (wisi-parser-state-sp parser-state)))
365 (msg (make-vector (+ 1 count) nil)))
366 (dotimes (i count)
367 (aset msg (- count i)
368 (aref (wisi-parser-state-stack parser-state) (- (wisi-parser-state-sp parser-state) i)))
369 )
370 (message "%d: %s: %d: %s"
371 (wisi-parser-state-label parser-state)
372 (wisi-parser-state-active parser-state)
373 (wisi-parser-state-sp parser-state)
374 msg))
375 (message " %d: %s: %s" state token parse-action))
376 (message "%d: %d: %s: %s" (wisi-parser-state-label parser-state) state token parse-action)))
377
378 (when (and (listp parse-action)
379 (not (symbolp (car parse-action))))
380 ;; Conflict; spawn a new parser.
381 (setq new-parser-state
382 (make-wisi-parser-state
383 :active nil
384 :stack (vconcat (wisi-parser-state-stack parser-state))
385 :sp (wisi-parser-state-sp parser-state)
386 :pending (wisi-parser-state-pending parser-state)))
387
388 (wisi-parse-2 (cadr parse-action) token new-parser-state t gotos)
389 (setq pendingp t)
390 (setq parse-action (car parse-action))
391 );; when
392
393 ;; current parser
394 (wisi-parse-2 parse-action token parser-state pendingp gotos)
395
396 new-parser-state))
397
398 (defun wisi-parse-2 (action token parser-state pendingp gotos)
399 "Execute parser ACTION (must not be a conflict).
400 Return nil."
401 (cond
402 ((eq action 'accept)
403 (setf (wisi-parser-state-active parser-state) 'accept))
404
405 ((eq action 'error)
406 (setf (wisi-parser-state-active parser-state) 'error))
407
408 ((natnump action)
409 ;; Shift token and new state (= action) onto stack
410 (let ((stack (wisi-parser-state-stack parser-state)); reference
411 (sp (wisi-parser-state-sp parser-state))); copy
412 (setq sp (+ sp 2))
413 (aset stack (1- sp) token)
414 (aset stack sp action)
415 (setf (wisi-parser-state-sp parser-state) sp))
416 (setf (wisi-parser-state-active parser-state) 'shift))
417
418 (t
419 (wisi-parse-reduce action parser-state pendingp gotos)
420 (setf (wisi-parser-state-active parser-state) 'reduce))
421 ))
422
423 (defun wisi-nonterm-bounds (stack i j)
424 "Return a pair (START . END), the buffer region for a nonterminal.
425 STACK is the parser stack. I and J are the indices in STACK of
426 the first and last tokens of the nonterminal."
427 (let ((start (cl-caddr (aref stack i)))
428 (end (cl-cdddr (aref stack j))))
429 (while (and (or (not start) (not end))
430 (/= i j))
431 (cond
432 ((not start)
433 ;; item i is an empty production
434 (setq start (cl-caddr (aref stack (setq i (+ i 2))))))
435
436 ((not end)
437 ;; item j is an empty production
438 (setq end (cl-cdddr (aref stack (setq j (- j 2))))))
439
440 (t (setq i j))))
441 (and start end (cons start end))))
442
443 (defun wisi-parse-reduce (action parser-state pendingp gotos)
444 "Reduce PARSER-STATE.stack, and execute or pend ACTION."
445 (let* ((stack (wisi-parser-state-stack parser-state)); reference
446 (sp (wisi-parser-state-sp parser-state)); copy
447 (token-count (or (nth 2 action) 0))
448 (nonterm (nth 0 action))
449 (nonterm-region (when (> token-count 0)
450 (wisi-nonterm-bounds stack (- sp (* 2 (1- token-count)) 1) (1- sp))))
451 (post-reduce-state (aref stack (- sp (* 2 token-count))))
452 (new-state (cdr (assoc nonterm (aref gotos post-reduce-state))))
453 tokens)
454 (when (not new-state)
455 (error "no goto for %s %d" nonterm post-reduce-state))
456 (if (= 1 token-count)
457 (setq tokens (list (aref stack (1- sp))))
458 (dotimes (i token-count)
459 (push (aref stack (- sp (* 2 i) 1)) tokens)))
460 (setq sp (+ 2 (- sp (* 2 token-count))))
461 (aset stack (1- sp) (cons nonterm (cons nil nonterm-region)))
462 (aset stack sp new-state)
463 (setf (wisi-parser-state-sp parser-state) sp)
464 (if pendingp
465 (if (wisi-parser-state-pending parser-state)
466 (setf (wisi-parser-state-pending parser-state)
467 (append (wisi-parser-state-pending parser-state)
468 (list (list (nth 1 action) tokens))))
469 (setf (wisi-parser-state-pending parser-state)
470 (list (list (nth 1 action) tokens))))
471 (funcall (nth 1 action) tokens))
472 ))
473
474 (provide 'wisi-parse)
475 ;;; wisi-parse.el ends here