]> code.delx.au - gnu-emacs-elpa/blob - packages/wisi/wisi-parse.el
b62b3d68ad39a096daa209deeace197deb982b5e
[gnu-emacs-elpa] / packages / wisi / wisi-parse.el
1 ;;; wisi-parse.el --- Wisi parser
2
3 ;; Copyright (C) 2013 Free Software Foundation, Inc.
4
5 ;; This file is part of GNU Emacs.
6
7 ;; GNU Emacs is free software: you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation, either version 3 of the License, or
10 ;; (at your option) any later version.
11
12 ;; GNU Emacs is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
16
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
19
20
21 ;;; Commentary:
22
23 ;; An extended LALR parser, that handles shift/reduce and
24 ;; reduce/reduce conflicts by spawning parallel parsers to follow each
25 ;; path.
26
27 ;;; Code:
28
29 (require 'semantic/wisent)
30 (eval-when-compile (require 'cl-lib))
31
32 (cl-defstruct (wisi-parser-state
33 (:copier nil))
34 label ;; integer identifying parser for debug
35
36 active
37 ;; 'shift - need new token
38 ;; 'reduce - need reduce
39 ;; 'accept - parsing completed
40 ;; 'error - failed, error not reported yet
41 ;; nil - terminated
42 ;;
43 ;; 'pending-shift, 'pending-reduce - newly created parser; see wisi-parse
44
45 stack
46 ;; Each stack item takes two slots: (token-symbol token-text (token-start . token-end)), state
47 ;; token-text is nil for nonterminals.
48 ;; this is _not_ the same as the wisent-parse stack; that leaves out token-symbol.
49
50 sp ;; stack pointer
51
52 pending
53 ;; list of (action-symbol stack-fragment)
54 )
55
56 (defun wisi-error-msg (message &rest args)
57 (let ((line (line-number-at-pos))
58 (col (- (point) (line-beginning-position))))
59 (format
60 "%s:%d:%d: %s"
61 (file-name-nondirectory (buffer-name)) ;; buffer-file-name is sometimes nil here!?
62 line col
63 (apply 'format message args))))
64
65 (defvar wisi-parse-error nil)
66 (put 'wisi-parse-error
67 'error-conditions
68 '(error wisi-parse-error))
69 (put 'wisi-parse-error
70 'error-message
71 "wisi parse error")
72
73 (defvar wisi-parse-max-parallel 15
74 "Maximum number of parallel parsers for acceptable performance.
75 If a file needs more than this, it's probably an indication that
76 the grammar is excessively redundant.")
77
78 (defun wisi-parse (automaton lexer)
79 "Parse input using the automaton specified in AUTOMATON.
80
81 - AUTOMATON is the parse table generated by `wisi-compile-grammar'.
82
83 - LEXER is a function with no argument called by the parser to
84 obtain the next token in input, as a list (symbol text start
85 . end), where `symbol' is the terminal symbol, `text' is the
86 token string, `start . end' is the range in the buffer."
87 (let* ((actions (aref automaton 0))
88 (gotos (aref automaton 1))
89 (parser-states ;; vector of parallel parser states
90 (vector
91 (make-wisi-parser-state
92 :label 0
93 :active 'shift
94 :stack (make-vector wisent-parse-max-stack-size nil)
95 ;; FIXME: better error message when stack overflows, so
96 ;; user can set wisent-parse-max-stack-size in file-local
97 ;; vars.
98 :sp 0
99 :pending nil)))
100 (active-parser-count 1)
101 active-parser-count-prev
102 (active 'shift)
103 (token (funcall lexer))
104 some-pending)
105
106 (aset (wisi-parser-state-stack (aref parser-states 0)) 0 0) ;; Initial state
107
108 (while (not (eq active 'accept))
109 (setq active-parser-count-prev active-parser-count)
110 (setq some-pending nil)
111 (dotimes (parser-index (length parser-states))
112 (when (eq active (wisi-parser-state-active (aref parser-states parser-index)))
113 (let* ((parser-state (aref parser-states parser-index))
114 (result (wisi-parse-1 token parser-state (> active-parser-count 1) actions gotos)))
115 (when result
116 ;; spawn a new parser
117 (when (= active-parser-count wisi-parse-max-parallel)
118 (signal 'wisi-parse-error
119 (wisi-error-msg (concat "too many parallel parsers required;"
120 " simplify grammar, or increase `wisi-parse-max-parallel'"))))
121 (let ((j (wisi-free-parser parser-states)))
122 (cond
123 ((= j -1)
124 ;; add to parser-states; the new parser won't be executed again in this parser-index loop
125 (setq parser-states (vconcat parser-states (vector nil)))
126 (setq j (1- (length parser-states))))
127 ((< j parser-index)
128 ;; the new parser won't be executed again in this parser-index loop; nothing to do
129 )
130 (t
131 ;; don't let the new parser execute again in this parser-index loop
132 (setq some-pending t)
133 (setf (wisi-parser-state-active result)
134 (cl-case (wisi-parser-state-active result)
135 (shift 'pending-shift)
136 (reduce 'pending-reduce)
137 )))
138 )
139 (setq active-parser-count (1+ active-parser-count))
140 (setf (wisi-parser-state-label result) j)
141 (aset parser-states j result))
142 (when (> wisi-debug 1) (message "spawn parser (%d active)" active-parser-count)))
143
144 (when (eq 'error (wisi-parser-state-active parser-state))
145 (setq active-parser-count (1- active-parser-count))
146 (when (> wisi-debug 1) (message "terminate parser (%d active)" active-parser-count))
147 (cl-case active-parser-count
148 (0
149 (cond
150 ((= active-parser-count-prev 1)
151 ;; we were not in a parallel parse; report the error
152 (let ((state (aref (wisi-parser-state-stack parser-state) (wisi-parser-state-sp parser-state))))
153 (signal 'wisi-parse-error
154 (wisi-error-msg "syntax error in grammar state %d; unexpected %s, expecting one of %s"
155 state
156 (nth 1 token)
157 (mapcar 'car (aref actions state))))
158 ))
159 (t
160 ;; report errors from all parsers that failed on this token
161 (let ((msg))
162 (dotimes (index (length parser-states))
163 (let* ((parser-state (aref parser-states parser-index))
164 (state (aref (wisi-parser-state-stack parser-state)
165 (wisi-parser-state-sp parser-state))))
166 (when (eq 'error (wisi-parser-state-active parser-state))
167 (setq msg
168 (concat msg
169 (when msg "\n")
170 (wisi-error-msg
171 "syntax error in grammar state %d; unexpected %s, expecting one of %s"
172 state
173 (nth 1 token)
174 (mapcar 'car (aref actions state)))))
175 )))
176 (signal 'wisi-parse-error msg)))
177 ))
178
179 (1
180 (setf (wisi-parser-state-active parser-state) nil); don't save error for later
181 (wisi-execute-pending (wisi-parser-state-pending
182 (aref parser-states (wisi-active-parser parser-states))))
183 (setf (wisi-parser-state-pending
184 (aref parser-states (wisi-active-parser parser-states)))
185 nil))
186 (t
187 ;; we were in a parallel parse, and this parser
188 ;; failed; mark it inactive, don't save error for
189 ;; later
190 (setf (wisi-parser-state-active parser-state) nil)
191 )))
192 )));; end dotimes
193
194 (when some-pending
195 ;; change pending-* parsers to *
196 (dotimes (parser-index (length parser-states))
197 (cond
198 ((eq (wisi-parser-state-active (aref parser-states parser-index)) 'pending-shift)
199 (setf (wisi-parser-state-active (aref parser-states parser-index)) 'shift))
200 ((eq (wisi-parser-state-active (aref parser-states parser-index)) 'pending-reduce)
201 (setf (wisi-parser-state-active (aref parser-states parser-index)) 'reduce))
202 )))
203
204 (setq active (wisi-parsers-active parser-states active-parser-count))
205 (when (eq active 'shift)
206 (when (> active-parser-count 1)
207 (setq active-parser-count (wisi-parse-elim-identical parser-states active-parser-count)))
208 (setq token (funcall lexer)))
209 )
210 (when (> active-parser-count 1)
211 (error "ambiguous parse result"))))
212
213 (defun wisi-parsers-active (parser-states active-count)
214 "Return the type of parser cycle to execute.
215 PARSER-STATES[*].active is the last action a parser took. If it
216 was 'shift, that parser used the input token, and should not be
217 executed again until another input token is available, after all
218 parsers have shifted the current token or terminated.
219
220 'accept : all PARSER-STATES have active set to nil or 'accept -
221 done parsing
222
223 'shift : all PARSER-STATES have active set to nil, 'accept, or
224 'shift - get a new token, execute 'shift parsers.
225
226 'reduce : some PARSER-STATES have active set to 'reduce - no new
227 token, execute 'reduce parsers."
228 (let ((result nil)
229 (i 0)
230 (shift-count 0)
231 (accept-count 0)
232 active)
233 (while (and (not result)
234 (< i (length parser-states)))
235 (setq active (wisi-parser-state-active (aref parser-states i)))
236 (cond
237 ((eq active 'shift) (setq shift-count (1+ shift-count)))
238 ((eq active 'reduce) (setq result 'reduce))
239 ((eq active 'accept) (setq accept-count (1+ accept-count)))
240 )
241 (setq i (1+ i)))
242
243 (cond
244 (result )
245 ((= accept-count active-count)
246 'accept)
247 ((= (+ shift-count accept-count) active-count)
248 'shift)
249 (t (error "unexpected result in wisi-parsers-active"))
250 )))
251
252 (defun wisi-free-parser (parser-states)
253 "Return index to a non-active parser in PARSER-STATES, -1 if there is none."
254 (let ((result nil)
255 (i 0))
256 (while (and (not result)
257 (< i (length parser-states)))
258 (when (not (wisi-parser-state-active (aref parser-states i)))
259 (setq result i))
260 (setq i (1+ i)))
261 (if result result -1)))
262
263 (defun wisi-active-parser (parser-states)
264 "Return index to the first active parser in PARSER-STATES."
265 (let ((result nil)
266 (i 0))
267 (while (and (not result)
268 (< i (length parser-states)))
269 (when (wisi-parser-state-active (aref parser-states i))
270 (setq result i))
271 (setq i (1+ i)))
272 (unless result
273 (error "no active parsers"))
274 result))
275
276 (defun wisi-parse-elim-identical (parser-states active-parser-count)
277 "Check for parsers in PARSER-STATES that have reached identical states eliminate one.
278 Return new ACTIVE-PARSER-COUNT. Assumes all parsers have active
279 nil, 'shift, or 'accept."
280 ;; parser-states passed by reference; active-parser-count by copy
281 ;; see test/ada_mode-slices.adb for example
282 (dotimes (parser-i (1- (length parser-states)))
283 (when (wisi-parser-state-active (aref parser-states parser-i))
284 (dotimes (parser-j (- (length parser-states) parser-i 1))
285 (when (wisi-parser-state-active (aref parser-states (+ parser-i parser-j 1)))
286 (when (eq (wisi-parser-state-sp (aref parser-states parser-i))
287 (wisi-parser-state-sp (aref parser-states (+ parser-i parser-j 1))))
288 (let ((compare t))
289 (dotimes (stack-i (wisi-parser-state-sp (aref parser-states parser-i)))
290 (setq
291 compare
292 (and compare
293 (equal (aref (wisi-parser-state-stack (aref parser-states parser-i)) stack-i)
294 (aref (wisi-parser-state-stack (aref parser-states (+ parser-i parser-j 1))) stack-i)))))
295 (when compare
296 ;; parser stacks are identical
297 (setq active-parser-count (1- active-parser-count))
298 (when (> wisi-debug 1)
299 (message "terminate identical parser %d (%d active)"
300 (+ parser-i parser-j 1) active-parser-count))
301 (when (= active-parser-count 1)
302 ;; the actions for the two parsers are not
303 ;; identical, but either is good enough for
304 ;; indentation and navigation, so we just do one.
305 (when (> wisi-debug 1) (message "executing actions for %d" (+ parser-i parser-j 1)))
306 (wisi-execute-pending (wisi-parser-state-pending (aref parser-states (+ parser-i parser-j 1))))
307 (setf (wisi-parser-state-pending (aref parser-states (+ parser-i parser-j 1))) nil)
308
309 ;; clear pending of other parser so it can be reused
310 (setf (wisi-parser-state-pending (aref parser-states parser-i)) nil))
311
312 (setf (wisi-parser-state-active (aref parser-states (+ parser-i parser-j 1))) nil))
313 )))
314 )))
315 active-parser-count)
316
317 (defun wisi-execute-pending (pending)
318 (while pending
319 (when (> wisi-debug 1) (message "%s" (car pending)))
320 (apply (pop pending))))
321
322 (defun wisi-parse-1 (token parser-state pendingp actions gotos)
323 "Perform one shift or reduce on PARSER-STATE.
324 If PENDINGP, push actions onto PARSER-STATE.pending; otherwise execute them.
325 See `wisi-parse' for full details.
326 Return nil or new parser (a wisi-parse-state struct)."
327 (let* ((state (aref (wisi-parser-state-stack parser-state)
328 (wisi-parser-state-sp parser-state)))
329 (parse-action (wisent-parse-action (car token) (aref actions state)))
330 new-parser-state)
331
332 (when (> wisi-debug 1)
333 ;; output trace info
334 (if (> wisi-debug 2)
335 (progn
336 ;; put top 10 stack items
337 (let* ((count (min 20 (wisi-parser-state-sp parser-state)))
338 (msg (make-vector (+ 1 count) nil)))
339 (dotimes (i count)
340 (aset msg (- count i)
341 (aref (wisi-parser-state-stack parser-state) (- (wisi-parser-state-sp parser-state) i)))
342 )
343 (message "%d: %s: %d: %s"
344 (wisi-parser-state-label parser-state)
345 (wisi-parser-state-active parser-state)
346 (wisi-parser-state-sp parser-state)
347 msg))
348 (message " %d: %s: %s" state token parse-action))
349 (message "%d: %d: %s: %s" (wisi-parser-state-label parser-state) state token parse-action)))
350
351 (when (and (listp parse-action)
352 (not (symbolp (car parse-action))))
353 ;; Conflict; spawn a new parser.
354 (setq new-parser-state
355 (make-wisi-parser-state
356 :active nil
357 :stack (vconcat (wisi-parser-state-stack parser-state))
358 :sp (wisi-parser-state-sp parser-state)
359 :pending (wisi-parser-state-pending parser-state)))
360
361 (wisi-parse-2 (cadr parse-action) token new-parser-state t gotos)
362 (setq pendingp t)
363 (setq parse-action (car parse-action))
364 );; when
365
366 ;; current parser
367 (wisi-parse-2 parse-action token parser-state pendingp gotos)
368
369 new-parser-state))
370
371 (defun wisi-parse-2 (action token parser-state pendingp gotos)
372 "Execute parser ACTION (must not be a conflict).
373 Return nil."
374 (cond
375 ((eq action 'accept)
376 (setf (wisi-parser-state-active parser-state) 'accept))
377
378 ((eq action 'error)
379 (setf (wisi-parser-state-active parser-state) 'error))
380
381 ((natnump action)
382 ;; Shift token and new state (= action) onto stack
383 (let ((stack (wisi-parser-state-stack parser-state)); reference
384 (sp (wisi-parser-state-sp parser-state))); copy
385 (setq sp (+ sp 2))
386 (aset stack (1- sp) token)
387 (aset stack sp action)
388 (setf (wisi-parser-state-sp parser-state) sp))
389 (setf (wisi-parser-state-active parser-state) 'shift))
390
391 (t
392 (wisi-parse-reduce action parser-state pendingp gotos)
393 (setf (wisi-parser-state-active parser-state) 'reduce))
394 ))
395
396 (defun wisi-nonterm-bounds (stack i j)
397 "Return a pair (START . END), the buffer region for a nonterminal.
398 STACK is the parser stack. I and J are the indices in STACK of
399 the first and last tokens of the nonterminal."
400 (let ((start (cl-caddr (aref stack i)))
401 (end (cl-cdddr (aref stack j))))
402 (while (and (or (not start) (not end))
403 (/= i j))
404 (cond
405 ((not start)
406 ;; item i is an empty production
407 (setq start (cl-caddr (aref stack (setq i (+ i 2))))))
408
409 ((not end)
410 ;; item j is an empty production
411 (setq end (cl-cdddr (aref stack (setq j (- j 2))))))
412
413 (t (setq i j))))
414 (and start end (cons start end))))
415
416 (defun wisi-parse-reduce (action parser-state pendingp gotos)
417 "Reduce PARSER-STATE.stack, and execute or pend ACTION."
418 (let* ((stack (wisi-parser-state-stack parser-state)); reference
419 (sp (wisi-parser-state-sp parser-state)); copy
420 (token-count (or (nth 2 action) 0))
421 (nonterm (nth 0 action))
422 (nonterm-region (when (> token-count 0)
423 (wisi-nonterm-bounds stack (- sp (* 2 (1- token-count)) 1) (1- sp))))
424 (post-reduce-state (aref stack (- sp (* 2 token-count))))
425 (new-state (cdr (assoc nonterm (aref gotos post-reduce-state))))
426 tokens)
427 (when (not new-state)
428 (error "no goto for %s %d" nonterm post-reduce-state))
429 (if (= 1 token-count)
430 (setq tokens (list (aref stack (1- sp))))
431 (dotimes (i token-count)
432 (push (aref stack (- sp (* 2 i) 1)) tokens)))
433 (setq sp (+ 2 (- sp (* 2 token-count))))
434 (aset stack (1- sp) (cons nonterm (cons nil nonterm-region)))
435 (aset stack sp new-state)
436 (setf (wisi-parser-state-sp parser-state) sp)
437 (if pendingp
438 (if (wisi-parser-state-pending parser-state)
439 (setf (wisi-parser-state-pending parser-state)
440 (append (wisi-parser-state-pending parser-state)
441 (list (list (nth 1 action) tokens))))
442 (setf (wisi-parser-state-pending parser-state)
443 (list (list (nth 1 action) tokens))))
444 (funcall (nth 1 action) tokens))
445 ))
446
447 (provide 'wisi-parse)
448 ;; end of file