]> code.delx.au - gnu-emacs-elpa/blob - packages/wisi/wisi-parse.el
Add ada-mode, wisi packages
[gnu-emacs-elpa] / packages / wisi / wisi-parse.el
1 ;; Wisi parser
2 ;;
3 ;; An extended LALR parser, that handles shift/reduce and
4 ;; reduce/reduce conflicts by spawning parallel parsers to follow each
5 ;; path.
6
7 (require 'semantic/wisent)
8 (eval-when-compile (require 'cl-macs))
9
10 (cl-defstruct (wisi-parser-state
11 (:copier nil))
12 label ;; integer identifying parser for debug
13
14 active
15 ;; 'shift - need new token
16 ;; 'reduce - need reduce
17 ;; 'accept - parsing completed
18 ;; 'error - failed, error not reported yet
19 ;; nil - terminated
20 ;;
21 ;; 'pending-shift, 'pending-reduce - newly created parser; see wisi-parse
22
23 stack
24 ;; Each stack item takes two slots: (token-symbol token-text (token-start . token-end)), state
25 ;; token-text is nil for nonterminals.
26 ;; this is _not_ the same as the wisent-parse stack; that leaves out token-symbol.
27
28 sp ;; stack pointer
29
30 pending
31 ;; list of (action-symbol stack-fragment)
32 )
33
34 (defun wisi-error-msg (message &rest args)
35 (let ((line (line-number-at-pos))
36 (col (- (point) (line-beginning-position))))
37 (format
38 "%s:%d:%d: %s"
39 (file-name-nondirectory (buffer-name)) ;; buffer-file-name is sometimes nil here!?
40 line col
41 (apply 'format message args))))
42
43 (defvar wisi-parse-error nil)
44 (put 'wisi-parse-error
45 'error-conditions
46 '(error wisi-parse-error))
47 (put 'wisi-parse-error
48 'error-message
49 "wisi parse error")
50
51 (defvar wisi-parse-max-parallel 15
52 "Maximum number of parallel parsers for acceptable performance.
53 If a file needs more than this, it's probably an indication that
54 the grammar is excessively redundant.")
55
56 (defun wisi-parse (automaton lexer)
57 "Parse input using the automaton specified in AUTOMATON.
58
59 - AUTOMATON is the parse table generated by `wisi-compile-grammar'.
60
61 - LEXER is a function with no argument called by the parser to
62 obtain the next token in input, as a list (symbol text start
63 . end), where `symbol' is the terminal symbol, `text' is the
64 token string, `start . end' is the range in the buffer."
65 (let* ((actions (aref automaton 0))
66 (gotos (aref automaton 1))
67 (parser-states ;; vector of parallel parser states
68 (vector
69 (make-wisi-parser-state
70 :label 0
71 :active 'shift
72 :stack (make-vector wisent-parse-max-stack-size nil)
73 ;; FIXME: better error message when stack overflows, so
74 ;; user can set wisent-parse-max-stack-size in file-local
75 ;; vars.
76 :sp 0
77 :pending nil)))
78 (active-parser-count 1)
79 active-parser-count-prev
80 (active 'shift)
81 (token (funcall lexer))
82 some-pending)
83
84 (aset (wisi-parser-state-stack (aref parser-states 0)) 0 0) ;; Initial state
85
86 (while (not (eq active 'accept))
87 (setq active-parser-count-prev active-parser-count)
88 (setq some-pending nil)
89 (dotimes (parser-index (length parser-states))
90 (when (eq active (wisi-parser-state-active (aref parser-states parser-index)))
91 (let* ((parser-state (aref parser-states parser-index))
92 (result (wisi-parse-1 token parser-state (> active-parser-count 1) actions gotos)))
93 (when result
94 ;; spawn a new parser
95 (when (= active-parser-count wisi-parse-max-parallel)
96 (signal 'wisi-parse-error
97 (wisi-error-msg (concat "too many parallel parsers required;"
98 " simplify grammar, or increase `wisi-parse-max-parallel'"))))
99 (let ((j (wisi-free-parser parser-states)))
100 (cond
101 ((= j -1)
102 ;; add to parser-states; the new parser won't be executed again in this parser-index loop
103 (setq parser-states (vconcat parser-states (vector nil)))
104 (setq j (1- (length parser-states))))
105 ((< j parser-index)
106 ;; the new parser won't be executed again in this parser-index loop; nothing to do
107 )
108 (t
109 ;; don't let the new parser execute again in this parser-index loop
110 (setq some-pending t)
111 (setf (wisi-parser-state-active result)
112 (cl-case (wisi-parser-state-active result)
113 (shift 'pending-shift)
114 (reduce 'pending-reduce)
115 )))
116 )
117 (setq active-parser-count (1+ active-parser-count))
118 (setf (wisi-parser-state-label result) j)
119 (aset parser-states j result))
120 (when (> wisi-debug 1) (message "spawn parser (%d active)" active-parser-count)))
121
122 (when (eq 'error (wisi-parser-state-active parser-state))
123 (setq active-parser-count (1- active-parser-count))
124 (when (> wisi-debug 1) (message "terminate parser (%d active)" active-parser-count))
125 (cl-case active-parser-count
126 (0
127 (cond
128 ((= active-parser-count-prev 1)
129 ;; we were not in a parallel parse; report the error
130 (let ((state (aref (wisi-parser-state-stack parser-state) (wisi-parser-state-sp parser-state))))
131 (signal 'wisi-parse-error
132 (wisi-error-msg "syntax error in grammar state %d; unexpected %s, expecting one of %s"
133 state
134 (nth 1 token)
135 (mapcar 'car (aref actions state))))
136 ))
137 (t
138 ;; report errors from all parsers that failed on this token
139 (let ((msg))
140 (dotimes (index (length parser-states))
141 (let* ((parser-state (aref parser-states parser-index))
142 (state (aref (wisi-parser-state-stack parser-state)
143 (wisi-parser-state-sp parser-state))))
144 (when (eq 'error (wisi-parser-state-active parser-state))
145 (setq msg
146 (concat msg
147 (when msg "\n")
148 (wisi-error-msg
149 "syntax error in grammar state %d; unexpected %s, expecting one of %s"
150 state
151 (nth 1 token)
152 (mapcar 'car (aref actions state)))))
153 )))
154 (signal 'wisi-parse-error msg)))
155 ))
156
157 (1
158 (setf (wisi-parser-state-active parser-state) nil); don't save error for later
159 (wisi-execute-pending (wisi-parser-state-pending
160 (aref parser-states (wisi-active-parser parser-states))))
161 (setf (wisi-parser-state-pending
162 (aref parser-states (wisi-active-parser parser-states)))
163 nil))
164 (t
165 ;; we were in a parallel parse, and this parser
166 ;; failed; mark it inactive, don't save error for
167 ;; later
168 (setf (wisi-parser-state-active parser-state) nil)
169 )))
170 )));; end dotimes
171
172 (when some-pending
173 ;; change pending-* parsers to *
174 (dotimes (parser-index (length parser-states))
175 (cond
176 ((eq (wisi-parser-state-active (aref parser-states parser-index)) 'pending-shift)
177 (setf (wisi-parser-state-active (aref parser-states parser-index)) 'shift))
178 ((eq (wisi-parser-state-active (aref parser-states parser-index)) 'pending-reduce)
179 (setf (wisi-parser-state-active (aref parser-states parser-index)) 'reduce))
180 )))
181
182 (setq active (wisi-parsers-active parser-states active-parser-count))
183 (when (eq active 'shift)
184 (when (> active-parser-count 1)
185 (setq active-parser-count (wisi-parse-elim-identical parser-states active-parser-count)))
186 (setq token (funcall lexer)))
187 )
188 (when (> active-parser-count 1)
189 (error "ambiguous parse result"))))
190
191 (defun wisi-parsers-active (parser-states active-count)
192 "Return the type of parser cycle to execute.
193 PARSER-STATES[*].active is the last action a parser took. If it
194 was 'shift, that parser used the input token, and should not be
195 executed again until another input token is available, after all
196 parsers have shifted the current token or terminated.
197
198 'accept : all PARSER-STATES have active set to nil or 'accept -
199 done parsing
200
201 'shift : all PARSER-STATES have active set to nil, 'accept, or
202 'shift - get a new token, execute 'shift parsers.
203
204 'reduce : some PARSER-STATES have active set to 'reduce - no new
205 token, execute 'reduce parsers."
206 (let ((result nil)
207 (i 0)
208 (shift-count 0)
209 (accept-count 0)
210 active)
211 (while (and (not result)
212 (< i (length parser-states)))
213 (setq active (wisi-parser-state-active (aref parser-states i)))
214 (cond
215 ((eq active 'shift) (setq shift-count (1+ shift-count)))
216 ((eq active 'reduce) (setq result 'reduce))
217 ((eq active 'accept) (setq accept-count (1+ accept-count)))
218 )
219 (setq i (1+ i)))
220
221 (cond
222 (result )
223 ((= accept-count active-count)
224 'accept)
225 ((= (+ shift-count accept-count) active-count)
226 'shift)
227 (t (error "unexpected result in wisi-parsers-active"))
228 )))
229
230 (defun wisi-free-parser (parser-states)
231 "Return index to a non-active parser in PARSER-STATES, -1 if there is none."
232 (let ((result nil)
233 (i 0))
234 (while (and (not result)
235 (< i (length parser-states)))
236 (when (not (wisi-parser-state-active (aref parser-states i)))
237 (setq result i))
238 (setq i (1+ i)))
239 (if result result -1)))
240
241 (defun wisi-active-parser (parser-states)
242 "Return index to the first active parser in PARSER-STATES."
243 (let ((result nil)
244 (i 0))
245 (while (and (not result)
246 (< i (length parser-states)))
247 (when (wisi-parser-state-active (aref parser-states i))
248 (setq result i))
249 (setq i (1+ i)))
250 (unless result
251 (error "no active parsers"))
252 result))
253
254 (defun wisi-parse-elim-identical (parser-states active-parser-count)
255 "Check for parsers in PARSER-STATES that have reached identical states eliminate one.
256 Return new ACTIVE-PARSER-COUNT. Assumes all parsers have active
257 nil, 'shift, or 'accept."
258 ;; parser-states passed by reference; active-parser-count by copy
259 ;; see test/ada_mode-slices.adb for example
260 (dotimes (parser-i (1- (length parser-states)))
261 (when (wisi-parser-state-active (aref parser-states parser-i))
262 (dotimes (parser-j (- (length parser-states) parser-i 1))
263 (when (wisi-parser-state-active (aref parser-states (+ parser-i parser-j 1)))
264 (when (eq (wisi-parser-state-sp (aref parser-states parser-i))
265 (wisi-parser-state-sp (aref parser-states (+ parser-i parser-j 1))))
266 (let ((compare t))
267 (dotimes (stack-i (wisi-parser-state-sp (aref parser-states parser-i)))
268 (setq
269 compare
270 (and compare
271 (equal (aref (wisi-parser-state-stack (aref parser-states parser-i)) stack-i)
272 (aref (wisi-parser-state-stack (aref parser-states (+ parser-i parser-j 1))) stack-i)))))
273 (when compare
274 ;; parser stacks are identical
275 (setq active-parser-count (1- active-parser-count))
276 (when (> wisi-debug 1)
277 (message "terminate identical parser %d (%d active)"
278 (+ parser-i parser-j 1) active-parser-count))
279 (when (= active-parser-count 1)
280 ;; the actions for the two parsers are not
281 ;; identical, but either is good enough for
282 ;; indentation and navigation, so we just do one.
283 (when (> wisi-debug 1) (message "executing actions for %d" (+ parser-i parser-j 1)))
284 (wisi-execute-pending (wisi-parser-state-pending (aref parser-states (+ parser-i parser-j 1))))
285 (setf (wisi-parser-state-pending (aref parser-states (+ parser-i parser-j 1))) nil)
286
287 ;; clear pending of other parser so it can be reused
288 (setf (wisi-parser-state-pending (aref parser-states parser-i)) nil))
289
290 (setf (wisi-parser-state-active (aref parser-states (+ parser-i parser-j 1))) nil))
291 )))
292 )))
293 active-parser-count)
294
295 (defun wisi-execute-pending (pending)
296 (while pending
297 (when (> wisi-debug 1) (message "%s" (car pending)))
298 (apply (pop pending))))
299
300 (defun wisi-parse-1 (token parser-state pendingp actions gotos)
301 "Perform one shift or reduce on PARSER-STATE.
302 If PENDINGP, push actions onto PARSER-STATE.pending; otherwise execute them.
303 See `wisi-parse' for full details.
304 Return nil or new parser (a wisi-parse-state struct)."
305 (let* ((state (aref (wisi-parser-state-stack parser-state)
306 (wisi-parser-state-sp parser-state)))
307 (parse-action (wisent-parse-action (car token) (aref actions state)))
308 new-parser-state)
309
310 (when (> wisi-debug 1)
311 ;; output trace info
312 (if (> wisi-debug 2)
313 (progn
314 ;; put top 10 stack items
315 (let* ((count (min 20 (wisi-parser-state-sp parser-state)))
316 (msg (make-vector (+ 1 count) nil)))
317 (dotimes (i count)
318 (aset msg (- count i)
319 (aref (wisi-parser-state-stack parser-state) (- (wisi-parser-state-sp parser-state) i)))
320 )
321 (message "%d: %s: %d: %s"
322 (wisi-parser-state-label parser-state)
323 (wisi-parser-state-active parser-state)
324 (wisi-parser-state-sp parser-state)
325 msg))
326 (message " %d: %s: %s" state token parse-action))
327 (message "%d: %d: %s: %s" (wisi-parser-state-label parser-state) state token parse-action)))
328
329 (when (and (listp parse-action)
330 (not (symbolp (car parse-action))))
331 ;; Conflict; spawn a new parser.
332 (setq new-parser-state
333 (make-wisi-parser-state
334 :active nil
335 :stack (vconcat (wisi-parser-state-stack parser-state))
336 :sp (wisi-parser-state-sp parser-state)
337 :pending (wisi-parser-state-pending parser-state)))
338
339 (wisi-parse-2 (cadr parse-action) token new-parser-state t gotos)
340 (setq pendingp t)
341 (setq parse-action (car parse-action))
342 );; when
343
344 ;; current parser
345 (wisi-parse-2 parse-action token parser-state pendingp gotos)
346
347 new-parser-state))
348
349 (defun wisi-parse-2 (action token parser-state pendingp gotos)
350 "Execute parser ACTION (must not be a conflict).
351 Return nil."
352 (cond
353 ((eq action 'accept)
354 (setf (wisi-parser-state-active parser-state) 'accept))
355
356 ((eq action 'error)
357 (setf (wisi-parser-state-active parser-state) 'error))
358
359 ((natnump action)
360 ;; Shift token and new state (= action) onto stack
361 (let ((stack (wisi-parser-state-stack parser-state)); reference
362 (sp (wisi-parser-state-sp parser-state))); copy
363 (setq sp (+ sp 2))
364 (aset stack (1- sp) token)
365 (aset stack sp action)
366 (setf (wisi-parser-state-sp parser-state) sp))
367 (setf (wisi-parser-state-active parser-state) 'shift))
368
369 (t
370 (wisi-parse-reduce action parser-state pendingp gotos)
371 (setf (wisi-parser-state-active parser-state) 'reduce))
372 ))
373
374 (defun wisi-nonterm-bounds (stack i j)
375 "Return a pair (START . END), the buffer region for a nonterminal.
376 STACK is the parser stack. I and J are the indices in STACK of
377 the first and last tokens of the nonterminal."
378 (let ((start (cl-caddr (aref stack i)))
379 (end (cl-cdddr (aref stack j))))
380 (while (and (or (not start) (not end))
381 (/= i j))
382 (cond
383 ((not start)
384 ;; item i is an empty production
385 (setq start (cl-caddr (aref stack (setq i (+ i 2))))))
386
387 ((not end)
388 ;; item j is an empty production
389 (setq end (cl-cdddr (aref stack (setq j (- j 2))))))
390
391 (t (setq i j))))
392 (and start end (cons start end))))
393
394 (defun wisi-parse-reduce (action parser-state pendingp gotos)
395 "Reduce PARSER-STATE.stack, and execute or pend ACTION."
396 (let* ((stack (wisi-parser-state-stack parser-state)); reference
397 (sp (wisi-parser-state-sp parser-state)); copy
398 (token-count (or (nth 2 action) 0))
399 (nonterm (nth 0 action))
400 (nonterm-region (when (> token-count 0)
401 (wisi-nonterm-bounds stack (- sp (* 2 (1- token-count)) 1) (1- sp))))
402 (post-reduce-state (aref stack (- sp (* 2 token-count))))
403 (new-state (cdr (assoc nonterm (aref gotos post-reduce-state))))
404 tokens)
405 (when (not new-state)
406 (error "no goto for %s %d" nonterm post-reduce-state))
407 (if (= 1 token-count)
408 (setq tokens (list (aref stack (1- sp))))
409 (dotimes (i token-count)
410 (push (aref stack (- sp (* 2 i) 1)) tokens)))
411 (setq sp (+ 2 (- sp (* 2 token-count))))
412 (aset stack (1- sp) (cons nonterm (cons nil nonterm-region)))
413 (aset stack sp new-state)
414 (setf (wisi-parser-state-sp parser-state) sp)
415 (if pendingp
416 (if (wisi-parser-state-pending parser-state)
417 (setf (wisi-parser-state-pending parser-state)
418 (append (wisi-parser-state-pending parser-state)
419 (list (list (nth 1 action) tokens))))
420 (setf (wisi-parser-state-pending parser-state)
421 (list (list (nth 1 action) tokens))))
422 (funcall (nth 1 action) tokens))
423 ))
424
425 (provide 'wisi-parse)
426 ;; end of file