]> code.delx.au - gnu-emacs-elpa/blob - packages/wisi/wisi.el
509e6baacf299ebd18fdfc8fd39dc905f964a910
[gnu-emacs-elpa] / packages / wisi / wisi.el
1 ;;; wisi.el --- Utilities for implementing an indentation/navigation engine using a generalized LALR parser
2 ;;
3 ;; Copyright (C) 2012 - 2014 Free Software Foundation, Inc.
4 ;;
5 ;; Author: Stephen Leake <stephen_leake@member.fsf.org>
6 ;; Maintainer: Stephen Leake <stephen_leake@member.fsf.org>
7 ;; Keywords: parser
8 ;; indentation
9 ;; navigation
10 ;; Version: 1.0.6
11 ;; package-requires: ((cl-lib "0.4") (emacs "24.2"))
12 ;; URL: http://stephe-leake.org/emacs/ada-mode/emacs-ada-mode.html
13 ;;
14 ;; This file is part of GNU Emacs.
15 ;;
16 ;; GNU Emacs is free software: you can redistribute it and/or modify
17 ;; it under the terms of the GNU General Public License as published by
18 ;; the Free Software Foundation, either version 3 of the License, or
19 ;; (at your option) any later version.
20 ;;
21 ;; GNU Emacs is distributed in the hope that it will be useful,
22 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
23 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 ;; GNU General Public License for more details.
25 ;;
26 ;; You should have received a copy of the GNU General Public License
27 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
28 ;;
29
30 ;;; Commentary:
31
32 ;;;; History: see NEWS-wisi.text
33 ;;
34 ;;;; indentation algorithm overview
35 ;;
36 ;; This design is inspired in part by experience writing a SMIE
37 ;; indentation engine for Ada, and the wisent parser.
38 ;;
39 ;; The general approach to indenting a given token is to find the
40 ;; start of the statement it is part of, or some other relevant point
41 ;; in the statement, and indent relative to that. So we need a parser
42 ;; that lets us find statement indent points from arbitrary places in
43 ;; the code.
44 ;;
45 ;; For example, the grammar for Ada as represented by the EBNF in LRM
46 ;; Annex P is not LALR(1), so we use a generalized LALR(1) parser (see
47 ;; wisi-parse, wisi-compile).
48 ;;
49 ;; The parser actions cache indentation and other information as text
50 ;; properties of tokens in statements.
51 ;;
52 ;; An indentation engine moves text in the buffer, as does user
53 ;; editing, so we can't rely on character positions remaining
54 ;; constant. So the parser actions use markers to store
55 ;; positions. Text properties also move with the text.
56 ;;
57 ;; The stored information includes a marker at each statement indent
58 ;; point. Thus, the indentation algorithm is: find the previous token
59 ;; with cached information, and either indent from it, or fetch from
60 ;; it the marker for a previous statement indent point, and indent
61 ;; relative to that.
62 ;;
63 ;; Since we have a cache (the text properties), we need to consider
64 ;; when to invalidate it. Ideally, we invalidate only when a change to
65 ;; the buffer would change the result of a parse that crosses that
66 ;; change, or starts after that change. Changes in whitespace
67 ;; (indentation and newlines) do not affect an Ada parse. Other
68 ;; languages are sensitive to newlines (Bash for example) or
69 ;; indentation (Python). Adding comments does not change a parse,
70 ;; unless code is commented out. For now we invalidate the cache after
71 ;; the edit point if the change involves anything other than
72 ;; whitespace.
73 ;;
74 ;;;; comparison to the SMIE parser
75 ;;
76 ;; The central problem to be solved in building the SMIE parser is
77 ;; grammar precedence conflicts; the general solution is refining
78 ;; keywords so that each new keyword can be assigned a unique
79 ;; precedence. This means ad hoc code must be written to determine the
80 ;; correct refinement for each language keyword from the surrounding
81 ;; tokens. In effect, for a complex language like Ada, the knowledge
82 ;; of the language grammar is mostly embedded in the refinement code;
83 ;; only a small amount is in the refined grammar. Implementing a SMIE
84 ;; parser for a new language involves the same amount of work as the
85 ;; first language.
86 ;;
87 ;; Using a generalized LALR parser avoids that particular problem;
88 ;; assuming the language is already defined by a grammar, it is only a
89 ;; matter of a format change to teach the wisi parser the
90 ;; language. The problem in a wisi indentation engine is caching the
91 ;; output of the parser in a useful way, since we can't start the
92 ;; parser from arbitrary places in the code (as we can with the SMIE
93 ;; parser). A second problem is determining when to invalidate the
94 ;; cache. But these problems are independent of the language being
95 ;; parsed, so once we have one wisi indentation engine working,
96 ;; adapting it to new languages should be quite simple.
97 ;;
98 ;; The SMIE parser does not find the start of each statement, only the
99 ;; first language keyword in each statement; additional code must be
100 ;; written to find the statement start and indent points. The wisi
101 ;; parser finds the statement start and indent points directly.
102 ;;
103 ;; In SMIE, it is best if each grammar rule is a complete statement,
104 ;; so forward-sexp will traverse the entire statement. If nested
105 ;; non-terminals are used, forward-sexp may stop inside one of the
106 ;; nested non-terminals. This problem does not occur with the wisi
107 ;; parser.
108 ;;
109 ;; A downside of the wisi parser is conflicts in the grammar; they can
110 ;; be much more difficult to resolve than in the SMIE parser. The
111 ;; generalized parser helps by handling conflicts, but it does so by
112 ;; running multiple parsers in parallel, persuing each choice in the
113 ;; conflict. If the conflict is due to a genuine ambiguity, both paths
114 ;; will succeed, which causes the parse to fail, since it is not clear
115 ;; which set of text properties to store. Even if one branch
116 ;; ultimately fails, running parallel parsers over large sections of
117 ;; code is slow. Finally, this approach can lead to exponential growth
118 ;; in the number of parsers. So grammar conflicts must still be
119 ;; analyzed and minimized.
120 ;;
121 ;; In addition, the complete grammar must be specified; in smie, it is
122 ;; often possible to specify a subset of the grammar.
123 ;;
124 ;;;; grammar compiler and parser
125 ;;
126 ;; Since we are using a generalized LALR(1) parser, we cannot use any
127 ;; of the wisent grammar functions. We use OpenToken wisi-generate
128 ;; to compile BNF to Elisp source (similar to
129 ;; semantic-grammar-create-package), and wisi-compile-grammar to
130 ;; compile that to the parser table.
131 ;;
132 ;; Semantic provides a complex lexer, more complicated than we need
133 ;; for indentation. So we use the elisp lexer, which consists of
134 ;; `forward-comment', `skip-syntax-forward', and `scan-sexp'. We wrap
135 ;; that in functions that return tokens in the form wisi-parse
136 ;; expects.
137 ;;
138 ;;;; lexer
139 ;;
140 ;; The lexer is `wisi-forward-token'. It relies on syntax properties,
141 ;; so syntax-propertize must be called on the text to be lexed before
142 ;; wisi-forward-token is called. In general, it is hard to determine
143 ;; an appropriate end-point for syntax-propertize, other than
144 ;; point-max. So we call (syntax-propertize point-max) in wisi-setup,
145 ;; and also call syntax-propertize in wisi-after-change.
146 ;;
147 ;;;; code style
148 ;;
149 ;; 'wisi' was originally short for "wisent indentation engine", but
150 ;; now is just a name.
151 ;;
152 ;; not using lexical-binding because we support Emacs 23
153 ;;
154 ;;;;;
155
156 ;;; Code:
157
158 (require 'cl-lib)
159 (require 'wisi-parse)
160
161 ;; WORKAROUND: for some reason, this condition doesn't work in batch mode!
162 ;; (when (and (= emacs-major-version 24)
163 ;; (= emacs-minor-version 2))
164 (require 'wisi-compat-24.2)
165 ;;)
166
167 ;;;; lexer
168
169 (defvar-local wisi-class-list nil)
170 (defvar-local wisi-keyword-table nil)
171 (defvar-local wisi-punctuation-table nil)
172 (defvar-local wisi-punctuation-table-max-length 0)
173 (defvar-local wisi-string-double-term nil);; string delimited by double quotes
174 (defvar-local wisi-string-quote-escape-doubled nil
175 "Non-nil if a string delimiter is escaped by doubling it (as in Ada).")
176 (defvar-local wisi-string-quote-escape nil
177 "Cons '(delim . character) where 'character' escapes quotes in strings delimited by 'delim'.")
178 (defvar-local wisi-string-single-term nil) ;; string delimited by single quotes
179 (defvar-local wisi-symbol-term nil)
180
181 (defun wisi-forward-token (&optional text-only)
182 "Move point forward across one token, skipping leading whitespace and comments.
183 Return the corresponding token, in a format determined by TEXT-ONLY:
184 TEXT-ONLY t: text
185 TEXT-ONLY nil: (token text start . end)
186 where:
187 `token' is a token symbol (not string) from `wisi-punctuation-table',
188 `wisi-keyword-table', `wisi-string-double-term', `wisi-string-double-term' or `wisi-symbol-term'.
189
190 `text' is the token text from the buffer
191
192 `start, end' are the character positions in the buffer of the start
193 and end of the token text.
194
195 If at end of buffer, returns `wisent-eoi-term'."
196 (forward-comment (point-max))
197 ;; skips leading whitespace, comment, trailing whitespace.
198
199 (let ((start (point))
200 ;; (info "(elisp)Syntax Table Internals" "*info elisp syntax*")
201 (syntax (syntax-class (syntax-after (point))))
202 token-id token-text)
203 (cond
204 ((eobp)
205 (setq token-text "")
206 (setq token-id wisent-eoi-term))
207
208 ((eq syntax 1)
209 ;; punctuation. Find the longest matching string in wisi-punctuation-table
210 (forward-char 1)
211 (let ((next-point (point))
212 temp-text temp-id done)
213 (while (not done)
214 (setq temp-text (buffer-substring-no-properties start (point)))
215 (setq temp-id (car (rassoc temp-text wisi-punctuation-table)))
216 (when temp-id
217 (setq token-text temp-text
218 token-id temp-id
219 next-point (point)))
220 (if (or
221 (eobp)
222 (= (- (point) start) wisi-punctuation-table-max-length))
223 (setq done t)
224 (forward-char 1))
225 )
226 (goto-char next-point)))
227
228 ((memq syntax '(4 5)) ;; open, close parenthesis
229 (forward-char 1)
230 (setq token-text (buffer-substring-no-properties start (point)))
231 (setq token-id (symbol-value (intern-soft token-text wisi-keyword-table))))
232
233 ((eq syntax 7)
234 ;; string quote, either single or double. we assume point is
235 ;; before the start quote, not the end quote
236 (let ((delim (char-after (point)))
237 (forward-sexp-function nil))
238 (condition-case err
239 (progn
240 (forward-sexp)
241
242 ;; point is now after the end quote; check for an escaped quote
243 (while (or
244 (and wisi-string-quote-escape-doubled
245 (eq (char-after (point)) delim))
246 (and (eq delim (car wisi-string-quote-escape))
247 (eq (char-before (1- (point))) (cdr wisi-string-quote-escape))))
248 (forward-sexp))
249 (setq token-text (buffer-substring-no-properties start (point)))
250 (setq token-id (if (= delim ?\") wisi-string-double-term wisi-string-single-term)))
251 (scan-error
252 ;; Something screwed up; we should not get here if
253 ;; syntax-propertize works properly.
254 (error "wisi-forward-token: forward-sexp failed %s" err)
255 ))))
256
257 (t ;; assuming word syntax
258 (skip-syntax-forward "w_'")
259 (setq token-text (buffer-substring-no-properties start (point)))
260 (setq token-id
261 (or (symbol-value (intern-soft (downcase token-text) wisi-keyword-table))
262 wisi-symbol-term)))
263 );; cond
264
265 (unless token-id
266 (signal 'wisi-parse-error
267 (wisi-error-msg "unrecognized token '%s'" (buffer-substring-no-properties start (point)))))
268
269 (if text-only
270 token-text
271 (cons token-id (cons token-text (cons start (point)))))
272 ))
273
274 (defun wisi-backward-token ()
275 "Move point backward across one token, skipping whitespace and comments.
276 Return (nil text start . end) - same structure as
277 wisi-forward-token, but does not look up symbol."
278 (forward-comment (- (point)))
279 ;; skips leading whitespace, comment, trailing whitespace.
280
281 ;; (info "(elisp)Syntax Table Internals" "*info elisp syntax*")
282 (let ((end (point))
283 (syntax (syntax-class (syntax-after (1- (point))))))
284 (cond
285 ((bobp) nil)
286
287 ((memq syntax '(4 5)) ;; open, close parenthesis
288 (backward-char 1))
289
290 ((eq syntax 7)
291 ;; a string quote. we assume we are after the end quote, not the start quote
292 (let ((forward-sexp-function nil))
293 (forward-sexp -1)))
294
295 (t
296 (if (zerop (skip-syntax-backward "."))
297 (skip-syntax-backward "w_'")))
298 )
299 (cons nil (cons (buffer-substring-no-properties (point) end) (cons (point) end)))
300 ))
301
302 ;;;; token info cache
303 ;;
304 ;; the cache stores the results of parsing as text properties on
305 ;; keywords, for use by the indention and motion engines.
306
307 (cl-defstruct
308 (wisi-cache
309 (:constructor wisi-cache-create)
310 (:copier nil))
311 nonterm;; nonterminal from parse (set by wisi-statement-action)
312
313 token
314 ;; terminal symbol from wisi-keyword-table or
315 ;; wisi-punctuation-table, or lower-level nonterminal from parse
316 ;; (set by wisi-statement-action)
317
318 last ;; pos of last char in token, relative to first (0 indexed)
319
320 class
321 ;; arbitrary lisp symbol, used for indentation and navigation.
322 ;; some classes are defined by wisi:
323 ;;
324 ;; 'block-middle - a block keyword (ie: if then else end), not at the start of a statement
325 ;;
326 ;; 'block-start - a block keyword at the start of a statement
327 ;;
328 ;; 'statement-start - the start of a statement
329 ;;
330 ;; 'open-paren
331 ;;
332 ;; others are language-specific
333
334 containing
335 ;; Marker at the containing keyword for this token.
336 ;; A containing keyword is an indent point; the start of a
337 ;; statement, or 'begin', 'then' or 'else' for a block of
338 ;; statements, etc.
339 ;; nil only for first token in buffer
340
341 prev ;; marker at previous motion token in statement; nil if none
342 next ;; marker at next motion token in statement; nil if none
343 end ;; marker at token at end of current statement
344 face ;; for font-lock. only set when regexp font-lock can't handle it
345 )
346
347 (defvar-local wisi-parse-table nil)
348
349 (defvar-local wisi-parse-failed nil
350 "Non-nil when a recent parse has failed - cleared when parse succeeds.")
351
352 (defvar-local wisi-parse-try nil
353 "Non-nil when parse is needed - cleared when parse succeeds.")
354
355 (defvar-local wisi-change-need-invalidate nil
356 "When non-nil, buffer position to invalidate from.
357 Used in before/after change functions.")
358
359 (defvar-local wisi-end-caches nil
360 "List of buffer positions of caches in current statement that need wisi-cache-end set.")
361
362 (defun wisi-invalidate-cache(&optional after)
363 "Invalidate parsing caches for the current buffer from AFTER to end of buffer.
364 Caches are the Emacs syntax cache, the wisi token cache, and the wisi parser cache."
365 (interactive)
366 (if (not after)
367 (setq after (point-min))
368 (setq after
369 (save-excursion
370 (goto-char after)
371 (line-beginning-position))))
372 (when (> wisi-debug 0) (message "wisi-invalidate %s:%d" (current-buffer) after))
373 (setq wisi-cache-max after)
374 (setq wisi-parse-try t)
375 (syntax-ppss-flush-cache after)
376 (with-silent-modifications
377 (remove-text-properties after (point-max) '(wisi-cache nil))))
378
379 (defun wisi-before-change (begin end)
380 "For `before-change-functions'."
381 ;; begin . end is range of text being deleted
382
383 ;; If jit-lock-after-change is before wisi-after-change in
384 ;; after-change-functions, it might use any invalid caches in the
385 ;; inserted text.
386 ;;
387 ;; So we check for that here, and ensure it is after
388 ;; wisi-after-change, which deletes the invalid caches
389 (when (boundp 'jit-lock-mode)
390 (when (memq 'wisi-after-change (memq 'jit-lock-after-change after-change-functions))
391 (setq after-change-functions (delete 'wisi-after-change after-change-functions))
392 (add-hook 'after-change-functions 'wisi-after-change nil t))
393 )
394
395 (setq wisi-change-need-invalidate nil)
396
397 (when (and (> end begin)
398 (>= wisi-cache-max begin))
399
400 (when wisi-parse-failed
401 ;; The parse was failing, probably due to bad syntax; this change
402 ;; may have fixed it, so try reparse.
403 (setq wisi-parse-try t))
404
405 (save-excursion
406 ;; don't invalidate parse for whitespace, string, or comment changes
407 (let (;; (info "(elisp)Parser State")
408 (state (syntax-ppss begin)))
409 ;; syntax-ppss has moved point to "begin".
410 (cond
411 ((or
412 (nth 3 state); in string
413 (nth 4 state)); in comment
414 ;; FIXME: check that entire range is in comment or string
415 )
416
417 ((progn
418 (skip-syntax-forward " " end);; does not skip newline
419 (eq (point) end)))
420
421 (t
422 (setq wisi-change-need-invalidate
423 (progn
424 (wisi-goto-statement-start)
425 (point))))
426 ))))
427 )
428
429 (defun wisi-after-change (begin end length)
430 "For `after-change-functions'."
431 ;; begin . end is range of text being inserted (empty if equal)
432
433 ;; (syntax-ppss-flush-cache begin) is in before-change-functions
434
435 (syntax-propertize end) ;; see comments above on "lexer" re syntax-propertize
436
437 ;; The parse was failing, probably due to bad syntax; this change
438 ;; may have fixed it, so try reparse.
439 (setq wisi-parse-try t)
440
441 ;; remove 'wisi-cache on inserted text, which could have caches
442 ;; from before the failed parse (or another buffer), and are in
443 ;; any case invalid.
444 (with-silent-modifications
445 (remove-text-properties begin end '(wisi-cache)))
446
447 (cond
448 ((>= wisi-cache-max begin)
449 ;; The parse had succeeded past the start of the inserted
450 ;; text.
451 (save-excursion
452 (let (need-invalidate
453 ;; (info "(elisp)Parser State")
454 (state (syntax-ppss begin)))
455 ;; syntax-ppss has moved point to "begin".
456 (cond
457 (wisi-change-need-invalidate
458 ;; wisi-before change determined the removed text alters the
459 ;; parse
460 (setq need-invalidate wisi-change-need-invalidate))
461
462 ((= end begin)
463 (setq need-invalidate nil))
464
465 ((or
466 (nth 3 state); in string
467 (nth 4 state)); in comment
468 ;; FIXME: insert newline in comment to create non-comment!?
469 ;; or paste a chunk of code
470 ;; => check that all of change region is comment or string
471 (setq need-invalidate nil)
472 ;; no caches to remove
473 )
474
475 ((progn
476 (skip-syntax-forward " " end);; does not skip newlines
477 (eq (point) end))
478 (setq need-invalidate nil))
479
480 (t
481 (setq need-invalidate begin))
482 )
483
484 (if need-invalidate
485 ;; The inserted or deleted text could alter the parse;
486 ;; wisi-invalidate-cache removes all 'wisi-cache.
487 (wisi-invalidate-cache need-invalidate)
488
489 ;; else move cache-max by the net change length.
490 (setq wisi-cache-max
491 (+ wisi-cache-max (- end begin length))))
492 )
493 ))
494
495 (t
496 ;; parse never attempted, or only done to before BEGIN. Just
497 ;; remove caches
498 (with-silent-modifications
499 (remove-text-properties begin end '(wisi-cache)))
500 )
501 ))
502
503 (defun wisi-get-cache (pos)
504 "Return `wisi-cache' struct from the `wisi-cache' text property at POS.
505 If accessing cache at a marker for a token as set by `wisi-cache-tokens', POS must be (1- mark)."
506 (get-text-property pos 'wisi-cache))
507
508 (defvar-local wisi-parse-error-msg nil)
509
510 (defun wisi-goto-error ()
511 "Move point to position in last error message (if any)."
512 (when (string-match ":\\([0-9]+\\):\\([0-9]+\\):" wisi-parse-error-msg)
513 (let ((line (string-to-number (match-string 1 wisi-parse-error-msg)))
514 (col (string-to-number (match-string 2 wisi-parse-error-msg))))
515 (goto-char (point-min))
516 (forward-line (1- line))
517 (forward-char col))))
518
519 (defun wisi-show-parse-error ()
520 "Show last wisi-parse error."
521 (interactive)
522 (cond
523 (wisi-parse-failed
524 (message wisi-parse-error-msg)
525 (wisi-goto-error))
526
527 (wisi-parse-try
528 (message "need parse"))
529
530 (t
531 (message "parse succeeded"))
532 ))
533
534 (defvar wisi-post-parse-succeed-hook nil
535 "Hook run after parse succeeds.")
536
537 (defun wisi-validate-cache (pos)
538 "Ensure cached data is valid at least up to POS in current buffer."
539 (let ((msg (when (> wisi-debug 0) (format "wisi: parsing %s:%d ..." (buffer-name) (line-number-at-pos pos)))))
540 (when (and wisi-parse-try
541 (< wisi-cache-max pos))
542 (when (> wisi-debug 0)
543 (message msg))
544
545 (setq wisi-parse-try nil)
546 (setq wisi-parse-error-msg nil)
547 (setq wisi-end-caches nil)
548
549 (save-excursion
550 (if (> wisi-debug 1)
551 ;; let debugger stop in wisi-parse
552 (progn
553 (wisi-parse wisi-parse-table 'wisi-forward-token)
554 (setq wisi-cache-max (point))
555 (setq wisi-parse-failed nil)
556 (run-hooks 'wisi-post-parse-succeed-hook))
557
558 ;; else capture errors from bad syntax, so higher level
559 ;; functions can try to continue and/or we don't bother the
560 ;; user.
561 (condition-case err
562 (progn
563 (wisi-parse wisi-parse-table 'wisi-forward-token)
564 (setq wisi-cache-max (point))
565 (setq wisi-parse-failed nil)
566 (run-hooks 'wisi-post-parse-succeed-hook))
567 (wisi-parse-error
568 (setq wisi-parse-failed t)
569 (setq wisi-parse-error-msg (cdr err)))
570 )))
571 (if wisi-parse-error-msg
572 ;; error
573 (when (> wisi-debug 0)
574 (message "%s error" msg)
575 (wisi-goto-error)
576 (error wisi-parse-error-msg))
577 ;; no msg; success
578 (when (> wisi-debug 0)
579 (message "%s done" msg)))
580 )))
581
582 (defun wisi-get-containing-cache (cache)
583 "Return cache from (wisi-cache-containing CACHE)."
584 (let ((containing (wisi-cache-containing cache)))
585 (and containing
586 (wisi-get-cache (1- containing)))))
587
588 (defun wisi-cache-region (cache)
589 "Return region designated by cache.
590 Point must be at cache."
591 (cons (point) (+ (point) (wisi-cache-last cache))))
592
593 (defun wisi-cache-text (cache)
594 "Return property-less buffer substring designated by cache.
595 Point must be at cache."
596 (buffer-substring-no-properties (point) (+ (point) (wisi-cache-last cache))))
597
598 ;;;; parse actions
599
600 (defun wisi-set-end (start-mark end-mark)
601 "Set END-MARK on all caches in `wisi-end-caches' in range START-MARK END-MARK,
602 delete from `wisi-end-caches'."
603 (let ((i 0)
604 pos cache)
605 (while (< i (length wisi-end-caches))
606 (setq pos (nth i wisi-end-caches))
607 (setq cache (wisi-get-cache pos))
608
609 (if (and (>= pos start-mark)
610 (< pos end-mark))
611 (progn
612 (setf (wisi-cache-end cache) end-mark)
613 (setq wisi-end-caches (delq pos wisi-end-caches)))
614
615 ;; else not in range
616 (setq i (1+ i)))
617 )))
618
619 (defvar wisi-tokens nil)
620 ;; keep byte-compiler happy; `wisi-tokens' is bound in action created
621 ;; by wisi-semantic-action
622
623 (defun wisi-statement-action (&rest pairs)
624 "Cache information in text properties of tokens.
625 Intended as a grammar non-terminal action.
626
627 PAIRS is of the form [TOKEN-NUMBER CLASS] ... where TOKEN-NUMBER
628 is the (1 indexed) token number in the production, CLASS is the wisi class of
629 that token. Use in a grammar action as:
630 (wisi-statement-action 1 'statement-start 7 'statement-end)"
631 (save-excursion
632 (let ((first-item t)
633 first-keyword-mark
634 (override-start nil))
635 (while pairs
636 (let* ((number (1- (pop pairs)))
637 (region (cddr (nth number wisi-tokens)));; wisi-tokens is let-bound in wisi-parse-reduce
638 (token (car (nth number wisi-tokens)))
639 (class (pop pairs))
640 (mark
641 ;; Marker one char into token, so indent-line-to
642 ;; inserts space before the mark, not after
643 (when region (copy-marker (1+ (car region)))))
644 cache)
645
646 (unless (memq class wisi-class-list)
647 (error "%s not in wisi-class-list" class))
648
649 (if region
650 (progn
651 (if (setq cache (wisi-get-cache (car region)))
652 ;; We are processing a previously set non-terminal; ie generic_formal_part in
653 ;;
654 ;; generic_package_declaration : generic_formal_part package_specification SEMICOLON
655 ;; (wisi-statement-action 1 'block-start 2 'block-middle 3 'statement-end)
656 ;;
657 ;; or simple_statement in
658 ;;
659 ;; statement : label_opt simple_statement
660 ;;
661 ;; override nonterm, class, containing
662 ;; set end only if not set yet (due to failed parse)
663 (progn
664 (cl-case (wisi-cache-class cache)
665 (block-start
666 (setf (wisi-cache-class cache)
667 (cond
668 ((eq override-start nil)
669 (cond
670 ((memq class '(block-start statement-start)) 'block-start)
671 (t 'block-middle)))
672
673 ((memq override-start '(block-start statement-start)) 'block-start)
674
675 (t (error "unexpected override-start"))
676 )))
677 (t
678 (setf (wisi-cache-class cache) (or override-start class)))
679 )
680 (setf (wisi-cache-nonterm cache) $nterm)
681 (setf (wisi-cache-containing cache) first-keyword-mark)
682 (unless (wisi-cache-end cache)
683 (if wisi-end-caches
684 (push (car region) wisi-end-caches)
685 (setq wisi-end-caches (list (car region)))
686 ))
687 )
688
689 ;; else create new cache
690 (with-silent-modifications
691 (put-text-property
692 (car region)
693 (1+ (car region))
694 'wisi-cache
695 (wisi-cache-create
696 :nonterm $nterm;; $nterm defined in wisi-semantic-action
697 :token token
698 :last (- (cdr region) (car region))
699 :class (or override-start class)
700 :containing first-keyword-mark)
701 ))
702 (if wisi-end-caches
703 (push (car region) wisi-end-caches)
704 (setq wisi-end-caches (list (car region)))
705 ))
706
707 (when first-item
708 (setq first-item nil)
709 (when (or override-start
710 ;; FIXME: why block-middle here?
711 (memq class '(block-middle block-start statement-start)))
712 (setq override-start nil)
713 (setq first-keyword-mark mark)))
714
715 (when (eq class 'statement-end)
716 (wisi-set-end (1- first-keyword-mark) (copy-marker (1+ (car region)))))
717 )
718
719 ;; region is nil when a production is empty; if the first
720 ;; token is a start, override the class on the next token.
721 (when (and first-item
722 (memq class '(block-middle block-start statement-start)))
723 (setq override-start class)))
724 ))
725 )))
726
727 (defun wisi-containing-action (containing-token contained-token)
728 "Set containing marks in all tokens in CONTAINED-TOKEN with null containing mark to marker pointing to CONTAINING-TOKEN.
729 If CONTAINING-TOKEN is empty, the next token number is used."
730 ;; wisi-tokens is is bound in action created by wisi-semantic-action
731 (let* ((containing-region (cddr (nth (1- containing-token) wisi-tokens)))
732 (contained-region (cddr (nth (1- contained-token) wisi-tokens))))
733
734 (unless containing-region ;;
735 (signal 'wisi-parse-error
736 (wisi-error-msg
737 "wisi-containing-action: containing-region '%s' is empty. grammar error; bad action"
738 (nth 1 (nth (1- containing-token) wisi-tokens)))))
739
740 (unless (or (not contained-region) ;; contained-token is empty
741 (wisi-get-cache (car containing-region)))
742 (signal 'wisi-parse-error
743 (wisi-error-msg
744 "wisi-containing-action: containing-token '%s' has no cache. grammar error; missing action"
745 (nth 1 (nth (1- containing-token) wisi-tokens)))))
746
747 (while (not containing-region)
748 ;; containing-token is empty; use next
749 (setq containing-region (cddr (nth containing-token wisi-tokens))))
750
751 (when contained-region
752 ;; nil when empty production, may not contain any caches
753 (save-excursion
754 (goto-char (cdr contained-region))
755 (let ((cache (wisi-backward-cache))
756 (mark (copy-marker (1+ (car containing-region)))))
757 (while cache
758
759 ;; skip blocks that are already marked
760 (while (and (>= (point) (car contained-region))
761 (markerp (wisi-cache-containing cache)))
762 (goto-char (1- (wisi-cache-containing cache)))
763 (setq cache (wisi-get-cache (point))))
764
765 (if (or (and (= (car containing-region) (car contained-region))
766 (<= (point) (car contained-region)))
767 (< (point) (car contained-region)))
768 ;; done
769 (setq cache nil)
770
771 ;; else set mark, loop
772 (setf (wisi-cache-containing cache) mark)
773 (setq cache (wisi-backward-cache)))
774 ))))))
775
776 (defun wisi-motion-action (&rest token-numbers)
777 "Set prev/next marks in all tokens given by TOKEN-NUMBERS.
778 Each TOKEN-NUMBERS is one of:
779
780 number: the token number; mark that token
781
782 list (number class token_id):
783 list (number class token_id class token_id ...):
784 mark all tokens in number nonterminal matching (class token_id) with nil prev/next."
785 (save-excursion
786 (let (prev-keyword-mark
787 prev-cache
788 cache
789 mark)
790 (while token-numbers
791 (let ((token-number (pop token-numbers))
792 class-tokens target-class target-token
793 region)
794 (cond
795 ((numberp token-number)
796 (setq region (cddr (nth (1- token-number) wisi-tokens)))
797 (when region
798 (setq cache (wisi-get-cache (car region)))
799 (setq mark (copy-marker (1+ (car region))))
800
801 (when (and prev-keyword-mark
802 cache
803 (null (wisi-cache-prev cache)))
804 (setf (wisi-cache-prev cache) prev-keyword-mark)
805 (setf (wisi-cache-next prev-cache) mark))
806
807 (setq prev-keyword-mark mark)
808 (setq prev-cache cache)
809 ))
810
811 ((listp token-number)
812 ;; token-number may contain 0, 1, or more 'class token_id' pairs
813 ;; the corresponding region may be empty
814 ;; there must have been a prev keyword
815 (setq class-tokens (cdr token-number))
816 (setq token-number (car token-number))
817 (setq region (cddr (nth (1- token-number) wisi-tokens)))
818 (when region ;; not an empty token
819 (while class-tokens
820 (setq target-class (pop class-tokens))
821 (setq target-token (list (pop class-tokens)))
822 (goto-char (car region))
823 (while (setq cache (wisi-forward-find-token target-token (cdr region) t))
824 (when (eq target-class (wisi-cache-class cache))
825 (when (null (wisi-cache-prev cache))
826 (setf (wisi-cache-prev cache) prev-keyword-mark))
827 (when (null (wisi-cache-next cache))
828 (setq mark (copy-marker (1+ (point))))
829 (setf (wisi-cache-next prev-cache) mark)
830 (setq prev-keyword-mark mark)
831 (setq prev-cache cache)))
832
833 (wisi-forward-token);; don't find same token again
834 ))
835 ))
836
837 (t
838 (error "unexpected token-number %s" token-number))
839 )
840
841 ))
842 )))
843
844 (defun wisi-extend-action (number)
845 "Extend text of cache at token NUMBER to cover all of token NUMBER.
846 Also override token with new token."
847 (let* ((token-region (nth (1- number) wisi-tokens));; wisi-tokens is let-bound in wisi-parse-reduce
848 (token (car token-region))
849 (region (cddr token-region))
850 cache)
851
852 (when region
853 (setq cache (wisi-get-cache (car region)))
854 (setf (wisi-cache-last cache) (- (cdr region) (car region)))
855 (setf (wisi-cache-token cache) token)
856 )
857 ))
858
859 (defun wisi-face-action (&rest pairs)
860 "Cache face information in text properties of tokens.
861 Intended as a grammar non-terminal action.
862
863 PAIRS is of the form [TOKEN-NUMBER fase] ..."
864 (while pairs
865 (let* ((number (1- (pop pairs)))
866 (region (cddr (nth number wisi-tokens)));; wisi-tokens is let-bound in wisi-parse-reduce
867 (face (pop pairs))
868 cache)
869
870 (when region
871 (setq cache (wisi-get-cache (car region)))
872 (unless cache
873 (error "wisi-face-action on non-cache"))
874 (setf (wisi-cache-face cache) face)
875 (when (boundp 'jit-lock-mode)
876 (jit-lock-refontify (car region) (cdr region))))
877 )))
878
879 ;;;; motion
880 (defun wisi-backward-cache ()
881 "Move point backward to the beginning of the first token preceding point that has a cache.
882 Returns cache, or nil if at beginning of buffer."
883 (let (cache pos)
884 (setq pos (previous-single-property-change (point) 'wisi-cache))
885 ;; There are three cases:
886 ;;
887 ;; 1) caches separated by non-cache chars: 'if ... then'
888 ;; pos is before 'f', cache is on 'i'
889 ;;
890 ;; 2) caches not separated: ');'
891 ;; pos is before ';', cache is on ';'
892 ;;
893 ;; 3) at bob; pos is nil
894 ;;
895 (if pos
896 (progn
897 (setq cache (get-text-property pos 'wisi-cache))
898 (if cache
899 ;; case 2
900 (goto-char pos)
901 ;; case 1
902 (setq cache (get-text-property (1- pos) 'wisi-cache))
903 (goto-char (1- pos))))
904 ;; at bob
905 (goto-char (point-min))
906 (setq cache nil))
907 cache
908 ))
909
910 (defun wisi-forward-cache ()
911 "Move point forward to the beginning of the first token after point that has a cache.
912 Returns cache, or nil if at end of buffer."
913 (let (cache pos)
914 (when (get-text-property (point) 'wisi-cache)
915 ;; on a cache; get past it
916 (goto-char (1+ (point))))
917
918 (setq cache (get-text-property (point) 'wisi-cache))
919 (if cache
920 nil
921
922 (setq pos (next-single-property-change (point) 'wisi-cache))
923 (if pos
924 (progn
925 (goto-char pos)
926 (setq cache (get-text-property pos 'wisi-cache)))
927 ;; at eob
928 (goto-char (point-max))
929 (setq cache nil))
930 )
931 cache
932 ))
933
934 (defun wisi-forward-find-class (class limit)
935 "Search forward for a token that has a cache with CLASS.
936 Return cache, or nil if at end of buffer.
937 If LIMIT (a buffer position) is reached, throw an error."
938 (let ((cache (wisi-forward-cache)))
939 (while (not (eq class (wisi-cache-class cache)))
940 (setq cache (wisi-forward-cache))
941 (when (>= (point) limit)
942 (error "cache with class %s not found" class)))
943 cache))
944
945 (defun wisi-forward-find-token (token limit &optional noerror)
946 "Search forward for a token that has a cache with TOKEN.
947 If point is at a matching token, return that token.
948 TOKEN may be a list; stop on any cache that has a member of the list.
949 Return cache, or nil if at end of buffer.
950 If LIMIT (a buffer position) is reached, then if NOERROR is nil, throw an
951 error, if non-nil, return nil."
952 (let ((token-list (cond
953 ((listp token) token)
954 (t (list token))))
955 (cache (wisi-get-cache (point)))
956 (done nil))
957 (while (not (or done
958 (and cache
959 (memq (wisi-cache-token cache) token-list))))
960 (setq cache (wisi-forward-cache))
961 (when (>= (point) limit)
962 (if noerror
963 (progn
964 (setq done t)
965 (setq cache nil))
966 (error "cache with token %s not found" token))))
967 cache))
968
969 (defun wisi-forward-find-nonterm (nonterm limit)
970 "Search forward for a token that has a cache with NONTERM.
971 NONTERM may be a list; stop on any cache that has a member of the list.
972 Return cache, or nil if at end of buffer.
973 If LIMIT (a buffer position) is reached, throw an error."
974 (let ((nonterm-list (cond
975 ((listp nonterm) nonterm)
976 (t (list nonterm))))
977 (cache (wisi-forward-cache)))
978 (while (not (memq (wisi-cache-nonterm cache) nonterm-list))
979 (setq cache (wisi-forward-cache))
980 (when (>= (point) limit)
981 (error "cache with nonterm %s not found" nonterm)))
982 cache))
983
984 (defun wisi-goto-cache-next (cache)
985 (goto-char (1- (wisi-cache-next cache)))
986 (wisi-get-cache (point))
987 )
988
989 (defun wisi-forward-statement-keyword ()
990 "If not at a cached token, move forward to next
991 cache. Otherwise move to cache-next, or next cache if nil.
992 Return cache found."
993 (wisi-validate-cache (point-max)) ;; ensure there is a next cache to move to
994 (let ((cache (wisi-get-cache (point))))
995 (if cache
996 (let ((next (wisi-cache-next cache)))
997 (if next
998 (goto-char (1- next))
999 (wisi-forward-token)
1000 (wisi-forward-cache)))
1001 (wisi-forward-cache))
1002 )
1003 (wisi-get-cache (point))
1004 )
1005
1006 (defun wisi-backward-statement-keyword ()
1007 "If not at a cached token, move backward to prev
1008 cache. Otherwise move to cache-prev, or prev cache if nil."
1009 (wisi-validate-cache (point))
1010 (let ((cache (wisi-get-cache (point))))
1011 (if cache
1012 (let ((prev (wisi-cache-prev cache)))
1013 (if prev
1014 (goto-char (1- prev))
1015 (wisi-backward-cache)))
1016 (wisi-backward-cache))
1017 ))
1018
1019 (defun wisi-goto-containing (cache &optional error)
1020 "Move point to containing token for CACHE, return cache at that point.
1021 If ERROR, throw error when CACHE has no container; else return nil."
1022 (cond
1023 ((markerp (wisi-cache-containing cache))
1024 (goto-char (1- (wisi-cache-containing cache)))
1025 (wisi-get-cache (point)))
1026 (t
1027 (when error
1028 (error "already at outermost containing token")))
1029 ))
1030
1031 (defun wisi-goto-containing-paren (cache)
1032 "Move point to just after the open-paren containing CACHE.
1033 Return cache for paren, or nil if no containing paren."
1034 (while (and cache
1035 (not (eq (wisi-cache-class cache) 'open-paren)))
1036 (setq cache (wisi-goto-containing cache)))
1037 (when cache
1038 (forward-char 1))
1039 cache)
1040
1041 (defun wisi-goto-start (cache)
1042 "Move point to containing ancestor of CACHE that has class block-start or statement-start.
1043 Return start cache."
1044 (when
1045 ;; cache nil at bob
1046 (while (and cache
1047 (not (memq (wisi-cache-class cache) '(block-start statement-start))))
1048 (setq cache (wisi-goto-containing cache)))
1049 )
1050 cache)
1051
1052 (defun wisi-goto-end-1 (cache)
1053 (goto-char (1- (wisi-cache-end cache))))
1054
1055 (defun wisi-goto-statement-start ()
1056 "Move point to token at start of statement point is in or after.
1057 Return start cache."
1058 (interactive)
1059 (wisi-validate-cache (point))
1060 (let ((cache (wisi-get-cache (point))))
1061 (unless cache
1062 (setq cache (wisi-backward-cache)))
1063 (wisi-goto-start cache)))
1064
1065 (defun wisi-goto-statement-end ()
1066 "Move point to token at end of statement point is in or before."
1067 (interactive)
1068 (wisi-validate-cache (point))
1069 (let ((cache (or (wisi-get-cache (point))
1070 (wisi-forward-cache))))
1071 (when (wisi-cache-end cache)
1072 ;; nil when cache is statement-end
1073 (wisi-goto-end-1 cache))
1074 ))
1075
1076 (defun wisi-next-statement-cache (cache)
1077 "Move point to CACHE-next, return cache; error if nil."
1078 (when (not (markerp (wisi-cache-next cache)))
1079 (error "no next statement cache"))
1080 (goto-char (1- (wisi-cache-next cache)))
1081 (wisi-get-cache (point)))
1082
1083 (defun wisi-prev-statement-cache (cache)
1084 "Move point to CACHE-next, return cache; error if nil."
1085 (when (not (markerp (wisi-cache-prev cache)))
1086 (error "no prev statement cache"))
1087 (goto-char (1- (wisi-cache-prev cache)))
1088 (wisi-get-cache (point)))
1089
1090 ;;;; indentation
1091
1092 (defun wisi-comment-indent ()
1093 "For `comment-indent-function'. Indent single line comment to
1094 the comment on the previous line."
1095 ;; This should only be called by comment-indent-new-line or
1096 ;; fill-comment-paragraph, so there will be a preceding comment line
1097 ;; that we can trust.
1098 (save-excursion
1099 (forward-comment -1)
1100 (if (looking-at comment-start)
1101 (current-column)
1102 (error "wisi-comment-indent called after non-comment"))))
1103
1104 (defun wisi-indent-current (offset)
1105 "Return indentation OFFSET relative to indentation of current line."
1106 (+ (current-indentation) offset)
1107 )
1108
1109 (defun wisi-indent-paren (offset)
1110 "Return indentation OFFSET relative to preceding open paren."
1111 (save-excursion
1112 (goto-char (nth 1 (syntax-ppss)))
1113 (+ (current-column) offset)))
1114
1115 (defun wisi-indent-start (offset cache)
1116 "Return indentation of OFFSET relative to containing ancestor
1117 of CACHE with class statement-start or block-start."
1118 (wisi-goto-start cache)
1119 (+ (current-indentation) offset))
1120
1121 (defun wisi-indent-statement ()
1122 "Indent region given by `wisi-goto-start' on cache at or before point, then wisi-cache-end."
1123 (wisi-validate-cache (point))
1124
1125 (save-excursion
1126 (let ((cache (or (wisi-get-cache (point))
1127 (wisi-backward-cache))))
1128 (when cache
1129 ;; can be nil if in header comment
1130 (let ((start (progn (wisi-goto-start cache) (point)))
1131 (end (progn
1132 (when (wisi-cache-end cache)
1133 ;; nil when cache is statement-end
1134 (goto-char (1- (wisi-cache-end cache))))
1135 (point))))
1136 (indent-region start end)
1137 ))
1138 )))
1139
1140 (defvar-local wisi-indent-calculate-functions nil
1141 "Functions to calculate indentation. Each called with point
1142 before a token at the beginning of a line (at current
1143 indentation); return indentation column for that token, or
1144 nil. May move point. Calling stops when first function returns
1145 non-nil.")
1146
1147 (defvar-local wisi-post-parse-fail-hook
1148 "Function to reindent portion of buffer.
1149 Called from `wisi-indent-line' when a parse succeeds after
1150 failing; assumes user was editing code that is now syntactically
1151 correct. Must leave point at indentation of current line.")
1152
1153 (defvar-local wisi-indent-failed nil
1154 "Non-nil when wisi-indent-line fails due to parse failing; cleared when indent succeeds.")
1155
1156 (defun wisi-indent-line ()
1157 "Indent current line using the wisi indentation engine."
1158 (interactive)
1159
1160 (let ((savep (point))
1161 indent)
1162 (save-excursion
1163 (back-to-indentation)
1164 (when (>= (point) savep) (setq savep nil))
1165
1166 (when (> (point) wisi-cache-max)
1167 (wisi-validate-cache (point))
1168 (when (and (not wisi-parse-failed)
1169 wisi-indent-failed)
1170 (setq wisi-indent-failed nil)
1171 (run-hooks 'wisi-post-parse-fail-hook)))
1172
1173 (if (> (point) wisi-cache-max)
1174 (progn
1175 ;; no indent info at point. Assume user is
1176 ;; editing; indent to previous line, fix it
1177 ;; after parse succeeds
1178 (setq wisi-indent-failed t)
1179 (forward-line -1);; safe at bob
1180 (back-to-indentation)
1181 (setq indent (current-column)))
1182
1183 (setq indent
1184 (with-demoted-errors
1185 (or (run-hook-with-args-until-success 'wisi-indent-calculate-functions) 0))
1186 )))
1187
1188 (if savep
1189 ;; point was inside line text; leave it there
1190 (save-excursion (indent-line-to indent))
1191 ;; point was before line text; move to start of text
1192 (indent-line-to indent))
1193 ))
1194
1195 ;;;; debug
1196 (defun wisi-parse-buffer ()
1197 (interactive)
1198 (syntax-propertize (point-max))
1199 (wisi-invalidate-cache)
1200 (wisi-validate-cache (point-max)))
1201
1202 (defun wisi-show-cache ()
1203 "Show cache at point."
1204 (interactive)
1205 (message "%s" (wisi-get-cache (point))))
1206
1207 (defun wisi-show-token ()
1208 "Move forward across one keyword, show token_id."
1209 (interactive)
1210 (let ((token (wisi-forward-token)))
1211 (message "%s" (car token))))
1212
1213 (defun wisi-show-containing-or-previous-cache ()
1214 (interactive)
1215 (let ((cache (wisi-get-cache (point))))
1216 (if cache
1217 (message "containing %s" (wisi-goto-containing cache t))
1218 (message "previous %s" (wisi-backward-cache)))
1219 ))
1220
1221 ;;;;; setup
1222
1223 (defun wisi-setup (indent-calculate post-parse-fail class-list keyword-table token-table parse-table)
1224 "Set up a buffer for parsing files with wisi."
1225 (setq wisi-class-list class-list)
1226 (setq wisi-string-double-term (car (symbol-value (intern-soft "string-double" token-table))))
1227 (setq wisi-string-single-term (car (symbol-value (intern-soft "string-single" token-table))))
1228 (setq wisi-symbol-term (car (symbol-value (intern-soft "symbol" token-table))))
1229
1230 (setq wisi-punctuation-table (symbol-value (intern-soft "punctuation" token-table)))
1231 (setq wisi-punctuation-table-max-length 0)
1232 (let (fail)
1233 (dolist (item wisi-punctuation-table)
1234 (when item ;; default matcher can be nil
1235
1236 ;; check that all chars used in punctuation tokens have punctuation syntax
1237 (mapc (lambda (char)
1238 (when (not (= ?. (char-syntax char)))
1239 (setq fail t)
1240 (message "in %s, %c does not have punctuation syntax"
1241 (car item) char)))
1242 (cdr item))
1243
1244 (when (< wisi-punctuation-table-max-length (length (cdr item)))
1245 (setq wisi-punctuation-table-max-length (length (cdr item)))))
1246 )
1247 (when fail
1248 (error "aborting due to punctuation errors")))
1249
1250 (setq wisi-keyword-table keyword-table)
1251 (setq wisi-parse-table parse-table)
1252
1253 (setq wisi-indent-calculate-functions indent-calculate)
1254 (set (make-local-variable 'indent-line-function) 'wisi-indent-line)
1255
1256 (setq wisi-post-parse-fail-hook post-parse-fail)
1257 (setq wisi-indent-failed nil)
1258
1259 (add-hook 'before-change-functions 'wisi-before-change nil t)
1260 (add-hook 'after-change-functions 'wisi-after-change nil t)
1261
1262 ;; see comments on "lexer" above re syntax-propertize
1263 (syntax-propertize (point-max))
1264
1265 (wisi-invalidate-cache)
1266 )
1267
1268 (provide 'wisi)
1269 ;;; wisi.el ends here