1 ;;; wisi.el --- Utilities for implementing an indentation/navigation engine using a generalized LALR parser
3 ;; Copyright (C) 2012 - 2014 Free Software Foundation, Inc.
5 ;; Author: Stephen Leake <stephen_leake@member.fsf.org>
6 ;; Maintainer: Stephen Leake <stephen_leake@member.fsf.org>
11 ;; package-requires: ((cl-lib "0.4") (emacs "24.2"))
12 ;; URL: http://stephe-leake.org/emacs/ada-mode/emacs-ada-mode.html
14 ;; This file is part of GNU Emacs.
16 ;; GNU Emacs is free software: you can redistribute it and/or modify
17 ;; it under the terms of the GNU General Public License as published by
18 ;; the Free Software Foundation, either version 3 of the License, or
19 ;; (at your option) any later version.
21 ;; GNU Emacs is distributed in the hope that it will be useful,
22 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
23 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 ;; GNU General Public License for more details.
26 ;; You should have received a copy of the GNU General Public License
27 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
32 ;;;; History: see NEWS-wisi.text
34 ;;;; indentation algorithm overview
36 ;; This design is inspired in part by experience writing a SMIE
37 ;; indentation engine for Ada, and the wisent parser.
39 ;; The general approach to indenting a given token is to find the
40 ;; start of the statement it is part of, or some other relevant point
41 ;; in the statement, and indent relative to that. So we need a parser
42 ;; that lets us find statement indent points from arbitrary places in
45 ;; For example, the grammar for Ada as represented by the EBNF in LRM
46 ;; Annex P is not LALR(1), so we use a generalized LALR(1) parser (see
47 ;; wisi-parse, wisi-compile).
49 ;; The parser actions cache indentation and other information as text
50 ;; properties of tokens in statements.
52 ;; An indentation engine moves text in the buffer, as does user
53 ;; editing, so we can't rely on character positions remaining
54 ;; constant. So the parser actions use markers to store
55 ;; positions. Text properties also move with the text.
57 ;; The stored information includes a marker at each statement indent
58 ;; point. Thus, the indentation algorithm is: find the previous token
59 ;; with cached information, and either indent from it, or fetch from
60 ;; it the marker for a previous statement indent point, and indent
63 ;; Since we have a cache (the text properties), we need to consider
64 ;; when to invalidate it. Ideally, we invalidate only when a change to
65 ;; the buffer would change the result of a parse that crosses that
66 ;; change, or starts after that change. Changes in whitespace
67 ;; (indentation and newlines) do not affect an Ada parse. Other
68 ;; languages are sensitive to newlines (Bash for example) or
69 ;; indentation (Python). Adding comments does not change a parse,
70 ;; unless code is commented out. For now we invalidate the cache after
71 ;; the edit point if the change involves anything other than
74 ;;;; comparison to the SMIE parser
76 ;; The central problem to be solved in building the SMIE parser is
77 ;; grammar precedence conflicts; the general solution is refining
78 ;; keywords so that each new keyword can be assigned a unique
79 ;; precedence. This means ad hoc code must be written to determine the
80 ;; correct refinement for each language keyword from the surrounding
81 ;; tokens. In effect, for a complex language like Ada, the knowledge
82 ;; of the language grammar is mostly embedded in the refinement code;
83 ;; only a small amount is in the refined grammar. Implementing a SMIE
84 ;; parser for a new language involves the same amount of work as the
87 ;; Using a generalized LALR parser avoids that particular problem;
88 ;; assuming the language is already defined by a grammar, it is only a
89 ;; matter of a format change to teach the wisi parser the
90 ;; language. The problem in a wisi indentation engine is caching the
91 ;; output of the parser in a useful way, since we can't start the
92 ;; parser from arbitrary places in the code (as we can with the SMIE
93 ;; parser). A second problem is determining when to invalidate the
94 ;; cache. But these problems are independent of the language being
95 ;; parsed, so once we have one wisi indentation engine working,
96 ;; adapting it to new languages should be quite simple.
98 ;; The SMIE parser does not find the start of each statement, only the
99 ;; first language keyword in each statement; additional code must be
100 ;; written to find the statement start and indent points. The wisi
101 ;; parser finds the statement start and indent points directly.
103 ;; In SMIE, it is best if each grammar rule is a complete statement,
104 ;; so forward-sexp will traverse the entire statement. If nested
105 ;; non-terminals are used, forward-sexp may stop inside one of the
106 ;; nested non-terminals. This problem does not occur with the wisi
109 ;; A downside of the wisi parser is conflicts in the grammar; they can
110 ;; be much more difficult to resolve than in the SMIE parser. The
111 ;; generalized parser helps by handling conflicts, but it does so by
112 ;; running multiple parsers in parallel, persuing each choice in the
113 ;; conflict. If the conflict is due to a genuine ambiguity, both paths
114 ;; will succeed, which causes the parse to fail, since it is not clear
115 ;; which set of text properties to store. Even if one branch
116 ;; ultimately fails, running parallel parsers over large sections of
117 ;; code is slow. Finally, this approach can lead to exponential growth
118 ;; in the number of parsers. So grammar conflicts must still be
119 ;; analyzed and minimized.
121 ;; In addition, the complete grammar must be specified; in smie, it is
122 ;; often possible to specify a subset of the grammar.
124 ;;;; grammar compiler and parser
126 ;; Since we are using a generalized LALR(1) parser, we cannot use any
127 ;; of the wisent grammar functions. We use OpenToken wisi-generate
128 ;; to compile BNF to Elisp source (similar to
129 ;; semantic-grammar-create-package), and wisi-compile-grammar to
130 ;; compile that to the parser table.
132 ;; Semantic provides a complex lexer, more complicated than we need
133 ;; for indentation. So we use the elisp lexer, which consists of
134 ;; `forward-comment', `skip-syntax-forward', and `scan-sexp'. We wrap
135 ;; that in functions that return tokens in the form wisi-parse
140 ;; The lexer is `wisi-forward-token'. It relies on syntax properties,
141 ;; so syntax-propertize must be called on the text to be lexed before
142 ;; wisi-forward-token is called. In general, it is hard to determine
143 ;; an appropriate end-point for syntax-propertize, other than
144 ;; point-max. So we call (syntax-propertize point-max) in wisi-setup,
145 ;; and also call syntax-propertize in wisi-after-change.
149 ;; 'wisi' was originally short for "wisent indentation engine", but
150 ;; now is just a name.
152 ;; not using lexical-binding because we support Emacs 23
159 (require 'wisi-parse)
161 ;; WORKAROUND: for some reason, this condition doesn't work in batch mode!
162 ;; (when (and (= emacs-major-version 24)
163 ;; (= emacs-minor-version 2))
164 (require 'wisi-compat-24.2)
169 (defvar-local wisi-class-list nil)
170 (defvar-local wisi-keyword-table nil)
171 (defvar-local wisi-punctuation-table nil)
172 (defvar-local wisi-punctuation-table-max-length 0)
173 (defvar-local wisi-string-double-term nil);; string delimited by double quotes
174 (defvar-local wisi-string-quote-escape-doubled nil
175 "Non-nil if a string delimiter is escaped by doubling it (as in Ada).")
176 (defvar-local wisi-string-quote-escape nil
177 "Cons '(delim . character) where 'character' escapes quotes in strings delimited by 'delim'.")
178 (defvar-local wisi-string-single-term nil) ;; string delimited by single quotes
179 (defvar-local wisi-symbol-term nil)
181 (defun wisi-forward-token (&optional text-only)
182 "Move point forward across one token, skipping leading whitespace and comments.
183 Return the corresponding token, in a format determined by TEXT-ONLY:
185 TEXT-ONLY nil: (token text start . end)
187 `token' is a token symbol (not string) from `wisi-punctuation-table',
188 `wisi-keyword-table', `wisi-string-double-term', `wisi-string-double-term' or `wisi-symbol-term'.
190 `text' is the token text from the buffer
192 `start, end' are the character positions in the buffer of the start
193 and end of the token text.
195 If at end of buffer, returns `wisent-eoi-term'."
196 (forward-comment (point-max))
197 ;; skips leading whitespace, comment, trailing whitespace.
199 (let ((start (point))
200 ;; (info "(elisp)Syntax Table Internals" "*info elisp syntax*")
201 (syntax (syntax-class (syntax-after (point))))
206 (setq token-id wisent-eoi-term))
209 ;; punctuation. Find the longest matching string in wisi-punctuation-table
211 (let ((next-point (point))
212 temp-text temp-id done)
214 (setq temp-text (buffer-substring-no-properties start (point)))
215 (setq temp-id (car (rassoc temp-text wisi-punctuation-table)))
217 (setq token-text temp-text
222 (= (- (point) start) wisi-punctuation-table-max-length))
226 (goto-char next-point)))
228 ((memq syntax '(4 5)) ;; open, close parenthesis
230 (setq token-text (buffer-substring-no-properties start (point)))
231 (setq token-id (symbol-value (intern-soft token-text wisi-keyword-table))))
234 ;; string quote, either single or double. we assume point is
235 ;; before the start quote, not the end quote
236 (let ((delim (char-after (point)))
237 (forward-sexp-function nil))
242 ;; point is now after the end quote; check for an escaped quote
244 (and wisi-string-quote-escape-doubled
245 (eq (char-after (point)) delim))
246 (and (eq delim (car wisi-string-quote-escape))
247 (eq (char-before (1- (point))) (cdr wisi-string-quote-escape))))
249 (setq token-text (buffer-substring-no-properties start (point)))
250 (setq token-id (if (= delim ?\") wisi-string-double-term wisi-string-single-term)))
252 ;; Something screwed up; we should not get here if
253 ;; syntax-propertize works properly.
254 (error "wisi-forward-token: forward-sexp failed %s" err)
257 (t ;; assuming word syntax
258 (skip-syntax-forward "w_'")
259 (setq token-text (buffer-substring-no-properties start (point)))
261 (or (symbol-value (intern-soft (downcase token-text) wisi-keyword-table))
266 (signal 'wisi-parse-error
267 (wisi-error-msg "unrecognized token '%s'" (buffer-substring-no-properties start (point)))))
271 (cons token-id (cons token-text (cons start (point)))))
274 (defun wisi-backward-token ()
275 "Move point backward across one token, skipping whitespace and comments.
276 Return (nil text start . end) - same structure as
277 wisi-forward-token, but does not look up symbol."
278 (forward-comment (- (point)))
279 ;; skips leading whitespace, comment, trailing whitespace.
281 ;; (info "(elisp)Syntax Table Internals" "*info elisp syntax*")
283 (syntax (syntax-class (syntax-after (1- (point))))))
287 ((memq syntax '(4 5)) ;; open, close parenthesis
291 ;; a string quote. we assume we are after the end quote, not the start quote
292 (let ((forward-sexp-function nil))
296 (if (zerop (skip-syntax-backward "."))
297 (skip-syntax-backward "w_'")))
299 (cons nil (cons (buffer-substring-no-properties (point) end) (cons (point) end)))
302 ;;;; token info cache
304 ;; the cache stores the results of parsing as text properties on
305 ;; keywords, for use by the indention and motion engines.
309 (:constructor wisi-cache-create)
311 nonterm;; nonterminal from parse (set by wisi-statement-action)
314 ;; terminal symbol from wisi-keyword-table or
315 ;; wisi-punctuation-table, or lower-level nonterminal from parse
316 ;; (set by wisi-statement-action)
318 last ;; pos of last char in token, relative to first (0 indexed)
321 ;; arbitrary lisp symbol, used for indentation and navigation.
322 ;; some classes are defined by wisi:
324 ;; 'block-middle - a block keyword (ie: if then else end), not at the start of a statement
326 ;; 'block-start - a block keyword at the start of a statement
328 ;; 'statement-start - the start of a statement
332 ;; others are language-specific
335 ;; Marker at the containing keyword for this token.
336 ;; A containing keyword is an indent point; the start of a
337 ;; statement, or 'begin', 'then' or 'else' for a block of
339 ;; nil only for first token in buffer
341 prev ;; marker at previous motion token in statement; nil if none
342 next ;; marker at next motion token in statement; nil if none
343 end ;; marker at token at end of current statement
344 face ;; for font-lock. only set when regexp font-lock can't handle it
347 (defvar-local wisi-parse-table nil)
349 (defvar-local wisi-parse-failed nil
350 "Non-nil when a recent parse has failed - cleared when parse succeeds.")
352 (defvar-local wisi-parse-try nil
353 "Non-nil when parse is needed - cleared when parse succeeds.")
355 (defvar-local wisi-change-need-invalidate nil
356 "When non-nil, buffer position to invalidate from.
357 Used in before/after change functions.")
359 (defvar-local wisi-end-caches nil
360 "List of buffer positions of caches in current statement that need wisi-cache-end set.")
362 (defun wisi-invalidate-cache(&optional after)
363 "Invalidate parsing caches for the current buffer from AFTER to end of buffer.
364 Caches are the Emacs syntax cache, the wisi token cache, and the wisi parser cache."
367 (setq after (point-min))
371 (line-beginning-position))))
372 (when (> wisi-debug 0) (message "wisi-invalidate %s:%d" (current-buffer) after))
373 (setq wisi-cache-max after)
374 (setq wisi-parse-try t)
375 (syntax-ppss-flush-cache after)
376 (with-silent-modifications
377 (remove-text-properties after (point-max) '(wisi-cache nil))))
379 (defun wisi-before-change (begin end)
380 "For `before-change-functions'."
381 ;; begin . end is range of text being deleted
383 ;; If jit-lock-after-change is before wisi-after-change in
384 ;; after-change-functions, it might use any invalid caches in the
387 ;; So we check for that here, and ensure it is after
388 ;; wisi-after-change, which deletes the invalid caches
389 (when (boundp 'jit-lock-mode)
390 (when (memq 'wisi-after-change (memq 'jit-lock-after-change after-change-functions))
391 (setq after-change-functions (delete 'wisi-after-change after-change-functions))
392 (add-hook 'after-change-functions 'wisi-after-change nil t))
395 (setq wisi-change-need-invalidate nil)
397 (when (and (> end begin)
398 (>= wisi-cache-max begin))
400 (when wisi-parse-failed
401 ;; The parse was failing, probably due to bad syntax; this change
402 ;; may have fixed it, so try reparse.
403 (setq wisi-parse-try t))
406 ;; don't invalidate parse for whitespace, string, or comment changes
407 (let (;; (info "(elisp)Parser State")
408 (state (syntax-ppss begin)))
409 ;; syntax-ppss has moved point to "begin".
412 (nth 3 state); in string
413 (nth 4 state)); in comment
414 ;; FIXME: check that entire range is in comment or string
418 (skip-syntax-forward " " end);; does not skip newline
422 (setq wisi-change-need-invalidate
424 (wisi-goto-statement-start)
429 (defun wisi-after-change (begin end length)
430 "For `after-change-functions'."
431 ;; begin . end is range of text being inserted (empty if equal)
433 ;; (syntax-ppss-flush-cache begin) is in before-change-functions
435 (syntax-propertize end) ;; see comments above on "lexer" re syntax-propertize
437 ;; The parse was failing, probably due to bad syntax; this change
438 ;; may have fixed it, so try reparse.
439 (setq wisi-parse-try t)
441 ;; remove 'wisi-cache on inserted text, which could have caches
442 ;; from before the failed parse (or another buffer), and are in
444 (with-silent-modifications
445 (remove-text-properties begin end '(wisi-cache)))
448 ((>= wisi-cache-max begin)
449 ;; The parse had succeeded past the start of the inserted
452 (let (need-invalidate
453 ;; (info "(elisp)Parser State")
454 (state (syntax-ppss begin)))
455 ;; syntax-ppss has moved point to "begin".
457 (wisi-change-need-invalidate
458 ;; wisi-before change determined the removed text alters the
460 (setq need-invalidate wisi-change-need-invalidate))
463 (setq need-invalidate nil))
466 (nth 3 state); in string
467 (nth 4 state)); in comment
468 ;; FIXME: insert newline in comment to create non-comment!?
469 ;; or paste a chunk of code
470 ;; => check that all of change region is comment or string
471 (setq need-invalidate nil)
472 ;; no caches to remove
476 (skip-syntax-forward " " end);; does not skip newlines
478 (setq need-invalidate nil))
481 (setq need-invalidate begin))
485 ;; The inserted or deleted text could alter the parse;
486 ;; wisi-invalidate-cache removes all 'wisi-cache.
487 (wisi-invalidate-cache need-invalidate)
489 ;; else move cache-max by the net change length.
491 (+ wisi-cache-max (- end begin length))))
496 ;; parse never attempted, or only done to before BEGIN. Just
498 (with-silent-modifications
499 (remove-text-properties begin end '(wisi-cache)))
503 (defun wisi-get-cache (pos)
504 "Return `wisi-cache' struct from the `wisi-cache' text property at POS.
505 If accessing cache at a marker for a token as set by `wisi-cache-tokens', POS must be (1- mark)."
506 (get-text-property pos 'wisi-cache))
508 (defvar-local wisi-parse-error-msg nil)
510 (defun wisi-goto-error ()
511 "Move point to position in last error message (if any)."
512 (when (string-match ":\\([0-9]+\\):\\([0-9]+\\):" wisi-parse-error-msg)
513 (let ((line (string-to-number (match-string 1 wisi-parse-error-msg)))
514 (col (string-to-number (match-string 2 wisi-parse-error-msg))))
515 (goto-char (point-min))
516 (forward-line (1- line))
517 (forward-char col))))
519 (defun wisi-show-parse-error ()
520 "Show last wisi-parse error."
524 (message wisi-parse-error-msg)
528 (message "need parse"))
531 (message "parse succeeded"))
534 (defvar wisi-post-parse-succeed-hook nil
535 "Hook run after parse succeeds.")
537 (defun wisi-validate-cache (pos)
538 "Ensure cached data is valid at least up to POS in current buffer."
539 (let ((msg (when (> wisi-debug 0) (format "wisi: parsing %s:%d ..." (buffer-name) (line-number-at-pos pos)))))
540 (when (and wisi-parse-try
541 (< wisi-cache-max pos))
542 (when (> wisi-debug 0)
545 (setq wisi-parse-try nil)
546 (setq wisi-parse-error-msg nil)
547 (setq wisi-end-caches nil)
551 ;; let debugger stop in wisi-parse
553 (wisi-parse wisi-parse-table 'wisi-forward-token)
554 (setq wisi-cache-max (point))
555 (setq wisi-parse-failed nil)
556 (run-hooks 'wisi-post-parse-succeed-hook))
558 ;; else capture errors from bad syntax, so higher level
559 ;; functions can try to continue and/or we don't bother the
563 (wisi-parse wisi-parse-table 'wisi-forward-token)
564 (setq wisi-cache-max (point))
565 (setq wisi-parse-failed nil)
566 (run-hooks 'wisi-post-parse-succeed-hook))
568 (setq wisi-parse-failed t)
569 (setq wisi-parse-error-msg (cdr err)))
571 (if wisi-parse-error-msg
573 (when (> wisi-debug 0)
574 (message "%s error" msg)
576 (error wisi-parse-error-msg))
578 (when (> wisi-debug 0)
579 (message "%s done" msg)))
582 (defun wisi-get-containing-cache (cache)
583 "Return cache from (wisi-cache-containing CACHE)."
584 (let ((containing (wisi-cache-containing cache)))
586 (wisi-get-cache (1- containing)))))
588 (defun wisi-cache-region (cache)
589 "Return region designated by cache.
590 Point must be at cache."
591 (cons (point) (+ (point) (wisi-cache-last cache))))
593 (defun wisi-cache-text (cache)
594 "Return property-less buffer substring designated by cache.
595 Point must be at cache."
596 (buffer-substring-no-properties (point) (+ (point) (wisi-cache-last cache))))
600 (defun wisi-set-end (start-mark end-mark)
601 "Set END-MARK on all caches in `wisi-end-caches' in range START-MARK END-MARK,
602 delete from `wisi-end-caches'."
605 (while (< i (length wisi-end-caches))
606 (setq pos (nth i wisi-end-caches))
607 (setq cache (wisi-get-cache pos))
609 (if (and (>= pos start-mark)
612 (setf (wisi-cache-end cache) end-mark)
613 (setq wisi-end-caches (delq pos wisi-end-caches)))
619 (defvar wisi-tokens nil)
620 ;; keep byte-compiler happy; `wisi-tokens' is bound in action created
621 ;; by wisi-semantic-action
623 (defun wisi-statement-action (&rest pairs)
624 "Cache information in text properties of tokens.
625 Intended as a grammar non-terminal action.
627 PAIRS is of the form [TOKEN-NUMBER CLASS] ... where TOKEN-NUMBER
628 is the (1 indexed) token number in the production, CLASS is the wisi class of
629 that token. Use in a grammar action as:
630 (wisi-statement-action 1 'statement-start 7 'statement-end)"
634 (override-start nil))
636 (let* ((number (1- (pop pairs)))
637 (region (cddr (nth number wisi-tokens)));; wisi-tokens is let-bound in wisi-parse-reduce
638 (token (car (nth number wisi-tokens)))
641 ;; Marker one char into token, so indent-line-to
642 ;; inserts space before the mark, not after
643 (when region (copy-marker (1+ (car region)))))
646 (unless (memq class wisi-class-list)
647 (error "%s not in wisi-class-list" class))
651 (if (setq cache (wisi-get-cache (car region)))
652 ;; We are processing a previously set non-terminal; ie generic_formal_part in
654 ;; generic_package_declaration : generic_formal_part package_specification SEMICOLON
655 ;; (wisi-statement-action 1 'block-start 2 'block-middle 3 'statement-end)
657 ;; or simple_statement in
659 ;; statement : label_opt simple_statement
661 ;; override nonterm, class, containing
662 ;; set end only if not set yet (due to failed parse)
664 (cl-case (wisi-cache-class cache)
666 (setf (wisi-cache-class cache)
668 ((eq override-start nil)
670 ((memq class '(block-start statement-start)) 'block-start)
673 ((memq override-start '(block-start statement-start)) 'block-start)
675 (t (error "unexpected override-start"))
678 (setf (wisi-cache-class cache) (or override-start class)))
680 (setf (wisi-cache-nonterm cache) $nterm)
681 (setf (wisi-cache-containing cache) first-keyword-mark)
682 (unless (wisi-cache-end cache)
684 (push (car region) wisi-end-caches)
685 (setq wisi-end-caches (list (car region)))
689 ;; else create new cache
690 (with-silent-modifications
696 :nonterm $nterm;; $nterm defined in wisi-semantic-action
698 :last (- (cdr region) (car region))
699 :class (or override-start class)
700 :containing first-keyword-mark)
703 (push (car region) wisi-end-caches)
704 (setq wisi-end-caches (list (car region)))
708 (setq first-item nil)
709 (when (or override-start
710 ;; FIXME: why block-middle here?
711 (memq class '(block-middle block-start statement-start)))
712 (setq override-start nil)
713 (setq first-keyword-mark mark)))
715 (when (eq class 'statement-end)
716 (wisi-set-end (1- first-keyword-mark) (copy-marker (1+ (car region)))))
719 ;; region is nil when a production is empty; if the first
720 ;; token is a start, override the class on the next token.
721 (when (and first-item
722 (memq class '(block-middle block-start statement-start)))
723 (setq override-start class)))
727 (defun wisi-containing-action (containing-token contained-token)
728 "Set containing marks in all tokens in CONTAINED-TOKEN with null containing mark to marker pointing to CONTAINING-TOKEN.
729 If CONTAINING-TOKEN is empty, the next token number is used."
730 ;; wisi-tokens is is bound in action created by wisi-semantic-action
731 (let* ((containing-region (cddr (nth (1- containing-token) wisi-tokens)))
732 (contained-region (cddr (nth (1- contained-token) wisi-tokens))))
734 (unless containing-region ;;
735 (signal 'wisi-parse-error
737 "wisi-containing-action: containing-region '%s' is empty. grammar error; bad action"
738 (nth 1 (nth (1- containing-token) wisi-tokens)))))
740 (unless (or (not contained-region) ;; contained-token is empty
741 (wisi-get-cache (car containing-region)))
742 (signal 'wisi-parse-error
744 "wisi-containing-action: containing-token '%s' has no cache. grammar error; missing action"
745 (nth 1 (nth (1- containing-token) wisi-tokens)))))
747 (while (not containing-region)
748 ;; containing-token is empty; use next
749 (setq containing-region (cddr (nth containing-token wisi-tokens))))
751 (when contained-region
752 ;; nil when empty production, may not contain any caches
754 (goto-char (cdr contained-region))
755 (let ((cache (wisi-backward-cache))
756 (mark (copy-marker (1+ (car containing-region)))))
759 ;; skip blocks that are already marked
760 (while (and (>= (point) (car contained-region))
761 (markerp (wisi-cache-containing cache)))
762 (goto-char (1- (wisi-cache-containing cache)))
763 (setq cache (wisi-get-cache (point))))
765 (if (or (and (= (car containing-region) (car contained-region))
766 (<= (point) (car contained-region)))
767 (< (point) (car contained-region)))
771 ;; else set mark, loop
772 (setf (wisi-cache-containing cache) mark)
773 (setq cache (wisi-backward-cache)))
776 (defun wisi-motion-action (&rest token-numbers)
777 "Set prev/next marks in all tokens given by TOKEN-NUMBERS.
778 Each TOKEN-NUMBERS is one of:
780 number: the token number; mark that token
782 list (number class token_id):
783 list (number class token_id class token_id ...):
784 mark all tokens in number nonterminal matching (class token_id) with nil prev/next."
786 (let (prev-keyword-mark
791 (let ((token-number (pop token-numbers))
792 class-tokens target-class target-token
795 ((numberp token-number)
796 (setq region (cddr (nth (1- token-number) wisi-tokens)))
798 (setq cache (wisi-get-cache (car region)))
799 (setq mark (copy-marker (1+ (car region))))
801 (when (and prev-keyword-mark
803 (null (wisi-cache-prev cache)))
804 (setf (wisi-cache-prev cache) prev-keyword-mark)
805 (setf (wisi-cache-next prev-cache) mark))
807 (setq prev-keyword-mark mark)
808 (setq prev-cache cache)
811 ((listp token-number)
812 ;; token-number may contain 0, 1, or more 'class token_id' pairs
813 ;; the corresponding region may be empty
814 ;; there must have been a prev keyword
815 (setq class-tokens (cdr token-number))
816 (setq token-number (car token-number))
817 (setq region (cddr (nth (1- token-number) wisi-tokens)))
818 (when region ;; not an empty token
820 (setq target-class (pop class-tokens))
821 (setq target-token (list (pop class-tokens)))
822 (goto-char (car region))
823 (while (setq cache (wisi-forward-find-token target-token (cdr region) t))
824 (when (eq target-class (wisi-cache-class cache))
825 (when (null (wisi-cache-prev cache))
826 (setf (wisi-cache-prev cache) prev-keyword-mark))
827 (when (null (wisi-cache-next cache))
828 (setq mark (copy-marker (1+ (point))))
829 (setf (wisi-cache-next prev-cache) mark)
830 (setq prev-keyword-mark mark)
831 (setq prev-cache cache)))
833 (wisi-forward-token);; don't find same token again
838 (error "unexpected token-number %s" token-number))
844 (defun wisi-extend-action (number)
845 "Extend text of cache at token NUMBER to cover all of token NUMBER.
846 Also override token with new token."
847 (let* ((token-region (nth (1- number) wisi-tokens));; wisi-tokens is let-bound in wisi-parse-reduce
848 (token (car token-region))
849 (region (cddr token-region))
853 (setq cache (wisi-get-cache (car region)))
854 (setf (wisi-cache-last cache) (- (cdr region) (car region)))
855 (setf (wisi-cache-token cache) token)
859 (defun wisi-face-action (&rest pairs)
860 "Cache face information in text properties of tokens.
861 Intended as a grammar non-terminal action.
863 PAIRS is of the form [TOKEN-NUMBER fase] ..."
865 (let* ((number (1- (pop pairs)))
866 (region (cddr (nth number wisi-tokens)));; wisi-tokens is let-bound in wisi-parse-reduce
871 (setq cache (wisi-get-cache (car region)))
873 (error "wisi-face-action on non-cache"))
874 (setf (wisi-cache-face cache) face)
875 (when (boundp 'jit-lock-mode)
876 (jit-lock-refontify (car region) (cdr region))))
880 (defun wisi-backward-cache ()
881 "Move point backward to the beginning of the first token preceding point that has a cache.
882 Returns cache, or nil if at beginning of buffer."
884 (setq pos (previous-single-property-change (point) 'wisi-cache))
885 ;; There are three cases:
887 ;; 1) caches separated by non-cache chars: 'if ... then'
888 ;; pos is before 'f', cache is on 'i'
890 ;; 2) caches not separated: ');'
891 ;; pos is before ';', cache is on ';'
893 ;; 3) at bob; pos is nil
897 (setq cache (get-text-property pos 'wisi-cache))
902 (setq cache (get-text-property (1- pos) 'wisi-cache))
903 (goto-char (1- pos))))
905 (goto-char (point-min))
910 (defun wisi-forward-cache ()
911 "Move point forward to the beginning of the first token after point that has a cache.
912 Returns cache, or nil if at end of buffer."
914 (when (get-text-property (point) 'wisi-cache)
915 ;; on a cache; get past it
916 (goto-char (1+ (point))))
918 (setq cache (get-text-property (point) 'wisi-cache))
922 (setq pos (next-single-property-change (point) 'wisi-cache))
926 (setq cache (get-text-property pos 'wisi-cache)))
928 (goto-char (point-max))
934 (defun wisi-forward-find-class (class limit)
935 "Search forward for a token that has a cache with CLASS.
936 Return cache, or nil if at end of buffer.
937 If LIMIT (a buffer position) is reached, throw an error."
938 (let ((cache (wisi-forward-cache)))
939 (while (not (eq class (wisi-cache-class cache)))
940 (setq cache (wisi-forward-cache))
941 (when (>= (point) limit)
942 (error "cache with class %s not found" class)))
945 (defun wisi-forward-find-token (token limit &optional noerror)
946 "Search forward for a token that has a cache with TOKEN.
947 If point is at a matching token, return that token.
948 TOKEN may be a list; stop on any cache that has a member of the list.
949 Return cache, or nil if at end of buffer.
950 If LIMIT (a buffer position) is reached, then if NOERROR is nil, throw an
951 error, if non-nil, return nil."
952 (let ((token-list (cond
953 ((listp token) token)
955 (cache (wisi-get-cache (point)))
959 (memq (wisi-cache-token cache) token-list))))
960 (setq cache (wisi-forward-cache))
961 (when (>= (point) limit)
966 (error "cache with token %s not found" token))))
969 (defun wisi-forward-find-nonterm (nonterm limit)
970 "Search forward for a token that has a cache with NONTERM.
971 NONTERM may be a list; stop on any cache that has a member of the list.
972 Return cache, or nil if at end of buffer.
973 If LIMIT (a buffer position) is reached, throw an error."
974 (let ((nonterm-list (cond
975 ((listp nonterm) nonterm)
977 (cache (wisi-forward-cache)))
978 (while (not (memq (wisi-cache-nonterm cache) nonterm-list))
979 (setq cache (wisi-forward-cache))
980 (when (>= (point) limit)
981 (error "cache with nonterm %s not found" nonterm)))
984 (defun wisi-goto-cache-next (cache)
985 (goto-char (1- (wisi-cache-next cache)))
986 (wisi-get-cache (point))
989 (defun wisi-forward-statement-keyword ()
990 "If not at a cached token, move forward to next
991 cache. Otherwise move to cache-next, or next cache if nil.
993 (wisi-validate-cache (point-max)) ;; ensure there is a next cache to move to
994 (let ((cache (wisi-get-cache (point))))
996 (let ((next (wisi-cache-next cache)))
998 (goto-char (1- next))
1000 (wisi-forward-cache)))
1001 (wisi-forward-cache))
1003 (wisi-get-cache (point))
1006 (defun wisi-backward-statement-keyword ()
1007 "If not at a cached token, move backward to prev
1008 cache. Otherwise move to cache-prev, or prev cache if nil."
1009 (wisi-validate-cache (point))
1010 (let ((cache (wisi-get-cache (point))))
1012 (let ((prev (wisi-cache-prev cache)))
1014 (goto-char (1- prev))
1015 (wisi-backward-cache)))
1016 (wisi-backward-cache))
1019 (defun wisi-goto-containing (cache &optional error)
1020 "Move point to containing token for CACHE, return cache at that point.
1021 If ERROR, throw error when CACHE has no container; else return nil."
1023 ((markerp (wisi-cache-containing cache))
1024 (goto-char (1- (wisi-cache-containing cache)))
1025 (wisi-get-cache (point)))
1028 (error "already at outermost containing token")))
1031 (defun wisi-goto-containing-paren (cache)
1032 "Move point to just after the open-paren containing CACHE.
1033 Return cache for paren, or nil if no containing paren."
1035 (not (eq (wisi-cache-class cache) 'open-paren)))
1036 (setq cache (wisi-goto-containing cache)))
1041 (defun wisi-goto-start (cache)
1042 "Move point to containing ancestor of CACHE that has class block-start or statement-start.
1043 Return start cache."
1047 (not (memq (wisi-cache-class cache) '(block-start statement-start))))
1048 (setq cache (wisi-goto-containing cache)))
1052 (defun wisi-goto-end-1 (cache)
1053 (goto-char (1- (wisi-cache-end cache))))
1055 (defun wisi-goto-statement-start ()
1056 "Move point to token at start of statement point is in or after.
1057 Return start cache."
1059 (wisi-validate-cache (point))
1060 (let ((cache (wisi-get-cache (point))))
1062 (setq cache (wisi-backward-cache)))
1063 (wisi-goto-start cache)))
1065 (defun wisi-goto-statement-end ()
1066 "Move point to token at end of statement point is in or before."
1068 (wisi-validate-cache (point))
1069 (let ((cache (or (wisi-get-cache (point))
1070 (wisi-forward-cache))))
1071 (when (wisi-cache-end cache)
1072 ;; nil when cache is statement-end
1073 (wisi-goto-end-1 cache))
1076 (defun wisi-next-statement-cache (cache)
1077 "Move point to CACHE-next, return cache; error if nil."
1078 (when (not (markerp (wisi-cache-next cache)))
1079 (error "no next statement cache"))
1080 (goto-char (1- (wisi-cache-next cache)))
1081 (wisi-get-cache (point)))
1083 (defun wisi-prev-statement-cache (cache)
1084 "Move point to CACHE-next, return cache; error if nil."
1085 (when (not (markerp (wisi-cache-prev cache)))
1086 (error "no prev statement cache"))
1087 (goto-char (1- (wisi-cache-prev cache)))
1088 (wisi-get-cache (point)))
1092 (defun wisi-comment-indent ()
1093 "For `comment-indent-function'. Indent single line comment to
1094 the comment on the previous line."
1095 ;; This should only be called by comment-indent-new-line or
1096 ;; fill-comment-paragraph, so there will be a preceding comment line
1097 ;; that we can trust.
1099 (forward-comment -1)
1100 (if (looking-at comment-start)
1102 (error "wisi-comment-indent called after non-comment"))))
1104 (defun wisi-indent-current (offset)
1105 "Return indentation OFFSET relative to indentation of current line."
1106 (+ (current-indentation) offset)
1109 (defun wisi-indent-paren (offset)
1110 "Return indentation OFFSET relative to preceding open paren."
1112 (goto-char (nth 1 (syntax-ppss)))
1113 (+ (current-column) offset)))
1115 (defun wisi-indent-start (offset cache)
1116 "Return indentation of OFFSET relative to containing ancestor
1117 of CACHE with class statement-start or block-start."
1118 (wisi-goto-start cache)
1119 (+ (current-indentation) offset))
1121 (defun wisi-indent-statement ()
1122 "Indent region given by `wisi-goto-start' on cache at or before point, then wisi-cache-end."
1123 (wisi-validate-cache (point))
1126 (let ((cache (or (wisi-get-cache (point))
1127 (wisi-backward-cache))))
1129 ;; can be nil if in header comment
1130 (let ((start (progn (wisi-goto-start cache) (point)))
1132 (when (wisi-cache-end cache)
1133 ;; nil when cache is statement-end
1134 (goto-char (1- (wisi-cache-end cache))))
1136 (indent-region start end)
1140 (defvar-local wisi-indent-calculate-functions nil
1141 "Functions to calculate indentation. Each called with point
1142 before a token at the beginning of a line (at current
1143 indentation); return indentation column for that token, or
1144 nil. May move point. Calling stops when first function returns
1147 (defvar-local wisi-post-parse-fail-hook
1148 "Function to reindent portion of buffer.
1149 Called from `wisi-indent-line' when a parse succeeds after
1150 failing; assumes user was editing code that is now syntactically
1151 correct. Must leave point at indentation of current line.")
1153 (defvar-local wisi-indent-failed nil
1154 "Non-nil when wisi-indent-line fails due to parse failing; cleared when indent succeeds.")
1156 (defun wisi-indent-line ()
1157 "Indent current line using the wisi indentation engine."
1160 (let ((savep (point))
1163 (back-to-indentation)
1164 (when (>= (point) savep) (setq savep nil))
1166 (when (> (point) wisi-cache-max)
1167 (wisi-validate-cache (point))
1168 (when (and (not wisi-parse-failed)
1170 (setq wisi-indent-failed nil)
1171 (run-hooks 'wisi-post-parse-fail-hook)))
1173 (if (> (point) wisi-cache-max)
1175 ;; no indent info at point. Assume user is
1176 ;; editing; indent to previous line, fix it
1177 ;; after parse succeeds
1178 (setq wisi-indent-failed t)
1179 (forward-line -1);; safe at bob
1180 (back-to-indentation)
1181 (setq indent (current-column)))
1184 (with-demoted-errors
1185 (or (run-hook-with-args-until-success 'wisi-indent-calculate-functions) 0))
1189 ;; point was inside line text; leave it there
1190 (save-excursion (indent-line-to indent))
1191 ;; point was before line text; move to start of text
1192 (indent-line-to indent))
1196 (defun wisi-parse-buffer ()
1198 (syntax-propertize (point-max))
1199 (wisi-invalidate-cache)
1200 (wisi-validate-cache (point-max)))
1202 (defun wisi-show-cache ()
1203 "Show cache at point."
1205 (message "%s" (wisi-get-cache (point))))
1207 (defun wisi-show-token ()
1208 "Move forward across one keyword, show token_id."
1210 (let ((token (wisi-forward-token)))
1211 (message "%s" (car token))))
1213 (defun wisi-show-containing-or-previous-cache ()
1215 (let ((cache (wisi-get-cache (point))))
1217 (message "containing %s" (wisi-goto-containing cache t))
1218 (message "previous %s" (wisi-backward-cache)))
1223 (defun wisi-setup (indent-calculate post-parse-fail class-list keyword-table token-table parse-table)
1224 "Set up a buffer for parsing files with wisi."
1225 (setq wisi-class-list class-list)
1226 (setq wisi-string-double-term (car (symbol-value (intern-soft "string-double" token-table))))
1227 (setq wisi-string-single-term (car (symbol-value (intern-soft "string-single" token-table))))
1228 (setq wisi-symbol-term (car (symbol-value (intern-soft "symbol" token-table))))
1230 (setq wisi-punctuation-table (symbol-value (intern-soft "punctuation" token-table)))
1231 (setq wisi-punctuation-table-max-length 0)
1233 (dolist (item wisi-punctuation-table)
1234 (when item ;; default matcher can be nil
1236 ;; check that all chars used in punctuation tokens have punctuation syntax
1237 (mapc (lambda (char)
1238 (when (not (= ?. (char-syntax char)))
1240 (message "in %s, %c does not have punctuation syntax"
1244 (when (< wisi-punctuation-table-max-length (length (cdr item)))
1245 (setq wisi-punctuation-table-max-length (length (cdr item)))))
1248 (error "aborting due to punctuation errors")))
1250 (setq wisi-keyword-table keyword-table)
1251 (setq wisi-parse-table parse-table)
1253 (setq wisi-indent-calculate-functions indent-calculate)
1254 (set (make-local-variable 'indent-line-function) 'wisi-indent-line)
1256 (setq wisi-post-parse-fail-hook post-parse-fail)
1257 (setq wisi-indent-failed nil)
1259 (add-hook 'before-change-functions 'wisi-before-change nil t)
1260 (add-hook 'after-change-functions 'wisi-after-change nil t)
1262 ;; see comments on "lexer" above re syntax-propertize
1263 (syntax-propertize (point-max))
1265 (wisi-invalidate-cache)
1269 ;;; wisi.el ends here