1 ;;; wisi.el --- Utilities for implementing an indentation/navigation engine using a generalized LALR parser
3 ;; Copyright (C) 2012, 2013, 2014 Free Software Foundation, Inc.
5 ;; Author: Stephen Leake <stephen_leake@member.fsf.org>
7 ;; package-requires: ((cl-lib "0.4") (emacs "24.2"))
8 ;; URL: http://stephe-leake.org/emacs/ada-mode/emacs-ada-mode.html
10 ;; This file is part of GNU Emacs.
12 ;; GNU Emacs is free software: you can redistribute it and/or modify
13 ;; it under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation, either version 3 of the License, or
15 ;; (at your option) any later version.
17 ;; GNU Emacs is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;; GNU General Public License for more details.
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
28 ;;;; History: first experimental version Oct 2012
30 ;;;; indentation algorithm overview
32 ;; This design is inspired in part by experience writing a SMIE
33 ;; indentation engine for Ada, and the wisent parser.
35 ;; The general approach to indenting a given token is to find the
36 ;; start of the statement it is part of, or some other relevant point
37 ;; in the statement, and indent relative to that. So we need a parser
38 ;; that lets us find statement indent points from arbitrary places in
41 ;; For example, the grammar for Ada as represented by the EBNF in LRM
42 ;; Annex P is not LALR(1), so we use a generalized LALR(1) parser (see
43 ;; wisi-parse, wisi-compile).
45 ;; The parser actions cache indentation and other information as text
46 ;; properties of tokens in statements.
48 ;; An indentation engine moves text in the buffer, as does user
49 ;; editing, so we can't rely on character positions remaining
50 ;; constant. So the parser actions use markers to store
51 ;; positions. Text properties also move with the text.
53 ;; The stored information includes a marker at each statement indent
54 ;; point. Thus, the indentation algorithm is: find the previous token
55 ;; with cached information, and either indent from it, or fetch from
56 ;; it the marker for a previous statement indent point, and indent
59 ;; Since we have a cache (the text properties), we need to consider
60 ;; when to invalidate it. Ideally, we invalidate only when a change to
61 ;; the buffer would change the result of a parse that crosses that
62 ;; change, or starts after that change. Changes in whitespace
63 ;; (indentation and newlines) do not affect an Ada parse. Other
64 ;; languages are sensitive to newlines (Bash for example) or
65 ;; indentation (Python). Adding comments does not change a parse,
66 ;; unless code is commented out. For now we invalidate the cache after
67 ;; the edit point if the change involves anything other than
70 ;;;; comparison to the SMIE parser
72 ;; The central problem to be solved in building the SMIE parser is
73 ;; grammar precedence conflicts; the general solution is refining
74 ;; keywords so that each new keyword can be assigned a unique
75 ;; precedence. This means ad hoc code must be written to determine the
76 ;; correct refinement for each language keyword from the surrounding
77 ;; tokens. In effect, for a complex language like Ada, the knowledge
78 ;; of the language grammar is mostly embedded in the refinement code;
79 ;; only a small amount is in the refined grammar. Implementing a SMIE
80 ;; parser for a new language involves the same amount of work as the
83 ;; Using a generalized LALR parser avoids that particular problem;
84 ;; assuming the language is already defined by a grammar, it is only a
85 ;; matter of a format change to teach the wisi parser the
86 ;; language. The problem in a wisi indentation engine is caching the
87 ;; output of the parser in a useful way, since we can't start the
88 ;; parser from arbitrary places in the code (as we can with the SMIE
89 ;; parser). A second problem is determining when to invalidate the
90 ;; cache. But these problems are independent of the language being
91 ;; parsed, so once we have one wisi indentation engine working,
92 ;; adapting it to new languages should be quite simple.
94 ;; The SMIE parser does not find the start of each statement, only the
95 ;; first language keyword in each statement; additional code must be
96 ;; written to find the statement start and indent points. The wisi
97 ;; parser finds the statement start and indent points directly.
99 ;; In SMIE, it is best if each grammar rule is a complete statement,
100 ;; so forward-sexp will traverse the entire statement. If nested
101 ;; non-terminals are used, forward-sexp may stop inside one of the
102 ;; nested non-terminals. This problem does not occur with the wisi
105 ;; A downside of the wisi parser is conflicts in the grammar; they can
106 ;; be much more difficult to resolve than in the SMIE parser. The
107 ;; generalized parser helps by handling conflicts, but it does so by
108 ;; running multiple parsers in parallel, persuing each choice in the
109 ;; conflict. If the conflict is due to a genuine ambiguity, both paths
110 ;; will succeed, which causes the parse to fail, since it is not clear
111 ;; which set of text properties to store. Even if one branch
112 ;; ultimately fails, running parallel parsers over large sections of
113 ;; code is slow. Finally, this approach can lead to exponential growth
114 ;; in the number of parsers. So grammar conflicts must still be
115 ;; analyzed and minimized.
117 ;; In addition, the complete grammar must be specified; in smie, it is
118 ;; often possible to specify a subset of the grammar.
120 ;;;; grammar compiler and parser
122 ;; Since we are using a generalized LALR(1) parser, we cannot use any
123 ;; of the wisent grammar functions. We use OpenToken wisi-generate
124 ;; to compile BNF to Elisp source (similar to
125 ;; semantic-grammar-create-package), and wisi-compile-grammar to
126 ;; compile that to the parser table.
128 ;; Semantic provides a complex lexer, more complicated than we need
129 ;; for indentation. So we use the elisp lexer, which consists of
130 ;; `forward-comment', `skip-syntax-forward', and `scan-sexp'. We wrap
131 ;; that in functions that return tokens in the form wisi-parse
136 ;; 'wisi' was originally short for "wisent indentation engine", but
137 ;; now is just a name.
139 ;; not using lexical-binding because we support Emacs 23
146 (require 'wisi-parse)
148 ;; WORKAROUND: for some reason, this condition doesn't work in batch mode!
149 ;; (when (and (= emacs-major-version 24)
150 ;; (= emacs-minor-version 2))
151 (require 'wisi-compat-24.2)
156 (defvar-local wisi-class-list nil)
157 (defvar-local wisi-keyword-table nil)
158 (defvar-local wisi-punctuation-table nil)
159 (defvar-local wisi-punctuation-table-max-length 0)
160 (defvar-local wisi-string-double-term nil) ;; string delimited by double quotes
161 (defvar-local wisi-string-quote-escape-doubled nil)
162 (defvar-local wisi-string-single-term nil) ;; string delimited by single quotes
163 (defvar-local wisi-symbol-term nil)
165 (defun wisi-forward-token (&optional text-only)
166 "Move point forward across one token, skipping leading whitespace and comments.
167 Return the corresponding token, in a format determined by TEXT-ONLY:
169 TEXT-ONLY nil: (token text start . end)
171 `token' is a token symbol (not string) from `wisi-punctuation-table',
172 `wisi-keyword-table', `wisi-string-double-term', `wisi-string-double-term' or `wisi-symbol-term'.
174 `text' is the token text from the buffer
176 `start, end' are the character positions in the buffer of the start
177 and end of the token text.
179 If at end of buffer, returns `wisent-eoi-term'."
180 (forward-comment (point-max))
181 ;; skips leading whitespace, comment, trailing whitespace.
183 (let ((start (point))
184 ;; (info "(elisp)Syntax Table Internals" "*info elisp syntax*")
185 (syntax (syntax-class (syntax-after (point))))
190 (setq token-id wisent-eoi-term))
193 ;; punctuation. Find the longest matching string in wisi-punctuation-table
195 (let ((next-point (point))
196 temp-text temp-id done)
198 (setq temp-text (buffer-substring-no-properties start (point)))
199 (setq temp-id (car (rassoc temp-text wisi-punctuation-table)))
201 (setq token-text temp-text
206 (= (- (point) start) wisi-punctuation-table-max-length))
210 (goto-char next-point)))
212 ((memq syntax '(4 5)) ;; open, close parenthesis
214 (setq token-text (buffer-substring-no-properties start (point)))
215 (setq token-id (symbol-value (intern-soft token-text wisi-keyword-table))))
218 ;; string quote, either single or double. we assume point is before the start quote, not the end quote
219 (let ((delim (char-after (point)))
220 (forward-sexp-function nil))
222 ;; point is now after the end quote; check for a doubled quote
223 (while (and wisi-string-quote-escape-doubled
224 (eq (char-after (point)) delim))
226 (setq token-text (buffer-substring-no-properties start (point)))
227 (setq token-id (if (= delim ?\") wisi-string-double-term wisi-string-single-term))))
229 (t ;; assuming word syntax
230 (skip-syntax-forward "w_'")
231 (setq token-text (buffer-substring-no-properties start (point)))
233 (or (symbol-value (intern-soft (downcase token-text) wisi-keyword-table))
238 (error (wisi-error-msg "unrecognized token '%s'" (buffer-substring-no-properties start (point)))))
242 (cons token-id (cons token-text (cons start (point)))))
245 (defun wisi-backward-token ()
246 "Move point backward across one token, skipping whitespace and comments.
247 Return (nil text start . end) - same structure as
248 wisi-forward-token, but does not look up symbol."
249 (forward-comment (- (point)))
250 ;; skips leading whitespace, comment, trailing whitespace.
252 ;; (info "(elisp)Syntax Table Internals" "*info elisp syntax*")
254 (syntax (syntax-class (syntax-after (1- (point))))))
258 ((memq syntax '(4 5)) ;; open, close parenthesis
262 ;; a string quote. we assume we are after the end quote, not the start quote
263 (let ((forward-sexp-function nil))
267 (if (zerop (skip-syntax-backward "."))
268 (skip-syntax-backward "w_'")))
270 (cons nil (cons (buffer-substring-no-properties (point) end) (cons (point) end)))
273 ;;;; token info cache
275 ;; the cache stores the results of parsing as text properties on
276 ;; keywords, for use by the indention and motion engines.
280 (:constructor wisi-cache-create)
282 nonterm;; nonterminal from parse (set by wisi-statement-action)
285 ;; terminal symbol from wisi-keyword-table or
286 ;; wisi-punctuation-table, or lower-level nonterminal from parse
287 ;; (set by wisi-statement-action)
289 last ;; pos of last char in token, relative to first (0 indexed)
292 ;; arbitrary lisp symbol, used for indentation and navigation.
293 ;; some classes are defined by wisi:
295 ;; 'block-middle - a block keyword (ie: if then else end), not at the start of a statement
297 ;; 'block-start - a block keyword at the start of a statement
299 ;; 'statement-start - the start of a statement
303 ;; others are language-specific
306 ;; Marker at the containing keyword for this token.
307 ;; A containing keyword is an indent point; the start of a
308 ;; statement, or 'begin', 'then' or 'else' for a block of
310 ;; nil only for first token in buffer
312 prev ;; marker at previous motion token in statement; nil if none
313 next ;; marker at next motion token in statement; nil if none
314 end ;; marker at token at end of current statement
317 (defvar-local wisi-cache-max 0
318 "Maximimum position in buffer where wisi token cache is valid.")
320 (defvar-local wisi-parse-table nil)
322 (defvar-local wisi-parse-failed nil
323 "Non-nil when a recent parse has failed - cleared when parse succeeds.")
325 (defvar-local wisi-parse-try nil
326 "Non-nil when parse is needed - cleared when parse succeeds.")
328 (defvar-local wisi-change-need-invalidate nil)
330 (defun wisi-invalidate-cache()
331 "Invalidate the wisi token cache for the current buffer.
332 Also invalidate the Emacs syntax cache."
334 (setq wisi-cache-max 0)
335 (setq wisi-parse-try t)
336 (syntax-ppss-flush-cache (point-min))
337 (with-silent-modifications
338 (remove-text-properties (point-min) (point-max) '(wisi-cache))))
340 (defun wisi-before-change (begin end)
341 "For `before-change-functions'."
342 ;; begin . end is range of text being deleted
344 ;; If jit-lock-after-change is before wisi-after-change in
345 ;; after-change-functions, it might use any invalid caches in the
348 ;; So we check for that here, and ensure it is after
349 ;; wisi-after-change, which deletes the invalid caches
350 (when (boundp 'jit-lock-mode)
351 (when (memq 'wisi-after-change (memq 'jit-lock-after-change after-change-functions))
352 (setq after-change-functions (delete 'wisi-after-change after-change-functions))
353 (add-hook 'after-change-functions 'wisi-after-change nil t))
357 ;; don't invalidate parse for whitespace, string, or comment changes
358 (let (;; (info "(elisp)Parser State")
359 (state (syntax-ppss begin)))
360 ;; syntax-ppss has moved point to "begin".
363 (nth 3 state); in string
364 (nth 4 state)); in comment
365 ;; FIXME: check that entire range is in comment or string
366 (setq wisi-change-need-invalidate nil))
369 (skip-syntax-forward " " end);; does not skip newline
371 (setq wisi-change-need-invalidate nil))
373 (t (setq wisi-change-need-invalidate t))
376 (defun wisi-after-change (begin end length)
377 "For `after-change-functions'."
378 ;; begin . end is range of text being inserted (may be empty)
379 ;; (syntax-ppss-flush-cache begin) is in before-change-functions
381 (syntax-ppss-flush-cache begin) ;; IMPROVEME: could check for whitespace
385 ;; The parse was failing, probably due to bad syntax; this change
386 ;; may have fixed it, so try reparse.
387 (setq wisi-parse-try t)
389 ;; remove 'wisi-cache on inserted text, which could have caches
390 ;; from before the failed parse, and are in any case invalid.
391 (with-silent-modifications
392 (remove-text-properties begin end '(wisi-cache)))
395 ((>= wisi-cache-max begin)
396 ;; The parse had succeeded paste the start of the inserted
399 (let ((need-invalidate t)
400 ;; (info "(elisp)Parser State")
401 (state (syntax-ppss begin)))
402 ;; syntax-ppss has moved point to "begin".
404 (wisi-change-need-invalidate
405 ;; wisi-before change determined the removed text alters the
410 (nth 3 state); in string
411 (nth 4 state)); in comment
412 ;; FIXME: insert newline in comment to create non-comment!?
413 ;; or paste a chunk of code
414 ;; => check that all of change region is comment or string
415 (setq need-invalidate nil))
418 (skip-syntax-forward " " end);; does not skip newlines
420 (setq need-invalidate nil))
426 ;; The inserted or deleted text could alter the parse
427 (wisi-invalidate-cache)
429 ;; else move cache-max by the net change length. We don't
430 ;; need to delete 'wisi-cache in the inserted text, because
431 ;; if there were any it would not pass the above.
433 (+ wisi-cache-max (- end begin length))))
438 ;; parse never attempted, or only done to before BEGIN. Just
439 ;; remove 'wisi-cache
440 (with-silent-modifications
441 (remove-text-properties begin end '(wisi-cache)))
445 (defun wisi-get-cache (pos)
446 "Return `wisi-cache' struct from the `wisi-cache' text property at POS.
447 If accessing cache at a marker for a token as set by `wisi-cache-tokens', POS must be (1- mark)."
448 (get-text-property pos 'wisi-cache))
450 (defvar-local wisi-parse-error-msg nil)
452 (defun wisi-goto-error ()
453 "Move point to position in last error message (if any)."
454 (when (string-match ":\\([0-9]+\\):\\([0-9]+\\):" wisi-parse-error-msg)
455 (let ((line (string-to-number (match-string 1 wisi-parse-error-msg)))
456 (col (string-to-number (match-string 2 wisi-parse-error-msg))))
457 (goto-char (point-min))
458 (forward-line (1- line))
459 (forward-char col))))
461 (defun wisi-show-parse-error ()
462 "Show last wisi-parse error."
464 (if wisi-parse-failed
466 (message wisi-parse-error-msg)
468 (message "parse succeeded")))
470 (defun wisi-validate-cache (pos)
471 "Ensure cached data is valid at least up to POS in current buffer."
472 (when (and wisi-parse-try
473 (< wisi-cache-max pos))
474 (when (> wisi-debug 0)
475 (message "wisi: parsing %s ..." (buffer-name)))
477 (setq wisi-parse-try nil)
478 (setq wisi-parse-error-msg nil)
480 (goto-char wisi-cache-max)
482 ;; let debugger stop in wisi-parse
484 (wisi-parse wisi-parse-table 'wisi-forward-token)
485 (setq wisi-cache-max (point))
486 (setq wisi-parse-failed nil))
487 ;; else capture errors from bad syntax, so higher level functions can try to continue
490 (wisi-parse wisi-parse-table 'wisi-forward-token)
491 (setq wisi-cache-max (point))
492 (setq wisi-parse-failed nil))
494 (setq wisi-parse-failed t)
495 (setq wisi-parse-error-msg (cdr err)))
497 (if wisi-parse-error-msg
499 (when (> wisi-debug 0)
500 (message "wisi: parsing ... error")
502 (error wisi-parse-error-msg))
504 (when (> wisi-debug 0)
505 (message "wisi: parsing ... done")))
508 (defun wisi-get-containing-cache (cache)
509 "Return cache from (wisi-cache-containing CACHE)."
510 (let ((containing (wisi-cache-containing cache)))
512 (wisi-get-cache (1- containing)))))
514 (defun wisi-cache-text (cache)
515 "Return property-less buffer substring designated by cache.
516 Point must be at cache."
517 (buffer-substring-no-properties (point) (+ (point) (wisi-cache-last cache))))
521 (defun wisi-set-end (tokens end-mark)
522 "Set END-MARK on all unset caches in TOKENS."
523 (let ((tokens-t tokens))
525 (let* ((token (pop tokens-t))
526 (region (cddr token))
529 (goto-char (car region))
530 (setq cache (wisi-get-cache (car region)))
532 ;; token is non-terminal; first terminal doesn't have cache.
533 (setq cache (wisi-forward-cache)))
535 (< (point) (cdr region)))
536 (if (not (wisi-cache-end cache))
537 (setf (wisi-cache-end cache) end-mark)
538 (goto-char (wisi-cache-end cache))
540 (setq cache (wisi-forward-cache))
545 (defvar wisi-tokens nil);; keep byte-compiler happy; `wisi-tokens' is bound in action created by wisi-semantic-action
546 (defun wisi-statement-action (&rest pairs)
547 "Cache information in text properties of tokens.
548 Intended as a grammar non-terminal action.
550 PAIRS is of the form [TOKEN-NUMBER CLASS] ... where TOKEN-NUMBER
551 is the (1 indexed) token number in the production, CLASS is the wisi class of
552 that token. Use in a grammar action as:
553 (wisi-statement-action 1 'statement-start 7 'statement-end)"
557 (override-start nil))
559 (let* ((number (1- (pop pairs)))
560 (region (cddr (nth number wisi-tokens)));; wisi-tokens is let-bound in wisi-parse-reduce
561 (token (car (nth number wisi-tokens)))
564 ;; Marker one char into token, so indent-line-to
565 ;; inserts space before the mark, not after
566 (when region (copy-marker (1+ (car region)))))
569 (unless (memq class wisi-class-list)
570 (error "%s not in wisi-class-list" class))
574 (if (setq cache (wisi-get-cache (car region)))
575 ;; We are processing a previously set non-terminal; ie generic_formal_part in
577 ;; generic_package_declaration : generic_formal_part package_specification SEMICOLON
578 ;; (wisi-statement-action 1 'block-start 2 'block-middle 3 'statement-end)
580 ;; or simple_statement in
582 ;; statement : label_opt simple_statement
584 ;; override nonterm, class and containing
586 (cl-case (wisi-cache-class cache)
588 (setf (wisi-cache-class cache)
590 ((eq override-start nil)
592 ((memq class '(block-start statement-start)) 'block-start)
595 ((memq override-start '(block-start statement-start)) 'block-start)
597 (t (error "unexpected override-start"))
600 (setf (wisi-cache-class cache) (or override-start class)))
602 (setf (wisi-cache-nonterm cache) $nterm)
603 (setf (wisi-cache-containing cache) first-keyword-mark))
605 ;; else create new cache
606 (with-silent-modifications
612 :nonterm $nterm;; $nterm defined in wisi-semantic-action
614 :last (- (cdr region) (car region))
615 :class (or override-start class)
616 :containing first-keyword-mark)
620 (setq first-item nil)
621 (when (or override-start
622 (memq class '(block-middle block-start statement-start)))
623 (setq override-start nil)
624 (setq first-keyword-mark mark)))
626 (when (eq class 'statement-end)
627 (wisi-set-end wisi-tokens (copy-marker (1+ (car region)))))
630 ;; region is nil when a production is empty; if the first
631 ;; token is a start, override the class on the next token.
632 (when (and first-item
633 (memq class '(block-middle block-start statement-start)))
634 (setq override-start class)))
638 (defun wisi-containing-action (containing-token contained-token)
639 "Set containing marks in all tokens in CONTAINED-TOKEN with null containing mark to marker pointing to CONTAINING-TOKEN.
640 If CONTAINING-TOKEN is empty, the next token number is used."
641 ;; wisi-tokens is is bound in action created by wisi-semantic-action
642 (let* ((containing-region (cddr (nth (1- containing-token) wisi-tokens)))
643 (contained-region (cddr (nth (1- contained-token) wisi-tokens))))
644 (while (not containing-region)
645 ;; containing-token is empty; use next
646 (setq containing-region (cddr (nth containing-token wisi-tokens))))
648 (when contained-region
649 ;; nil when empty production, may not contain any caches
651 (goto-char (cdr contained-region))
652 (let ((cache (wisi-backward-cache))
653 (mark (copy-marker (1+ (car containing-region)))))
656 ;; skip blocks that are already marked
657 (while (and (>= (point) (car contained-region))
658 (markerp (wisi-cache-containing cache)))
659 (goto-char (1- (wisi-cache-containing cache)))
660 (setq cache (wisi-get-cache (point))))
662 (if (or (and (= (car containing-region) (car contained-region))
663 (<= (point) (car contained-region)))
664 (< (point) (car contained-region)))
668 ;; else set mark, loop
669 (setf (wisi-cache-containing cache) mark)
670 (setq cache (wisi-backward-cache)))
673 (defun wisi-motion-action (&rest token-numbers)
674 "Set prev/next marks in all tokens given by TOKEN-NUMBERS.
675 Each TOKEN-NUMBERS is one of:
677 number: the token number; mark that token
679 list (number token_id):
680 list (number (token_id token_id)):
681 mark all tokens with token_id in the nonterminal given by the number."
683 (let (prev-keyword-mark
688 (let ((token-number (pop token-numbers))
692 ((numberp token-number)
693 (setq target-token nil)
694 (setq region (cddr (nth (1- token-number) wisi-tokens)))
696 (setq cache (wisi-get-cache (car region)))
697 (setq mark (copy-marker (1+ (car region))))
699 (when (and prev-keyword-mark
701 (null (wisi-cache-prev cache)))
702 (setf (wisi-cache-prev cache) prev-keyword-mark)
703 (setf (wisi-cache-next prev-cache) mark))
705 (setq prev-keyword-mark mark)
706 (setq prev-cache cache)
709 ((listp token-number)
710 ;; token-number may contain 0, 1, or more token_id; token_id may be a list
711 ;; the corresponding region may be empty
712 ;; there must have been a prev keyword
713 (setq target-token (cadr token-number))
714 (when (not (listp target-token))
715 (setq target-token (list target-token)))
716 (setq token-number (car token-number))
717 (setq region (cddr (nth (1- token-number) wisi-tokens)))
718 (when region ;; not an empty token
719 (goto-char (car region))
720 (while (wisi-forward-find-token target-token (cdr region) t)
721 (setq cache (wisi-get-cache (point)))
722 (setq mark (copy-marker (1+ (point))))
724 (when (null (wisi-cache-prev cache))
725 (setf (wisi-cache-prev cache) prev-keyword-mark)
726 (setf (wisi-cache-next prev-cache) mark)
727 (setq prev-keyword-mark mark)
728 (setq prev-cache cache))
730 (wisi-forward-token);; don't find same token again
735 (error "unexpected token-number %s" token-number))
742 (defun wisi-backward-cache ()
743 "Move point backward to the beginning of the first token preceding point that has a cache.
744 Returns cache, or nil if at beginning of buffer."
746 (setq pos (previous-single-property-change (point) 'wisi-cache))
747 ;; There are three cases:
749 ;; 1) caches separated by non-cache chars: 'if ... then'
750 ;; pos is before 'f', cache is on 'i'
752 ;; 2) caches not separated: ');'
753 ;; pos is before ';', cache is on ';'
755 ;; 3) at bob; pos is nil
759 (setq cache (get-text-property pos 'wisi-cache))
764 (setq cache (get-text-property (1- pos) 'wisi-cache))
765 (goto-char (1- pos))))
767 (goto-char (point-min))
772 (defun wisi-forward-cache ()
773 "Move point forward to the beginning of the first token after point that has a cache.
774 Returns cache, or nil if at end of buffer."
776 (when (get-text-property (point) 'wisi-cache)
777 ;; on a cache; get past it
778 (goto-char (1+ (point))))
780 (setq cache (get-text-property (point) 'wisi-cache))
784 (setq pos (next-single-property-change (point) 'wisi-cache))
788 (setq cache (get-text-property pos 'wisi-cache)))
790 (goto-char (point-max))
796 (defun wisi-forward-find-class (class limit)
797 "Search forward for a token that has a cache with CLASS.
798 Return cache, or nil if at end of buffer.
799 If LIMIT (a buffer position) is reached, throw an error."
800 (let ((cache (wisi-forward-cache)))
801 (while (not (eq class (wisi-cache-class cache)))
802 (setq cache (wisi-forward-cache))
803 (when (>= (point) limit)
804 (error "cache with class %s not found" class)))
807 (defun wisi-forward-find-token (token limit &optional noerror)
808 "Search forward for a token that has a cache with TOKEN.
809 If point is at a matching token, return that token.
810 TOKEN may be a list; stop on any cache that has a member of the list.
811 Return cache, or nil if at end of buffer.
812 If LIMIT (a buffer position) is reached, then if NOERROR is nil, throw an
813 error, if non-nil, return nil."
814 (let ((token-list (cond
815 ((listp token) token)
817 (cache (wisi-get-cache (point)))
821 (memq (wisi-cache-token cache) token-list))))
822 (setq cache (wisi-forward-cache))
823 (when (>= (point) limit)
828 (error "cache with token %s not found" token))))
831 (defun wisi-forward-find-nonterm (nonterm limit)
832 "Search forward for a token that has a cache with NONTERM.
833 NONTERM may be a list; stop on any cache that has a member of the list.
834 Return cache, or nil if at end of buffer.
835 If LIMIT (a buffer position) is reached, throw an error."
836 (let ((nonterm-list (cond
837 ((listp nonterm) nonterm)
839 (cache (wisi-forward-cache)))
840 (while (not (memq (wisi-cache-nonterm cache) nonterm-list))
841 (setq cache (wisi-forward-cache))
842 (when (>= (point) limit)
843 (error "cache with nonterm %s not found" nonterm)))
846 (defun wisi-goto-cache-next (cache)
847 (goto-char (1- (wisi-cache-next cache)))
848 (wisi-get-cache (point))
851 (defun wisi-forward-statement-keyword ()
852 "If not at a cached token, move forward to next
853 cache. Otherwise move to cache-next, or next cache if nil.
855 (wisi-validate-cache (point-max))
856 (let ((cache (wisi-get-cache (point))))
858 (let ((next (wisi-cache-next cache)))
860 (goto-char (1- next))
862 (wisi-forward-cache)))
863 (wisi-forward-cache))
865 (wisi-get-cache (point))
868 (defun wisi-backward-statement-keyword ()
869 "If not at a cached token, move backward to prev
870 cache. Otherwise move to cache-prev, or prev cache if nil."
871 (wisi-validate-cache (point-max))
872 (let ((cache (wisi-get-cache (point))))
874 (let ((prev (wisi-cache-prev cache)))
876 (goto-char (1- prev))
877 (wisi-backward-cache)))
878 (wisi-backward-cache))
881 (defun wisi-goto-containing (cache &optional error)
882 "Move point to containing token for CACHE, return cache at that point."
884 ((markerp (wisi-cache-containing cache))
885 (goto-char (1- (wisi-cache-containing cache)))
886 (wisi-get-cache (point)))
889 (error "already at outermost containing token")))
892 (defun wisi-goto-containing-paren (cache)
893 "Move point to just after the open-paren containing CACHE.
894 Return cache for paren, or nil if no containing paren."
896 (not (eq (wisi-cache-class cache) 'open-paren)))
897 (setq cache (wisi-goto-containing cache)))
902 (defun wisi-goto-start (cache)
903 "Move point to containing ancestor of CACHE that has class block-start or statement-start.
908 (not (memq (wisi-cache-class cache) '(block-start statement-start))))
909 (setq cache (wisi-goto-containing cache)))
913 (defun wisi-goto-end-1 (cache)
914 (goto-char (1- (wisi-cache-end cache))))
916 (defun wisi-goto-end ()
917 "Move point to token at end of statement point is in or before."
919 (wisi-validate-cache (point-max))
920 (let ((cache (or (wisi-get-cache (point))
921 (wisi-forward-cache))))
922 (when (wisi-cache-end cache)
923 ;; nil when cache is statement-end
924 (wisi-goto-end-1 cache))
927 (defun wisi-next-statement-cache (cache)
928 "Move point to CACHE-next, return cache; error if nil."
929 (when (not (markerp (wisi-cache-next cache)))
930 (error "no next statement cache"))
931 (goto-char (1- (wisi-cache-next cache)))
932 (wisi-get-cache (point)))
934 (defun wisi-prev-statement-cache (cache)
935 "Move point to CACHE-next, return cache; error if nil."
936 (when (not (markerp (wisi-cache-prev cache)))
937 (error "no prev statement cache"))
938 (goto-char (1- (wisi-cache-prev cache)))
939 (wisi-get-cache (point)))
943 (defun wisi-comment-indent ()
944 "For `comment-indent-function'. Indent single line comment to
945 the comment on the previous line."
946 ;; This should only be called by comment-indent-new-line or
947 ;; fill-comment-paragraph, so there will be a preceding comment line
948 ;; that we can trust.
951 (if (looking-at comment-start)
953 (error "wisi-comment-indent called after non-comment"))))
955 (defun wisi-indent-current (offset)
956 "Return indentation OFFSET relative to indentation of current line."
957 (+ (current-indentation) offset)
960 (defun wisi-indent-paren (offset)
961 "Return indentation OFFSET relative to preceding open paren."
963 (goto-char (nth 1 (syntax-ppss)))
964 (+ (current-column) offset)))
966 (defun wisi-indent-start (offset cache)
967 "Return indentation of OFFSET relative to containing ancestor
968 of CACHE with class statement-start or block-start."
969 (wisi-goto-start cache)
970 (+ (current-indentation) offset))
972 (defun wisi-indent-statement ()
973 "Indent region given by `wisi-goto-start' on cache at or before point, then wisi-cache-end."
974 ;; force reparse, in case parser got confused
975 (let ((wisi-parse-try t))
976 (wisi-validate-cache (point)))
979 (let ((cache (or (wisi-get-cache (point))
980 (wisi-backward-cache))))
982 ;; can be nil if in header comment
983 (let ((start (progn (wisi-goto-start cache) (point)))
985 (when (wisi-cache-end cache)
986 ;; nil when cache is statement-end
987 (goto-char (1- (wisi-cache-end cache))))
989 (indent-region start end)
993 (defvar-local wisi-indent-calculate-functions nil
994 "Functions to calculate indentation. Each called with point
995 before a token at the beginning of a line (at current
996 indentation); return indentation column for that token, or
997 nil. May move point. Calling stops when first function returns
1000 (defvar-local wisi-post-parse-fail-hook
1001 "Function to reindent portion of buffer.
1002 Called from `wisi-indent-line' when a parse succeeds after
1003 failing; assumes user was editing code that is now syntactically
1004 correct. Must leave point at indentation of current line.")
1006 (defvar-local wisi-indent-failed nil
1007 "Non-nil when wisi-indent-line fails due to parse failing; cleared when indent succeeds.")
1009 (defun wisi-indent-line ()
1010 "Indent current line using the wisi indentation engine."
1013 (let* ((savep (point))
1016 (wisi-validate-cache (point))
1017 (back-to-indentation)
1018 (when (>= (point) savep) (setq savep nil))
1019 (if wisi-parse-failed
1021 ;; parse failed. Assume user is editing; indent to previous line, fix it after parse succeeds
1022 (setq wisi-indent-failed t)
1023 (forward-line -1);; safe at bob
1024 (back-to-indentation)
1027 ;; else parse succeeded
1028 (when wisi-indent-failed
1029 (setq wisi-indent-failed nil)
1030 (run-hooks 'wisi-post-parse-fail-hook))
1031 (with-demoted-errors
1032 (or (run-hook-with-args-until-success 'wisi-indent-calculate-functions) 0))
1035 ;; point was inside line text; leave it there
1036 (save-excursion (indent-line-to indent))
1037 ;; point was before line text; move to start of text
1038 (indent-line-to indent))
1042 (defun wisi-parse-buffer ()
1044 (syntax-propertize (point-max))
1045 (wisi-invalidate-cache)
1046 (wisi-validate-cache (point-max)))
1048 (defun wisi-show-cache ()
1049 "Show cache at point."
1051 (message "%s" (wisi-get-cache (point))))
1053 (defun wisi-show-token ()
1054 "Move forward across one keyword, show token_id."
1056 (let ((token (wisi-forward-token)))
1057 (message "%s" (car token))))
1059 (defun wisi-show-containing-or-previous-cache ()
1061 (let ((cache (wisi-get-cache (point))))
1063 (message "containing %s" (wisi-goto-containing cache t))
1064 (message "previous %s" (wisi-backward-cache)))
1069 (defun wisi-setup (indent-calculate post-parse-fail class-list keyword-table token-table parse-table)
1070 "Set up a buffer for parsing files with wisi."
1071 (setq wisi-class-list class-list)
1072 (setq wisi-string-double-term (car (symbol-value (intern-soft "string-double" token-table))))
1073 (setq wisi-string-single-term (car (symbol-value (intern-soft "string-single" token-table))))
1074 (setq wisi-symbol-term (car (symbol-value (intern-soft "symbol" token-table))))
1076 (setq wisi-punctuation-table (symbol-value (intern-soft "punctuation" token-table)))
1077 (setq wisi-punctuation-table-max-length 0)
1079 (dolist (item wisi-punctuation-table)
1080 (when item ;; default matcher can be nil
1082 ;; check that all chars used in punctuation tokens have punctuation syntax
1083 (mapc (lambda (char)
1084 (when (not (= ?. (char-syntax char)))
1086 (message "in %s, %c does not have punctuation syntax"
1090 (when (< wisi-punctuation-table-max-length (length (cdr item)))
1091 (setq wisi-punctuation-table-max-length (length (cdr item)))))
1094 (error "aborting due to punctuation errors")))
1096 (setq wisi-keyword-table keyword-table)
1097 (setq wisi-parse-table parse-table)
1099 (setq wisi-indent-calculate-functions indent-calculate)
1100 (set (make-local-variable 'indent-line-function) 'wisi-indent-line)
1102 (setq wisi-post-parse-fail-hook post-parse-fail)
1103 (setq wisi-indent-failed nil)
1105 (add-hook 'before-change-functions 'wisi-before-change nil t)
1106 (add-hook 'after-change-functions 'wisi-after-change nil t)
1108 ;; WORKAROUND: sometimes the first time font-lock is run,
1109 ;; syntax-propertize is not run properly, so we run it here
1110 (syntax-propertize (point-max))
1112 (wisi-invalidate-cache)
1116 ;;; wisi.el ends here