]> code.delx.au - gnu-emacs-elpa/blob - packages/wisi/wisi.el
Merge commit 'c82a552640134128df4485f258d40a609bd8a9dc'
[gnu-emacs-elpa] / packages / wisi / wisi.el
1 ;;; wisi.el --- Utilities for implementing an indentation/navigation engine using a generalized LALR parser -*- lexical-binding:t -*-
2 ;;
3 ;; Copyright (C) 2012 - 2015 Free Software Foundation, Inc.
4 ;;
5 ;; Author: Stephen Leake <stephen_leake@member.fsf.org>
6 ;; Maintainer: Stephen Leake <stephen_leake@member.fsf.org>
7 ;; Keywords: parser
8 ;; indentation
9 ;; navigation
10 ;; Version: 1.1.1
11 ;; package-requires: ((cl-lib "0.4") (emacs "24.2"))
12 ;; URL: http://stephe-leake.org/emacs/ada-mode/emacs-ada-mode.html
13 ;;
14 ;; This file is part of GNU Emacs.
15 ;;
16 ;; GNU Emacs is free software: you can redistribute it and/or modify
17 ;; it under the terms of the GNU General Public License as published by
18 ;; the Free Software Foundation, either version 3 of the License, or
19 ;; (at your option) any later version.
20 ;;
21 ;; GNU Emacs is distributed in the hope that it will be useful,
22 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
23 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 ;; GNU General Public License for more details.
25 ;;
26 ;; You should have received a copy of the GNU General Public License
27 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
28 ;;
29
30 ;;; Commentary:
31
32 ;;;; History: see NEWS-wisi.text
33 ;;
34 ;;;; indentation algorithm overview
35 ;;
36 ;; This design is inspired in part by experience writing a SMIE
37 ;; indentation engine for Ada, and the wisent parser.
38 ;;
39 ;; The general approach to indenting a given token is to find the
40 ;; start of the statement it is part of, or some other relevant point
41 ;; in the statement, and indent relative to that. So we need a parser
42 ;; that lets us find statement indent points from arbitrary places in
43 ;; the code.
44 ;;
45 ;; For example, the grammar for Ada as represented by the EBNF in LRM
46 ;; Annex P is not LALR(1), so we use a generalized LALR(1) parser (see
47 ;; wisi-parse, wisi-compile).
48 ;;
49 ;; The parser actions cache indentation and other information as text
50 ;; properties of tokens in statements.
51 ;;
52 ;; An indentation engine moves text in the buffer, as does user
53 ;; editing, so we can't rely on character positions remaining
54 ;; constant. So the parser actions use markers to store
55 ;; positions. Text properties also move with the text.
56 ;;
57 ;; The stored information includes a marker at each statement indent
58 ;; point. Thus, the indentation algorithm is: find the previous token
59 ;; with cached information, and either indent from it, or fetch from
60 ;; it the marker for a previous statement indent point, and indent
61 ;; relative to that.
62 ;;
63 ;; Since we have a cache (the text properties), we need to consider
64 ;; when to invalidate it. Ideally, we invalidate only when a change to
65 ;; the buffer would change the result of a parse that crosses that
66 ;; change, or starts after that change. Changes in whitespace
67 ;; (indentation and newlines) do not affect an Ada parse. Other
68 ;; languages are sensitive to newlines (Bash for example) or
69 ;; indentation (Python). Adding comments does not change a parse,
70 ;; unless code is commented out. For now we invalidate the cache after
71 ;; the edit point if the change involves anything other than
72 ;; whitespace.
73 ;;
74 ;;; Handling parse errors:
75 ;;
76 ;; When a parse fails, the cache information before the failure point
77 ;; is only partly correct, and there is no cache informaiton after the
78 ;; failure point.
79 ;;
80 ;; However, in the case where a parse previously succeeded, and the
81 ;; current parse fails due to editing, we keep the preceding cache
82 ;; information by setting wisi-cache-max to the edit point in
83 ;; wisi-before change; the parser does not apply actions before that
84 ;; point.
85 ;;
86 ;; This allows navigation and indentation in the text preceding the
87 ;; edit point, and saves some time.
88 ;;
89 ;;;; comparison to the SMIE parser
90 ;;
91 ;; The central problem to be solved in building the SMIE parser is
92 ;; grammar precedence conflicts; the general solution is refining
93 ;; keywords so that each new keyword can be assigned a unique
94 ;; precedence. This means ad hoc code must be written to determine the
95 ;; correct refinement for each language keyword from the surrounding
96 ;; tokens. In effect, for a complex language like Ada, the knowledge
97 ;; of the language grammar is mostly embedded in the refinement code;
98 ;; only a small amount is in the refined grammar. Implementing a SMIE
99 ;; parser for a new language involves the same amount of work as the
100 ;; first language.
101 ;;
102 ;; Using a generalized LALR parser avoids that particular problem;
103 ;; assuming the language is already defined by a grammar, it is only a
104 ;; matter of a format change to teach the wisi parser the
105 ;; language. The problem in a wisi indentation engine is caching the
106 ;; output of the parser in a useful way, since we can't start the
107 ;; parser from arbitrary places in the code (as we can with the SMIE
108 ;; parser). A second problem is determining when to invalidate the
109 ;; cache. But these problems are independent of the language being
110 ;; parsed, so once we have one wisi indentation engine working,
111 ;; adapting it to new languages should be quite simple.
112 ;;
113 ;; The SMIE parser does not find the start of each statement, only the
114 ;; first language keyword in each statement; additional code must be
115 ;; written to find the statement start and indent points. The wisi
116 ;; parser finds the statement start and indent points directly.
117 ;;
118 ;; In SMIE, it is best if each grammar rule is a complete statement,
119 ;; so forward-sexp will traverse the entire statement. If nested
120 ;; non-terminals are used, forward-sexp may stop inside one of the
121 ;; nested non-terminals. This problem does not occur with the wisi
122 ;; parser.
123 ;;
124 ;; A downside of the wisi parser is conflicts in the grammar; they can
125 ;; be much more difficult to resolve than in the SMIE parser. The
126 ;; generalized parser helps by handling conflicts, but it does so by
127 ;; running multiple parsers in parallel, persuing each choice in the
128 ;; conflict. If the conflict is due to a genuine ambiguity, both paths
129 ;; will succeed, which causes the parse to fail, since it is not clear
130 ;; which set of text properties to store. Even if one branch
131 ;; ultimately fails, running parallel parsers over large sections of
132 ;; code is slow. Finally, this approach can lead to exponential growth
133 ;; in the number of parsers. So grammar conflicts must still be
134 ;; analyzed and minimized.
135 ;;
136 ;; In addition, the complete grammar must be specified; in smie, it is
137 ;; often possible to specify a subset of the grammar.
138 ;;
139 ;;;; grammar compiler and parser
140 ;;
141 ;; Since we are using a generalized LALR(1) parser, we cannot use any
142 ;; of the wisent grammar functions. We use OpenToken wisi-generate
143 ;; to compile BNF to Elisp source (similar to
144 ;; semantic-grammar-create-package), and wisi-compile-grammar to
145 ;; compile that to the parser table.
146 ;;
147 ;; Semantic provides a complex lexer, more complicated than we need
148 ;; for indentation. So we use the elisp lexer, which consists of
149 ;; `forward-comment', `skip-syntax-forward', and `scan-sexp'. We wrap
150 ;; that in functions that return tokens in the form wisi-parse
151 ;; expects.
152 ;;
153 ;;;; lexer
154 ;;
155 ;; The lexer is `wisi-forward-token'. It relies on syntax properties,
156 ;; so syntax-propertize must be called on the text to be lexed before
157 ;; wisi-forward-token is called. In general, it is hard to determine
158 ;; an appropriate end-point for syntax-propertize, other than
159 ;; point-max. So we call (syntax-propertize point-max) in wisi-setup,
160 ;; and also call syntax-propertize in wisi-after-change.
161 ;;
162 ;;;; code style
163 ;;
164 ;; 'wisi' was originally short for "wisent indentation engine", but
165 ;; now is just a name.
166 ;;
167 ;; not using lexical-binding because we support Emacs 23
168 ;;
169 ;;;;;
170
171 ;;; Code:
172
173 (require 'cl-lib)
174 (require 'wisi-parse)
175
176 ;; WORKAROUND: for some reason, this condition doesn't work in batch mode!
177 ;; (when (and (= emacs-major-version 24)
178 ;; (= emacs-minor-version 2))
179 (require 'wisi-compat-24.2)
180 ;;)
181
182 (defcustom wisi-size-threshold 100000
183 "Max size (in characters) for using wisi parser results for syntax highlighting and file navigation."
184 :type 'integer
185 :group 'wisi
186 :safe 'integerp)
187 (make-variable-buffer-local 'wisi-size-threshold)
188
189 ;;;; lexer
190
191 (defvar-local wisi-class-list nil)
192 (defvar-local wisi-keyword-table nil)
193 (defvar-local wisi-punctuation-table nil)
194 (defvar-local wisi-punctuation-table-max-length 0)
195 (defvar-local wisi-string-double-term nil);; string delimited by double quotes
196 (defvar-local wisi-string-quote-escape-doubled nil
197 "Non-nil if a string delimiter is escaped by doubling it (as in Ada).")
198 (defvar-local wisi-string-quote-escape nil
199 "Cons '(delim . character) where 'character' escapes quotes in strings delimited by 'delim'.")
200 (defvar-local wisi-string-single-term nil) ;; string delimited by single quotes
201 (defvar-local wisi-symbol-term nil)
202 (defvar-local wisi-number-term nil)
203 (defvar-local wisi-number-p nil)
204
205 (defun wisi-number-p (token-text)
206 "Return t if TOKEN-TEXT plus text after point matches the
207 syntax for a real literal; otherwise nil. Point is after
208 TOKEN-TEXT; move point to just past token."
209 ;; Typical literals:
210 ;; 1234
211 ;; 1234.5678
212 ;; _not_ including non-decimal base, or underscores (see ada-wisi-number-p)
213 ;;
214 ;; Starts with a simple integer
215 (when (string-match "^[0-9]+$" token-text)
216 (when (looking-at "\\.[0-9]+")
217 ;; real number
218 (goto-char (match-end 0))
219 (when (looking-at "[Ee][+-][0-9]+")
220 ;; exponent
221 (goto-char (match-end 0))))
222
223 t
224 ))
225
226 (defun wisi-forward-token ()
227 "Move point forward across one token, skipping leading whitespace and comments.
228 Return the corresponding token, in format: (token start . end) where:
229
230 `token' is a token symbol (not string) from `wisi-punctuation-table',
231 `wisi-keyword-table', `wisi-string-double-term', `wisi-string-double-term' or `wisi-symbol-term'.
232
233 `start, end' are the character positions in the buffer of the start
234 and end of the token text.
235
236 If at end of buffer, returns `wisent-eoi-term'."
237 (forward-comment (point-max))
238 ;; skips leading whitespace, comment, trailing whitespace.
239
240 (let ((start (point))
241 ;; (info "(elisp)Syntax Table Internals" "*info elisp syntax*")
242 (syntax (syntax-class (syntax-after (point))))
243 token-id token-text)
244 (cond
245 ((eobp)
246 (setq token-id wisent-eoi-term))
247
248 ((eq syntax 1)
249 ;; punctuation. Find the longest matching string in wisi-punctuation-table
250 (forward-char 1)
251 (let ((next-point (point))
252 temp-text temp-id done)
253 (while (not done)
254 (setq temp-text (buffer-substring-no-properties start (point)))
255 (setq temp-id (car (rassoc temp-text wisi-punctuation-table)))
256 (when temp-id
257 (setq token-id temp-id
258 next-point (point)))
259 (if (or
260 (eobp)
261 (= (- (point) start) wisi-punctuation-table-max-length))
262 (setq done t)
263 (forward-char 1))
264 )
265 (goto-char next-point)))
266
267 ((memq syntax '(4 5)) ;; open, close parenthesis
268 (forward-char 1)
269 (setq token-text (buffer-substring-no-properties start (point)))
270 (setq token-id (symbol-value (intern-soft token-text wisi-keyword-table))))
271
272 ((eq syntax 7)
273 ;; string quote, either single or double. we assume point is
274 ;; before the start quote, not the end quote
275 (let ((delim (char-after (point)))
276 (forward-sexp-function nil))
277 (condition-case err
278 (progn
279 (forward-sexp)
280
281 ;; point is now after the end quote; check for an escaped quote
282 (while (or
283 (and wisi-string-quote-escape-doubled
284 (eq (char-after (point)) delim))
285 (and (eq delim (car wisi-string-quote-escape))
286 (eq (char-before (1- (point))) (cdr wisi-string-quote-escape))))
287 (forward-sexp))
288 (setq token-id (if (= delim ?\") wisi-string-double-term wisi-string-single-term)))
289 (scan-error
290 ;; Something screwed up; we should not get here if
291 ;; syntax-propertize works properly.
292 (error "wisi-forward-token: forward-sexp failed %s" err)
293 ))))
294
295 (t ;; assuming word or symbol syntax
296 (skip-syntax-forward "w_'")
297 (setq token-text (buffer-substring-no-properties start (point)))
298 (setq token-id
299 (or (symbol-value (intern-soft (downcase token-text) wisi-keyword-table))
300 (and (functionp wisi-number-p)
301 (funcall wisi-number-p token-text)
302 (setq token-text (buffer-substring-no-properties start (point)))
303 wisi-number-term)
304 wisi-symbol-term))
305 )
306 );; cond
307
308 (unless token-id
309 (signal 'wisi-parse-error
310 (wisi-error-msg "unrecognized token '%s'" (buffer-substring-no-properties start (point)))))
311
312 (cons token-id (cons start (point)))
313 ))
314
315 (defun wisi-backward-token ()
316 "Move point backward across one token, skipping whitespace and comments.
317 Does _not_ handle numbers with wisi-number-p; just sees lower-level syntax.
318 Return (nil start . end) - same structure as
319 wisi-forward-token, but does not look up symbol."
320 (forward-comment (- (point)))
321 ;; skips leading whitespace, comment, trailing whitespace.
322
323 ;; (info "(elisp)Syntax Table Internals" "*info elisp syntax*")
324 (let ((end (point))
325 (syntax (syntax-class (syntax-after (1- (point))))))
326 (cond
327 ((bobp) nil)
328
329 ((eq syntax 1)
330 ;; punctuation. Find the longest matching string in wisi-punctuation-table
331 (backward-char 1)
332 (let ((next-point (point))
333 temp-text done)
334 (while (not done)
335 (setq temp-text (buffer-substring-no-properties (point) end))
336 (when (car (rassoc temp-text wisi-punctuation-table))
337 (setq next-point (point)))
338 (if (or
339 (bobp)
340 (= (- end (point)) wisi-punctuation-table-max-length))
341 (setq done t)
342 (backward-char 1))
343 )
344 (goto-char next-point))
345 )
346
347 ((memq syntax '(4 5)) ;; open, close parenthesis
348 (backward-char 1))
349
350 ((eq syntax 7)
351 ;; a string quote. we assume we are after the end quote, not the start quote
352 (let ((forward-sexp-function nil))
353 (forward-sexp -1)))
354
355 (t ;; assuming word or symbol syntax
356 (if (zerop (skip-syntax-backward "."))
357 (skip-syntax-backward "w_'")))
358 )
359 (cons nil (cons (point) end))
360 ))
361
362 ;;;; token info cache
363 ;;
364 ;; the cache stores the results of parsing as text properties on
365 ;; keywords, for use by the indention and motion engines.
366
367 (cl-defstruct
368 (wisi-cache
369 (:constructor wisi-cache-create)
370 (:copier nil))
371 nonterm;; nonterminal from parse (set by wisi-statement-action)
372
373 token
374 ;; terminal symbol from wisi-keyword-table or
375 ;; wisi-punctuation-table, or lower-level nonterminal from parse
376 ;; (set by wisi-statement-action)
377
378 last ;; pos of last char in token, relative to first (0 indexed)
379
380 class
381 ;; arbitrary lisp symbol, used for indentation and navigation.
382 ;; some classes are defined by wisi:
383 ;;
384 ;; 'block-middle - a block keyword (ie: if then else end), not at the start of a statement
385 ;;
386 ;; 'block-start - a block keyword at the start of a statement
387 ;;
388 ;; 'statement-start - the start of a statement
389 ;;
390 ;; 'open-paren
391 ;;
392 ;; others are language-specific
393
394 containing
395 ;; Marker at the containing keyword for this token.
396 ;; A containing keyword is an indent point; the start of a
397 ;; statement, or 'begin', 'then' or 'else' for a block of
398 ;; statements, etc.
399 ;; nil only for first token in buffer
400
401 prev ;; marker at previous motion token in statement; nil if none
402 next ;; marker at next motion token in statement; nil if none
403 end ;; marker at token at end of current statement
404 )
405
406 (defvar-local wisi-parse-table nil)
407
408 (defvar-local wisi-parse-failed nil
409 "Non-nil when a recent parse has failed - cleared when parse succeeds.")
410
411 (defvar-local wisi-parse-try nil
412 "Non-nil when parse is needed - cleared when parse succeeds.")
413
414 (defvar-local wisi-change-need-invalidate nil
415 "When non-nil, buffer position to invalidate from.
416 Used in before/after change functions.")
417
418 (defvar-local wisi-end-caches nil
419 "List of buffer positions of caches in current statement that need wisi-cache-end set.")
420
421 (defun wisi-delete-cache (after)
422 (with-silent-modifications
423 (remove-text-properties after (point-max) '(wisi-cache nil))
424 ;; We don't remove 'font-lock-face; that's annoying to the user,
425 ;; since they won't be restored until a parse for some other
426 ;; reason, and they are likely to be right anyway.
427 ))
428
429 (defun wisi-invalidate-cache(&optional after)
430 "Invalidate parsing caches for the current buffer from AFTER to end of buffer."
431 (interactive)
432 (if (not after)
433 (setq after (point-min))
434 (setq after
435 (save-excursion
436 (goto-char after)
437 (line-beginning-position))))
438 (when (> wisi-debug 0) (message "wisi-invalidate %s:%d" (current-buffer) after))
439 (setq wisi-cache-max after)
440 (setq wisi-parse-try t)
441 (syntax-ppss-flush-cache after)
442 (wisi-delete-cache after)
443 )
444
445 (defun wisi-before-change (begin end)
446 "For `before-change-functions'."
447 ;; begin . end is range of text being deleted
448
449 ;; If jit-lock-after-change is before wisi-after-change in
450 ;; after-change-functions, it might use any invalid caches in the
451 ;; inserted text.
452 ;;
453 ;; So we check for that here, and ensure it is after
454 ;; wisi-after-change, which deletes the invalid caches
455 (when (boundp 'jit-lock-mode)
456 (when (memq 'wisi-after-change (memq 'jit-lock-after-change after-change-functions))
457 (setq after-change-functions (delete 'wisi-after-change after-change-functions))
458 (add-hook 'after-change-functions 'wisi-after-change nil t))
459 )
460
461 (setq wisi-change-need-invalidate nil)
462
463 (when (> end begin)
464 (save-excursion
465 ;; (info "(elisp)Parser State")
466 (let* ((begin-state (syntax-ppss begin))
467 (end-state (syntax-ppss end))
468 ;; syntax-ppss has moved point to "end".
469 (word-end (progn (skip-syntax-forward "w_")(point))))
470
471 ;; Remove grammar face from word(s) containing change region;
472 ;; might be changing to/from a keyword. See
473 ;; test/ada_mode-interactive_common.adb Obj_1
474 (goto-char begin)
475 (skip-syntax-backward "w_")
476 (with-silent-modifications
477 (remove-text-properties (point) word-end '(font-lock-face nil fontified nil)))
478
479 (if (<= wisi-cache-max begin)
480 ;; Change is in unvalidated region; either the parse was
481 ;; failing, or there is more than one top-level grammar
482 ;; symbol in buffer.
483 (when wisi-parse-failed
484 ;; The parse was failing, probably due to bad syntax; this
485 ;; change may have fixed it, so try reparse.
486 (setq wisi-parse-try t))
487
488 ;; else change is in validated region
489 ;;
490 ;; don't invalidate parse for whitespace, string, or comment changes
491 (cond
492 ((and
493 (nth 3 begin-state); in string
494 (nth 3 end-state)))
495 ;; no easy way to tell if there is intervening non-string
496
497 ((and
498 (nth 4 begin-state); in comment
499 (nth 4 end-state))
500 ;; too hard to detect case where there is intervening
501 ;; code; no easy way to go to end of comment if not
502 ;; newline
503 )
504
505 ;; Deleting whitespace generally does not require parse, but
506 ;; deleting all whitespace between two words does; check that
507 ;; there is whitespace on at least one side of the deleted
508 ;; text.
509 ;;
510 ;; We are not in a comment (checked above), so treat
511 ;; comment end as whitespace in case it is newline, except
512 ;; deleting a comment end at begin means commenting the
513 ;; current line; requires parse.
514 ((and
515 (eq (car (syntax-after begin)) 0) ; whitespace
516 (memq (car (syntax-after (1- end))) '(0 12)) ; whitespace, comment end
517 (or
518 (memq (car (syntax-after (1- begin))) '(0 12))
519 (memq (car (syntax-after end)) '(0 12)))
520 (progn
521 (goto-char begin)
522 (skip-syntax-forward " >" end)
523 (eq (point) end))))
524
525 (t
526 (setq wisi-change-need-invalidate
527 (progn
528 (goto-char begin)
529 ;; note that because of the checks above, this never
530 ;; triggers a parse, so it's fast
531 (wisi-goto-statement-start)
532 (point))))
533 )))
534 ))
535 )
536
537 (defun wisi-after-change (begin end length)
538 "For `after-change-functions'."
539 ;; begin . end is range of text being inserted (empty if equal);
540 ;; length is the size of the deleted text.
541
542 ;; (syntax-ppss-flush-cache begin) is in before-change-functions
543
544 (syntax-propertize end) ;; see comments above on "lexer" re syntax-propertize
545
546 ;; Remove caches on inserted text, which could have caches from
547 ;; before the failed parse (or another buffer), and are in any case
548 ;; invalid. No point in removing 'fontified; that's handled by
549 ;; jit-lock.
550
551 (with-silent-modifications
552 (remove-text-properties begin end '(wisi-cache nil font-lock-face nil)))
553
554 ;; Also remove grammar face from word(s) containing change region;
555 ;; might be changing to/from a keyword. See
556 ;; test/ada_mode-interactive_common.adb Obj_1
557 (save-excursion
558 ;; (info "(elisp)Parser State")
559 (let ((need-invalidate wisi-change-need-invalidate)
560 begin-state end-state word-end)
561 (when (> end begin)
562 (setq begin-state (syntax-ppss begin))
563 (setq end-state (syntax-ppss end))
564 ;; syntax-ppss has moved point to "end".
565 (skip-syntax-forward "w_")
566 (setq word-end (point))
567 (goto-char begin)
568 (skip-syntax-backward "w_")
569 (with-silent-modifications
570 (remove-text-properties (point) word-end '(font-lock-face nil fontified nil))))
571
572 (if (<= wisi-cache-max begin)
573 ;; Change is in unvalidated region
574 (when wisi-parse-failed
575 ;; The parse was failing, probably due to bad syntax; this
576 ;; change may have fixed it, so try reparse.
577 (setq wisi-parse-try t))
578
579 ;; Change is in validated region
580 (cond
581 (wisi-change-need-invalidate
582 ;; wisi-before change determined the removed text alters the
583 ;; parse
584 )
585
586 ((= end begin)
587 (setq need-invalidate nil))
588
589 ((and
590 (nth 3 begin-state); in string
591 (nth 3 end-state))
592 ;; no easy way to tell if there is intervening non-string
593 (setq need-invalidate nil))
594
595 ((or
596 (nth 4 begin-state)
597 (nth 4 end-state)); in comment
598 ;; no easy way to detect intervening code
599 (setq need-invalidate nil)
600 ;; no caches to remove
601 )
602
603 ;; Adding whitespace generally does not require parse, but in
604 ;; the middle of word it does; check that there was
605 ;; whitespace on at least one side of the inserted text.
606 ;;
607 ;; We are not in a comment (checked above), so treat
608 ;; comment end as whitespace in case it is newline
609 ((and
610 (or
611 (memq (car (syntax-after (1- begin))) '(0 12)); whitespace, comment end
612 (memq (car (syntax-after end)) '(0 12)))
613 (progn
614 (goto-char begin)
615 (skip-syntax-forward " >" end)
616 (eq (point) end)))
617 (setq need-invalidate nil))
618
619 (t
620 (setq need-invalidate
621 (progn
622 (goto-char begin)
623 ;; note that because of the checks above, this never
624 ;; triggers a parse, so it's fast
625 (wisi-goto-statement-start)
626 (point))))
627 )
628
629 (if need-invalidate
630 (wisi-invalidate-cache need-invalidate)
631
632 ;; else move cache-max by the net change length.
633 (setq wisi-cache-max
634 (+ wisi-cache-max (- end begin length))) )
635 ))
636 ))
637
638 (defun wisi-get-cache (pos)
639 "Return `wisi-cache' struct from the `wisi-cache' text property at POS.
640 If accessing cache at a marker for a token as set by `wisi-cache-tokens', POS must be (1- mark)."
641 (get-text-property pos 'wisi-cache))
642
643 (defvar-local wisi-parse-error-msg nil)
644
645 (defun wisi-goto-error ()
646 "Move point to position in last error message (if any)."
647 (when (string-match ":\\([0-9]+\\):\\([0-9]+\\):" wisi-parse-error-msg)
648 (let ((line (string-to-number (match-string 1 wisi-parse-error-msg)))
649 (col (string-to-number (match-string 2 wisi-parse-error-msg))))
650 (push-mark)
651 (goto-char (point-min))
652 (forward-line (1- line))
653 (forward-char col))))
654
655 (defun wisi-show-parse-error ()
656 "Show last wisi-parse error."
657 (interactive)
658 (cond
659 (wisi-parse-failed
660 (wisi-goto-error)
661 (message wisi-parse-error-msg))
662
663 (wisi-parse-try
664 (message "need parse"))
665
666 (t
667 (message "parse succeeded"))
668 ))
669
670 (defvar wisi-post-parse-succeed-hook nil
671 "Hook run after parse succeeds.")
672
673 (defun wisi-validate-cache (pos)
674 "Ensure cached data is valid at least up to POS in current buffer."
675 (let ((msg (when (> wisi-debug 0) (format "wisi: parsing %s:%d ..." (buffer-name) (line-number-at-pos pos)))))
676 ;; If wisi-cache-max = pos, then there is no cache at pos; need parse
677 (when (and wisi-parse-try
678 (<= wisi-cache-max pos))
679 (when (> wisi-debug 0)
680 (message msg))
681
682 (setq wisi-parse-try nil)
683 (setq wisi-parse-error-msg nil)
684 (setq wisi-end-caches nil)
685
686 (if (> wisi-debug 1)
687 ;; let debugger stop in wisi-parse
688 (progn
689 (save-excursion
690 (wisi-parse wisi-parse-table 'wisi-forward-token)
691 (setq wisi-cache-max (point))
692 (setq wisi-parse-failed nil))
693 (run-hooks 'wisi-post-parse-succeed-hook))
694
695 ;; else capture errors from bad syntax, so higher level
696 ;; functions can try to continue and/or we don't bother the
697 ;; user.
698 (condition-case err
699 (progn
700 (save-excursion
701 (wisi-parse wisi-parse-table 'wisi-forward-token)
702 (setq wisi-cache-max (point))
703 (setq wisi-parse-failed nil))
704 (run-hooks 'wisi-post-parse-succeed-hook))
705 (wisi-parse-error
706 ;; delete caches past wisi-cache-max added by failed parse
707 (wisi-delete-cache wisi-cache-max)
708 (setq wisi-parse-failed t)
709 (setq wisi-parse-error-msg (cdr err)))
710 ))
711 (if wisi-parse-error-msg
712 ;; error
713 (when (> wisi-debug 0)
714 (message "%s error" msg)
715 (wisi-goto-error)
716 (error wisi-parse-error-msg))
717 ;; no msg; success
718 (when (> wisi-debug 0)
719 (message "%s done" msg)))
720 )))
721
722 (defun wisi-fontify-region (_begin end)
723 "For `jit-lock-functions'."
724 (when (< (point-max) wisi-size-threshold)
725 (wisi-validate-cache end)))
726
727 (defun wisi-get-containing-cache (cache)
728 "Return cache from (wisi-cache-containing CACHE)."
729 (let ((containing (wisi-cache-containing cache)))
730 (and containing
731 (wisi-get-cache (1- containing)))))
732
733 (defun wisi-cache-region (cache)
734 "Return region designated by cache.
735 Point must be at cache."
736 (cons (point) (+ (point) (wisi-cache-last cache))))
737
738 (defun wisi-cache-text (cache)
739 "Return property-less buffer substring designated by cache.
740 Point must be at cache."
741 (buffer-substring-no-properties (point) (+ (point) (wisi-cache-last cache))))
742
743 ;;;; parse actions
744
745 (defun wisi-set-end (start-mark end-mark)
746 "Set END-MARK on all caches in `wisi-end-caches' in range START-MARK END-MARK,
747 delete from `wisi-end-caches'."
748 (let ((i 0)
749 pos cache)
750 (while (< i (length wisi-end-caches))
751 (setq pos (nth i wisi-end-caches))
752 (setq cache (wisi-get-cache pos))
753
754 (if (and (>= pos start-mark)
755 (< pos end-mark))
756 (progn
757 (setf (wisi-cache-end cache) end-mark)
758 (setq wisi-end-caches (delq pos wisi-end-caches)))
759
760 ;; else not in range
761 (setq i (1+ i)))
762 )))
763
764 (defvar wisi-tokens nil)
765 ;; keep byte-compiler happy; `wisi-tokens' is bound in action created
766 ;; by wisi-semantic-action
767
768 (defun wisi-statement-action (pairs)
769 "Cache information in text properties of tokens.
770 Intended as a grammar non-terminal action.
771
772 PAIRS is a vector of the form [TOKEN-NUMBER CLASS TOKEN-NUMBER
773 CLASS ...] where TOKEN-NUMBER is the (1 indexed) token number in
774 the production, CLASS is the wisi class of that token. Use in a
775 grammar action as:
776 (wisi-statement-action [1 'statement-start 7 'statement-end])"
777 (save-excursion
778 (let ((first-item t)
779 first-keyword-mark
780 (override-start nil)
781 (i 0))
782 (while (< i (length pairs))
783 (let* ((number (1- (aref pairs i)))
784 (region (cdr (aref wisi-tokens number)));; wisi-tokens is let-bound in wisi-parse-reduce
785 (token (car (aref wisi-tokens number)))
786 (class (aref pairs (setq i (1+ i))))
787 (mark
788 ;; Marker one char into token, so indent-line-to
789 ;; inserts space before the mark, not after
790 (when region (copy-marker (1+ (car region)))))
791 cache)
792
793 (setq i (1+ i))
794
795 (unless (memq class wisi-class-list)
796 (error "%s not in wisi-class-list" class))
797
798 (if region
799 (progn
800 (if (setq cache (wisi-get-cache (car region)))
801 ;; We are processing a previously set non-terminal; ie generic_formal_part in
802 ;;
803 ;; generic_package_declaration : generic_formal_part package_specification SEMICOLON
804 ;; (wisi-statement-action 1 'block-start 2 'block-middle 3 'statement-end)
805 ;;
806 ;; or simple_statement in
807 ;;
808 ;; statement : label_opt simple_statement
809 ;;
810 ;; override nonterm, class, containing
811 ;; set end only if not set yet (due to failed parse)
812 (progn
813 (cl-case (wisi-cache-class cache)
814 (block-start
815 (setf (wisi-cache-class cache)
816 (cond
817 ((eq override-start nil)
818 (cond
819 ((memq class '(block-start statement-start)) 'block-start)
820 (t 'block-middle)))
821
822 ((memq override-start '(block-start statement-start)) 'block-start)
823
824 (t (error "unexpected override-start"))
825 )))
826 (t
827 (setf (wisi-cache-class cache) (or override-start class)))
828 )
829 (setf (wisi-cache-nonterm cache) $nterm)
830 (setf (wisi-cache-containing cache) first-keyword-mark)
831 (unless (wisi-cache-end cache)
832 (if wisi-end-caches
833 (push (car region) wisi-end-caches)
834 (setq wisi-end-caches (list (car region)))
835 ))
836 )
837
838 ;; else create new cache
839 (with-silent-modifications
840 (put-text-property
841 (car region)
842 (1+ (car region))
843 'wisi-cache
844 (wisi-cache-create
845 :nonterm $nterm;; $nterm defined in wisi-semantic-action
846 :token token
847 :last (- (cdr region) (car region))
848 :class (or override-start class)
849 :containing first-keyword-mark)
850 ))
851 (if wisi-end-caches
852 (push (car region) wisi-end-caches)
853 (setq wisi-end-caches (list (car region)))
854 ))
855
856 (when first-item
857 (setq first-item nil)
858 (when (or override-start
859 ;; FIXME: why block-middle here?
860 (memq class '(block-middle block-start statement-start)))
861 (setq override-start nil)
862 (setq first-keyword-mark mark)))
863
864 (when (eq class 'statement-end)
865 (wisi-set-end (1- first-keyword-mark) (copy-marker (1+ (car region)))))
866 )
867
868 ;; region is nil when a production is empty; if the first
869 ;; token is a start, override the class on the next token.
870 (when (and first-item
871 (memq class '(block-middle block-start statement-start)))
872 (setq override-start class)))
873 ))
874 )))
875
876 (defun wisi-containing-action (containing-token contained-token)
877 "Set containing marks in all tokens in CONTAINED-TOKEN with null containing mark to marker pointing to CONTAINING-TOKEN.
878 If CONTAINING-TOKEN is empty, the next token number is used."
879 ;; wisi-tokens is is bound in action created by wisi-semantic-action
880 (let* ((containing-region (cdr (aref wisi-tokens (1- containing-token))))
881 (contained-region (cdr (aref wisi-tokens (1- contained-token)))))
882
883 (unless containing-region ;;
884 (signal 'wisi-parse-error
885 (wisi-error-msg
886 "wisi-containing-action: containing-region '%s' is empty. grammar error; bad action"
887 (wisi-token-text (aref wisi-tokens (1- containing-token))))))
888
889 (unless (or (not contained-region) ;; contained-token is empty
890 (wisi-get-cache (car containing-region)))
891 (signal 'wisi-parse-error
892 (wisi-error-msg
893 "wisi-containing-action: containing-token '%s' has no cache. grammar error; missing action"
894 (wisi-token-text (aref wisi-tokens (1- containing-token))))))
895
896 (while (not containing-region)
897 ;; containing-token is empty; use next
898 (setq containing-region (cdr (aref wisi-tokens containing-token))))
899
900 (when contained-region
901 ;; nil when empty production, may not contain any caches
902 (save-excursion
903 (goto-char (cdr contained-region))
904 (let ((cache (wisi-backward-cache))
905 (mark (copy-marker (1+ (car containing-region)))))
906 (while cache
907
908 ;; skip blocks that are already marked
909 (while (and (>= (point) (car contained-region))
910 (markerp (wisi-cache-containing cache)))
911 (goto-char (1- (wisi-cache-containing cache)))
912 (setq cache (wisi-get-cache (point))))
913
914 (if (or (and (= (car containing-region) (car contained-region))
915 (<= (point) (car contained-region)))
916 (< (point) (car contained-region)))
917 ;; done
918 (setq cache nil)
919
920 ;; else set mark, loop
921 (setf (wisi-cache-containing cache) mark)
922 (setq cache (wisi-backward-cache)))
923 ))))))
924
925 (defun wisi-match-class-token (cache class-tokens)
926 "Return t if CACHE matches CLASS-TOKENS.
927 CLASS-TOKENS is a vector [number class token_id class token_id ...].
928 number is ignored."
929 (let ((i 1)
930 (done nil)
931 (result nil)
932 class token)
933 (while (and (not done)
934 (< i (length class-tokens)))
935 (setq class (aref class-tokens i))
936 (setq token (aref class-tokens (setq i (1+ i))))
937 (setq i (1+ i))
938 (when (and (eq class (wisi-cache-class cache))
939 (eq token (wisi-cache-token cache)))
940 (setq result t
941 done t))
942 )
943 result))
944
945 (defun wisi-motion-action (token-numbers)
946 "Set prev/next marks in all tokens given by TOKEN-NUMBERS.
947 TOKEN-NUMBERS is a vector with each element one of:
948
949 number: the token number; mark that token
950
951 vector [number class token_id]:
952 vector [number class token_id class token_id ...]:
953 mark all tokens in number nonterminal matching (class token_id) with nil prev/next."
954 (save-excursion
955 (let (prev-keyword-mark
956 prev-cache
957 cache
958 mark
959 (i 0))
960 (while (< i (length token-numbers))
961 (let ((token-number (aref token-numbers i))
962 region)
963 (setq i (1+ i))
964 (cond
965 ((numberp token-number)
966 (setq region (cdr (aref wisi-tokens (1- token-number))))
967 (when region
968 (setq cache (wisi-get-cache (car region)))
969 (setq mark (copy-marker (1+ (car region))))
970
971 (when (and prev-keyword-mark
972 cache
973 (null (wisi-cache-prev cache)))
974 (setf (wisi-cache-prev cache) prev-keyword-mark)
975 (setf (wisi-cache-next prev-cache) mark))
976
977 (setq prev-keyword-mark mark)
978 (setq prev-cache cache)
979 ))
980
981 ((vectorp token-number)
982 ;; token-number may contain 0, 1, or more 'class token_id' pairs
983 ;; the corresponding region may be empty
984 ;; there must have been a prev keyword
985 (setq region (cdr (aref wisi-tokens (1- (aref token-number 0)))))
986 (when region ;; not an empty token
987 ;; We must search for all targets at the same time, to
988 ;; get the motion order right.
989 (goto-char (car region))
990 (setq cache (or (wisi-get-cache (point))
991 (wisi-forward-cache)))
992 (while (< (point) (cdr region))
993 (when (wisi-match-class-token cache token-number)
994 (when (null (wisi-cache-prev cache))
995 (setf (wisi-cache-prev cache) prev-keyword-mark))
996 (when (null (wisi-cache-next cache))
997 (setq mark (copy-marker (1+ (point))))
998 (setf (wisi-cache-next prev-cache) mark)
999 (setq prev-keyword-mark mark)
1000 (setq prev-cache cache)))
1001
1002 (setq cache (wisi-forward-cache))
1003 )))
1004
1005 (t
1006 (error "unexpected token-number %s" token-number))
1007 )
1008
1009 ))
1010 )))
1011
1012 (defun wisi-extend-action (number)
1013 "Extend text of cache at token NUMBER to cover all of token NUMBER.
1014 Also override token with new token."
1015 (let* ((token-region (aref wisi-tokens (1- number)));; wisi-tokens is let-bound in wisi-parse-reduce
1016 (token (car token-region))
1017 (region (cdr token-region))
1018 cache)
1019
1020 (when region
1021 (setq cache (wisi-get-cache (car region)))
1022 (setf (wisi-cache-last cache) (- (cdr region) (car region)))
1023 (setf (wisi-cache-token cache) token)
1024 )
1025 ))
1026
1027 (defun wisi-face-action-1 (face region &optional no-override)
1028 "Apply FACE to REGION. If NO-OVERRIDE is non-nil, don't override existing face."
1029 (when region
1030 ;; We allow overriding a face property, because we don't want to
1031 ;; delete them in wisi-invalidate (see comments there). On the
1032 ;; other hand, it can be an error, so keep this debug
1033 ;; code. However, note that font-lock-face properties must be
1034 ;; removed first, or the buffer must be fresh (never parsed).
1035 ;;
1036 ;; Grammar sets no-override when a higher-level production might
1037 ;; override a face in a lower-level production; that's not an
1038 ;; error.
1039 (let (cur-face
1040 (do-set t))
1041 (when (or no-override
1042 (> wisi-debug 1))
1043 (setq cur-face (get-text-property (car region) 'font-lock-face))
1044 (if cur-face
1045 (if no-override
1046 (setq do-set nil)
1047 (message "%s:%d overriding face %s with %s on '%s'"
1048 (buffer-file-name)
1049 (line-number-at-pos (car region))
1050 face
1051 cur-face
1052 (buffer-substring-no-properties (car region) (cdr region))))
1053
1054 ))
1055 (when do-set
1056 (with-silent-modifications
1057 (add-text-properties
1058 (car region) (cdr region)
1059 (list
1060 'font-lock-face face
1061 'fontified t))))
1062 )))
1063
1064 (defun wisi-face-action (pairs &optional no-override)
1065 "Cache face information in text properties of tokens.
1066 Intended as a grammar non-terminal action.
1067
1068 PAIRS is a vector of the form [token-number face token-number face ...]
1069 token-number may be an integer, or a vector [integer token_id token_id ...]
1070
1071 For an integer token-number, apply face to the first cached token
1072 in the range covered by wisi-tokens[token-number]. If there are
1073 no cached tokens, apply face to entire wisi-tokens[token-number]
1074 region.
1075
1076 For a vector token-number, apply face to the first cached token
1077 in the range matching one of token_id covered by
1078 wisi-tokens[token-number].
1079
1080 If NO-OVERRIDE is non-nil, don't override existing face."
1081 (let (number region face (tokens nil) cache (i 0) (j 1))
1082 (while (< i (length pairs))
1083 (setq number (aref pairs i))
1084 (setq face (aref pairs (setq i (1+ i))))
1085 (cond
1086 ((integerp number)
1087 (setq region (cdr (aref wisi-tokens (1- number))));; wisi-tokens is let-bound in wisi-parse-reduce
1088 (when region
1089 (save-excursion
1090 (goto-char (car region))
1091 (setq cache (or (wisi-get-cache (point))
1092 (wisi-forward-cache)))
1093 (if (< (point) (cdr region))
1094 (when cache
1095 (wisi-face-action-1 face (wisi-cache-region cache) no-override))
1096
1097 ;; no caches in region; just apply face to region
1098 (wisi-face-action-1 face region no-override))
1099 )))
1100
1101 ((vectorp number)
1102 (setq region (cdr (aref wisi-tokens (1- (aref number 0)))))
1103 (when region
1104 (while (< j (length number))
1105 (setq tokens (cons (aref number j) tokens))
1106 (setq j (1+ j)))
1107 (save-excursion
1108 (goto-char (car region))
1109 (setq cache (wisi-forward-find-token tokens (cdr region) t))
1110 ;; might be looking for IDENTIFIER in name, but only have "*".
1111 (when cache
1112 (wisi-face-action-1 face (wisi-cache-region cache) no-override))
1113 )))
1114 )
1115 (setq i (1+ i))
1116
1117 )))
1118
1119 (defun wisi-face-list-action (pairs &optional no-override)
1120 "Cache face information in text properties of tokens.
1121 Intended as a grammar non-terminal action.
1122
1123 PAIRS is a vector of the form [token-number face token-number face ...]
1124 token-number is an integer. Apply face to all cached tokens
1125 in the range covered by wisi-tokens[token-number].
1126
1127 If NO-OVERRIDE is non-nil, don't override existing face."
1128 (let (number region face cache (i 0))
1129 (while (< i (length pairs))
1130 (setq number (aref pairs i))
1131 (setq face (aref pairs (setq i (1+ i))))
1132 (setq region (cdr (aref wisi-tokens (1- number))));; wisi-tokens is let-bound in wisi-parse-reduce
1133 (when region
1134 (save-excursion
1135 (goto-char (car region))
1136 (setq cache (or (wisi-get-cache (point))
1137 (wisi-forward-cache)))
1138 (while (<= (point) (cdr region))
1139 (when cache
1140 (wisi-face-action-1 face (wisi-cache-region cache) no-override))
1141 (setq cache (wisi-forward-cache))
1142 )))
1143
1144 (setq i (1+ i))
1145
1146 )))
1147
1148 ;;;; motion
1149 (defun wisi-backward-cache ()
1150 "Move point backward to the beginning of the first token preceding point that has a cache.
1151 Returns cache, or nil if at beginning of buffer."
1152 (let (cache pos)
1153 (setq pos (previous-single-property-change (point) 'wisi-cache))
1154 ;; There are three cases:
1155 ;;
1156 ;; 1) caches separated by non-cache chars: 'if ... then'
1157 ;; pos is before 'f', cache is on 'i'
1158 ;;
1159 ;; 2) caches not separated: ');'
1160 ;; pos is before ';', cache is on ';'
1161 ;;
1162 ;; 3) at bob; pos is nil
1163 ;;
1164 (if pos
1165 (progn
1166 (setq cache (get-text-property pos 'wisi-cache))
1167 (if cache
1168 ;; case 2
1169 (goto-char pos)
1170 ;; case 1
1171 (setq cache (get-text-property (1- pos) 'wisi-cache))
1172 (goto-char (1- pos))))
1173 ;; at bob
1174 (goto-char (point-min))
1175 (setq cache nil))
1176 cache
1177 ))
1178
1179 (defun wisi-forward-cache ()
1180 "Move point forward to the beginning of the first token after point that has a cache.
1181 Returns cache, or nil if at end of buffer."
1182 (let (cache pos)
1183 (when (get-text-property (point) 'wisi-cache)
1184 ;; on a cache; get past it
1185 (goto-char (1+ (point))))
1186
1187 (setq cache (get-text-property (point) 'wisi-cache))
1188 (if cache
1189 nil
1190
1191 (setq pos (next-single-property-change (point) 'wisi-cache))
1192 (if pos
1193 (progn
1194 (goto-char pos)
1195 (setq cache (get-text-property pos 'wisi-cache)))
1196 ;; at eob
1197 (goto-char (point-max))
1198 (setq cache nil))
1199 )
1200 cache
1201 ))
1202
1203 (defun wisi-forward-find-class (class limit)
1204 "Search forward for a token that has a cache with CLASS.
1205 Return cache, or nil if at end of buffer.
1206 If LIMIT (a buffer position) is reached, throw an error."
1207 (let ((cache (wisi-forward-cache)))
1208 (while (not (eq class (wisi-cache-class cache)))
1209 (setq cache (wisi-forward-cache))
1210 (when (>= (point) limit)
1211 (error "cache with class %s not found" class)))
1212 cache))
1213
1214 (defun wisi-forward-find-token (token limit &optional noerror)
1215 "Search forward for a token that has a cache with TOKEN.
1216 If point is at a matching token, return that token.
1217 TOKEN may be a list; stop on any cache that has a member of the list.
1218 Return cache, or nil if at end of buffer.
1219 If LIMIT (a buffer position) is reached, then if NOERROR is nil, throw an
1220 error, if non-nil, return nil."
1221 (let ((token-list (cond
1222 ((listp token) token)
1223 (t (list token))))
1224 (cache (wisi-get-cache (point)))
1225 (done nil))
1226 (while (not (or done
1227 (and cache
1228 (memq (wisi-cache-token cache) token-list))))
1229 (setq cache (wisi-forward-cache))
1230 (when (>= (point) limit)
1231 (if noerror
1232 (progn
1233 (setq done t)
1234 (setq cache nil))
1235 (error "cache with token %s not found" token))))
1236 cache))
1237
1238 (defun wisi-forward-find-nonterm (nonterm limit)
1239 "Search forward for a token that has a cache with NONTERM.
1240 NONTERM may be a list; stop on any cache that has a member of the list.
1241 Return cache, or nil if at end of buffer.
1242 If LIMIT (a buffer position) is reached, throw an error."
1243 (let ((nonterm-list (cond
1244 ((listp nonterm) nonterm)
1245 (t (list nonterm))))
1246 (cache (wisi-forward-cache)))
1247 (while (not (memq (wisi-cache-nonterm cache) nonterm-list))
1248 (setq cache (wisi-forward-cache))
1249 (when (>= (point) limit)
1250 (error "cache with nonterm %s not found" nonterm)))
1251 cache))
1252
1253 (defun wisi-goto-cache-next (cache)
1254 (goto-char (1- (wisi-cache-next cache)))
1255 (wisi-get-cache (point))
1256 )
1257
1258 (defun wisi-forward-statement-keyword ()
1259 "If not at a cached token, move forward to next
1260 cache. Otherwise move to cache-next, or next cache if nil.
1261 Return cache found."
1262 (wisi-validate-cache (point-max)) ;; ensure there is a next cache to move to
1263 (let ((cache (wisi-get-cache (point))))
1264 (if cache
1265 (let ((next (wisi-cache-next cache)))
1266 (if next
1267 (goto-char (1- next))
1268 (wisi-forward-token)
1269 (wisi-forward-cache)))
1270 (wisi-forward-cache))
1271 )
1272 (wisi-get-cache (point))
1273 )
1274
1275 (defun wisi-backward-statement-keyword ()
1276 "If not at a cached token, move backward to prev
1277 cache. Otherwise move to cache-prev, or prev cache if nil."
1278 (wisi-validate-cache (point))
1279 (let ((cache (wisi-get-cache (point))))
1280 (if cache
1281 (let ((prev (wisi-cache-prev cache)))
1282 (if prev
1283 (goto-char (1- prev))
1284 (wisi-backward-cache)))
1285 (wisi-backward-cache))
1286 ))
1287
1288 (defun wisi-goto-containing (cache &optional error)
1289 "Move point to containing token for CACHE, return cache at that point.
1290 If ERROR, throw error when CACHE has no container; else return nil."
1291 (cond
1292 ((markerp (wisi-cache-containing cache))
1293 (goto-char (1- (wisi-cache-containing cache)))
1294 (wisi-get-cache (point)))
1295 (t
1296 (when error
1297 (error "already at outermost containing token")))
1298 ))
1299
1300 (defun wisi-goto-containing-paren (cache)
1301 "Move point to just after the open-paren containing CACHE.
1302 Return cache for paren, or nil if no containing paren."
1303 (while (and cache
1304 (not (eq (wisi-cache-class cache) 'open-paren)))
1305 (setq cache (wisi-goto-containing cache)))
1306 (when cache
1307 (forward-char 1))
1308 cache)
1309
1310 (defun wisi-goto-start (cache)
1311 "Move point to containing ancestor of CACHE that has class block-start or statement-start.
1312 Return start cache."
1313 (when
1314 ;; cache nil at bob, or on cache in partially parsed statement
1315 (while (and cache
1316 (not (memq (wisi-cache-class cache) '(block-start statement-start))))
1317 (setq cache (wisi-goto-containing cache)))
1318 )
1319 cache)
1320
1321 (defun wisi-goto-end-1 (cache)
1322 (goto-char (1- (wisi-cache-end cache))))
1323
1324 (defun wisi-goto-statement-start ()
1325 "Move point to token at start of statement point is in or after.
1326 Return start cache."
1327 (interactive)
1328 (wisi-validate-cache (point))
1329 (let ((cache (wisi-get-cache (point))))
1330 (unless cache
1331 (setq cache (wisi-backward-cache)))
1332 (wisi-goto-start cache)))
1333
1334 (defun wisi-goto-statement-end ()
1335 "Move point to token at end of statement point is in or before."
1336 (interactive)
1337 (wisi-validate-cache (point))
1338 (let ((cache (or (wisi-get-cache (point))
1339 (wisi-forward-cache))))
1340 (when (wisi-cache-end cache)
1341 ;; nil when cache is statement-end
1342 (wisi-goto-end-1 cache))
1343 ))
1344
1345 (defun wisi-next-statement-cache (cache)
1346 "Move point to CACHE-next, return cache; error if nil."
1347 (when (not (markerp (wisi-cache-next cache)))
1348 (error "no next statement cache"))
1349 (goto-char (1- (wisi-cache-next cache)))
1350 (wisi-get-cache (point)))
1351
1352 (defun wisi-prev-statement-cache (cache)
1353 "Move point to CACHE-next, return cache; error if nil."
1354 (when (not (markerp (wisi-cache-prev cache)))
1355 (error "no prev statement cache"))
1356 (goto-char (1- (wisi-cache-prev cache)))
1357 (wisi-get-cache (point)))
1358
1359 ;;;; indentation
1360
1361 (defun wisi-comment-indent ()
1362 "For `comment-indent-function'. Indent single line comment to
1363 the comment on the previous line."
1364 ;; This should only be called by comment-indent-new-line or
1365 ;; fill-comment-paragraph, so there will be a preceding comment line
1366 ;; that we can trust.
1367 (save-excursion
1368 (forward-comment -1)
1369 (if (looking-at comment-start)
1370 (current-column)
1371 (error "wisi-comment-indent called after non-comment"))))
1372
1373 (defun wisi-indent-current (offset)
1374 "Return indentation OFFSET relative to indentation of current line."
1375 (+ (current-indentation) offset)
1376 )
1377
1378 (defun wisi-indent-paren (offset)
1379 "Return indentation OFFSET relative to preceding open paren."
1380 (save-excursion
1381 (goto-char (nth 1 (syntax-ppss)))
1382 (+ (current-column) offset)))
1383
1384 (defun wisi-indent-start (offset cache)
1385 "Return indentation of OFFSET relative to containing ancestor
1386 of CACHE with class statement-start or block-start."
1387 (wisi-goto-start cache)
1388 (+ (current-indentation) offset))
1389
1390 (defun wisi-indent-statement ()
1391 "Indent region given by `wisi-goto-start' on cache at or before point, then wisi-cache-end."
1392 (wisi-validate-cache (point))
1393
1394 (save-excursion
1395 (let ((cache (or (wisi-get-cache (point))
1396 (wisi-backward-cache))))
1397 (when cache
1398 ;; can be nil if in header comment
1399 (let ((start (progn (wisi-goto-start cache) (point)))
1400 (end (progn
1401 (when (wisi-cache-end cache)
1402 ;; nil when cache is statement-end
1403 (goto-char (1- (wisi-cache-end cache))))
1404 (point))))
1405 (indent-region start end)
1406 ))
1407 )))
1408
1409 (defvar-local wisi-indent-calculate-functions nil
1410 "Functions to calculate indentation. Each called with point
1411 before a token at the beginning of a line (at current
1412 indentation); return indentation column for that token, or
1413 nil. May move point. Calling stops when first function returns
1414 non-nil.")
1415
1416 (defvar-local wisi-post-parse-fail-hook
1417 "Function to reindent portion of buffer.
1418 Called from `wisi-indent-line' when a parse succeeds after
1419 failing; assumes user was editing code that is now syntactically
1420 correct. Must leave point at indentation of current line.")
1421
1422 (defvar-local wisi-indent-failed nil
1423 "Non-nil when wisi-indent-line fails due to parse failing; cleared when indent succeeds.")
1424
1425 (defun wisi-indent-line ()
1426 "Indent current line using the wisi indentation engine."
1427 (interactive)
1428
1429 (let ((savep (point))
1430 indent)
1431 (save-excursion
1432 (back-to-indentation)
1433 (when (>= (point) savep) (setq savep nil))
1434
1435 (when (>= (point) wisi-cache-max)
1436 (wisi-validate-cache (line-end-position))) ;; include at lease the first token on this line
1437
1438 (if (> (point) wisi-cache-max)
1439 ;; parse failed
1440 (progn
1441 ;; no indent info at point. Assume user is
1442 ;; editing; indent to previous line, fix it
1443 ;; after parse succeeds
1444 (setq wisi-indent-failed t)
1445 (forward-line -1);; safe at bob
1446 (back-to-indentation)
1447 (setq indent (current-column)))
1448
1449 ;; parse succeeded
1450 (when wisi-indent-failed
1451 ;; previous parse failed
1452 (setq wisi-indent-failed nil)
1453 (run-hooks 'wisi-post-parse-fail-hook))
1454
1455 (when (> (point) wisi-cache-max)
1456 (error "wisi-post-parse-fail-hook invalidated parse."))
1457
1458 (setq indent
1459 (with-demoted-errors
1460 (or (run-hook-with-args-until-success 'wisi-indent-calculate-functions) 0))
1461 )
1462 ))
1463
1464 (if savep
1465 ;; point was inside line text; leave it there
1466 (save-excursion (indent-line-to indent))
1467 ;; point was before line text; move to start of text
1468 (indent-line-to indent))
1469 ))
1470
1471 ;;;; debug
1472 (defun wisi-parse-buffer ()
1473 (interactive)
1474 (syntax-propertize (point-max))
1475 (wisi-invalidate-cache)
1476 (wisi-validate-cache (point-max)))
1477
1478 (defun wisi-show-cache ()
1479 "Show cache at point."
1480 (interactive)
1481 (message "%s" (wisi-get-cache (point))))
1482
1483 (defun wisi-show-token ()
1484 "Move forward across one keyword, show token_id."
1485 (interactive)
1486 (let ((token (wisi-forward-token)))
1487 (message "%s" (car token))))
1488
1489 (defun wisi-show-containing-or-previous-cache ()
1490 (interactive)
1491 (let ((cache (wisi-get-cache (point))))
1492 (if cache
1493 (message "containing %s" (wisi-goto-containing cache t))
1494 (message "previous %s" (wisi-backward-cache)))
1495 ))
1496
1497 (defun wisi-show-cache-max ()
1498 (interactive)
1499 (push-mark)
1500 (goto-char wisi-cache-max))
1501
1502 ;;;;; setup
1503
1504 (defun wisi-setup (indent-calculate post-parse-fail class-list keyword-table token-table parse-table)
1505 "Set up a buffer for parsing files with wisi."
1506 (setq wisi-class-list class-list)
1507 (setq wisi-string-double-term (car (symbol-value (intern-soft "string-double" token-table))))
1508 (setq wisi-string-single-term (car (symbol-value (intern-soft "string-single" token-table))))
1509 (setq wisi-symbol-term (car (symbol-value (intern-soft "symbol" token-table))))
1510
1511 (let ((numbers (cadr (symbol-value (intern-soft "number" token-table)))))
1512 (setq wisi-number-term (car numbers))
1513 (setq wisi-number-p (cdr numbers)))
1514
1515 (setq wisi-punctuation-table (symbol-value (intern-soft "punctuation" token-table)))
1516 (setq wisi-punctuation-table-max-length 0)
1517 (let (fail)
1518 (dolist (item wisi-punctuation-table)
1519 (when item ;; default matcher can be nil
1520
1521 ;; check that all chars used in punctuation tokens have punctuation syntax
1522 (mapc (lambda (char)
1523 (when (not (= ?. (char-syntax char)))
1524 (setq fail t)
1525 (message "in %s, %c does not have punctuation syntax"
1526 (car item) char)))
1527 (cdr item))
1528
1529 (when (< wisi-punctuation-table-max-length (length (cdr item)))
1530 (setq wisi-punctuation-table-max-length (length (cdr item)))))
1531 )
1532 (when fail
1533 (error "aborting due to punctuation errors")))
1534
1535 (setq wisi-keyword-table keyword-table)
1536 (setq wisi-parse-table parse-table)
1537
1538 (setq wisi-indent-calculate-functions indent-calculate)
1539 (set (make-local-variable 'indent-line-function) 'wisi-indent-line)
1540
1541 (setq wisi-post-parse-fail-hook post-parse-fail)
1542 (setq wisi-indent-failed nil)
1543
1544 (add-hook 'before-change-functions 'wisi-before-change nil t)
1545 (add-hook 'after-change-functions 'wisi-after-change nil t)
1546
1547 (when (functionp 'jit-lock-register)
1548 (jit-lock-register 'wisi-fontify-region))
1549
1550 ;; see comments on "lexer" above re syntax-propertize
1551 (syntax-propertize (point-max))
1552
1553 (wisi-invalidate-cache)
1554 )
1555
1556 (provide 'wisi)
1557 ;;; wisi.el ends here