]> code.delx.au - gnu-emacs-elpa/blob - packages/wisi/wisi.el
e12ce8f9548fcb1f259a60289028b5a732e8446d
[gnu-emacs-elpa] / packages / wisi / wisi.el
1 ;;; wisi.el --- Utilities for implementing an indentation/navigation engine using a generalized LALR parser
2 ;;
3 ;; Copyright (C) 2012, 2013 Free Software Foundation, Inc.
4 ;;
5 ;; Author: Stephen Leake <stephen_leake@member.fsf.org>
6 ;; Version: 1.0
7 ;; url: http://stephe-leake.org/emacs/ada-mode/emacs-ada-mode.html
8 ;;
9 ;; This file is part of GNU Emacs.
10 ;;
11 ;; GNU Emacs is free software: you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation, either version 3 of the License, or
14 ;; (at your option) any later version.
15 ;;
16 ;; GNU Emacs is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ;; GNU General Public License for more details.
20 ;;
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
23 ;;
24 ;;; History: first experimental version Oct 2012
25 ;;
26 ;;; indentation algorithm overview
27 ;;
28 ;; This design is inspired in part by experience writing a SMIE
29 ;; indentation engine for Ada, and the wisent parser.
30 ;;
31 ;; The general approach to indenting a given token is to find the
32 ;; start of the statement it is part of, or some other relevant point
33 ;; in the statement, and indent relative to that. So we need a parser
34 ;; that lets us find statement indent points from arbitrary places in
35 ;; the code.
36 ;;
37 ;; The grammar for Ada as represented by the EBNF in LRM Annex P is
38 ;; not LALR(1), so we use a generalized LALR(1) parser (see
39 ;; wisi-parse, wisi-compile).
40 ;;
41 ;; The parser actions cache indentation and other information as text
42 ;; properties of tokens in statements.
43 ;;
44 ;; An indentation engine moves text in the buffer, as does user
45 ;; editing, so we can't rely on character positions remaining
46 ;; constant. So the parser actions use markers to store
47 ;; positions. Text properties also move with the text.
48 ;;
49 ;; The stored information includes a marker at each statement indent
50 ;; point. Thus, the indentation algorithm is: find the previous token
51 ;; with cached information, and either indent from it, or fetch from
52 ;; it the marker for a previous statement indent point, and indent
53 ;; relative to that.
54 ;;
55 ;; Since we have a cache (the text properties), we need to consider
56 ;; when to invalidate it. Ideally, we invalidate only when a change to
57 ;; the buffer would change the result of a parse that crosses that
58 ;; change, or starts after that change. Changes in whitespace
59 ;; (indentation and newlines) do not affect an Ada parse. Other
60 ;; languages are sensitive to newlines (Bash for example) or
61 ;; indentation (Python). Adding comments does not change a parse,
62 ;; unless code is commented out. For now we invalidate the cache after
63 ;; the edit point if the change involves anything other than
64 ;; whitespace.
65 ;;
66 ;;; comparison to the SMIE parser
67 ;;
68 ;; The central problem to be solved in building the SMIE parser is
69 ;; grammar precedence conflicts; the general solution is refining
70 ;; keywords so that each new keyword can be assigned a unique
71 ;; precedence. This means ad hoc code must be written to determine the
72 ;; correct refinement for each language keyword from the surrounding
73 ;; tokens. In effect, for a complex language like Ada, the knowledge
74 ;; of the language grammar is mostly embedded in the refinement code;
75 ;; only a small amount is in the refined grammar. Implementing a SMIE
76 ;; parser for a new language involves the same amount of work as the
77 ;; first language.
78 ;;
79 ;; Using a generalized LALR parser avoids that particular problem;
80 ;; assuming the language is already defined by a grammar, it is only a
81 ;; matter of a format change to teach the wisi parser the
82 ;; language. The problem in a wisi indentation engine is caching the
83 ;; output of the parser in a useful way, since we can't start the
84 ;; parser from arbitrary places in the code (as we can with the SMIE
85 ;; parser). A second problem is determining when to invalidate the
86 ;; cache. But these problems are independent of the language being
87 ;; parsed, so once we have one wisi indentation engine working,
88 ;; adapting it to new languages should be quite simple.
89 ;;
90 ;; The SMIE parser does not find the start of each statement, only the
91 ;; first language keyword in each statement; additional code must be
92 ;; written to find the statement start and indent points. The wisi
93 ;; parser finds the statement start and indent points directly.
94 ;;
95 ;; In SMIE, it is best if each grammar rule is a complete statement,
96 ;; so forward-sexp will traverse the entire statement. If nested
97 ;; non-terminals are used, forward-sexp may stop inside one of the
98 ;; nested non-terminals. This problem does not occur with the wisi
99 ;; parser.
100 ;;
101 ;; A downside of the wisi parser is conflicts in the grammar; they can
102 ;; be much more difficult to resolve than in the SMIE parser. The
103 ;; generalized parser helps by handling conflicts, but it does so by
104 ;; running multiple parsers in parallel, persuing each choice in the
105 ;; conflict. If the conflict is due to a genuine ambiguity, both paths
106 ;; will succeed, which causes the parse to fail, since it is not clear
107 ;; which set of text properties to store. Even if one branch
108 ;; ultimately fails, running parallel parsers over large sections of
109 ;; code is slow. Finally, this approach can lead to exponential growth
110 ;; in the number of parsers. So grammar conflicts must still be
111 ;; analyzed and minimized.
112 ;;
113 ;; In addition, the complete grammar must be specified; in smie, it is
114 ;; often possible to specify a subset of the grammar.
115 ;;
116 ;;;; grammar compiler and parser
117 ;;
118 ;; Since we are using a generalized LALR(1) parser, we cannot use any
119 ;; of the wisent grammar functions. We use the OpenToken Ada package
120 ;; to compile BNF to Elisp source (similar to
121 ;; semantic-grammar-create-package), and wisi-compile-grammar to
122 ;; compile that to the parser table.
123 ;;
124 ;; Semantic provides a complex lexer, more complicated than we need
125 ;; for indentation. So we use the elisp lexer, which consists of
126 ;; `forward-comment', `skip-syntax-forward', and `scan-sexp'. We wrap
127 ;; that in functions that return tokens in the form wisi-parse
128 ;; expects.
129 ;;
130 ;;;; code style
131 ;;
132 ;; 'wisi' was originally short for "wisent indentation engine", but
133 ;; now is just a name.
134 ;;
135 ;; not using lexical-binding because we support Emacs 23
136 ;;
137 ;;;;;
138
139 (require 'wisi-parse)
140 (eval-when-compile (require 'cl-macs))
141
142 ;;;; lexer
143
144 (defvar-local wisi-class-list nil)
145 (defvar-local wisi-keyword-table nil)
146 (defvar-local wisi-punctuation-table nil)
147 (defvar-local wisi-punctuation-table-max-length 0)
148 (defvar-local wisi-string-double-term nil) ;; string delimited by double quotes
149 (defvar-local wisi-string-quote-escape-doubled nil)
150 (defvar-local wisi-string-single-term nil) ;; string delimited by single quotes
151 (defvar-local wisi-symbol-term nil)
152
153 (defun wisi-forward-token (&optional text-only)
154 "Move point forward across one token, skipping leading whitespace and comments.
155 Return the corresponding token, in a format determined by TEXT-ONLY:
156 TEXT-ONLY t: text
157 TEXT-ONLY nil: (token text start . end)
158 where:
159 `token' is a token symbol (not string) from `wisi-punctuation-table',
160 `wisi-keyword-table', `wisi-string-double-term', `wisi-string-double-term' or `wisi-symbol-term'.
161
162 `text' is the token text from the buffer
163
164 `start, end' are the character positions in the buffer of the start
165 and end of the token text.
166
167 If at end of buffer, returns `wisent-eoi-term'."
168 (forward-comment (point-max))
169 ;; skips leading whitespace, comment, trailing whitespace.
170
171 (let ((start (point))
172 ;; (info "(elisp)Syntax Table Internals" "*info elisp syntax*")
173 (syntax (syntax-class (syntax-after (point))))
174 token-id token-text)
175 (cond
176 ((eobp)
177 (setq token-text "")
178 (setq token-id wisent-eoi-term))
179
180 ((eq syntax 1)
181 ;; punctuation. Find the longest matching string in wisi-punctuation-table
182 (forward-char 1)
183 (let ((next-point (point))
184 temp-text temp-id done)
185 (while (not done)
186 (setq temp-text (buffer-substring-no-properties start (point)))
187 (setq temp-id (car (rassoc temp-text wisi-punctuation-table)))
188 (when temp-id
189 (setq token-text temp-text
190 token-id temp-id
191 next-point (point)))
192 (if (or
193 (eobp)
194 (= (- (point) start) wisi-punctuation-table-max-length))
195 (setq done t)
196 (forward-char 1))
197 )
198 (goto-char next-point)))
199
200 ((memq syntax '(4 5)) ;; open, close parenthesis
201 (forward-char 1)
202 (setq token-text (buffer-substring-no-properties start (point)))
203 (setq token-id (symbol-value (intern-soft token-text wisi-keyword-table))))
204
205 ((eq syntax 7)
206 ;; string quote, either single or double. we assume point is before the start quote, not the end quote
207 (let ((delim (char-after (point)))
208 (forward-sexp-function nil))
209 (forward-sexp)
210 ;; point is now after the end quote; check for a doubled quote
211 (while (and wisi-string-quote-escape-doubled
212 (eq (char-after (point)) delim))
213 (forward-sexp))
214 (setq token-text (buffer-substring-no-properties start (point)))
215 (setq token-id (if (= delim ?\") wisi-string-double-term wisi-string-single-term))))
216
217 (t ;; assuming word syntax
218 (skip-syntax-forward "w_'")
219 (setq token-text (buffer-substring-no-properties start (point)))
220 (setq token-id
221 (or (symbol-value (intern-soft (downcase token-text) wisi-keyword-table))
222 wisi-symbol-term)))
223 );; cond
224
225 (unless token-id
226 (error (wisi-error-msg "unrecognized token '%s'" (buffer-substring-no-properties start (point)))))
227
228 (if text-only
229 token-text
230 (cons token-id (cons token-text (cons start (point)))))
231 ))
232
233 (defun wisi-backward-token ()
234 "Move point backward across one token, skipping whitespace and comments.
235 Return (nil text start . end) - same structure as
236 wisi-forward-token, but does not look up symbol."
237 (forward-comment (- (point)))
238 ;; skips leading whitespace, comment, trailing whitespace.
239
240 ;; (info "(elisp)Syntax Table Internals" "*info elisp syntax*")
241 (let ((end (point))
242 (syntax (syntax-class (syntax-after (1- (point))))))
243 (cond
244 ((bobp) nil)
245
246 ((memq syntax '(4 5)) ;; open, close parenthesis
247 (backward-char 1))
248
249 ((eq syntax 7)
250 ;; a string quote. we assume we are after the end quote, not the start quote
251 (let ((forward-sexp-function nil))
252 (forward-sexp -1)))
253
254 (t
255 (if (zerop (skip-syntax-backward "."))
256 (skip-syntax-backward "w_'")))
257 )
258 (cons nil (cons (buffer-substring-no-properties (point) end) (cons (point) end)))
259 ))
260
261 ;;;; token info cache
262 ;;
263 ;; the cache stores the results of parsing as text properties on
264 ;; keywords, for use by the indention and motion engines.
265
266 (cl-defstruct
267 (wisi-cache
268 (:constructor wisi-cache-create)
269 (:copier nil))
270 nonterm;; nonterminal from parse (set by wisi-statement-action)
271
272 token
273 ;; terminal symbol from wisi-keyword-table or
274 ;; wisi-punctuation-table, or lower-level nonterminal from parse
275 ;; (set by wisi-statement-action)
276
277 last ;; pos of last char in token, relative to first (0 indexed)
278
279 class
280 ;; arbitrary lisp symbol, used for indentation and navigation.
281 ;; some classes are defined by wisi:
282 ;;
283 ;; 'block-middle - a block keyword (ie: if then else end), not at the start of a statement
284 ;;
285 ;; 'block-start - a block keyword at the start of a statement
286 ;;
287 ;; 'statement-start - the start of a statement
288 ;;
289 ;; 'open-paren
290 ;;
291 ;; others are language-specific
292
293 containing
294 ;; Marker at the containing keyword for this token.
295 ;; A containing keyword is an indent point; the start of a
296 ;; statement, or 'begin', 'then' or 'else' for a block of
297 ;; statements, etc.
298 ;; nil only for first token in buffer
299
300 prev ;; marker at previous motion token in statement; nil if none
301 next ;; marker at next motion token in statement; nil if none
302 end ;; marker at token at end of current statement
303 )
304
305 (defvar-local wisi-cache-max 0
306 "Maximimum position in buffer where wisi token cache is valid.")
307
308 (defvar-local wisi-parse-table nil)
309
310 (defvar-local wisi-parse-failed nil
311 "Non-nil when a recent parse has failed - cleared when parse succeeds.")
312
313 (defvar-local wisi-parse-try nil
314 "Non-nil when parse is needed - cleared when parse succeeds.")
315
316 (defvar-local wisi-change-need-invalidate nil)
317 (defvar-local wisi-change-jit-lock-mode nil)
318
319 (defun wisi-invalidate-cache()
320 "Invalidate the wisi token cache for the current buffer.
321 Also invalidate the Emacs syntax cache."
322 (interactive)
323 (setq wisi-cache-max 0)
324 (setq wisi-parse-try t)
325 (syntax-ppss-flush-cache (point-min))
326 (with-silent-modifications
327 (remove-text-properties (point-min) (point-max) '(wisi-cache))))
328
329 (defun wisi-before-change (begin end)
330 "For `before-change-functions'."
331 ;; begin . end is range of text being deleted
332
333 ;; If jit-lock-after-change is before wisi-after-change in
334 ;; after-change-functions, it might use any invalid caches in the
335 ;; inserted text.
336 ;;
337 ;; So we check for that here, and ensure it is after
338 ;; wisi-after-change, which deletes the invalid caches
339 (when (boundp 'jit-lock-mode)
340 (when (memq 'wisi-after-change (memq 'jit-lock-after-change after-change-functions))
341 (setq after-change-functions (delete 'wisi-after-change after-change-functions))
342 (add-hook 'after-change-functions 'wisi-after-change nil t)
343 (setq wisi-change-jit-lock-mode (1+ wisi-change-jit-lock-mode)))
344 )
345
346 (save-excursion
347 ;; don't invalidate parse for whitespace, string, or comment changes
348 (let (;; (info "(elisp)Parser State")
349 (state (syntax-ppss begin)))
350 ;; syntax-ppss has moved point to "begin".
351 (cond
352 ((or
353 (nth 3 state); in string
354 (nth 4 state)); in comment
355 ;; FIXME: check that entire range is in comment or string
356 (setq wisi-change-need-invalidate nil))
357
358 ((progn
359 (skip-syntax-forward " " end);; does not skip newline
360 (eq (point) end))
361 (setq wisi-change-need-invalidate nil))
362
363 (t (setq wisi-change-need-invalidate t))
364 ))))
365
366 (defun wisi-after-change (begin end length)
367 "For `after-change-functions'."
368 ;; begin . end is range of text being inserted (may be empty)
369 ;; (syntax-ppss-flush-cache begin) is in before-change-functions
370
371 (syntax-ppss-flush-cache begin) ;; IMPROVEME: could check for whitespace
372
373 (cond
374 (wisi-parse-failed
375 ;; The parse was failing, probably due to bad syntax; this change
376 ;; may have fixed it, so try reparse.
377 (setq wisi-parse-try t)
378
379 ;; remove 'wisi-cache on inserted text, which could have caches
380 ;; from before the failed parse, and are in any case invalid.
381 (with-silent-modifications
382 (remove-text-properties begin end '(wisi-cache)))
383 )
384
385 ((>= wisi-cache-max begin)
386 ;; The parse had succeeded paste the start of the inserted
387 ;; text.
388 (save-excursion
389 (let ((need-invalidate t)
390 ;; (info "(elisp)Parser State")
391 (state (syntax-ppss begin)))
392 ;; syntax-ppss has moved point to "begin".
393 (cond
394 (wisi-change-need-invalidate
395 ;; wisi-before change determined the removed text alters the
396 ;; parse
397 nil)
398
399 ((or
400 (nth 3 state); in string
401 (nth 4 state)); in comment
402 ;; FIXME: insert newline in comment to create non-comment!?
403 ;; or paste a chunk of code
404 ;; => check that all of change region is comment or string
405 (setq need-invalidate nil))
406
407 ((progn
408 (skip-syntax-forward " " end);; does not skip newlines
409 (eq (point) end))
410 (setq need-invalidate nil))
411
412 (t nil)
413 )
414
415 (if need-invalidate
416 ;; The inserted or deleted text could alter the parse
417 (wisi-invalidate-cache)
418
419 ;; else move cache-max by the net change length. We don't
420 ;; need to delete 'wisi-cache in the inserted text, because
421 ;; if there were any it would not pass the above.
422 (setq wisi-cache-max
423 (+ wisi-cache-max (- end begin length))))
424 )
425 ))
426
427 (t
428 ;; parse never attempted, or only done to before BEGIN. Just
429 ;; remove 'wisi-cache
430 (with-silent-modifications
431 (remove-text-properties begin end '(wisi-cache)))
432 )
433 ))
434
435 (defun wisi-get-cache (pos)
436 "Return `wisi-cache' struct from the `wisi-cache' text property at POS.
437 If accessing cache at a marker for a token as set by `wisi-cache-tokens', POS must be (1- mark)."
438 (get-text-property pos 'wisi-cache))
439
440 (defvar wisi-debug 0
441 "wisi debug mode:
442 0 : normal - ignore parse errors, for indenting new code
443 1 : report parse errors (for running tests)
444 2 : show parse states, position point at parse errors, debug-on-error works in parser
445 3 : also show top 10 items of parser stack.")
446
447 (defvar-local wisi-parse-error-msg nil)
448
449 (defun wisi-goto-error ()
450 "Move point to position in last error message (if any)."
451 (when (string-match ":\\([0-9]+\\):\\([0-9]+\\):" wisi-parse-error-msg)
452 (let ((line (string-to-number (match-string 1 wisi-parse-error-msg)))
453 (col (string-to-number (match-string 2 wisi-parse-error-msg))))
454 (goto-char (point-min))
455 (forward-line (1- line))
456 (forward-char col))))
457
458 (defun wisi-show-parse-error ()
459 "Show last wisi-parse error."
460 (interactive)
461 (if wisi-parse-failed
462 (progn
463 (message wisi-parse-error-msg)
464 (wisi-goto-error))
465 (message "parse succeeded")))
466
467 (defun wisi-validate-cache (pos)
468 "Ensure cached data is valid at least up to POS in current buffer."
469 (when (and wisi-parse-try
470 (< wisi-cache-max pos))
471 (when (> wisi-debug 0)
472 (message "wisi: parsing ..."))
473
474 (setq wisi-parse-try nil)
475 (setq wisi-parse-error-msg nil)
476 (save-excursion
477 (goto-char wisi-cache-max)
478 (if (> wisi-debug 1)
479 ;; let debugger stop in wisi-parse
480 (progn
481 (wisi-parse wisi-parse-table 'wisi-forward-token)
482 (setq wisi-cache-max (point))
483 (setq wisi-parse-failed nil))
484 ;; else capture errors from bad syntax, so higher level functions can try to continue
485 (condition-case err
486 (progn
487 (wisi-parse wisi-parse-table 'wisi-forward-token)
488 (setq wisi-cache-max (point))
489 (setq wisi-parse-failed nil))
490 (wisi-parse-error
491 (setq wisi-parse-failed t)
492 (setq wisi-parse-error-msg (cdr err)))
493 )))
494 (if wisi-parse-error-msg
495 ;; error
496 (when (> wisi-debug 0)
497 (message "wisi: parsing ... error")
498 (wisi-goto-error)
499 (error wisi-parse-error-msg))
500 ;; no msg; success
501 (when (> wisi-debug 0)
502 (message "wisi: parsing ... done")))
503 ))
504
505 (defun wisi-get-containing-cache (cache)
506 "Return cache from (wisi-cache-containing CACHE)."
507 (let ((containing (wisi-cache-containing cache)))
508 (and containing
509 (wisi-get-cache (1- containing)))))
510
511 (defun wisi-cache-text (cache)
512 "Return property-less buffer substring designated by cache.
513 Point must be at cache."
514 (buffer-substring-no-properties (point) (+ (point) (wisi-cache-last cache))))
515
516 ;;;; parse actions
517
518 (defun wisi-set-end (tokens end-mark)
519 "Set END-MARK on all unset caches in TOKENS."
520 (let ((tokens-t tokens))
521 (while tokens-t
522 (let* ((token (pop tokens-t))
523 (region (cddr token))
524 cache)
525 (when region
526 (goto-char (car region))
527 (setq cache (wisi-get-cache (car region)))
528 (while (and cache
529 (< (point) (cdr region)))
530 (if (not (wisi-cache-end cache))
531 (setf (wisi-cache-end cache) end-mark)
532 (goto-char (wisi-cache-end cache))
533 )
534 (setq cache (wisi-forward-cache))
535 ))
536 ))
537 ))
538
539 (defvar wisi-tokens nil);; keep byte-compiler happy; `wisi-tokens' is bound in action created by wisi-semantic-action
540 (defun wisi-statement-action (&rest pairs)
541 "Cache information in text properties of tokens.
542 Intended as a grammar non-terminal action.
543
544 PAIRS is of the form [TOKEN-NUMBER CLASS] ... where TOKEN-NUMBER
545 is the (1 indexed) token number in the production, CLASS is the wisi class of
546 that token. Use in a grammar action as:
547 (wisi-statement-action 1 'statement-start 7 'statement-end)"
548 (save-excursion
549 (let ((first-item t)
550 first-keyword-mark
551 (override-start nil))
552 (while pairs
553 (let* ((number (1- (pop pairs)))
554 (region (cddr (nth number wisi-tokens)));; wisi-tokens is let-bound in wisi-parse-reduce
555 (token (car (nth number wisi-tokens)))
556 (class (pop pairs))
557 (mark
558 ;; Marker one char into token, so indent-line-to
559 ;; inserts space before the mark, not after
560 (when region (copy-marker (1+ (car region)))))
561 cache)
562
563 (unless (memq class wisi-class-list)
564 (error "%s not in wisi-class-list" class))
565
566 (if region
567 (progn
568 (if (setq cache (wisi-get-cache (car region)))
569 ;; We are processing a previously set non-terminal; ie generic_formal_part in
570 ;;
571 ;; generic_package_declaration : generic_formal_part package_specification SEMICOLON
572 ;; (wisi-statement-action 1 'block-start 2 'block-middle 3 'statement-end)
573 ;;
574 ;; or simple_statement in
575 ;;
576 ;; statement : label_opt simple_statement
577 ;;
578 ;; override nonterm, class and containing
579 (progn
580 (cl-case (wisi-cache-class cache)
581 (block-start
582 (setf (wisi-cache-class cache)
583 (cond
584 ((eq override-start nil)
585 (cond
586 ((memq class '(block-start statement-start)) 'block-start)
587 (t 'block-middle)))
588
589 ((memq override-start '(block-start statement-start)) 'block-start)
590
591 (t (error "unexpected override-start"))
592 )))
593 (t
594 (setf (wisi-cache-class cache) (or override-start class)))
595 )
596 (setf (wisi-cache-nonterm cache) $nterm)
597 (setf (wisi-cache-containing cache) first-keyword-mark))
598
599 ;; else create new cache
600 (with-silent-modifications
601 (put-text-property
602 (car region)
603 (1+ (car region))
604 'wisi-cache
605 (wisi-cache-create
606 :nonterm $nterm;; $nterm defined in wisi-semantic-action
607 :token token
608 :last (- (cdr region) (car region))
609 :class (or override-start class)
610 :containing first-keyword-mark)
611 )))
612
613 (when first-item
614 (setq first-item nil)
615 (when (or override-start
616 (memq class '(block-middle block-start statement-start)))
617 (setq override-start nil)
618 (setq first-keyword-mark mark)))
619
620 (when (eq class 'statement-end)
621 (wisi-set-end wisi-tokens (copy-marker (1+ (car region)))))
622 )
623
624 ;; region is nil when a production is empty; if the first
625 ;; token is a start, override the class on the next token.
626 (when (and first-item
627 (memq class '(block-middle block-start statement-start)))
628 (setq override-start class)))
629 ))
630 )))
631
632 (defun wisi-containing-action (containing-token contained-token)
633 "Set containing marks in all tokens in CONTAINED-TOKEN with null containing mark to marker pointing to CONTAINING-TOKEN.
634 If CONTAINING-TOKEN is empty, the next token number is used."
635 ;; wisi-tokens is is bound in action created by wisi-semantic-action
636 (let* ((containing-region (cddr (nth (1- containing-token) wisi-tokens)))
637 (contained-region (cddr (nth (1- contained-token) wisi-tokens))))
638 (while (not containing-region)
639 ;; containing-token is empty; use next
640 (setq containing-region (cddr (nth containing-token wisi-tokens))))
641
642 (when contained-region
643 ;; nil when empty production, may not contain any caches
644 (save-excursion
645 (goto-char (cdr contained-region))
646 (let ((cache (wisi-backward-cache))
647 (mark (copy-marker (1+ (car containing-region)))))
648 (while cache
649
650 ;; skip blocks that are already marked
651 (while (and (>= (point) (car contained-region))
652 (markerp (wisi-cache-containing cache)))
653 (goto-char (1- (wisi-cache-containing cache)))
654 (setq cache (wisi-get-cache (point))))
655
656 (if (or (and (= (car containing-region) (car contained-region))
657 (<= (point) (car contained-region)))
658 (< (point) (car contained-region)))
659 ;; done
660 (setq cache nil)
661
662 ;; else set mark, loop
663 (setf (wisi-cache-containing cache) mark)
664 (setq cache (wisi-backward-cache)))
665 ))))))
666
667 (defun wisi-motion-action (&rest token-numbers)
668 "Set prev/next marks in all tokens given by TOKEN-NUMBERS.
669 Each TOKEN-NUMBERS is one of:
670
671 number: the token number; mark that token
672
673 list (number token_id):
674 list (number (token_id token_id)):
675 mark all tokens with token_id in the nonterminal given by the number."
676 (save-excursion
677 (let (next-keyword-mark
678 prev-keyword-mark
679 prev-cache
680 cache
681 mark)
682 (while token-numbers
683 (let ((token-number (pop token-numbers))
684 target-token
685 region)
686 (cond
687 ((numberp token-number)
688 (setq target-token nil)
689 (setq region (cddr (nth (1- token-number) wisi-tokens)))
690 (when region
691 (setq cache (wisi-get-cache (car region)))
692 (setq mark (copy-marker (1+ (car region))))
693
694 (when (and prev-keyword-mark
695 cache
696 (null (wisi-cache-prev cache)))
697 (setf (wisi-cache-prev cache) prev-keyword-mark)
698 (setf (wisi-cache-next prev-cache) mark))
699
700 (setq prev-keyword-mark mark)
701 (setq prev-cache cache)
702 ))
703
704 ((listp token-number)
705 ;; token-number may contain 0, 1, or more token_id; token_id may be a list
706 ;; the corresponding region may be empty
707 ;; there must have been a prev keyword
708 (setq target-token (cadr token-number))
709 (when (not (listp target-token))
710 (setq target-token (list target-token)))
711 (setq token-number (car token-number))
712 (setq region (cddr (nth (1- token-number) wisi-tokens)))
713 (when region ;; not an empty token
714 (goto-char (car region))
715 (while (wisi-forward-find-token target-token (cdr region) t)
716 (setq cache (wisi-get-cache (point)))
717 (setq mark (copy-marker (1+ (point))))
718
719 (when (null (wisi-cache-prev cache))
720 (setf (wisi-cache-prev cache) prev-keyword-mark)
721 (setf (wisi-cache-next prev-cache) mark)
722 (setq prev-keyword-mark mark)
723 (setq prev-cache cache))
724
725 (wisi-forward-token);; don't find same token again
726 ))
727 )
728
729 (t
730 (error "unexpected token-number %s" token-number))
731 )
732
733 ))
734 )))
735
736 ;;;; motion
737 (defun wisi-backward-cache ()
738 "Move point backward to the beginning of the first token preceding point that has a cache.
739 Returns cache, or nil if at beginning of buffer."
740 (let (cache pos)
741 (setq pos (previous-single-property-change (point) 'wisi-cache))
742 ;; There are three cases:
743 ;;
744 ;; 1) caches separated by non-cache chars: 'if ... then'
745 ;; pos is before 'f', cache is on 'i'
746 ;;
747 ;; 2) caches not separated: ');'
748 ;; pos is before ';', cache is on ';'
749 ;;
750 ;; 3) at bob; pos is nil
751 ;;
752 (if pos
753 (progn
754 (setq cache (get-text-property pos 'wisi-cache))
755 (if cache
756 ;; case 2
757 (goto-char pos)
758 ;; case 1
759 (setq cache (get-text-property (1- pos) 'wisi-cache))
760 (goto-char (1- pos))))
761 ;; at bob
762 (goto-char (point-min))
763 (setq cache nil))
764 cache
765 ))
766
767 (defun wisi-forward-cache ()
768 "Move point forward to the beginning of the first token after point that has a cache.
769 Returns cache, or nil if at end of buffer."
770 (let (cache pos)
771 (when (get-text-property (point) 'wisi-cache)
772 ;; on a cache; get past it
773 (goto-char (1+ (point))))
774
775 (setq cache (get-text-property (point) 'wisi-cache))
776 (if cache
777 nil
778
779 (setq pos (next-single-property-change (point) 'wisi-cache))
780 (if pos
781 (progn
782 (goto-char pos)
783 (setq cache (get-text-property pos 'wisi-cache)))
784 ;; at eob
785 (goto-char (point-max))
786 (setq cache nil))
787 )
788 cache
789 ))
790
791 (defun wisi-forward-find-class (class limit)
792 "Search forward for a token that has a cache with CLASS.
793 Return cache, or nil if at end of buffer.
794 If LIMIT (a buffer position) is reached, throw an error."
795 (let ((cache (wisi-forward-cache)))
796 (while (not (eq class (wisi-cache-class cache)))
797 (setq cache (wisi-forward-cache))
798 (when (>= (point) limit)
799 (error "cache with class %s not found" class)))
800 cache))
801
802 (defun wisi-forward-find-token (token limit &optional noerror)
803 "Search forward for a token that has a cache with TOKEN.
804 If point is at a matching token, return that token.
805 TOKEN may be a list; stop on any cache that has a member of the list.
806 Return cache, or nil if at end of buffer.
807 If LIMIT (a buffer position) is reached, then if NOERROR is nil, throw an
808 error, if non-nil, return nil."
809 (let ((token-list (cond
810 ((listp token) token)
811 (t (list token))))
812 (cache (wisi-get-cache (point)))
813 (done nil))
814 (while (not (or done
815 (and cache
816 (memq (wisi-cache-token cache) token-list))))
817 (setq cache (wisi-forward-cache))
818 (when (>= (point) limit)
819 (if noerror
820 (progn
821 (setq done t)
822 (setq cache nil))
823 (error "cache with token %s not found" token))))
824 cache))
825
826 (defun wisi-forward-find-nonterm (nonterm limit)
827 "Search forward for a token that has a cache with NONTERM.
828 NONTERM may be a list; stop on any cache that has a member of the list.
829 Return cache, or nil if at end of buffer.
830 If LIMIT (a buffer position) is reached, throw an error."
831 (let ((nonterm-list (cond
832 ((listp nonterm) nonterm)
833 (t (list nonterm))))
834 (cache (wisi-forward-cache)))
835 (while (not (memq (wisi-cache-nonterm cache) nonterm-list))
836 (setq cache (wisi-forward-cache))
837 (when (>= (point) limit)
838 (error "cache with nonterm %s not found" nonterm)))
839 cache))
840
841 (defun wisi-goto-cache-next (cache)
842 (goto-char (1- (wisi-cache-next cache)))
843 (wisi-get-cache (point))
844 )
845
846 (defun wisi-forward-statement-keyword ()
847 "If not at a cached token, move forward to next
848 cache. Otherwise move to cache-next, or next cache if nil.
849 Return cache found."
850 (wisi-validate-cache (point-max))
851 (let ((cache (wisi-get-cache (point))))
852 (if cache
853 (let ((next (wisi-cache-next cache)))
854 (if next
855 (goto-char (1- next))
856 (wisi-forward-token)
857 (wisi-forward-cache)))
858 (wisi-forward-cache))
859 )
860 (wisi-get-cache (point))
861 )
862
863 (defun wisi-backward-statement-keyword ()
864 "If not at a cached token, move backward to prev
865 cache. Otherwise move to cache-prev, or prev cache if nil."
866 (wisi-validate-cache (point-max))
867 (let ((cache (wisi-get-cache (point))))
868 (if cache
869 (let ((prev (wisi-cache-prev cache)))
870 (if prev
871 (goto-char (1- prev))
872 (wisi-backward-cache)))
873 (wisi-backward-cache))
874 ))
875
876 (defun wisi-goto-containing (cache &optional error)
877 "Move point to containing token for CACHE, return cache at that point."
878 (cond
879 ((markerp (wisi-cache-containing cache))
880 (goto-char (1- (wisi-cache-containing cache)))
881 (wisi-get-cache (point)))
882 (t
883 (when error
884 (error "already at outermost containing token")))
885 ))
886
887 (defun wisi-goto-containing-paren (cache)
888 "Move point to just after the open-paren containing CACHE.
889 Return cache for paren, or nil if no containing paren."
890 (while (and cache
891 (not (eq (wisi-cache-class cache) 'open-paren)))
892 (setq cache (wisi-goto-containing cache)))
893 (when cache
894 (forward-char 1))
895 cache)
896
897 (defun wisi-goto-start (cache)
898 "Move point to containing ancestor of CACHE that has class block-start or statement-start.
899 Return start cache."
900 (when
901 ;; cache nil at bob
902 (while (and cache
903 (not (memq (wisi-cache-class cache) '(block-start statement-start))))
904 (setq cache (wisi-goto-containing cache)))
905 )
906 cache)
907
908 (defun wisi-goto-end ()
909 "Move point to token at end of statement point is in or before."
910 (interactive)
911 (wisi-validate-cache (point-max))
912 (let ((cache (or (wisi-get-cache (point))
913 (wisi-forward-cache))))
914 (when (wisi-cache-end cache)
915 ;; nil when cache is statement-end
916 (goto-char (1- (wisi-cache-end cache))))
917 ))
918
919 (defun wisi-next-statement-cache (cache)
920 "Move point to CACHE-next, return cache; error if nil."
921 (when (not (markerp (wisi-cache-next cache)))
922 (error "no next statement cache"))
923 (goto-char (1- (wisi-cache-next cache)))
924 (wisi-get-cache (point)))
925
926 (defun wisi-prev-statement-cache (cache)
927 "Move point to CACHE-next, return cache; error if nil."
928 (when (not (markerp (wisi-cache-prev cache)))
929 (error "no prev statement cache"))
930 (goto-char (1- (wisi-cache-prev cache)))
931 (wisi-get-cache (point)))
932
933 ;;;; indentation
934
935 (defun wisi-comment-indent ()
936 "For `comment-indent-function'. Indent single line comment to
937 the comment on the previous line."
938 ;; This should only be called by comment-indent-new-line or
939 ;; fill-comment-paragraph, so there will be a preceding comment line
940 ;; that we can trust.
941 (save-excursion
942 (forward-comment -1)
943 (if (looking-at comment-start)
944 (current-column)
945 (error "wisi-comment-indent called after non-comment"))))
946
947 (defun wisi-indent-current (offset)
948 "Return indentation OFFSET relative to indentation of current line."
949 (+ (current-indentation) offset)
950 )
951
952 (defun wisi-indent-paren (offset)
953 "Return indentation OFFSET relative to preceding open paren."
954 (save-excursion
955 (ada-goto-open-paren 0)
956 (+ (current-column) offset)))
957
958 (defun wisi-indent-start (offset cache)
959 "Return indentation of OFFSET relative to containing ancestor
960 of CACHE with class statement-start or block-start."
961 (wisi-goto-start cache)
962 (+ (current-indentation) offset))
963
964 (defun wisi-indent-statement ()
965 "Indent region given by `wisi-goto-start' on cache at or before point, then wisi-cache-end."
966 ;; force reparse, in case parser got confused
967 (let ((wisi-parse-try t))
968 (wisi-validate-cache (point)))
969
970 (save-excursion
971 (let ((cache (or (wisi-get-cache (point))
972 (wisi-backward-cache))))
973 (when cache
974 ;; can be nil if in header comment
975 (let ((start (progn (wisi-goto-start cache) (point)))
976 (end (progn
977 (when (wisi-cache-end cache)
978 ;; nil when cache is statement-end
979 (goto-char (1- (wisi-cache-end cache))))
980 (point))))
981 (indent-region start end)
982 ))
983 )))
984
985 (defvar-local wisi-indent-calculate-functions nil
986 "Functions to calculate indentation. Each called with point
987 before a token at the beginning of a line (at current
988 indentation); return indentation column for that token, or
989 nil. May move point. Calling stops when first function returns
990 non-nil.")
991
992 (defvar-local wisi-post-parse-fail-hook
993 "Function to reindent portion of buffer.
994 Called from `wisi-indent-line' when a parse succeeds after
995 failing; assumes user was editing code that is now syntactically
996 correct. Must leave point at indentation of current line.")
997
998 (defvar-local wisi-indent-failed nil
999 "Non-nil when wisi-indent-line fails due to parse failing; cleared when indent succeeds.")
1000
1001 (defun wisi-indent-line ()
1002 "Indent current line using the wisi indentation engine."
1003 (interactive)
1004
1005 (let* ((savep (point))
1006 (indent
1007 (or (save-excursion
1008 (wisi-validate-cache (point))
1009 (back-to-indentation)
1010 (when (>= (point) savep) (setq savep nil))
1011 (if wisi-parse-failed
1012 (progn
1013 ;; parse failed. Assume user is editing; indent to previous line, fix it after parse succeeds
1014 (setq wisi-indent-failed t)
1015 (forward-line -1);; safe at bob
1016 (back-to-indentation)
1017 (current-column))
1018
1019 ;; else parse succeeded
1020 (when wisi-indent-failed
1021 (setq wisi-indent-failed nil)
1022 (run-hooks 'wisi-post-parse-fail-hook))
1023 (with-demoted-errors
1024 (or (run-hook-with-args-until-success 'wisi-indent-calculate-functions) 0))
1025 )))))
1026 (if savep
1027 ;; point was inside line text; leave it there
1028 (save-excursion (indent-line-to indent))
1029 ;; point was before line text; move to start of text
1030 (indent-line-to indent))
1031 ))
1032
1033 ;;;; debug
1034 (defun wisi-parse-buffer ()
1035 (interactive)
1036 (syntax-propertize (point-max))
1037 (wisi-invalidate-cache)
1038 (wisi-validate-cache (point-max)))
1039
1040 (defun wisi-show-cache ()
1041 "Show cache at point."
1042 (interactive)
1043 (message "%s" (wisi-get-cache (point))))
1044
1045 (defun wisi-show-token ()
1046 "Move forward across one keyword, show token_id."
1047 (interactive)
1048 (let ((token (wisi-forward-token)))
1049 (message "%s" (car token))))
1050
1051 (defun wisi-show-containing-or-previous-cache ()
1052 (interactive)
1053 (let ((cache (wisi-get-cache (point))))
1054 (if cache
1055 (message "containing %s" (wisi-goto-containing cache t))
1056 (message "previous %s" (wisi-backward-cache)))
1057 ))
1058
1059 ;;;;; setup
1060
1061 (defun wisi-setup (indent-calculate post-parse-fail class-list keyword-table token-table parse-table)
1062 "Set up a buffer for parsing files with wisi."
1063 (setq wisi-class-list class-list)
1064 (setq wisi-string-double-term (car (symbol-value (intern-soft "string-double" token-table))))
1065 (setq wisi-string-single-term (car (symbol-value (intern-soft "string-single" token-table))))
1066 (setq wisi-symbol-term (car (symbol-value (intern-soft "symbol" token-table))))
1067
1068 (setq wisi-punctuation-table (symbol-value (intern-soft "punctuation" token-table)))
1069 (setq wisi-punctuation-table-max-length 0)
1070 (let (fail)
1071 (dolist (item wisi-punctuation-table)
1072 (when item ;; default matcher can be nil
1073
1074 ;; check that all chars used in punctuation tokens have punctuation syntax
1075 (mapc (lambda (char)
1076 (when (not (= ?. (char-syntax char)))
1077 (setq fail t)
1078 (message "in %s, %c does not have punctuation syntax"
1079 (car item) char)))
1080 (cdr item))
1081
1082 (when (< wisi-punctuation-table-max-length (length (cdr item)))
1083 (setq wisi-punctuation-table-max-length (length (cdr item)))))
1084 )
1085 (when fail
1086 (error "aborting due to punctuation errors")))
1087
1088 (setq wisi-keyword-table keyword-table)
1089 (setq wisi-parse-table parse-table)
1090
1091 (setq wisi-indent-calculate-functions indent-calculate)
1092 (set (make-local-variable 'indent-line-function) 'wisi-indent-line)
1093
1094 (setq wisi-post-parse-fail-hook post-parse-fail)
1095 (setq wisi-indent-failed nil)
1096
1097 (add-hook 'before-change-functions 'wisi-before-change nil t)
1098 (add-hook 'after-change-functions 'wisi-after-change nil t)
1099
1100 ;; WORKAROUND: sometimes the first time font-lock is run,
1101 ;; syntax-propertize is not run properly, so we run it here
1102 (syntax-propertize (point-max))
1103
1104 (wisi-invalidate-cache)
1105
1106 ;; FIXME: debug counter
1107 (setq wisi-change-jit-lock-mode 0)
1108 )
1109
1110 (provide 'wisi)
1111
1112 ;;; end of file