]> code.delx.au - gnu-emacs-elpa/blob - packages/wisi/wisi.el
update to Ada mode version 5.0.1
[gnu-emacs-elpa] / packages / wisi / wisi.el
1 ;;; wisi.el --- Utilities for implementing an indentation/navigation engine using a generalized LALR parser
2 ;;
3 ;; Copyright (C) 2012, 2013 Free Software Foundation, Inc.
4 ;;
5 ;; Author: Stephen Leake <stephen_leake@member.fsf.org>
6 ;; Version: 1.0
7 ;; URL: http://stephe-leake.org/emacs/ada-mode/emacs-ada-mode.html
8 ;; Package-Requires: ((cl-lib "0"))
9 ;;
10 ;; This file is part of GNU Emacs.
11 ;;
12 ;; GNU Emacs is free software: you can redistribute it and/or modify
13 ;; it under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation, either version 3 of the License, or
15 ;; (at your option) any later version.
16 ;;
17 ;; GNU Emacs is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;; GNU General Public License for more details.
21 ;;
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
24 ;;
25
26 ;;; Commentary:
27
28 ;;;; History: first experimental version Oct 2012
29 ;;
30 ;;;; indentation algorithm overview
31 ;;
32 ;; This design is inspired in part by experience writing a SMIE
33 ;; indentation engine for Ada, and the wisent parser.
34 ;;
35 ;; The general approach to indenting a given token is to find the
36 ;; start of the statement it is part of, or some other relevant point
37 ;; in the statement, and indent relative to that. So we need a parser
38 ;; that lets us find statement indent points from arbitrary places in
39 ;; the code.
40 ;;
41 ;; For example, the grammar for Ada as represented by the EBNF in LRM
42 ;; Annex P is not LALR(1), so we use a generalized LALR(1) parser (see
43 ;; wisi-parse, wisi-compile).
44 ;;
45 ;; The parser actions cache indentation and other information as text
46 ;; properties of tokens in statements.
47 ;;
48 ;; An indentation engine moves text in the buffer, as does user
49 ;; editing, so we can't rely on character positions remaining
50 ;; constant. So the parser actions use markers to store
51 ;; positions. Text properties also move with the text.
52 ;;
53 ;; The stored information includes a marker at each statement indent
54 ;; point. Thus, the indentation algorithm is: find the previous token
55 ;; with cached information, and either indent from it, or fetch from
56 ;; it the marker for a previous statement indent point, and indent
57 ;; relative to that.
58 ;;
59 ;; Since we have a cache (the text properties), we need to consider
60 ;; when to invalidate it. Ideally, we invalidate only when a change to
61 ;; the buffer would change the result of a parse that crosses that
62 ;; change, or starts after that change. Changes in whitespace
63 ;; (indentation and newlines) do not affect an Ada parse. Other
64 ;; languages are sensitive to newlines (Bash for example) or
65 ;; indentation (Python). Adding comments does not change a parse,
66 ;; unless code is commented out. For now we invalidate the cache after
67 ;; the edit point if the change involves anything other than
68 ;; whitespace.
69 ;;
70 ;;;; comparison to the SMIE parser
71 ;;
72 ;; The central problem to be solved in building the SMIE parser is
73 ;; grammar precedence conflicts; the general solution is refining
74 ;; keywords so that each new keyword can be assigned a unique
75 ;; precedence. This means ad hoc code must be written to determine the
76 ;; correct refinement for each language keyword from the surrounding
77 ;; tokens. In effect, for a complex language like Ada, the knowledge
78 ;; of the language grammar is mostly embedded in the refinement code;
79 ;; only a small amount is in the refined grammar. Implementing a SMIE
80 ;; parser for a new language involves the same amount of work as the
81 ;; first language.
82 ;;
83 ;; Using a generalized LALR parser avoids that particular problem;
84 ;; assuming the language is already defined by a grammar, it is only a
85 ;; matter of a format change to teach the wisi parser the
86 ;; language. The problem in a wisi indentation engine is caching the
87 ;; output of the parser in a useful way, since we can't start the
88 ;; parser from arbitrary places in the code (as we can with the SMIE
89 ;; parser). A second problem is determining when to invalidate the
90 ;; cache. But these problems are independent of the language being
91 ;; parsed, so once we have one wisi indentation engine working,
92 ;; adapting it to new languages should be quite simple.
93 ;;
94 ;; The SMIE parser does not find the start of each statement, only the
95 ;; first language keyword in each statement; additional code must be
96 ;; written to find the statement start and indent points. The wisi
97 ;; parser finds the statement start and indent points directly.
98 ;;
99 ;; In SMIE, it is best if each grammar rule is a complete statement,
100 ;; so forward-sexp will traverse the entire statement. If nested
101 ;; non-terminals are used, forward-sexp may stop inside one of the
102 ;; nested non-terminals. This problem does not occur with the wisi
103 ;; parser.
104 ;;
105 ;; A downside of the wisi parser is conflicts in the grammar; they can
106 ;; be much more difficult to resolve than in the SMIE parser. The
107 ;; generalized parser helps by handling conflicts, but it does so by
108 ;; running multiple parsers in parallel, persuing each choice in the
109 ;; conflict. If the conflict is due to a genuine ambiguity, both paths
110 ;; will succeed, which causes the parse to fail, since it is not clear
111 ;; which set of text properties to store. Even if one branch
112 ;; ultimately fails, running parallel parsers over large sections of
113 ;; code is slow. Finally, this approach can lead to exponential growth
114 ;; in the number of parsers. So grammar conflicts must still be
115 ;; analyzed and minimized.
116 ;;
117 ;; In addition, the complete grammar must be specified; in smie, it is
118 ;; often possible to specify a subset of the grammar.
119 ;;
120 ;;;; grammar compiler and parser
121 ;;
122 ;; Since we are using a generalized LALR(1) parser, we cannot use any
123 ;; of the wisent grammar functions. We use OpenToken wisi-generate
124 ;; to compile BNF to Elisp source (similar to
125 ;; semantic-grammar-create-package), and wisi-compile-grammar to
126 ;; compile that to the parser table.
127 ;;
128 ;; Semantic provides a complex lexer, more complicated than we need
129 ;; for indentation. So we use the elisp lexer, which consists of
130 ;; `forward-comment', `skip-syntax-forward', and `scan-sexp'. We wrap
131 ;; that in functions that return tokens in the form wisi-parse
132 ;; expects.
133 ;;
134 ;;;; code style
135 ;;
136 ;; 'wisi' was originally short for "wisent indentation engine", but
137 ;; now is just a name.
138 ;;
139 ;; not using lexical-binding because we support Emacs 23
140 ;;
141 ;;;;;
142
143 ;;; Code:
144
145 (require 'wisi-parse)
146 (eval-when-compile (require 'cl-lib))
147
148 ;;;; lexer
149
150 (defvar-local wisi-class-list nil)
151 (defvar-local wisi-keyword-table nil)
152 (defvar-local wisi-punctuation-table nil)
153 (defvar-local wisi-punctuation-table-max-length 0)
154 (defvar-local wisi-string-double-term nil) ;; string delimited by double quotes
155 (defvar-local wisi-string-quote-escape-doubled nil)
156 (defvar-local wisi-string-single-term nil) ;; string delimited by single quotes
157 (defvar-local wisi-symbol-term nil)
158
159 (defun wisi-forward-token (&optional text-only)
160 "Move point forward across one token, skipping leading whitespace and comments.
161 Return the corresponding token, in a format determined by TEXT-ONLY:
162 TEXT-ONLY t: text
163 TEXT-ONLY nil: (token text start . end)
164 where:
165 `token' is a token symbol (not string) from `wisi-punctuation-table',
166 `wisi-keyword-table', `wisi-string-double-term', `wisi-string-double-term' or `wisi-symbol-term'.
167
168 `text' is the token text from the buffer
169
170 `start, end' are the character positions in the buffer of the start
171 and end of the token text.
172
173 If at end of buffer, returns `wisent-eoi-term'."
174 (forward-comment (point-max))
175 ;; skips leading whitespace, comment, trailing whitespace.
176
177 (let ((start (point))
178 ;; (info "(elisp)Syntax Table Internals" "*info elisp syntax*")
179 (syntax (syntax-class (syntax-after (point))))
180 token-id token-text)
181 (cond
182 ((eobp)
183 (setq token-text "")
184 (setq token-id wisent-eoi-term))
185
186 ((eq syntax 1)
187 ;; punctuation. Find the longest matching string in wisi-punctuation-table
188 (forward-char 1)
189 (let ((next-point (point))
190 temp-text temp-id done)
191 (while (not done)
192 (setq temp-text (buffer-substring-no-properties start (point)))
193 (setq temp-id (car (rassoc temp-text wisi-punctuation-table)))
194 (when temp-id
195 (setq token-text temp-text
196 token-id temp-id
197 next-point (point)))
198 (if (or
199 (eobp)
200 (= (- (point) start) wisi-punctuation-table-max-length))
201 (setq done t)
202 (forward-char 1))
203 )
204 (goto-char next-point)))
205
206 ((memq syntax '(4 5)) ;; open, close parenthesis
207 (forward-char 1)
208 (setq token-text (buffer-substring-no-properties start (point)))
209 (setq token-id (symbol-value (intern-soft token-text wisi-keyword-table))))
210
211 ((eq syntax 7)
212 ;; string quote, either single or double. we assume point is before the start quote, not the end quote
213 (let ((delim (char-after (point)))
214 (forward-sexp-function nil))
215 (forward-sexp)
216 ;; point is now after the end quote; check for a doubled quote
217 (while (and wisi-string-quote-escape-doubled
218 (eq (char-after (point)) delim))
219 (forward-sexp))
220 (setq token-text (buffer-substring-no-properties start (point)))
221 (setq token-id (if (= delim ?\") wisi-string-double-term wisi-string-single-term))))
222
223 (t ;; assuming word syntax
224 (skip-syntax-forward "w_'")
225 (setq token-text (buffer-substring-no-properties start (point)))
226 (setq token-id
227 (or (symbol-value (intern-soft (downcase token-text) wisi-keyword-table))
228 wisi-symbol-term)))
229 );; cond
230
231 (unless token-id
232 (error (wisi-error-msg "unrecognized token '%s'" (buffer-substring-no-properties start (point)))))
233
234 (if text-only
235 token-text
236 (cons token-id (cons token-text (cons start (point)))))
237 ))
238
239 (defun wisi-backward-token ()
240 "Move point backward across one token, skipping whitespace and comments.
241 Return (nil text start . end) - same structure as
242 wisi-forward-token, but does not look up symbol."
243 (forward-comment (- (point)))
244 ;; skips leading whitespace, comment, trailing whitespace.
245
246 ;; (info "(elisp)Syntax Table Internals" "*info elisp syntax*")
247 (let ((end (point))
248 (syntax (syntax-class (syntax-after (1- (point))))))
249 (cond
250 ((bobp) nil)
251
252 ((memq syntax '(4 5)) ;; open, close parenthesis
253 (backward-char 1))
254
255 ((eq syntax 7)
256 ;; a string quote. we assume we are after the end quote, not the start quote
257 (let ((forward-sexp-function nil))
258 (forward-sexp -1)))
259
260 (t
261 (if (zerop (skip-syntax-backward "."))
262 (skip-syntax-backward "w_'")))
263 )
264 (cons nil (cons (buffer-substring-no-properties (point) end) (cons (point) end)))
265 ))
266
267 ;;;; token info cache
268 ;;
269 ;; the cache stores the results of parsing as text properties on
270 ;; keywords, for use by the indention and motion engines.
271
272 (cl-defstruct
273 (wisi-cache
274 (:constructor wisi-cache-create)
275 (:copier nil))
276 nonterm;; nonterminal from parse (set by wisi-statement-action)
277
278 token
279 ;; terminal symbol from wisi-keyword-table or
280 ;; wisi-punctuation-table, or lower-level nonterminal from parse
281 ;; (set by wisi-statement-action)
282
283 last ;; pos of last char in token, relative to first (0 indexed)
284
285 class
286 ;; arbitrary lisp symbol, used for indentation and navigation.
287 ;; some classes are defined by wisi:
288 ;;
289 ;; 'block-middle - a block keyword (ie: if then else end), not at the start of a statement
290 ;;
291 ;; 'block-start - a block keyword at the start of a statement
292 ;;
293 ;; 'statement-start - the start of a statement
294 ;;
295 ;; 'open-paren
296 ;;
297 ;; others are language-specific
298
299 containing
300 ;; Marker at the containing keyword for this token.
301 ;; A containing keyword is an indent point; the start of a
302 ;; statement, or 'begin', 'then' or 'else' for a block of
303 ;; statements, etc.
304 ;; nil only for first token in buffer
305
306 prev ;; marker at previous motion token in statement; nil if none
307 next ;; marker at next motion token in statement; nil if none
308 end ;; marker at token at end of current statement
309 )
310
311 (defvar-local wisi-cache-max 0
312 "Maximimum position in buffer where wisi token cache is valid.")
313
314 (defvar-local wisi-parse-table nil)
315
316 (defvar-local wisi-parse-failed nil
317 "Non-nil when a recent parse has failed - cleared when parse succeeds.")
318
319 (defvar-local wisi-parse-try nil
320 "Non-nil when parse is needed - cleared when parse succeeds.")
321
322 (defvar-local wisi-change-need-invalidate nil)
323 (defvar-local wisi-change-jit-lock-mode nil)
324
325 (defun wisi-invalidate-cache()
326 "Invalidate the wisi token cache for the current buffer.
327 Also invalidate the Emacs syntax cache."
328 (interactive)
329 (setq wisi-cache-max 0)
330 (setq wisi-parse-try t)
331 (syntax-ppss-flush-cache (point-min))
332 (with-silent-modifications
333 (remove-text-properties (point-min) (point-max) '(wisi-cache))))
334
335 (defun wisi-before-change (begin end)
336 "For `before-change-functions'."
337 ;; begin . end is range of text being deleted
338
339 ;; If jit-lock-after-change is before wisi-after-change in
340 ;; after-change-functions, it might use any invalid caches in the
341 ;; inserted text.
342 ;;
343 ;; So we check for that here, and ensure it is after
344 ;; wisi-after-change, which deletes the invalid caches
345 (when (boundp 'jit-lock-mode)
346 (when (memq 'wisi-after-change (memq 'jit-lock-after-change after-change-functions))
347 (setq after-change-functions (delete 'wisi-after-change after-change-functions))
348 (add-hook 'after-change-functions 'wisi-after-change nil t)
349 (setq wisi-change-jit-lock-mode (1+ wisi-change-jit-lock-mode)))
350 )
351
352 (save-excursion
353 ;; don't invalidate parse for whitespace, string, or comment changes
354 (let (;; (info "(elisp)Parser State")
355 (state (syntax-ppss begin)))
356 ;; syntax-ppss has moved point to "begin".
357 (cond
358 ((or
359 (nth 3 state); in string
360 (nth 4 state)); in comment
361 ;; FIXME: check that entire range is in comment or string
362 (setq wisi-change-need-invalidate nil))
363
364 ((progn
365 (skip-syntax-forward " " end);; does not skip newline
366 (eq (point) end))
367 (setq wisi-change-need-invalidate nil))
368
369 (t (setq wisi-change-need-invalidate t))
370 ))))
371
372 (defun wisi-after-change (begin end length)
373 "For `after-change-functions'."
374 ;; begin . end is range of text being inserted (may be empty)
375 ;; (syntax-ppss-flush-cache begin) is in before-change-functions
376
377 (syntax-ppss-flush-cache begin) ;; IMPROVEME: could check for whitespace
378
379 (cond
380 (wisi-parse-failed
381 ;; The parse was failing, probably due to bad syntax; this change
382 ;; may have fixed it, so try reparse.
383 (setq wisi-parse-try t)
384
385 ;; remove 'wisi-cache on inserted text, which could have caches
386 ;; from before the failed parse, and are in any case invalid.
387 (with-silent-modifications
388 (remove-text-properties begin end '(wisi-cache)))
389 )
390
391 ((>= wisi-cache-max begin)
392 ;; The parse had succeeded paste the start of the inserted
393 ;; text.
394 (save-excursion
395 (let ((need-invalidate t)
396 ;; (info "(elisp)Parser State")
397 (state (syntax-ppss begin)))
398 ;; syntax-ppss has moved point to "begin".
399 (cond
400 (wisi-change-need-invalidate
401 ;; wisi-before change determined the removed text alters the
402 ;; parse
403 nil)
404
405 ((or
406 (nth 3 state); in string
407 (nth 4 state)); in comment
408 ;; FIXME: insert newline in comment to create non-comment!?
409 ;; or paste a chunk of code
410 ;; => check that all of change region is comment or string
411 (setq need-invalidate nil))
412
413 ((progn
414 (skip-syntax-forward " " end);; does not skip newlines
415 (eq (point) end))
416 (setq need-invalidate nil))
417
418 (t nil)
419 )
420
421 (if need-invalidate
422 ;; The inserted or deleted text could alter the parse
423 (wisi-invalidate-cache)
424
425 ;; else move cache-max by the net change length. We don't
426 ;; need to delete 'wisi-cache in the inserted text, because
427 ;; if there were any it would not pass the above.
428 (setq wisi-cache-max
429 (+ wisi-cache-max (- end begin length))))
430 )
431 ))
432
433 (t
434 ;; parse never attempted, or only done to before BEGIN. Just
435 ;; remove 'wisi-cache
436 (with-silent-modifications
437 (remove-text-properties begin end '(wisi-cache)))
438 )
439 ))
440
441 (defun wisi-get-cache (pos)
442 "Return `wisi-cache' struct from the `wisi-cache' text property at POS.
443 If accessing cache at a marker for a token as set by `wisi-cache-tokens', POS must be (1- mark)."
444 (get-text-property pos 'wisi-cache))
445
446 (defvar-local wisi-parse-error-msg nil)
447
448 (defun wisi-goto-error ()
449 "Move point to position in last error message (if any)."
450 (when (string-match ":\\([0-9]+\\):\\([0-9]+\\):" wisi-parse-error-msg)
451 (let ((line (string-to-number (match-string 1 wisi-parse-error-msg)))
452 (col (string-to-number (match-string 2 wisi-parse-error-msg))))
453 (goto-char (point-min))
454 (forward-line (1- line))
455 (forward-char col))))
456
457 (defun wisi-show-parse-error ()
458 "Show last wisi-parse error."
459 (interactive)
460 (if wisi-parse-failed
461 (progn
462 (message wisi-parse-error-msg)
463 (wisi-goto-error))
464 (message "parse succeeded")))
465
466 (defun wisi-validate-cache (pos)
467 "Ensure cached data is valid at least up to POS in current buffer."
468 (when (and wisi-parse-try
469 (< wisi-cache-max pos))
470 (when (> wisi-debug 0)
471 (message "wisi: parsing ..."))
472
473 (setq wisi-parse-try nil)
474 (setq wisi-parse-error-msg nil)
475 (save-excursion
476 (goto-char wisi-cache-max)
477 (if (> wisi-debug 1)
478 ;; let debugger stop in wisi-parse
479 (progn
480 (wisi-parse wisi-parse-table 'wisi-forward-token)
481 (setq wisi-cache-max (point))
482 (setq wisi-parse-failed nil))
483 ;; else capture errors from bad syntax, so higher level functions can try to continue
484 (condition-case err
485 (progn
486 (wisi-parse wisi-parse-table 'wisi-forward-token)
487 (setq wisi-cache-max (point))
488 (setq wisi-parse-failed nil))
489 (wisi-parse-error
490 (setq wisi-parse-failed t)
491 (setq wisi-parse-error-msg (cdr err)))
492 )))
493 (if wisi-parse-error-msg
494 ;; error
495 (when (> wisi-debug 0)
496 (message "wisi: parsing ... error")
497 (wisi-goto-error)
498 (error wisi-parse-error-msg))
499 ;; no msg; success
500 (when (> wisi-debug 0)
501 (message "wisi: parsing ... done")))
502 ))
503
504 (defun wisi-get-containing-cache (cache)
505 "Return cache from (wisi-cache-containing CACHE)."
506 (let ((containing (wisi-cache-containing cache)))
507 (and containing
508 (wisi-get-cache (1- containing)))))
509
510 (defun wisi-cache-text (cache)
511 "Return property-less buffer substring designated by cache.
512 Point must be at cache."
513 (buffer-substring-no-properties (point) (+ (point) (wisi-cache-last cache))))
514
515 ;;;; parse actions
516
517 (defun wisi-set-end (tokens end-mark)
518 "Set END-MARK on all unset caches in TOKENS."
519 (let ((tokens-t tokens))
520 (while tokens-t
521 (let* ((token (pop tokens-t))
522 (region (cddr token))
523 cache)
524 (when region
525 (goto-char (car region))
526 (setq cache (wisi-get-cache (car region)))
527 (while (and cache
528 (< (point) (cdr region)))
529 (if (not (wisi-cache-end cache))
530 (setf (wisi-cache-end cache) end-mark)
531 (goto-char (wisi-cache-end cache))
532 )
533 (setq cache (wisi-forward-cache))
534 ))
535 ))
536 ))
537
538 (defvar wisi-tokens nil);; keep byte-compiler happy; `wisi-tokens' is bound in action created by wisi-semantic-action
539 (defun wisi-statement-action (&rest pairs)
540 "Cache information in text properties of tokens.
541 Intended as a grammar non-terminal action.
542
543 PAIRS is of the form [TOKEN-NUMBER CLASS] ... where TOKEN-NUMBER
544 is the (1 indexed) token number in the production, CLASS is the wisi class of
545 that token. Use in a grammar action as:
546 (wisi-statement-action 1 'statement-start 7 'statement-end)"
547 (save-excursion
548 (let ((first-item t)
549 first-keyword-mark
550 (override-start nil))
551 (while pairs
552 (let* ((number (1- (pop pairs)))
553 (region (cddr (nth number wisi-tokens)));; wisi-tokens is let-bound in wisi-parse-reduce
554 (token (car (nth number wisi-tokens)))
555 (class (pop pairs))
556 (mark
557 ;; Marker one char into token, so indent-line-to
558 ;; inserts space before the mark, not after
559 (when region (copy-marker (1+ (car region)))))
560 cache)
561
562 (unless (memq class wisi-class-list)
563 (error "%s not in wisi-class-list" class))
564
565 (if region
566 (progn
567 (if (setq cache (wisi-get-cache (car region)))
568 ;; We are processing a previously set non-terminal; ie generic_formal_part in
569 ;;
570 ;; generic_package_declaration : generic_formal_part package_specification SEMICOLON
571 ;; (wisi-statement-action 1 'block-start 2 'block-middle 3 'statement-end)
572 ;;
573 ;; or simple_statement in
574 ;;
575 ;; statement : label_opt simple_statement
576 ;;
577 ;; override nonterm, class and containing
578 (progn
579 (cl-case (wisi-cache-class cache)
580 (block-start
581 (setf (wisi-cache-class cache)
582 (cond
583 ((eq override-start nil)
584 (cond
585 ((memq class '(block-start statement-start)) 'block-start)
586 (t 'block-middle)))
587
588 ((memq override-start '(block-start statement-start)) 'block-start)
589
590 (t (error "unexpected override-start"))
591 )))
592 (t
593 (setf (wisi-cache-class cache) (or override-start class)))
594 )
595 (setf (wisi-cache-nonterm cache) $nterm)
596 (setf (wisi-cache-containing cache) first-keyword-mark))
597
598 ;; else create new cache
599 (with-silent-modifications
600 (put-text-property
601 (car region)
602 (1+ (car region))
603 'wisi-cache
604 (wisi-cache-create
605 :nonterm $nterm;; $nterm defined in wisi-semantic-action
606 :token token
607 :last (- (cdr region) (car region))
608 :class (or override-start class)
609 :containing first-keyword-mark)
610 )))
611
612 (when first-item
613 (setq first-item nil)
614 (when (or override-start
615 (memq class '(block-middle block-start statement-start)))
616 (setq override-start nil)
617 (setq first-keyword-mark mark)))
618
619 (when (eq class 'statement-end)
620 (wisi-set-end wisi-tokens (copy-marker (1+ (car region)))))
621 )
622
623 ;; region is nil when a production is empty; if the first
624 ;; token is a start, override the class on the next token.
625 (when (and first-item
626 (memq class '(block-middle block-start statement-start)))
627 (setq override-start class)))
628 ))
629 )))
630
631 (defun wisi-containing-action (containing-token contained-token)
632 "Set containing marks in all tokens in CONTAINED-TOKEN with null containing mark to marker pointing to CONTAINING-TOKEN.
633 If CONTAINING-TOKEN is empty, the next token number is used."
634 ;; wisi-tokens is is bound in action created by wisi-semantic-action
635 (let* ((containing-region (cddr (nth (1- containing-token) wisi-tokens)))
636 (contained-region (cddr (nth (1- contained-token) wisi-tokens))))
637 (while (not containing-region)
638 ;; containing-token is empty; use next
639 (setq containing-region (cddr (nth containing-token wisi-tokens))))
640
641 (when contained-region
642 ;; nil when empty production, may not contain any caches
643 (save-excursion
644 (goto-char (cdr contained-region))
645 (let ((cache (wisi-backward-cache))
646 (mark (copy-marker (1+ (car containing-region)))))
647 (while cache
648
649 ;; skip blocks that are already marked
650 (while (and (>= (point) (car contained-region))
651 (markerp (wisi-cache-containing cache)))
652 (goto-char (1- (wisi-cache-containing cache)))
653 (setq cache (wisi-get-cache (point))))
654
655 (if (or (and (= (car containing-region) (car contained-region))
656 (<= (point) (car contained-region)))
657 (< (point) (car contained-region)))
658 ;; done
659 (setq cache nil)
660
661 ;; else set mark, loop
662 (setf (wisi-cache-containing cache) mark)
663 (setq cache (wisi-backward-cache)))
664 ))))))
665
666 (defun wisi-motion-action (&rest token-numbers)
667 "Set prev/next marks in all tokens given by TOKEN-NUMBERS.
668 Each TOKEN-NUMBERS is one of:
669
670 number: the token number; mark that token
671
672 list (number token_id):
673 list (number (token_id token_id)):
674 mark all tokens with token_id in the nonterminal given by the number."
675 (save-excursion
676 (let (prev-keyword-mark
677 prev-cache
678 cache
679 mark)
680 (while token-numbers
681 (let ((token-number (pop token-numbers))
682 target-token
683 region)
684 (cond
685 ((numberp token-number)
686 (setq target-token nil)
687 (setq region (cddr (nth (1- token-number) wisi-tokens)))
688 (when region
689 (setq cache (wisi-get-cache (car region)))
690 (setq mark (copy-marker (1+ (car region))))
691
692 (when (and prev-keyword-mark
693 cache
694 (null (wisi-cache-prev cache)))
695 (setf (wisi-cache-prev cache) prev-keyword-mark)
696 (setf (wisi-cache-next prev-cache) mark))
697
698 (setq prev-keyword-mark mark)
699 (setq prev-cache cache)
700 ))
701
702 ((listp token-number)
703 ;; token-number may contain 0, 1, or more token_id; token_id may be a list
704 ;; the corresponding region may be empty
705 ;; there must have been a prev keyword
706 (setq target-token (cadr token-number))
707 (when (not (listp target-token))
708 (setq target-token (list target-token)))
709 (setq token-number (car token-number))
710 (setq region (cddr (nth (1- token-number) wisi-tokens)))
711 (when region ;; not an empty token
712 (goto-char (car region))
713 (while (wisi-forward-find-token target-token (cdr region) t)
714 (setq cache (wisi-get-cache (point)))
715 (setq mark (copy-marker (1+ (point))))
716
717 (when (null (wisi-cache-prev cache))
718 (setf (wisi-cache-prev cache) prev-keyword-mark)
719 (setf (wisi-cache-next prev-cache) mark)
720 (setq prev-keyword-mark mark)
721 (setq prev-cache cache))
722
723 (wisi-forward-token);; don't find same token again
724 ))
725 )
726
727 (t
728 (error "unexpected token-number %s" token-number))
729 )
730
731 ))
732 )))
733
734 ;;;; motion
735 (defun wisi-backward-cache ()
736 "Move point backward to the beginning of the first token preceding point that has a cache.
737 Returns cache, or nil if at beginning of buffer."
738 (let (cache pos)
739 (setq pos (previous-single-property-change (point) 'wisi-cache))
740 ;; There are three cases:
741 ;;
742 ;; 1) caches separated by non-cache chars: 'if ... then'
743 ;; pos is before 'f', cache is on 'i'
744 ;;
745 ;; 2) caches not separated: ');'
746 ;; pos is before ';', cache is on ';'
747 ;;
748 ;; 3) at bob; pos is nil
749 ;;
750 (if pos
751 (progn
752 (setq cache (get-text-property pos 'wisi-cache))
753 (if cache
754 ;; case 2
755 (goto-char pos)
756 ;; case 1
757 (setq cache (get-text-property (1- pos) 'wisi-cache))
758 (goto-char (1- pos))))
759 ;; at bob
760 (goto-char (point-min))
761 (setq cache nil))
762 cache
763 ))
764
765 (defun wisi-forward-cache ()
766 "Move point forward to the beginning of the first token after point that has a cache.
767 Returns cache, or nil if at end of buffer."
768 (let (cache pos)
769 (when (get-text-property (point) 'wisi-cache)
770 ;; on a cache; get past it
771 (goto-char (1+ (point))))
772
773 (setq cache (get-text-property (point) 'wisi-cache))
774 (if cache
775 nil
776
777 (setq pos (next-single-property-change (point) 'wisi-cache))
778 (if pos
779 (progn
780 (goto-char pos)
781 (setq cache (get-text-property pos 'wisi-cache)))
782 ;; at eob
783 (goto-char (point-max))
784 (setq cache nil))
785 )
786 cache
787 ))
788
789 (defun wisi-forward-find-class (class limit)
790 "Search forward for a token that has a cache with CLASS.
791 Return cache, or nil if at end of buffer.
792 If LIMIT (a buffer position) is reached, throw an error."
793 (let ((cache (wisi-forward-cache)))
794 (while (not (eq class (wisi-cache-class cache)))
795 (setq cache (wisi-forward-cache))
796 (when (>= (point) limit)
797 (error "cache with class %s not found" class)))
798 cache))
799
800 (defun wisi-forward-find-token (token limit &optional noerror)
801 "Search forward for a token that has a cache with TOKEN.
802 If point is at a matching token, return that token.
803 TOKEN may be a list; stop on any cache that has a member of the list.
804 Return cache, or nil if at end of buffer.
805 If LIMIT (a buffer position) is reached, then if NOERROR is nil, throw an
806 error, if non-nil, return nil."
807 (let ((token-list (cond
808 ((listp token) token)
809 (t (list token))))
810 (cache (wisi-get-cache (point)))
811 (done nil))
812 (while (not (or done
813 (and cache
814 (memq (wisi-cache-token cache) token-list))))
815 (setq cache (wisi-forward-cache))
816 (when (>= (point) limit)
817 (if noerror
818 (progn
819 (setq done t)
820 (setq cache nil))
821 (error "cache with token %s not found" token))))
822 cache))
823
824 (defun wisi-forward-find-nonterm (nonterm limit)
825 "Search forward for a token that has a cache with NONTERM.
826 NONTERM may be a list; stop on any cache that has a member of the list.
827 Return cache, or nil if at end of buffer.
828 If LIMIT (a buffer position) is reached, throw an error."
829 (let ((nonterm-list (cond
830 ((listp nonterm) nonterm)
831 (t (list nonterm))))
832 (cache (wisi-forward-cache)))
833 (while (not (memq (wisi-cache-nonterm cache) nonterm-list))
834 (setq cache (wisi-forward-cache))
835 (when (>= (point) limit)
836 (error "cache with nonterm %s not found" nonterm)))
837 cache))
838
839 (defun wisi-goto-cache-next (cache)
840 (goto-char (1- (wisi-cache-next cache)))
841 (wisi-get-cache (point))
842 )
843
844 (defun wisi-forward-statement-keyword ()
845 "If not at a cached token, move forward to next
846 cache. Otherwise move to cache-next, or next cache if nil.
847 Return cache found."
848 (wisi-validate-cache (point-max))
849 (let ((cache (wisi-get-cache (point))))
850 (if cache
851 (let ((next (wisi-cache-next cache)))
852 (if next
853 (goto-char (1- next))
854 (wisi-forward-token)
855 (wisi-forward-cache)))
856 (wisi-forward-cache))
857 )
858 (wisi-get-cache (point))
859 )
860
861 (defun wisi-backward-statement-keyword ()
862 "If not at a cached token, move backward to prev
863 cache. Otherwise move to cache-prev, or prev cache if nil."
864 (wisi-validate-cache (point-max))
865 (let ((cache (wisi-get-cache (point))))
866 (if cache
867 (let ((prev (wisi-cache-prev cache)))
868 (if prev
869 (goto-char (1- prev))
870 (wisi-backward-cache)))
871 (wisi-backward-cache))
872 ))
873
874 (defun wisi-goto-containing (cache &optional error)
875 "Move point to containing token for CACHE, return cache at that point."
876 (cond
877 ((markerp (wisi-cache-containing cache))
878 (goto-char (1- (wisi-cache-containing cache)))
879 (wisi-get-cache (point)))
880 (t
881 (when error
882 (error "already at outermost containing token")))
883 ))
884
885 (defun wisi-goto-containing-paren (cache)
886 "Move point to just after the open-paren containing CACHE.
887 Return cache for paren, or nil if no containing paren."
888 (while (and cache
889 (not (eq (wisi-cache-class cache) 'open-paren)))
890 (setq cache (wisi-goto-containing cache)))
891 (when cache
892 (forward-char 1))
893 cache)
894
895 (defun wisi-goto-start (cache)
896 "Move point to containing ancestor of CACHE that has class block-start or statement-start.
897 Return start cache."
898 (when
899 ;; cache nil at bob
900 (while (and cache
901 (not (memq (wisi-cache-class cache) '(block-start statement-start))))
902 (setq cache (wisi-goto-containing cache)))
903 )
904 cache)
905
906 (defun wisi-goto-end ()
907 "Move point to token at end of statement point is in or before."
908 (interactive)
909 (wisi-validate-cache (point-max))
910 (let ((cache (or (wisi-get-cache (point))
911 (wisi-forward-cache))))
912 (when (wisi-cache-end cache)
913 ;; nil when cache is statement-end
914 (goto-char (1- (wisi-cache-end cache))))
915 ))
916
917 (defun wisi-next-statement-cache (cache)
918 "Move point to CACHE-next, return cache; error if nil."
919 (when (not (markerp (wisi-cache-next cache)))
920 (error "no next statement cache"))
921 (goto-char (1- (wisi-cache-next cache)))
922 (wisi-get-cache (point)))
923
924 (defun wisi-prev-statement-cache (cache)
925 "Move point to CACHE-next, return cache; error if nil."
926 (when (not (markerp (wisi-cache-prev cache)))
927 (error "no prev statement cache"))
928 (goto-char (1- (wisi-cache-prev cache)))
929 (wisi-get-cache (point)))
930
931 ;;;; indentation
932
933 (defun wisi-comment-indent ()
934 "For `comment-indent-function'. Indent single line comment to
935 the comment on the previous line."
936 ;; This should only be called by comment-indent-new-line or
937 ;; fill-comment-paragraph, so there will be a preceding comment line
938 ;; that we can trust.
939 (save-excursion
940 (forward-comment -1)
941 (if (looking-at comment-start)
942 (current-column)
943 (error "wisi-comment-indent called after non-comment"))))
944
945 (defun wisi-indent-current (offset)
946 "Return indentation OFFSET relative to indentation of current line."
947 (+ (current-indentation) offset)
948 )
949
950 (defun wisi-indent-paren (offset)
951 "Return indentation OFFSET relative to preceding open paren."
952 (save-excursion
953 (goto-char (nth 1 (syntax-ppss)))
954 (+ (current-column) offset)))
955
956 (defun wisi-indent-start (offset cache)
957 "Return indentation of OFFSET relative to containing ancestor
958 of CACHE with class statement-start or block-start."
959 (wisi-goto-start cache)
960 (+ (current-indentation) offset))
961
962 (defun wisi-indent-statement ()
963 "Indent region given by `wisi-goto-start' on cache at or before point, then wisi-cache-end."
964 ;; force reparse, in case parser got confused
965 (let ((wisi-parse-try t))
966 (wisi-validate-cache (point)))
967
968 (save-excursion
969 (let ((cache (or (wisi-get-cache (point))
970 (wisi-backward-cache))))
971 (when cache
972 ;; can be nil if in header comment
973 (let ((start (progn (wisi-goto-start cache) (point)))
974 (end (progn
975 (when (wisi-cache-end cache)
976 ;; nil when cache is statement-end
977 (goto-char (1- (wisi-cache-end cache))))
978 (point))))
979 (indent-region start end)
980 ))
981 )))
982
983 (defvar-local wisi-indent-calculate-functions nil
984 "Functions to calculate indentation. Each called with point
985 before a token at the beginning of a line (at current
986 indentation); return indentation column for that token, or
987 nil. May move point. Calling stops when first function returns
988 non-nil.")
989
990 (defvar-local wisi-post-parse-fail-hook
991 "Function to reindent portion of buffer.
992 Called from `wisi-indent-line' when a parse succeeds after
993 failing; assumes user was editing code that is now syntactically
994 correct. Must leave point at indentation of current line.")
995
996 (defvar-local wisi-indent-failed nil
997 "Non-nil when wisi-indent-line fails due to parse failing; cleared when indent succeeds.")
998
999 (defun wisi-indent-line ()
1000 "Indent current line using the wisi indentation engine."
1001 (interactive)
1002
1003 (let* ((savep (point))
1004 (indent
1005 (or (save-excursion
1006 (wisi-validate-cache (point))
1007 (back-to-indentation)
1008 (when (>= (point) savep) (setq savep nil))
1009 (if wisi-parse-failed
1010 (progn
1011 ;; parse failed. Assume user is editing; indent to previous line, fix it after parse succeeds
1012 (setq wisi-indent-failed t)
1013 (forward-line -1);; safe at bob
1014 (back-to-indentation)
1015 (current-column))
1016
1017 ;; else parse succeeded
1018 (when wisi-indent-failed
1019 (setq wisi-indent-failed nil)
1020 (run-hooks 'wisi-post-parse-fail-hook))
1021 (with-demoted-errors
1022 (or (run-hook-with-args-until-success 'wisi-indent-calculate-functions) 0))
1023 )))))
1024 (if savep
1025 ;; point was inside line text; leave it there
1026 (save-excursion (indent-line-to indent))
1027 ;; point was before line text; move to start of text
1028 (indent-line-to indent))
1029 ))
1030
1031 ;;;; debug
1032 (defun wisi-parse-buffer ()
1033 (interactive)
1034 (syntax-propertize (point-max))
1035 (wisi-invalidate-cache)
1036 (wisi-validate-cache (point-max)))
1037
1038 (defun wisi-show-cache ()
1039 "Show cache at point."
1040 (interactive)
1041 (message "%s" (wisi-get-cache (point))))
1042
1043 (defun wisi-show-token ()
1044 "Move forward across one keyword, show token_id."
1045 (interactive)
1046 (let ((token (wisi-forward-token)))
1047 (message "%s" (car token))))
1048
1049 (defun wisi-show-containing-or-previous-cache ()
1050 (interactive)
1051 (let ((cache (wisi-get-cache (point))))
1052 (if cache
1053 (message "containing %s" (wisi-goto-containing cache t))
1054 (message "previous %s" (wisi-backward-cache)))
1055 ))
1056
1057 ;;;;; setup
1058
1059 (defun wisi-setup (indent-calculate post-parse-fail class-list keyword-table token-table parse-table)
1060 "Set up a buffer for parsing files with wisi."
1061 (setq wisi-class-list class-list)
1062 (setq wisi-string-double-term (car (symbol-value (intern-soft "string-double" token-table))))
1063 (setq wisi-string-single-term (car (symbol-value (intern-soft "string-single" token-table))))
1064 (setq wisi-symbol-term (car (symbol-value (intern-soft "symbol" token-table))))
1065
1066 (setq wisi-punctuation-table (symbol-value (intern-soft "punctuation" token-table)))
1067 (setq wisi-punctuation-table-max-length 0)
1068 (let (fail)
1069 (dolist (item wisi-punctuation-table)
1070 (when item ;; default matcher can be nil
1071
1072 ;; check that all chars used in punctuation tokens have punctuation syntax
1073 (mapc (lambda (char)
1074 (when (not (= ?. (char-syntax char)))
1075 (setq fail t)
1076 (message "in %s, %c does not have punctuation syntax"
1077 (car item) char)))
1078 (cdr item))
1079
1080 (when (< wisi-punctuation-table-max-length (length (cdr item)))
1081 (setq wisi-punctuation-table-max-length (length (cdr item)))))
1082 )
1083 (when fail
1084 (error "aborting due to punctuation errors")))
1085
1086 (setq wisi-keyword-table keyword-table)
1087 (setq wisi-parse-table parse-table)
1088
1089 (setq wisi-indent-calculate-functions indent-calculate)
1090 (set (make-local-variable 'indent-line-function) 'wisi-indent-line)
1091
1092 (setq wisi-post-parse-fail-hook post-parse-fail)
1093 (setq wisi-indent-failed nil)
1094
1095 (add-hook 'before-change-functions 'wisi-before-change nil t)
1096 (add-hook 'after-change-functions 'wisi-after-change nil t)
1097
1098 ;; WORKAROUND: sometimes the first time font-lock is run,
1099 ;; syntax-propertize is not run properly, so we run it here
1100 (syntax-propertize (point-max))
1101
1102 (wisi-invalidate-cache)
1103
1104 ;; FIXME: debug counter
1105 (setq wisi-change-jit-lock-mode 0)
1106 )
1107
1108 (provide 'wisi)
1109 ;;; wisi.el ends here