]> code.delx.au - gnu-emacs-elpa/blob - packages/wisi/wisi.el
d4cd8e6dffe0fad4fcc8c3789ece5dd239f82a5e
[gnu-emacs-elpa] / packages / wisi / wisi.el
1 ;;; wisi.el --- Utilities for implementing an indentation/navigation engine using a generalized LALR parser
2 ;;
3 ;; Copyright (C) 2012 - 2014 Free Software Foundation, Inc.
4 ;;
5 ;; Author: Stephen Leake <stephen_leake@member.fsf.org>
6 ;; Version: 1.0.5
7 ;; package-requires: ((cl-lib "0.4") (emacs "24.2"))
8 ;; URL: http://stephe-leake.org/emacs/ada-mode/emacs-ada-mode.html
9 ;;
10 ;; This file is part of GNU Emacs.
11 ;;
12 ;; GNU Emacs is free software: you can redistribute it and/or modify
13 ;; it under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation, either version 3 of the License, or
15 ;; (at your option) any later version.
16 ;;
17 ;; GNU Emacs is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;; GNU General Public License for more details.
21 ;;
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
24 ;;
25
26 ;;; Commentary:
27
28 ;;;; History: first experimental version Oct 2012
29 ;;
30 ;;;; indentation algorithm overview
31 ;;
32 ;; This design is inspired in part by experience writing a SMIE
33 ;; indentation engine for Ada, and the wisent parser.
34 ;;
35 ;; The general approach to indenting a given token is to find the
36 ;; start of the statement it is part of, or some other relevant point
37 ;; in the statement, and indent relative to that. So we need a parser
38 ;; that lets us find statement indent points from arbitrary places in
39 ;; the code.
40 ;;
41 ;; For example, the grammar for Ada as represented by the EBNF in LRM
42 ;; Annex P is not LALR(1), so we use a generalized LALR(1) parser (see
43 ;; wisi-parse, wisi-compile).
44 ;;
45 ;; The parser actions cache indentation and other information as text
46 ;; properties of tokens in statements.
47 ;;
48 ;; An indentation engine moves text in the buffer, as does user
49 ;; editing, so we can't rely on character positions remaining
50 ;; constant. So the parser actions use markers to store
51 ;; positions. Text properties also move with the text.
52 ;;
53 ;; The stored information includes a marker at each statement indent
54 ;; point. Thus, the indentation algorithm is: find the previous token
55 ;; with cached information, and either indent from it, or fetch from
56 ;; it the marker for a previous statement indent point, and indent
57 ;; relative to that.
58 ;;
59 ;; Since we have a cache (the text properties), we need to consider
60 ;; when to invalidate it. Ideally, we invalidate only when a change to
61 ;; the buffer would change the result of a parse that crosses that
62 ;; change, or starts after that change. Changes in whitespace
63 ;; (indentation and newlines) do not affect an Ada parse. Other
64 ;; languages are sensitive to newlines (Bash for example) or
65 ;; indentation (Python). Adding comments does not change a parse,
66 ;; unless code is commented out. For now we invalidate the cache after
67 ;; the edit point if the change involves anything other than
68 ;; whitespace.
69 ;;
70 ;;;; comparison to the SMIE parser
71 ;;
72 ;; The central problem to be solved in building the SMIE parser is
73 ;; grammar precedence conflicts; the general solution is refining
74 ;; keywords so that each new keyword can be assigned a unique
75 ;; precedence. This means ad hoc code must be written to determine the
76 ;; correct refinement for each language keyword from the surrounding
77 ;; tokens. In effect, for a complex language like Ada, the knowledge
78 ;; of the language grammar is mostly embedded in the refinement code;
79 ;; only a small amount is in the refined grammar. Implementing a SMIE
80 ;; parser for a new language involves the same amount of work as the
81 ;; first language.
82 ;;
83 ;; Using a generalized LALR parser avoids that particular problem;
84 ;; assuming the language is already defined by a grammar, it is only a
85 ;; matter of a format change to teach the wisi parser the
86 ;; language. The problem in a wisi indentation engine is caching the
87 ;; output of the parser in a useful way, since we can't start the
88 ;; parser from arbitrary places in the code (as we can with the SMIE
89 ;; parser). A second problem is determining when to invalidate the
90 ;; cache. But these problems are independent of the language being
91 ;; parsed, so once we have one wisi indentation engine working,
92 ;; adapting it to new languages should be quite simple.
93 ;;
94 ;; The SMIE parser does not find the start of each statement, only the
95 ;; first language keyword in each statement; additional code must be
96 ;; written to find the statement start and indent points. The wisi
97 ;; parser finds the statement start and indent points directly.
98 ;;
99 ;; In SMIE, it is best if each grammar rule is a complete statement,
100 ;; so forward-sexp will traverse the entire statement. If nested
101 ;; non-terminals are used, forward-sexp may stop inside one of the
102 ;; nested non-terminals. This problem does not occur with the wisi
103 ;; parser.
104 ;;
105 ;; A downside of the wisi parser is conflicts in the grammar; they can
106 ;; be much more difficult to resolve than in the SMIE parser. The
107 ;; generalized parser helps by handling conflicts, but it does so by
108 ;; running multiple parsers in parallel, persuing each choice in the
109 ;; conflict. If the conflict is due to a genuine ambiguity, both paths
110 ;; will succeed, which causes the parse to fail, since it is not clear
111 ;; which set of text properties to store. Even if one branch
112 ;; ultimately fails, running parallel parsers over large sections of
113 ;; code is slow. Finally, this approach can lead to exponential growth
114 ;; in the number of parsers. So grammar conflicts must still be
115 ;; analyzed and minimized.
116 ;;
117 ;; In addition, the complete grammar must be specified; in smie, it is
118 ;; often possible to specify a subset of the grammar.
119 ;;
120 ;;;; grammar compiler and parser
121 ;;
122 ;; Since we are using a generalized LALR(1) parser, we cannot use any
123 ;; of the wisent grammar functions. We use OpenToken wisi-generate
124 ;; to compile BNF to Elisp source (similar to
125 ;; semantic-grammar-create-package), and wisi-compile-grammar to
126 ;; compile that to the parser table.
127 ;;
128 ;; Semantic provides a complex lexer, more complicated than we need
129 ;; for indentation. So we use the elisp lexer, which consists of
130 ;; `forward-comment', `skip-syntax-forward', and `scan-sexp'. We wrap
131 ;; that in functions that return tokens in the form wisi-parse
132 ;; expects.
133 ;;
134 ;;;; code style
135 ;;
136 ;; 'wisi' was originally short for "wisent indentation engine", but
137 ;; now is just a name.
138 ;;
139 ;; not using lexical-binding because we support Emacs 23
140 ;;
141 ;;;;;
142
143 ;;; Code:
144
145 (require 'cl-lib)
146 (require 'wisi-parse)
147
148 ;; WORKAROUND: for some reason, this condition doesn't work in batch mode!
149 ;; (when (and (= emacs-major-version 24)
150 ;; (= emacs-minor-version 2))
151 (require 'wisi-compat-24.2)
152 ;;)
153
154 ;;;; lexer
155
156 (defvar-local wisi-class-list nil)
157 (defvar-local wisi-keyword-table nil)
158 (defvar-local wisi-punctuation-table nil)
159 (defvar-local wisi-punctuation-table-max-length 0)
160 (defvar-local wisi-string-double-term nil);; string delimited by double quotes
161 (defvar-local wisi-string-quote-escape-doubled nil
162 "Non-nil if a string delimiter is escaped by doubling it (as in Ada).")
163 (defvar-local wisi-string-quote-escape nil
164 "Cons '(delim . character) where 'character' escapes quotes in strings delimited by 'delim'.")
165 (defvar-local wisi-string-single-term nil) ;; string delimited by single quotes
166 (defvar-local wisi-symbol-term nil)
167
168 (defun wisi-forward-token (&optional text-only)
169 "Move point forward across one token, skipping leading whitespace and comments.
170 Return the corresponding token, in a format determined by TEXT-ONLY:
171 TEXT-ONLY t: text
172 TEXT-ONLY nil: (token text start . end)
173 where:
174 `token' is a token symbol (not string) from `wisi-punctuation-table',
175 `wisi-keyword-table', `wisi-string-double-term', `wisi-string-double-term' or `wisi-symbol-term'.
176
177 `text' is the token text from the buffer
178
179 `start, end' are the character positions in the buffer of the start
180 and end of the token text.
181
182 If at end of buffer, returns `wisent-eoi-term'."
183 (forward-comment (point-max))
184 ;; skips leading whitespace, comment, trailing whitespace.
185
186 (let ((start (point))
187 ;; (info "(elisp)Syntax Table Internals" "*info elisp syntax*")
188 (syntax (syntax-class (syntax-after (point))))
189 token-id token-text)
190 (cond
191 ((eobp)
192 (setq token-text "")
193 (setq token-id wisent-eoi-term))
194
195 ((eq syntax 1)
196 ;; punctuation. Find the longest matching string in wisi-punctuation-table
197 (forward-char 1)
198 (let ((next-point (point))
199 temp-text temp-id done)
200 (while (not done)
201 (setq temp-text (buffer-substring-no-properties start (point)))
202 (setq temp-id (car (rassoc temp-text wisi-punctuation-table)))
203 (when temp-id
204 (setq token-text temp-text
205 token-id temp-id
206 next-point (point)))
207 (if (or
208 (eobp)
209 (= (- (point) start) wisi-punctuation-table-max-length))
210 (setq done t)
211 (forward-char 1))
212 )
213 (goto-char next-point)))
214
215 ((memq syntax '(4 5)) ;; open, close parenthesis
216 (forward-char 1)
217 (setq token-text (buffer-substring-no-properties start (point)))
218 (setq token-id (symbol-value (intern-soft token-text wisi-keyword-table))))
219
220 ((eq syntax 7)
221 ;; string quote, either single or double. we assume point is before the start quote, not the end quote
222 (let ((delim (char-after (point)))
223 (forward-sexp-function nil))
224 (forward-sexp)
225 ;; point is now after the end quote; check for an escaped quote
226 (while (or
227 (and wisi-string-quote-escape-doubled
228 (eq (char-after (point)) delim))
229 (and (eq delim (car wisi-string-quote-escape))
230 (eq (char-before (1- (point))) (cdr wisi-string-quote-escape))))
231 (forward-sexp))
232 (setq token-text (buffer-substring-no-properties start (point)))
233 (setq token-id (if (= delim ?\") wisi-string-double-term wisi-string-single-term))))
234
235 (t ;; assuming word syntax
236 (skip-syntax-forward "w_'")
237 (setq token-text (buffer-substring-no-properties start (point)))
238 (setq token-id
239 (or (symbol-value (intern-soft (downcase token-text) wisi-keyword-table))
240 wisi-symbol-term)))
241 );; cond
242
243 (unless token-id
244 (error (wisi-error-msg "unrecognized token '%s'" (buffer-substring-no-properties start (point)))))
245
246 (if text-only
247 token-text
248 (cons token-id (cons token-text (cons start (point)))))
249 ))
250
251 (defun wisi-backward-token ()
252 "Move point backward across one token, skipping whitespace and comments.
253 Return (nil text start . end) - same structure as
254 wisi-forward-token, but does not look up symbol."
255 (forward-comment (- (point)))
256 ;; skips leading whitespace, comment, trailing whitespace.
257
258 ;; (info "(elisp)Syntax Table Internals" "*info elisp syntax*")
259 (let ((end (point))
260 (syntax (syntax-class (syntax-after (1- (point))))))
261 (cond
262 ((bobp) nil)
263
264 ((memq syntax '(4 5)) ;; open, close parenthesis
265 (backward-char 1))
266
267 ((eq syntax 7)
268 ;; a string quote. we assume we are after the end quote, not the start quote
269 (let ((forward-sexp-function nil))
270 (forward-sexp -1)))
271
272 (t
273 (if (zerop (skip-syntax-backward "."))
274 (skip-syntax-backward "w_'")))
275 )
276 (cons nil (cons (buffer-substring-no-properties (point) end) (cons (point) end)))
277 ))
278
279 ;;;; token info cache
280 ;;
281 ;; the cache stores the results of parsing as text properties on
282 ;; keywords, for use by the indention and motion engines.
283
284 (cl-defstruct
285 (wisi-cache
286 (:constructor wisi-cache-create)
287 (:copier nil))
288 nonterm;; nonterminal from parse (set by wisi-statement-action)
289
290 token
291 ;; terminal symbol from wisi-keyword-table or
292 ;; wisi-punctuation-table, or lower-level nonterminal from parse
293 ;; (set by wisi-statement-action)
294
295 last ;; pos of last char in token, relative to first (0 indexed)
296
297 class
298 ;; arbitrary lisp symbol, used for indentation and navigation.
299 ;; some classes are defined by wisi:
300 ;;
301 ;; 'block-middle - a block keyword (ie: if then else end), not at the start of a statement
302 ;;
303 ;; 'block-start - a block keyword at the start of a statement
304 ;;
305 ;; 'statement-start - the start of a statement
306 ;;
307 ;; 'open-paren
308 ;;
309 ;; others are language-specific
310
311 containing
312 ;; Marker at the containing keyword for this token.
313 ;; A containing keyword is an indent point; the start of a
314 ;; statement, or 'begin', 'then' or 'else' for a block of
315 ;; statements, etc.
316 ;; nil only for first token in buffer
317
318 prev ;; marker at previous motion token in statement; nil if none
319 next ;; marker at next motion token in statement; nil if none
320 end ;; marker at token at end of current statement
321 )
322
323 (defvar-local wisi-cache-max 0
324 "Maximimum position in buffer where wisi token cache is valid.")
325
326 (defvar-local wisi-parse-table nil)
327
328 (defvar-local wisi-parse-failed nil
329 "Non-nil when a recent parse has failed - cleared when parse succeeds.")
330
331 (defvar-local wisi-parse-try nil
332 "Non-nil when parse is needed - cleared when parse succeeds.")
333
334 (defvar-local wisi-change-need-invalidate nil)
335
336 (defvar wisi-end-caches nil
337 "List of buffer positions of caches in current statement that need wisi-cache-end set.")
338
339 (defun wisi-invalidate-cache()
340 "Invalidate the wisi token cache for the current buffer.
341 Also invalidate the Emacs syntax cache."
342 (interactive)
343 (setq wisi-cache-max 0)
344 (setq wisi-parse-try t)
345 (setq wisi-end-caches nil)
346 (syntax-ppss-flush-cache (point-min))
347 (with-silent-modifications
348 (remove-text-properties (point-min) (point-max) '(wisi-cache))))
349
350 (defun wisi-before-change (begin end)
351 "For `before-change-functions'."
352 ;; begin . end is range of text being deleted
353
354 ;; If jit-lock-after-change is before wisi-after-change in
355 ;; after-change-functions, it might use any invalid caches in the
356 ;; inserted text.
357 ;;
358 ;; So we check for that here, and ensure it is after
359 ;; wisi-after-change, which deletes the invalid caches
360 (when (boundp 'jit-lock-mode)
361 (when (memq 'wisi-after-change (memq 'jit-lock-after-change after-change-functions))
362 (setq after-change-functions (delete 'wisi-after-change after-change-functions))
363 (add-hook 'after-change-functions 'wisi-after-change nil t))
364 )
365
366 (save-excursion
367 ;; don't invalidate parse for whitespace, string, or comment changes
368 (let (;; (info "(elisp)Parser State")
369 (state (syntax-ppss begin)))
370 ;; syntax-ppss has moved point to "begin".
371 (cond
372 ((or
373 (nth 3 state); in string
374 (nth 4 state)); in comment
375 ;; FIXME: check that entire range is in comment or string
376 (setq wisi-change-need-invalidate nil))
377
378 ((progn
379 (skip-syntax-forward " " end);; does not skip newline
380 (eq (point) end))
381 (setq wisi-change-need-invalidate nil))
382
383 (t (setq wisi-change-need-invalidate t))
384 ))))
385
386 (defun wisi-after-change (begin end length)
387 "For `after-change-functions'."
388 ;; begin . end is range of text being inserted (may be empty)
389
390 ;; (syntax-ppss-flush-cache begin) is in before-change-functions
391
392 (cond
393 (wisi-parse-failed
394 ;; The parse was failing, probably due to bad syntax; this change
395 ;; may have fixed it, so try reparse.
396 (setq wisi-parse-try t)
397
398 ;; remove 'wisi-cache on inserted text, which could have caches
399 ;; from before the failed parse, and are in any case invalid.
400 (with-silent-modifications
401 (remove-text-properties begin end '(wisi-cache)))
402 )
403
404 ((>= wisi-cache-max begin)
405 ;; The parse had succeeded past the start of the inserted
406 ;; text.
407 (save-excursion
408 (let ((need-invalidate t)
409 ;; (info "(elisp)Parser State")
410 (state (syntax-ppss begin)))
411 ;; syntax-ppss has moved point to "begin".
412 (cond
413 (wisi-change-need-invalidate
414 ;; wisi-before change determined the removed text alters the
415 ;; parse
416 nil)
417
418 ((or
419 (nth 3 state); in string
420 (nth 4 state)); in comment
421 ;; FIXME: insert newline in comment to create non-comment!?
422 ;; or paste a chunk of code
423 ;; => check that all of change region is comment or string
424 (setq need-invalidate nil)
425 ;; no caches to remove
426 )
427
428 ((progn
429 (skip-syntax-forward " " end);; does not skip newlines
430 (eq (point) end))
431 (setq need-invalidate nil))
432
433 (t nil)
434 )
435
436 (if need-invalidate
437 ;; The inserted or deleted text could alter the parse;
438 ;; wisi-invalidate-cache removes all 'wisi-cache.
439 (wisi-invalidate-cache)
440
441 ;; else move cache-max by the net change length. We don't
442 ;; need to delete 'wisi-cache in the inserted text, because
443 ;; if there were any it would not pass the above.
444 (setq wisi-cache-max
445 (+ wisi-cache-max (- end begin length))))
446 )
447 ))
448
449 (t
450 ;; parse never attempted, or only done to before BEGIN. Just
451 ;; remove 'wisi-cache
452 (with-silent-modifications
453 (remove-text-properties begin end '(wisi-cache)))
454 )
455 ))
456
457 (defun wisi-get-cache (pos)
458 "Return `wisi-cache' struct from the `wisi-cache' text property at POS.
459 If accessing cache at a marker for a token as set by `wisi-cache-tokens', POS must be (1- mark)."
460 (get-text-property pos 'wisi-cache))
461
462 (defvar-local wisi-parse-error-msg nil)
463
464 (defun wisi-goto-error ()
465 "Move point to position in last error message (if any)."
466 (when (string-match ":\\([0-9]+\\):\\([0-9]+\\):" wisi-parse-error-msg)
467 (let ((line (string-to-number (match-string 1 wisi-parse-error-msg)))
468 (col (string-to-number (match-string 2 wisi-parse-error-msg))))
469 (goto-char (point-min))
470 (forward-line (1- line))
471 (forward-char col))))
472
473 (defun wisi-show-parse-error ()
474 "Show last wisi-parse error."
475 (interactive)
476 (if wisi-parse-failed
477 (progn
478 (message wisi-parse-error-msg)
479 (wisi-goto-error))
480 (message "parse succeeded")))
481
482 (defun wisi-validate-cache (pos)
483 "Ensure cached data is valid at least up to POS in current buffer."
484 (let ((msg (format "wisi: parsing %s:%d ..." (buffer-name) (line-number-at-pos))))
485 (when (and wisi-parse-try
486 (< wisi-cache-max pos))
487 (when (> wisi-debug 0)
488 (message msg))
489
490 (setq wisi-parse-try nil)
491 (setq wisi-parse-error-msg nil)
492 (save-excursion
493 (goto-char wisi-cache-max)
494 (if (> wisi-debug 1)
495 ;; let debugger stop in wisi-parse
496 (progn
497 (wisi-parse wisi-parse-table 'wisi-forward-token)
498 (setq wisi-cache-max (point))
499 (setq wisi-parse-failed nil))
500 ;; else capture errors from bad syntax, so higher level functions can try to continue
501 (condition-case err
502 (progn
503 (wisi-parse wisi-parse-table 'wisi-forward-token)
504 (setq wisi-cache-max (point))
505 (setq wisi-parse-failed nil))
506 (wisi-parse-error
507 (setq wisi-parse-failed t)
508 (setq wisi-parse-error-msg (cdr err)))
509 )))
510 (if wisi-parse-error-msg
511 ;; error
512 (when (> wisi-debug 0)
513 (message "%s error" msg)
514 (wisi-goto-error)
515 (error wisi-parse-error-msg))
516 ;; no msg; success
517 (when (> wisi-debug 0)
518 (message "%s done" msg)))
519 )))
520
521 (defun wisi-get-containing-cache (cache)
522 "Return cache from (wisi-cache-containing CACHE)."
523 (let ((containing (wisi-cache-containing cache)))
524 (and containing
525 (wisi-get-cache (1- containing)))))
526
527 (defun wisi-cache-text (cache)
528 "Return property-less buffer substring designated by cache.
529 Point must be at cache."
530 (buffer-substring-no-properties (point) (+ (point) (wisi-cache-last cache))))
531
532 ;;;; parse actions
533
534 (defun wisi-set-end (start-mark end-mark)
535 "Set END-MARK on all caches in `wisi-end-caches' in range START-MARK END-MARK,
536 delete from `wisi-end-caches'."
537 (let ((i 0)
538 pos cache)
539 (while (< i (length wisi-end-caches))
540 (setq pos (nth i wisi-end-caches))
541 (setq cache (wisi-get-cache pos))
542
543 (if (and (>= pos start-mark)
544 (< pos end-mark))
545 (progn
546 (setf (wisi-cache-end cache) end-mark)
547 (setq wisi-end-caches (delq pos wisi-end-caches)))
548
549 ;; else not in range
550 (setq i (1+ i)))
551 )))
552
553 (defvar wisi-tokens nil)
554 ;; keep byte-compiler happy; `wisi-tokens' is bound in action created
555 ;; by wisi-semantic-action
556
557 (defun wisi-statement-action (&rest pairs)
558 "Cache information in text properties of tokens.
559 Intended as a grammar non-terminal action.
560
561 PAIRS is of the form [TOKEN-NUMBER CLASS] ... where TOKEN-NUMBER
562 is the (1 indexed) token number in the production, CLASS is the wisi class of
563 that token. Use in a grammar action as:
564 (wisi-statement-action 1 'statement-start 7 'statement-end)"
565 (save-excursion
566 (let ((first-item t)
567 first-keyword-mark
568 (override-start nil))
569 (while pairs
570 (let* ((number (1- (pop pairs)))
571 (region (cddr (nth number wisi-tokens)));; wisi-tokens is let-bound in wisi-parse-reduce
572 (token (car (nth number wisi-tokens)))
573 (class (pop pairs))
574 (mark
575 ;; Marker one char into token, so indent-line-to
576 ;; inserts space before the mark, not after
577 (when region (copy-marker (1+ (car region)))))
578 cache)
579
580 (unless (memq class wisi-class-list)
581 (error "%s not in wisi-class-list" class))
582
583 (if region
584 (progn
585 (if (setq cache (wisi-get-cache (car region)))
586 ;; We are processing a previously set non-terminal; ie generic_formal_part in
587 ;;
588 ;; generic_package_declaration : generic_formal_part package_specification SEMICOLON
589 ;; (wisi-statement-action 1 'block-start 2 'block-middle 3 'statement-end)
590 ;;
591 ;; or simple_statement in
592 ;;
593 ;; statement : label_opt simple_statement
594 ;;
595 ;; override nonterm, class and containing
596 (progn
597 (cl-case (wisi-cache-class cache)
598 (block-start
599 (setf (wisi-cache-class cache)
600 (cond
601 ((eq override-start nil)
602 (cond
603 ((memq class '(block-start statement-start)) 'block-start)
604 (t 'block-middle)))
605
606 ((memq override-start '(block-start statement-start)) 'block-start)
607
608 (t (error "unexpected override-start"))
609 )))
610 (t
611 (setf (wisi-cache-class cache) (or override-start class)))
612 )
613 (setf (wisi-cache-nonterm cache) $nterm)
614 (setf (wisi-cache-containing cache) first-keyword-mark))
615
616 ;; else create new cache
617 (with-silent-modifications
618 (put-text-property
619 (car region)
620 (1+ (car region))
621 'wisi-cache
622 (wisi-cache-create
623 :nonterm $nterm;; $nterm defined in wisi-semantic-action
624 :token token
625 :last (- (cdr region) (car region))
626 :class (or override-start class)
627 :containing first-keyword-mark)
628 ))
629 (if wisi-end-caches
630 (push (car region) wisi-end-caches)
631 (setq wisi-end-caches (list (car region)))
632 ))
633
634 (when first-item
635 (setq first-item nil)
636 (when (or override-start
637 ;; FIXME: why block-middle here?
638 (memq class '(block-middle block-start statement-start)))
639 (setq override-start nil)
640 (setq first-keyword-mark mark)))
641
642 (when (eq class 'statement-end)
643 (wisi-set-end (1- first-keyword-mark) (copy-marker (1+ (car region)))))
644 )
645
646 ;; region is nil when a production is empty; if the first
647 ;; token is a start, override the class on the next token.
648 (when (and first-item
649 (memq class '(block-middle block-start statement-start)))
650 (setq override-start class)))
651 ))
652 )))
653
654 (defun wisi-containing-action (containing-token contained-token)
655 "Set containing marks in all tokens in CONTAINED-TOKEN with null containing mark to marker pointing to CONTAINING-TOKEN.
656 If CONTAINING-TOKEN is empty, the next token number is used."
657 ;; wisi-tokens is is bound in action created by wisi-semantic-action
658 (let* ((containing-region (cddr (nth (1- containing-token) wisi-tokens)))
659 (contained-region (cddr (nth (1- contained-token) wisi-tokens))))
660
661 (unless containing-region ;;
662 (signal 'wisi-parse-error
663 (wisi-error-msg
664 "wisi-containing-action: containing-region '%s' is empty. grammar error; bad action"
665 (nth 1 (nth (1- containing-token) wisi-tokens)))))
666
667 (unless (or (not contained-region) ;; contained-token is empty
668 (wisi-get-cache (car containing-region)))
669 (signal 'wisi-parse-error
670 (wisi-error-msg
671 "wisi-containing-action: containing-token '%s' has no cache. grammar error; missing action"
672 (nth 1 (nth (1- containing-token) wisi-tokens)))))
673
674 (while (not containing-region)
675 ;; containing-token is empty; use next
676 (setq containing-region (cddr (nth containing-token wisi-tokens))))
677
678 (when contained-region
679 ;; nil when empty production, may not contain any caches
680 (save-excursion
681 (goto-char (cdr contained-region))
682 (let ((cache (wisi-backward-cache))
683 (mark (copy-marker (1+ (car containing-region)))))
684 (while cache
685
686 ;; skip blocks that are already marked
687 (while (and (>= (point) (car contained-region))
688 (markerp (wisi-cache-containing cache)))
689 (goto-char (1- (wisi-cache-containing cache)))
690 (setq cache (wisi-get-cache (point))))
691
692 (if (or (and (= (car containing-region) (car contained-region))
693 (<= (point) (car contained-region)))
694 (< (point) (car contained-region)))
695 ;; done
696 (setq cache nil)
697
698 ;; else set mark, loop
699 (setf (wisi-cache-containing cache) mark)
700 (setq cache (wisi-backward-cache)))
701 ))))))
702
703 (defun wisi-motion-action (&rest token-numbers)
704 "Set prev/next marks in all tokens given by TOKEN-NUMBERS.
705 Each TOKEN-NUMBERS is one of:
706
707 number: the token number; mark that token
708
709 list (number token_id):
710 list (number (token_id token_id)):
711 mark all tokens with token_id in the nonterminal given by the number."
712 (save-excursion
713 (let (prev-keyword-mark
714 prev-cache
715 cache
716 mark)
717 (while token-numbers
718 (let ((token-number (pop token-numbers))
719 target-token
720 region)
721 (cond
722 ((numberp token-number)
723 (setq target-token nil)
724 (setq region (cddr (nth (1- token-number) wisi-tokens)))
725 (when region
726 (setq cache (wisi-get-cache (car region)))
727 (setq mark (copy-marker (1+ (car region))))
728
729 (when (and prev-keyword-mark
730 cache
731 (null (wisi-cache-prev cache)))
732 (setf (wisi-cache-prev cache) prev-keyword-mark)
733 (setf (wisi-cache-next prev-cache) mark))
734
735 (setq prev-keyword-mark mark)
736 (setq prev-cache cache)
737 ))
738
739 ((listp token-number)
740 ;; token-number may contain 0, 1, or more token_id; token_id may be a list
741 ;; the corresponding region may be empty
742 ;; there must have been a prev keyword
743 (setq target-token (cadr token-number))
744 (when (not (listp target-token))
745 (setq target-token (list target-token)))
746 (setq token-number (car token-number))
747 (setq region (cddr (nth (1- token-number) wisi-tokens)))
748 (when region ;; not an empty token
749 (goto-char (car region))
750 (while (wisi-forward-find-token target-token (cdr region) t)
751 (setq cache (wisi-get-cache (point)))
752 (setq mark (copy-marker (1+ (point))))
753
754 (when (null (wisi-cache-prev cache))
755 (setf (wisi-cache-prev cache) prev-keyword-mark)
756 (setf (wisi-cache-next prev-cache) mark)
757 (setq prev-keyword-mark mark)
758 (setq prev-cache cache))
759
760 (wisi-forward-token);; don't find same token again
761 ))
762 )
763
764 (t
765 (error "unexpected token-number %s" token-number))
766 )
767
768 ))
769 )))
770
771 ;;;; motion
772 (defun wisi-backward-cache ()
773 "Move point backward to the beginning of the first token preceding point that has a cache.
774 Returns cache, or nil if at beginning of buffer."
775 (let (cache pos)
776 (setq pos (previous-single-property-change (point) 'wisi-cache))
777 ;; There are three cases:
778 ;;
779 ;; 1) caches separated by non-cache chars: 'if ... then'
780 ;; pos is before 'f', cache is on 'i'
781 ;;
782 ;; 2) caches not separated: ');'
783 ;; pos is before ';', cache is on ';'
784 ;;
785 ;; 3) at bob; pos is nil
786 ;;
787 (if pos
788 (progn
789 (setq cache (get-text-property pos 'wisi-cache))
790 (if cache
791 ;; case 2
792 (goto-char pos)
793 ;; case 1
794 (setq cache (get-text-property (1- pos) 'wisi-cache))
795 (goto-char (1- pos))))
796 ;; at bob
797 (goto-char (point-min))
798 (setq cache nil))
799 cache
800 ))
801
802 (defun wisi-forward-cache ()
803 "Move point forward to the beginning of the first token after point that has a cache.
804 Returns cache, or nil if at end of buffer."
805 (let (cache pos)
806 (when (get-text-property (point) 'wisi-cache)
807 ;; on a cache; get past it
808 (goto-char (1+ (point))))
809
810 (setq cache (get-text-property (point) 'wisi-cache))
811 (if cache
812 nil
813
814 (setq pos (next-single-property-change (point) 'wisi-cache))
815 (if pos
816 (progn
817 (goto-char pos)
818 (setq cache (get-text-property pos 'wisi-cache)))
819 ;; at eob
820 (goto-char (point-max))
821 (setq cache nil))
822 )
823 cache
824 ))
825
826 (defun wisi-forward-find-class (class limit)
827 "Search forward for a token that has a cache with CLASS.
828 Return cache, or nil if at end of buffer.
829 If LIMIT (a buffer position) is reached, throw an error."
830 (let ((cache (wisi-forward-cache)))
831 (while (not (eq class (wisi-cache-class cache)))
832 (setq cache (wisi-forward-cache))
833 (when (>= (point) limit)
834 (error "cache with class %s not found" class)))
835 cache))
836
837 (defun wisi-forward-find-token (token limit &optional noerror)
838 "Search forward for a token that has a cache with TOKEN.
839 If point is at a matching token, return that token.
840 TOKEN may be a list; stop on any cache that has a member of the list.
841 Return cache, or nil if at end of buffer.
842 If LIMIT (a buffer position) is reached, then if NOERROR is nil, throw an
843 error, if non-nil, return nil."
844 (let ((token-list (cond
845 ((listp token) token)
846 (t (list token))))
847 (cache (wisi-get-cache (point)))
848 (done nil))
849 (while (not (or done
850 (and cache
851 (memq (wisi-cache-token cache) token-list))))
852 (setq cache (wisi-forward-cache))
853 (when (>= (point) limit)
854 (if noerror
855 (progn
856 (setq done t)
857 (setq cache nil))
858 (error "cache with token %s not found" token))))
859 cache))
860
861 (defun wisi-forward-find-nonterm (nonterm limit)
862 "Search forward for a token that has a cache with NONTERM.
863 NONTERM may be a list; stop on any cache that has a member of the list.
864 Return cache, or nil if at end of buffer.
865 If LIMIT (a buffer position) is reached, throw an error."
866 (let ((nonterm-list (cond
867 ((listp nonterm) nonterm)
868 (t (list nonterm))))
869 (cache (wisi-forward-cache)))
870 (while (not (memq (wisi-cache-nonterm cache) nonterm-list))
871 (setq cache (wisi-forward-cache))
872 (when (>= (point) limit)
873 (error "cache with nonterm %s not found" nonterm)))
874 cache))
875
876 (defun wisi-goto-cache-next (cache)
877 (goto-char (1- (wisi-cache-next cache)))
878 (wisi-get-cache (point))
879 )
880
881 (defun wisi-forward-statement-keyword ()
882 "If not at a cached token, move forward to next
883 cache. Otherwise move to cache-next, or next cache if nil.
884 Return cache found."
885 (wisi-validate-cache (point-max))
886 (let ((cache (wisi-get-cache (point))))
887 (if cache
888 (let ((next (wisi-cache-next cache)))
889 (if next
890 (goto-char (1- next))
891 (wisi-forward-token)
892 (wisi-forward-cache)))
893 (wisi-forward-cache))
894 )
895 (wisi-get-cache (point))
896 )
897
898 (defun wisi-backward-statement-keyword ()
899 "If not at a cached token, move backward to prev
900 cache. Otherwise move to cache-prev, or prev cache if nil."
901 (wisi-validate-cache (point-max))
902 (let ((cache (wisi-get-cache (point))))
903 (if cache
904 (let ((prev (wisi-cache-prev cache)))
905 (if prev
906 (goto-char (1- prev))
907 (wisi-backward-cache)))
908 (wisi-backward-cache))
909 ))
910
911 (defun wisi-goto-containing (cache &optional error)
912 "Move point to containing token for CACHE, return cache at that point."
913 (cond
914 ((markerp (wisi-cache-containing cache))
915 (goto-char (1- (wisi-cache-containing cache)))
916 (wisi-get-cache (point)))
917 (t
918 (when error
919 (error "already at outermost containing token")))
920 ))
921
922 (defun wisi-goto-containing-paren (cache)
923 "Move point to just after the open-paren containing CACHE.
924 Return cache for paren, or nil if no containing paren."
925 (while (and cache
926 (not (eq (wisi-cache-class cache) 'open-paren)))
927 (setq cache (wisi-goto-containing cache)))
928 (when cache
929 (forward-char 1))
930 cache)
931
932 (defun wisi-goto-start (cache)
933 "Move point to containing ancestor of CACHE that has class block-start or statement-start.
934 Return start cache."
935 (when
936 ;; cache nil at bob
937 (while (and cache
938 (not (memq (wisi-cache-class cache) '(block-start statement-start))))
939 (setq cache (wisi-goto-containing cache)))
940 )
941 cache)
942
943 (defun wisi-goto-end-1 (cache)
944 (goto-char (1- (wisi-cache-end cache))))
945
946 (defun wisi-goto-end ()
947 "Move point to token at end of statement point is in or before."
948 (interactive)
949 (wisi-validate-cache (point-max))
950 (let ((cache (or (wisi-get-cache (point))
951 (wisi-forward-cache))))
952 (when (wisi-cache-end cache)
953 ;; nil when cache is statement-end
954 (wisi-goto-end-1 cache))
955 ))
956
957 (defun wisi-next-statement-cache (cache)
958 "Move point to CACHE-next, return cache; error if nil."
959 (when (not (markerp (wisi-cache-next cache)))
960 (error "no next statement cache"))
961 (goto-char (1- (wisi-cache-next cache)))
962 (wisi-get-cache (point)))
963
964 (defun wisi-prev-statement-cache (cache)
965 "Move point to CACHE-next, return cache; error if nil."
966 (when (not (markerp (wisi-cache-prev cache)))
967 (error "no prev statement cache"))
968 (goto-char (1- (wisi-cache-prev cache)))
969 (wisi-get-cache (point)))
970
971 ;;;; indentation
972
973 (defun wisi-comment-indent ()
974 "For `comment-indent-function'. Indent single line comment to
975 the comment on the previous line."
976 ;; This should only be called by comment-indent-new-line or
977 ;; fill-comment-paragraph, so there will be a preceding comment line
978 ;; that we can trust.
979 (save-excursion
980 (forward-comment -1)
981 (if (looking-at comment-start)
982 (current-column)
983 (error "wisi-comment-indent called after non-comment"))))
984
985 (defun wisi-indent-current (offset)
986 "Return indentation OFFSET relative to indentation of current line."
987 (+ (current-indentation) offset)
988 )
989
990 (defun wisi-indent-paren (offset)
991 "Return indentation OFFSET relative to preceding open paren."
992 (save-excursion
993 (goto-char (nth 1 (syntax-ppss)))
994 (+ (current-column) offset)))
995
996 (defun wisi-indent-start (offset cache)
997 "Return indentation of OFFSET relative to containing ancestor
998 of CACHE with class statement-start or block-start."
999 (wisi-goto-start cache)
1000 (+ (current-indentation) offset))
1001
1002 (defun wisi-indent-statement ()
1003 "Indent region given by `wisi-goto-start' on cache at or before point, then wisi-cache-end."
1004 ;; force reparse, in case parser got confused
1005 (let ((wisi-parse-try t))
1006 (wisi-validate-cache (point)))
1007
1008 (save-excursion
1009 (let ((cache (or (wisi-get-cache (point))
1010 (wisi-backward-cache))))
1011 (when cache
1012 ;; can be nil if in header comment
1013 (let ((start (progn (wisi-goto-start cache) (point)))
1014 (end (progn
1015 (when (wisi-cache-end cache)
1016 ;; nil when cache is statement-end
1017 (goto-char (1- (wisi-cache-end cache))))
1018 (point))))
1019 (indent-region start end)
1020 ))
1021 )))
1022
1023 (defvar-local wisi-indent-calculate-functions nil
1024 "Functions to calculate indentation. Each called with point
1025 before a token at the beginning of a line (at current
1026 indentation); return indentation column for that token, or
1027 nil. May move point. Calling stops when first function returns
1028 non-nil.")
1029
1030 (defvar-local wisi-post-parse-fail-hook
1031 "Function to reindent portion of buffer.
1032 Called from `wisi-indent-line' when a parse succeeds after
1033 failing; assumes user was editing code that is now syntactically
1034 correct. Must leave point at indentation of current line.")
1035
1036 (defvar-local wisi-indent-failed nil
1037 "Non-nil when wisi-indent-line fails due to parse failing; cleared when indent succeeds.")
1038
1039 (defun wisi-indent-line ()
1040 "Indent current line using the wisi indentation engine."
1041 (interactive)
1042
1043 (let* ((savep (point))
1044 (indent
1045 (or (save-excursion
1046 (wisi-validate-cache (point))
1047 (back-to-indentation)
1048 (when (>= (point) savep) (setq savep nil))
1049 (if wisi-parse-failed
1050 (progn
1051 ;; parse failed. Assume user is editing; indent to previous line, fix it after parse succeeds
1052 (setq wisi-indent-failed t)
1053 (forward-line -1);; safe at bob
1054 (back-to-indentation)
1055 (current-column))
1056
1057 ;; else parse succeeded
1058 (when wisi-indent-failed
1059 (setq wisi-indent-failed nil)
1060 (run-hooks 'wisi-post-parse-fail-hook))
1061 (with-demoted-errors
1062 (or (run-hook-with-args-until-success 'wisi-indent-calculate-functions) 0))
1063 )))))
1064 (if savep
1065 ;; point was inside line text; leave it there
1066 (save-excursion (indent-line-to indent))
1067 ;; point was before line text; move to start of text
1068 (indent-line-to indent))
1069 ))
1070
1071 ;;;; debug
1072 (defun wisi-parse-buffer ()
1073 (interactive)
1074 (syntax-propertize (point-max))
1075 (wisi-invalidate-cache)
1076 (wisi-validate-cache (point-max)))
1077
1078 (defun wisi-show-cache ()
1079 "Show cache at point."
1080 (interactive)
1081 (message "%s" (wisi-get-cache (point))))
1082
1083 (defun wisi-show-token ()
1084 "Move forward across one keyword, show token_id."
1085 (interactive)
1086 (let ((token (wisi-forward-token)))
1087 (message "%s" (car token))))
1088
1089 (defun wisi-show-containing-or-previous-cache ()
1090 (interactive)
1091 (let ((cache (wisi-get-cache (point))))
1092 (if cache
1093 (message "containing %s" (wisi-goto-containing cache t))
1094 (message "previous %s" (wisi-backward-cache)))
1095 ))
1096
1097 ;;;;; setup
1098
1099 (defun wisi-setup (indent-calculate post-parse-fail class-list keyword-table token-table parse-table)
1100 "Set up a buffer for parsing files with wisi."
1101 (setq wisi-class-list class-list)
1102 (setq wisi-string-double-term (car (symbol-value (intern-soft "string-double" token-table))))
1103 (setq wisi-string-single-term (car (symbol-value (intern-soft "string-single" token-table))))
1104 (setq wisi-symbol-term (car (symbol-value (intern-soft "symbol" token-table))))
1105
1106 (setq wisi-punctuation-table (symbol-value (intern-soft "punctuation" token-table)))
1107 (setq wisi-punctuation-table-max-length 0)
1108 (let (fail)
1109 (dolist (item wisi-punctuation-table)
1110 (when item ;; default matcher can be nil
1111
1112 ;; check that all chars used in punctuation tokens have punctuation syntax
1113 (mapc (lambda (char)
1114 (when (not (= ?. (char-syntax char)))
1115 (setq fail t)
1116 (message "in %s, %c does not have punctuation syntax"
1117 (car item) char)))
1118 (cdr item))
1119
1120 (when (< wisi-punctuation-table-max-length (length (cdr item)))
1121 (setq wisi-punctuation-table-max-length (length (cdr item)))))
1122 )
1123 (when fail
1124 (error "aborting due to punctuation errors")))
1125
1126 (setq wisi-keyword-table keyword-table)
1127 (setq wisi-parse-table parse-table)
1128
1129 (setq wisi-indent-calculate-functions indent-calculate)
1130 (set (make-local-variable 'indent-line-function) 'wisi-indent-line)
1131
1132 (setq wisi-post-parse-fail-hook post-parse-fail)
1133 (setq wisi-indent-failed nil)
1134
1135 (add-hook 'before-change-functions 'wisi-before-change nil t)
1136 (add-hook 'after-change-functions 'wisi-after-change nil t)
1137
1138 ;; WORKAROUND: sometimes the first time font-lock is run,
1139 ;; syntax-propertize is not run properly, so we run it here
1140 (syntax-propertize (point-max))
1141
1142 (wisi-invalidate-cache)
1143 )
1144
1145 (provide 'wisi)
1146 ;;; wisi.el ends here