-;;; smie.el --- Simple Minded Indentation Engine
+;;; smie.el --- Simple Minded Indentation Engine -*- lexical-binding: t -*-
-;; Copyright (C) 2010 Free Software Foundation, Inc.
+;; Copyright (C) 2010-2011 Free Software Foundation, Inc.
;; Author: Stefan Monnier <monnier@iro.umontreal.ca>
;; Keywords: languages, lisp, internal, parsing, indentation
;; TODO & BUGS:
;;
-;; - FIXME: I think the behavior on empty lines is wrong. It shouldn't
-;; look at the next token on subsequent lines.
;; - Using the structural information SMIE gives us, it should be possible to
;; implement a `smie-align' command that would automatically figure out what
;; there is to align and how to do it (something like: align the token of
;; - Maybe accept two juxtaposed non-terminals in the BNF under the condition
;; that the first always ends with a terminal, or that the second always
;; starts with a terminal.
+;; - Permit EBNF-style notation.
+;; - If the grammar has conflicts, the only way is to make the lexer return
+;; different tokens for the different cases. This extra work performed by
+;; the lexer can be costly and unnecessary: we perform this extra work every
+;; time we find the conflicting token, regardless of whether or not the
+;; difference between the various situations is relevant to the current
+;; situation. E.g. we may try to determine whether a ";" is a ";-operator"
+;; or a ";-separator" in a case where we're skipping over a "begin..end" pair
+;; where the difference doesn't matter. For frequently occurring tokens and
+;; rarely occurring conflicts, this can be a significant performance problem.
+;; We could try and let the lexer return a "set of possible tokens
+;; plus a refinement function" and then let parser call the refinement
+;; function if needed.
+;; - Make it possible to better specify the behavior in the face of
+;; syntax errors. IOW provide some control over the choice of precedence
+;; levels within the limits of the constraints. E.g. make it possible for
+;; the grammar to specify that "begin..end" has lower precedence than
+;; "Module..EndModule", so that if a "begin" is missing, scanning from the
+;; "end" will stop at "Module" rather than going past it (and similarly,
+;; scanning from "Module" should not stop at a spurious "end").
;;; Code:
;; Maybe also add (or <elem1> <elem2>...) for things like
;; (exp (exp (or "+" "*" "=" ..) exp)).
;; Basically, make it EBNF (except for the specification of a separator in
- ;; the repetition).
+ ;; the repetition, maybe).
(let ((nts (mapcar 'car bnf)) ;Non-terminals
(first-ops-table ())
(last-ops-table ())
;; the trouble, and it lets the writer of the BNF
;; be a bit more sloppy by skipping uninteresting base
;; cases which are terminals but not OPs.
- (assert (not (member (cadr rhs) nts)))
+ (when (member (cadr rhs) nts)
+ (error "Adjacent non-terminals: %s %s"
+ (car rhs) (cadr rhs)))
(pushnew (cadr rhs) first-ops)))
(let ((shr (reverse rhs)))
(if (not (member (car shr) nts))
(pushnew (car shr) last-ops)
(pushnew (car shr) last-nts)
(when (consp (cdr shr))
- (assert (not (member (cadr shr) nts)))
+ (when (member (cadr shr) nts)
+ (error "Adjacent non-terminals: %s %s"
+ (cadr shr) (car shr)))
(pushnew (cadr shr) last-ops)))))
(push (cons nt first-ops) first-ops-table)
(push (cons nt last-ops) last-ops-table)
"Return a table classifying terminals.
Each terminal can either be an `opener', a `closer', or neither."
(let ((table (make-hash-table :test #'equal))
+ (nts (mapcar #'car bnf))
(alist '()))
(dolist (category bnf)
(puthash (car category) 'neither table) ;Remove non-terminals.
(let ((first (pop rhs)))
(puthash first
(if (memq (gethash first table) '(nil opener))
- 'opener 'neither)
+ 'opener
+ (unless (member first nts)
+ (error "SMIE: token %s is both opener and non-opener"
+ first))
+ 'neither)
table))
(while (cdr rhs)
(puthash (pop rhs) 'neither table)) ;Remove internals.
(let ((last (pop rhs)))
(puthash last
(if (memq (gethash last table) '(nil closer))
- 'closer 'neither)
+ 'closer
+ (unless (member last nts)
+ (error "SMIE: token %s is both closer and non-closer"
+ last))
+ 'neither)
table)))))
(maphash (lambda (tok v)
(when (memq v '(closer opener))
(to (cdar eqs)))
(setq eqs (cdr eqs))
(if (eq to from)
- nil ;Nothing to do.
+ nil ;Nothing to do.
(dolist (other-eq eqs)
(if (eq from (cdr other-eq)) (setcdr other-eq to))
(when (eq from (car other-eq))
(setcar (car eq) (cadr eq))
;; (smie-check-grammar table prec2 'step2)
)
- ;; Finally, fill in the remaining vars (which only appeared on the
- ;; right side of the < constraints).
- (let ((classification-table (gethash :smie-open/close-alist prec2)))
- (dolist (x table)
- ;; When both sides are nil, it means this operator binds very
- ;; very tight, but it's still just an operator, so we give it
- ;; the highest precedence.
- ;; OTOH if only one side is nil, it usually means it's like an
- ;; open-paren, which is very important for indentation purposes,
- ;; so we keep it nil if so, to make it easier to recognize.
- (unless (or (nth 1 x)
- (eq 'opener (cdr (assoc (car x) classification-table))))
- (setf (nth 1 x) i)
- (incf i)) ;See other (incf i) above.
- (unless (or (nth 2 x)
- (eq 'closer (cdr (assoc (car x) classification-table))))
- (setf (nth 2 x) i)
- (incf i))))) ;See other (incf i) above.
+ ;; Finally, fill in the remaining vars (which did not appear on the
+ ;; left side of any < constraint).
+ (dolist (x table)
+ (unless (nth 1 x)
+ (setf (nth 1 x) i)
+ (incf i)) ;See other (incf i) above.
+ (unless (nth 2 x)
+ (setf (nth 2 x) i)
+ (incf i)))) ;See other (incf i) above.
+ ;; Mark closers and openers.
+ (dolist (x (gethash :smie-open/close-alist prec2))
+ (let* ((token (car x))
+ (cons (case (cdr x)
+ (closer (cddr (assoc token table)))
+ (opener (cdr (assoc token table))))))
+ (assert (numberp (car cons)))
+ (setf (car cons) (list (car cons)))))
(let ((ca (gethash :smie-closer-alist prec2)))
(when ca (push (cons :smie-closer-alist ca) table)))
;; (smie-check-grammar table prec2 'step3)
This list is normally built by `smie-prec2->grammar'.
Each element is of the form (TOKEN LEFT-LEVEL RIGHT-LEVEL).
Parsing is done using an operator precedence parser.
-LEFT-LEVEL and RIGHT-LEVEL can be either numbers or nil, where nil
+LEFT-LEVEL and RIGHT-LEVEL can be either numbers or a list, where a list
means that this operator does not bind on the corresponding side,
-i.e. a LEFT-LEVEL of nil means this is a token that behaves somewhat like
+e.g. a LEFT-LEVEL of nil means this is a token that behaves somewhat like
an open-paren, whereas a RIGHT-LEVEL of nil would correspond to something
like a close-paren.")
OP-BACK is the accessor to the backward level of the level data.
HALFSEXP if non-nil, means skip over a partial sexp if needed. I.e. if the
first token we see is an operator, skip over its left-hand-side argument.
+HALFSEXP can also be a token, in which case it means to parse as if
+we had just successfully passed this token.
Possible return values:
(FORW-LEVEL POS TOKEN): we couldn't skip TOKEN because its back-level
is too high. FORW-LEVEL is the forw-level of TOKEN,
(nil POS TOKEN): we skipped over a paren-like pair.
nil: we skipped over an identifier, matched parentheses, ..."
(catch 'return
- (let ((levels ()))
+ (let ((levels
+ (if (stringp halfsexp)
+ (prog1 (list (cdr (assoc halfsexp smie-grammar)))
+ (setq halfsexp nil)))))
(while
(let* ((pos (point))
(token (funcall next-token))
(if (eq pos (point))
;; We did not move, so let's abort the loop.
(throw 'return (list t (point))))))
- ((null (funcall op-back toklevels))
+ ((not (numberp (funcall op-back toklevels)))
;; A token like a paren-close.
- (assert (funcall op-forw toklevels)) ;Otherwise, why mention it?
+ (assert (numberp ; Otherwise, why mention it in smie-grammar.
+ (funcall op-forw toklevels)))
(push toklevels levels))
(t
(while (and levels (< (funcall op-back toklevels)
(setq levels (cdr levels)))
(cond
((null levels)
- (if (and halfsexp (funcall op-forw toklevels))
+ (if (and halfsexp (numberp (funcall op-forw toklevels)))
(push toklevels levels)
(throw 'return
(prog1 (list (or (car toklevels) t) (point) token)
;; Keep looking as long as we haven't matched the
;; topmost operator.
(levels
- (if (funcall op-forw toklevels)
+ (if (numberp (funcall op-forw toklevels))
(push toklevels levels)))
;; We matched the topmost operator. If the new operator
;; is the last in the corresponding BNF rule, we're done.
- ((null (funcall op-forw toklevels))
+ ((not (numberp (funcall op-forw toklevels)))
;; It is the last element, let's stop here.
(throw 'return (list nil (point) token)))
;; If the new operator is not the last in the BNF rule,
"Skip over one sexp.
HALFSEXP if non-nil, means skip over a partial sexp if needed. I.e. if the
first token we see is an operator, skip over its left-hand-side argument.
+HALFSEXP can also be a token, in which case we should skip the text
+assuming it is the left-hand-side argument of that token.
Possible return values:
(LEFT-LEVEL POS TOKEN): we couldn't skip TOKEN because its right-level
is too high. LEFT-LEVEL is the left-level of TOKEN,
(defun smie-forward-sexp (&optional halfsexp)
"Skip over one sexp.
HALFSEXP if non-nil, means skip over a partial sexp if needed. I.e. if the
-first token we see is an operator, skip over its left-hand-side argument.
+first token we see is an operator, skip over its right-hand-side argument.
+HALFSEXP can also be a token, in which case we should skip the text
+assuming it is the right-hand-side argument of that token.
Possible return values:
(RIGHT-LEVEL POS TOKEN): we couldn't skip TOKEN because its left-level
is too high. RIGHT-LEVEL is the right-level of TOKEN,
;; intervention, e.g. for Octave's use of `until'
;; as a pseudo-closer of `do'.
(closer)
- ((or (equal levels '(nil)) (nth 1 (car levels)))
+ ((or (equal levels '(nil)) (numberp (nth 1 (car levels))))
(error "Doesn't look like a block"))
(t
;; Now that smie-setup automatically sets smie-closer-alist
(when (and (eq (nth 2 level) (nth 1 other))
(not (memq other seen)))
(push other seen)
- (if (nth 2 other)
+ (if (numberp (nth 2 other))
(push other levels)
(push (car other) found))))))
(cond
((null found) (error "No known closer for opener %s" open))
- ;; FIXME: what should we do if there are various closers?
+ ;; What should we do if there are various closers?
(t (car found))))))))))
(unless (save-excursion (skip-chars-backward " \t") (bolp))
(newline))
(progn (goto-char start) (down-list inc) nil)
(forward-sexp inc)
(/= (point) pos)))
- ((and levels (null (nth (+ 1 offset) levels))) nil)
- ((and levels (null (nth (- 2 offset) levels)))
+ ((and levels (not (numberp (nth (+ 1 offset) levels)))) nil)
+ ((and levels (not (numberp (nth (- 2 offset) levels))))
(let ((end (point)))
(goto-char start)
(signal 'scan-error
;; anything else than this trigger char, lest we'd blink
;; both when inserting the trigger char and when
;; inserting a subsequent trigger char like SPC.
- (or (eq (point) pos)
+ (or (eq (char-before) last-command-event)
(not (memq (char-before)
smie-blink-matching-triggers)))
(or smie-blink-matching-inners
- (null (nth 2 (assoc token smie-grammar)))))
+ (not (numberp (nth 2 (assoc token smie-grammar))))))
;; The major mode might set blink-matching-check-function
;; buffer-locally so that interactive calls to
;; blink-matching-open work right, but let's not presume
(save-excursion
(let* ((pos (point))
(tok (funcall smie-forward-token-function)))
- (unless (cadr (assoc tok smie-grammar))
+ (unless (numberp (cadr (assoc tok smie-grammar)))
(goto-char pos))
(setq smie--parent
- (smie-backward-sexp 'halfsexp))))))
+ (or (smie-backward-sexp 'halfsexp)
+ (let (res)
+ (while (null (setq res (smie-backward-sexp))))
+ (list nil (point) (nth 2 res)))))))))
(defun smie-rule-parent-p (&rest parents)
"Return non-nil if the current token's parent is among PARENTS.
;; rules-function, so it gives it a chance to tweak
;; indentation (e.g. by forcing indentation relative to
;; its own parent, as in fn a => fn b => fn c =>).
- (if (or (null (car smie--parent)) (smie-indent--hanging-p))
+ (if (or (listp (car smie--parent)) (smie-indent--hanging-p))
(smie-indent-virtual) (current-column))))))
(defvar smie-rule-separator-outdent 2)
;; line, in which case we want to align it with its enclosing parent.
(cond
((and (eq method :before) (smie-rule-bolp) (not (smie-rule-sibling-p)))
- ;; FIXME: Rather than consult the number of spaces, we could *set* the
- ;; number of spaces so as to align the separator with the close-paren
- ;; while aligning the content with the rest.
(let ((parent-col (cdr (smie-rule-parent)))
(parent-pos-col ;FIXME: we knew this when computing smie--parent.
(save-excursion
(smie-indent-virtual)) ;:not-hanging
(scan-error nil)))))
-(defun smie-indent-keyword ()
- ;; Align closing token with the corresponding opening one.
- ;; (e.g. "of" with "case", or "in" with "let").
+(defun smie-indent-keyword (&optional token)
+ "Indent point based on the token that follows it immediately.
+If TOKEN is non-nil, assume that that is the token that follows point.
+Returns either a column number or nil if it considers that indentation
+should not be computed on the basis of the following token."
(save-excursion
(let* ((pos (point))
- (toklevels (smie-indent-forward-token))
- (token (pop toklevels)))
- (if (null (car toklevels))
- (save-excursion
- (goto-char pos)
- ;; Different cases:
- ;; - smie-indent--bolp: "indent according to others".
- ;; - common hanging: "indent according to others".
- ;; - SML-let hanging: "indent like parent".
- ;; - if-after-else: "indent-like parent".
- ;; - middle-of-line: "trust current position".
- (cond
- ((null (cdr toklevels)) nil) ;Not a keyword.
- ((smie-indent--rule :before token))
- ((smie-indent--bolp) ;I.e. non-virtual indent.
- ;; For an open-paren-like thingy at BOL, always indent only
- ;; based on other rules (typically smie-indent-after-keyword).
- nil)
- (t
- ;; By default use point unless we're hanging.
- (unless (smie-indent--hanging-p) (current-column)))))
-
+ (toklevels
+ (if token
+ (assoc token smie-grammar)
+ (let* ((res (smie-indent-forward-token)))
+ ;; Ignore tokens on subsequent lines.
+ (if (and (< pos (line-beginning-position))
+ ;; Make sure `token' also *starts* on another line.
+ (save-excursion
+ (smie-indent-backward-token)
+ (< pos (line-beginning-position))))
+ nil
+ (goto-char pos)
+ res)))))
+ (setq token (pop toklevels))
+ (cond
+ ((null (cdr toklevels)) nil) ;Not a keyword.
+ ((not (numberp (car toklevels)))
+ ;; Different cases:
+ ;; - smie-indent--bolp: "indent according to others".
+ ;; - common hanging: "indent according to others".
+ ;; - SML-let hanging: "indent like parent".
+ ;; - if-after-else: "indent-like parent".
+ ;; - middle-of-line: "trust current position".
+ (cond
+ ((smie-indent--rule :before token))
+ ((smie-indent--bolp) ;I.e. non-virtual indent.
+ ;; For an open-paren-like thingy at BOL, always indent only
+ ;; based on other rules (typically smie-indent-after-keyword).
+ nil)
+ (t
+ ;; By default use point unless we're hanging.
+ (unless (smie-indent--hanging-p) (current-column)))))
+ (t
;; FIXME: This still looks too much like black magic!!
- (let* ((parent (smie-backward-sexp 'halfsexp)))
+ (let* ((parent (smie-backward-sexp token)))
;; Different behaviors:
;; - align with parent.
;; - parent + offset.
;; So we use a heuristic here, which is that we only use virtual
;; if the parent is tightly linked to the child token (they're
;; part of the same BNF rule).
- (if (car parent) (current-column) (smie-indent-virtual))))))))))
+ (if (car parent) (current-column) (smie-indent-virtual)))))))))))
(defun smie-indent-comment ()
"Compute indentation of a comment."
(and (nth 4 (syntax-ppss))
'noindent))
+(defun smie-indent-inside-string ()
+ (and (nth 3 (syntax-ppss))
+ 'noindent))
+
(defun smie-indent-after-keyword ()
;; Indentation right after a special keyword.
(save-excursion
;; The default indentation after a keyword/operator is
;; 0 for infix, t for prefix, and use another rule
;; for postfix.
- ((null (nth 2 toklevel)) nil) ;A closer.
- ((or (null (nth 1 toklevel)) ;An opener.
- (rassoc tok smie-closer-alist)) ;An inner.
+ ((not (numberp (nth 2 toklevel))) nil) ;A closer.
+ ((or (not (numberp (nth 1 toklevel))) ;An opener.
+ (rassoc tok smie-closer-alist)) ;An inner.
(+ (smie-indent-virtual) (smie-indent--offset 'basic))) ;
- (t (smie-indent-virtual)))))) ;An infix.
+ (t (smie-indent-virtual)))))) ;An infix.
(defun smie-indent-exps ()
;; Indentation of sequences of simple expressions without
(defvar smie-indent-functions
'(smie-indent-fixindent smie-indent-bob smie-indent-close
- smie-indent-comment smie-indent-comment-continue smie-indent-comment-close
- smie-indent-comment-inside smie-indent-keyword smie-indent-after-keyword
+ smie-indent-comment smie-indent-comment-continue smie-indent-comment-close
+ smie-indent-comment-inside smie-indent-inside-string
+ smie-indent-keyword smie-indent-after-keyword
smie-indent-exps)
"Functions to compute the indentation.
Each function is called with no argument, shouldn't move point, and should
(save-excursion (indent-line-to indent))
(indent-line-to indent)))))
+(defun smie-auto-fill ()
+ (let ((fc (current-fill-column))
+ (try-again nil))
+ (while (and fc (> (current-column) fc))
+ (cond
+ ((not (or (nth 8 (save-excursion
+ (syntax-ppss (line-beginning-position))))
+ (nth 8 (syntax-ppss))))
+ (save-excursion
+ (beginning-of-line)
+ (smie-indent-forward-token)
+ (let ((bsf (point))
+ (gain 0)
+ curcol)
+ (while (<= (setq curcol (current-column)) fc)
+ ;; FIXME? `smie-indent-calculate' can (and often will)
+ ;; return a result that actually depends on the presence/absence
+ ;; of a newline, so the gain computed here may not be accurate,
+ ;; but in practice it seems to works well enough.
+ (let* ((newcol (smie-indent-calculate))
+ (newgain (- curcol newcol)))
+ (when (> newgain gain)
+ (setq gain newgain)
+ (setq bsf (point))))
+ (smie-indent-forward-token))
+ (when (> gain 0)
+ (setq try-again)
+ (goto-char bsf)
+ (newline-and-indent)))))
+ (t (do-auto-fill))))))
+
+
(defun smie-setup (grammar rules-function &rest keywords)
"Setup SMIE navigation and indentation.
GRAMMAR is a grammar table generated by `smie-prec2->grammar'.
(set (make-local-variable 'smie-rules-function) rules-function)
(set (make-local-variable 'smie-grammar) grammar)
(set (make-local-variable 'indent-line-function) 'smie-indent-line)
+ (set (make-local-variable 'normal-auto-fill-function) 'smie-auto-fill)
(set (make-local-variable 'forward-sexp-function)
'smie-forward-sexp-command)
(while keywords