X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/93d7a3669284221c9272784875f69c047873fe04..f4ff3e5cc0e873be609cf6172386c56587a83f31:/lisp/progmodes/cc-awk.el diff --git a/lisp/progmodes/cc-awk.el b/lisp/progmodes/cc-awk.el index 419803a7ad..3346767117 100644 --- a/lisp/progmodes/cc-awk.el +++ b/lisp/progmodes/cc-awk.el @@ -1,6 +1,7 @@ ;;; cc-awk.el --- AWK specific code within cc-mode. -;; Copyright (C) 1988,94,96,2000,01,02,03 Free Software Foundation, Inc. +;; Copyright (C) 1988, 1994, 1996, 2000, 2001, 2002, 2003, 2004, 2005, +;; 2006, 2007 Free Software Foundation, Inc. ;; Author: Alan Mackenzie (originally based on awk-mode.el) ;; Maintainer: FSF @@ -10,7 +11,7 @@ ;; GNU Emacs is free software; you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation; either version 2, or (at your option) +;; the Free Software Foundation; either version 3, or (at your option) ;; any later version. ;; GNU Emacs is distributed in the hope that it will be useful, @@ -19,21 +20,22 @@ ;; GNU General Public License for more details. ;; You should have received a copy of the GNU General Public License -;; along with GNU Emacs; see the file COPYING. If not, write to the -;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, -;; Boston, MA 02111-1307, USA. +;; along with this program; see the file COPYING. If not, write to the +;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +;; Boston, MA 02110-1301, USA. ;;; Commentary: ;; This file contains (most of) the adaptations to cc-mode required for the ;; integration of AWK Mode. -;; It is organised thusly: +;; It is organised thusly, the sections being separated by page breaks: ;; 1. The AWK Mode syntax table. -;; 2. Indentation calculation stuff ("c-awk-NL-prop text-property"). -;; 3. Syntax-table property/font-locking stuff, but not including the +;; 2. Regular expressions for analysing AWK code. +;; 3. Indentation calculation stuff ("c-awk-NL-prop text-property"). +;; 4. Syntax-table property/font-locking stuff, including the ;; font-lock-keywords setting. -;; 4. The AWK Mode before/after-change-functions. -;; 5. AWK Mode specific versions of commands like beginning-of-defun. +;; 5. The AWK Mode before/after-change-functions. +;; 6. AWK Mode specific versions of commands like beginning-of-defun. ;; The AWK Mode keymap, abbreviation table, and the mode function itself are ;; in cc-mode.el. @@ -84,6 +86,175 @@ st) "Syntax table in use in AWK Mode buffers.") + +;; This section defines regular expressions used in the analysis of AWK code. + +;; N.B. In the following regexps, an EOL is either \n OR \r. This is because +;; Emacs has in the past used \r to mark hidden lines in some fashion (and +;; maybe still does). + +(defconst c-awk-esc-pair-re "\\\\\\(.\\|\n\\|\r\\|\\'\\)") +;; Matches any escaped (with \) character-pair, including an escaped newline. +(defconst c-awk-non-eol-esc-pair-re "\\\\\\(.\\|\\'\\)") +;; Matches any escaped (with \) character-pair, apart from an escaped newline. +(defconst c-awk-comment-without-nl "#.*") +;; Matches an AWK comment, not including the terminating NL (if any). Note +;; that the "enclosing" (elisp) regexp must ensure the # is real. +(defconst c-awk-nl-or-eob "\\(\n\\|\r\\|\\'\\)") +;; Matches a newline, or the end of buffer. + +;; "Space" regular expressions. +(eval-and-compile + (defconst c-awk-escaped-nl "\\\\[\n\r]")) +;; Matches an escaped newline. +(eval-and-compile + (defconst c-awk-escaped-nls* (concat "\\(" c-awk-escaped-nl "\\)*"))) +;; Matches a possibly empty sequence of escaped newlines. Used in +;; awk-font-lock-keywords. +;; (defconst c-awk-escaped-nls*-with-space* +;; (concat "\\(" c-awk-escaped-nls* "\\|" "[ \t]+" "\\)*")) +;; The above RE was very slow. It's runtime was doubling with each additional +;; space :-( Reformulate it as below: +(eval-and-compile + (defconst c-awk-escaped-nls*-with-space* + (concat "\\(" c-awk-escaped-nl "\\|" "[ \t]" "\\)*"))) +;; Matches a possibly empty sequence of escaped newlines with optional +;; interspersed spaces and tabs. Used in awk-font-lock-keywords. +(defconst c-awk-blank-or-comment-line-re + (concat "[ \t]*\\(#\\|\\\\?$\\)")) +;; Matche (the tail of) a line containing at most either a comment or an +;; escaped EOL. + +;; REGEXPS FOR "HARMLESS" STRINGS/LINES. +(defconst c-awk-harmless-char-re "[^_#/\"\\\\\n\r]") +;; Matches any character but a _, #, /, ", \, or newline. N.B. _" starts a +;; localisation string in gawk 3.1 +(defconst c-awk-harmless-_ "_\\([^\"]\\|\\'\\)") +;; Matches an underline NOT followed by ". +(defconst c-awk-harmless-string*-re + (concat "\\(" c-awk-harmless-char-re "\\|" c-awk-esc-pair-re "\\|" c-awk-harmless-_ "\\)*")) +;; Matches a (possibly empty) sequence of chars without unescaped /, ", \, +;; #, or newlines. +(defconst c-awk-harmless-string*-here-re + (concat "\\=" c-awk-harmless-string*-re)) +;; Matches the (possibly empty) sequence of chars without unescaped /, ", \, +;; at point. +(defconst c-awk-harmless-line-re + (concat c-awk-harmless-string*-re + "\\(" c-awk-comment-without-nl "\\)?" c-awk-nl-or-eob)) +;; Matches (the tail of) an AWK \"logical\" line not containing an unescaped +;; " or /. "logical" means "possibly containing escaped newlines". A comment +;; is matched as part of the line even if it contains a " or a /. The End of +;; buffer is also an end of line. +(defconst c-awk-harmless-lines+-here-re + (concat "\\=\\(" c-awk-harmless-line-re "\\)+")) +;; Matches a sequence of (at least one) \"harmless-line\" at point. + + +;; REGEXPS FOR AWK STRINGS. +(defconst c-awk-string-ch-re "[^\"\\\n\r]") +;; Matches any character which can appear unescaped in a string. +(defconst c-awk-string-innards-re + (concat "\\(" c-awk-string-ch-re "\\|" c-awk-esc-pair-re "\\)*")) +;; Matches the inside of an AWK string (i.e. without the enclosing quotes). +(defconst c-awk-string-without-end-here-re + (concat "\\=_?\"" c-awk-string-innards-re)) +;; Matches an AWK string at point up to, but not including, any terminator. +;; A gawk 3.1+ string may look like _"localisable string". +(defconst c-awk-one-line-possibly-open-string-re + (concat "\"\\(" c-awk-string-ch-re "\\|" c-awk-non-eol-esc-pair-re "\\)*" + "\\(\"\\|\\\\?$\\|\\'\\)")) + +;; REGEXPS FOR AWK REGEXPS. +(defconst c-awk-regexp-normal-re "[^[/\\\n\r]") +;; Matches any AWK regexp character which doesn't require special analysis. +(defconst c-awk-escaped-newlines*-re "\\(\\\\[\n\r]\\)*") +;; Matches a (possibly empty) sequence of escaped newlines. + +;; NOTE: In what follows, "[asdf]" in a regexp will be called a "character +;; list", and "[:alpha:]" inside a character list will be known as a +;; "character class". These terms for these things vary between regexp +;; descriptions . +(defconst c-awk-regexp-char-class-re + "\\[:[a-z]+:\\]") + ;; Matches a character class spec (e.g. [:alpha:]). +(defconst c-awk-regexp-char-list-re + (concat "\\[" c-awk-escaped-newlines*-re "^?" c-awk-escaped-newlines*-re "]?" + "\\(" c-awk-esc-pair-re "\\|" c-awk-regexp-char-class-re + "\\|" "[^]\n\r]" "\\)*" "\\(]\\|$\\)")) +;; Matches a regexp char list, up to (but not including) EOL if the ] is +;; missing. +(defconst c-awk-regexp-one-line-possibly-open-char-list-re + (concat "\\[\\]?\\(" c-awk-non-eol-esc-pair-re "\\|" "[^]\n\r]" "\\)*" + "\\(]\\|\\\\?$\\|\\'\\)")) +;; Matches the head (or all) of a regexp char class, up to (but not +;; including) the first EOL. +(defconst c-awk-regexp-innards-re + (concat "\\(" c-awk-esc-pair-re "\\|" c-awk-regexp-char-list-re + "\\|" c-awk-regexp-normal-re "\\)*")) +;; Matches the inside of an AWK regexp (i.e. without the enclosing /s) +(defconst c-awk-regexp-without-end-re + (concat "/" c-awk-regexp-innards-re)) +;; Matches an AWK regexp up to, but not including, any terminating /. +(defconst c-awk-one-line-possibly-open-regexp-re + (concat "/\\(" c-awk-non-eol-esc-pair-re + "\\|" c-awk-regexp-one-line-possibly-open-char-list-re + "\\|" c-awk-regexp-normal-re "\\)*" + "\\(/\\|\\\\?$\\|\\'\\)")) +;; Matches as much of the head of an AWK regexp which fits on one line, +;; possibly all of it. + +;; REGEXPS used for scanning an AWK buffer in order to decide IF A '/' IS A +;; REGEXP OPENER OR A DIVISION SIGN. By "state" in the following is meant +;; whether a '/' at the current position would by a regexp opener or a +;; division sign. +(defconst c-awk-neutral-re +; "\\([{}@` \t]\\|\\+\\+\\|--\\|\\\\.\\)+") ; changed, 2003/6/7 + "\\([{}@` \t]\\|\\+\\+\\|--\\|\\\\.\\)") +;; A "neutral" char(pair). Doesn't change the "state" of a subsequent /. +;; This is space/tab, braces, an auto-increment/decrement operator or an +;; escaped character. Or one of the (illegal) characters @ or `. But NOT an +;; end of line (even if escaped). +(defconst c-awk-neutrals*-re + (concat "\\(" c-awk-neutral-re "\\)*")) +;; A (possibly empty) string of neutral characters (or character pairs). +(defconst c-awk-var-num-ket-re "[]\)0-9a-zA-Z_$.\x80-\xff]+") +;; Matches a char which is a constituent of a variable or number, or a ket +;; (i.e. closing bracKET), round or square. Assume that all characters \x80 to +;; \xff are "letters". +(defconst c-awk-div-sign-re + (concat c-awk-var-num-ket-re c-awk-neutrals*-re "/")) +;; Will match a piece of AWK buffer ending in / which is a division sign, in +;; a context where an immediate / would be a regexp bracket. It follows a +;; variable or number (with optional intervening "neutral" characters). This +;; will only work when there won't be a preceding " or / before the sought / +;; to foul things up. +(defconst c-awk-non-arith-op-bra-re + "[[\(&=:!><,?;'~|]") +;; Matches an openeing BRAcket ,round or square, or any operator character +;; apart from +,-,/,*,%. For the purpose at hand (detecting a / which is a +;; regexp bracket) these arith ops are unnecessary and a pain, because of "++" +;; and "--". +(defconst c-awk-regexp-sign-re + (concat c-awk-non-arith-op-bra-re c-awk-neutrals*-re "/")) +;; Will match a piece of AWK buffer ending in / which is an opening regexp +;; bracket, in a context where an immediate / would be a division sign. This +;; will only work when there won't be a preceding " or / before the sought / +;; to foul things up. + +;; REGEXPS USED FOR FINDING THE POSITION OF A "virtual semicolon" +(defconst c-awk-_-harmless-nonws-char-re "[^#/\"\\\\\n\r \t]") +;;;; NEW VERSION! (which will be restricted to the current line) +(defconst c-awk-one-line-non-syn-ws*-re + (concat "\\([ \t]*" + "\\(" c-awk-_-harmless-nonws-char-re "\\|" + c-awk-non-eol-esc-pair-re "\\|" + c-awk-one-line-possibly-open-string-re "\\|" + c-awk-one-line-possibly-open-regexp-re + "\\)" + "\\)*")) + + ;; ACM, 2002/5/29: ;; ;; The next section of code is about determining whether or not an AWK @@ -106,6 +277,9 @@ ;; after-change function) must be constantly updated for the mode to work ;; properly). ;; +;; This text property is also used for "syntactic whitespace" movement, this +;; being where the distinction between the values '$' and '}' is significant. +;; ;; The valid values for c-awk-NL-prop are: ;; ;; nil The property is not currently set for this line. @@ -120,11 +294,12 @@ ;; essential to the syntax of the program. (i.e. if it had been a ;; frivolous \, it would have been ignored and the line been given one of ;; the other property values.) -;; ';' A statement is completed as the last thing (aside from ws) on the line - -;; i.e. there is (at least part of) a statement on this line, and the last -;; statement on the line is complete, OR (2002/10/25) the line is -;; content-free but terminates a statement from the preceding (continued) -;; line (which has property \). +;; '$' A non-empty statement is terminated on the line by an EOL (a "virtual +;; semicolon"). This might be a content-free line terminating a statement +;; from the preceding (continued) line (which has property \). +;; '}' A statement, being the last thing (aside from ws/comments) is +;; explicitly terminated on this line by a closing brace (or sometimes a +;; semicolon). ;; ;; This set of values has been chosen so that the property's value on a line ;; is completely determined by the contents of the line and the property on @@ -140,6 +315,8 @@ ;; ;; DO-LIM sets a limit on how far back we search for the "do" of a possible ;; do-while. + ;; + ;; This function might do hidden buffer changes. (and (eq (char-before) ?\)) (save-excursion @@ -154,6 +331,8 @@ (defun c-awk-after-function-decl-param-list () ;; Are we just after the ) in "function foo (bar)" ? + ;; + ;; This function might do hidden buffer changes. (and (eq (char-before) ?\)) (save-excursion (let ((par-pos (c-safe (scan-lists (point) -1 0)))) @@ -168,6 +347,8 @@ (defun c-awk-after-continue-token () ;; Are we just after a token which can be continued onto the next line without ;; a backslash? +;; +;; This function might do hidden buffer changes. (save-excursion (c-backward-token-1) ; FIXME 2002/10/27. What if this fails? (if (and (looking-at "[&|]") (not (bobp))) @@ -177,6 +358,8 @@ (defun c-awk-after-rbrace-or-statement-semicolon () ;; Are we just after a } or a ; which closes a statement? ;; Be careful about ;s in for loop control bits. They don't count! + ;; + ;; This function might do hidden buffer changes. (or (eq (char-before) ?\}) (and (eq (char-before) ?\;) @@ -192,17 +375,19 @@ ;; Move back to just after the first found of either (i) an EOL which has ;; the c-awk-NL-prop text-property set; or (ii) non-ws text; or (iii) BOB. ;; We return either the value of c-awk-NL-prop (in case (i)) or nil. - ;; Calling function can best distinguish cases (ii) and (iii) with (bolp). + ;; Calling functions can best distinguish cases (ii) and (iii) with (bolp). ;; ;; Note that an escaped eol counts as whitespace here. ;; ;; Kludge: If c-backward-syntactic-ws gets stuck at a BOL, it is likely ;; that the previous line contains an unterminated string (without \). In - ;; this case, assume that the previous line's c-awk-NL-prop is a ;. + ;; this case, assume that the previous line's c-awk-NL-prop is a $. ;; ;; POINT MUST BE AT THE START OF A LINE when calling this function. This ;; is to ensure that the various backward-comment functions will work ;; properly. + ;; + ;; This function might do hidden buffer changes. (let ((nl-prop nil) bol-pos bsws-pos) ; starting pos for a backward-syntactic-ws call. (while ;; We are at a BOL here. Go back one line each iteration. @@ -213,9 +398,12 @@ (setq bsws-pos (point)) ;; N.B. the following function will not go back past an EOL if ;; there is an open string (without \) on the previous line. + ;; If we find such, set the c-awk-NL-prop on it, too + ;; (2004/3/29). (c-backward-syntactic-ws bol-pos) (or (/= (point) bsws-pos) - (progn (setq nl-prop ?\;) + (progn (setq nl-prop ?\$) + (c-put-char-property (1- (point)) 'c-awk-NL-prop nl-prop) nil))) ;; If we had a backslash at EOL, c-backward-syntactic-ws will ;; have gone backwards over it. Check the backslash was "real". @@ -245,6 +433,8 @@ ;; line. Return nil if we hit BOB. ;; ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM. + ;; + ;; This function might do hidden buffer changes. (save-excursion (save-match-data (beginning-of-line) @@ -266,7 +456,9 @@ ((and (looking-at "[ \t]*\\\\$") (not (c-awk-after-rbrace-or-statement-semicolon))) ?\\) - (t ?\;))) ; A statement was completed on this line + ;; A statement was completed on this line. How? + ((memq (char-before) '(?\; ?\})) ?\}) ; Real ; or } + (t ?\$))) ; A virtual semicolon. (end-of-line) (c-put-char-property (point) 'c-awk-NL-prop nl-prop) (forward-line)) @@ -275,9 +467,9 @@ ;; Set c-awk-NL-prop on each of these lines's EOL. (while (< (point) pos) ; one content-free line each iteration. (cond ; recalculate nl-prop from previous line's value. - ((memq nl-prop '(?\; nil)) (setq nl-prop ?\#)) + ((memq nl-prop '(?\} ?\$ nil)) (setq nl-prop ?\#)) ((eq nl-prop ?\\) - (if (not (looking-at "[ \t]*\\\\$")) (setq nl-prop ?\;))) ; was ?\# 2002/10/25 + (if (not (looking-at "[ \t]*\\\\$")) (setq nl-prop ?\$))) ;; ?\# (empty line) and ?\{ (open stmt) don't change. ) (forward-line) @@ -286,8 +478,10 @@ (defun c-awk-get-NL-prop-prev-line (&optional do-lim) ;; Get the c-awk-NL-prop text-property from the previous line, calculating - ;; it if necessary. Return nil iff we're already at BOB. + ;; it if necessary. Return nil if we're at BOB. ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM. + ;; + ;; This function might do hidden buffer changes. (if (bobp) nil (or (c-get-char-property (c-point 'eopl) 'c-awk-NL-prop) @@ -300,6 +494,8 @@ ;; function returns the property value an EOL would have got.) ;; ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM. + ;; + ;; This function might do hidden buffer changes. (save-excursion (let ((extra-nl nil)) (end-of-line) ; Necessary for the following test to work. @@ -309,67 +505,49 @@ (prog1 (c-awk-get-NL-prop-prev-line do-lim) (if extra-nl (delete-backward-char 1)))))) -(defun c-awk-prev-line-incomplete-p (&optional do-lim) +(defsubst c-awk-prev-line-incomplete-p (&optional do-lim) ;; Is there an incomplete statement at the end of the previous line? ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM. + ;; + ;; This function might do hidden buffer changes. (memq (c-awk-get-NL-prop-prev-line do-lim) '(?\\ ?\{))) -(defun c-awk-cur-line-incomplete-p (&optional do-lim) +(defsubst c-awk-cur-line-incomplete-p (&optional do-lim) ;; Is there an incomplete statement at the end of the current line? ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM. + ;; + ;; This function might do hidden buffer changes. (memq (c-awk-get-NL-prop-cur-line do-lim) '(?\\ ?\{))) -(defun c-awk-completed-stmt-ws-ends-prev-line-p (&optional do-lim) - ;; Is there a termination of a statement as the last thing (apart from an - ;; optional comment) on the previous line? - ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM. - (eq (c-awk-get-NL-prop-prev-line do-lim) ?\;)) - -(defun c-awk-completed-stmt-ws-ends-line-p (&optional pos do-lim) - ;; Same as previous function, but for the line containing position POS (or - ;; the current line if POS is omitted). - ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM. +;;;; NOTES ON "VIRTUAL SEMICOLONS" +;;;; +;;;; A "virtual semicolon" is what terminates a statement when there is no ; +;;;; or } to do the job. Like point, it is considered to lie _between_ two +;;;; characters. As from mid-March 2004, it is considered to lie just after +;;;; the last non-syntactic-whitespace character on the line; (previously, it +;;;; was considered an attribute of the EOL on the line). A real semicolon +;;;; never counts as a virtual one. + +(defun c-awk-at-vsemi-p (&optional pos) + ;; Is there a virtual semicolon at POS (or POINT)? (save-excursion - (if pos (goto-char pos)) - (eq (c-awk-get-NL-prop-cur-line do-lim) ?\;))) - -(defun c-awk-after-logical-semicolon (&optional do-lim) -;; Are we at BOL, the preceding EOL being a "logical semicolon"? -;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM. - (and (bolp) - (eq (c-awk-get-NL-prop-prev-line do-lim) ?\;))) - -(defun c-awk-backward-syntactic-ws (&optional lim) -;; Skip backwards over awk-syntactic whitespace. This is whitespace -;; characters, comments, and NEWLINES WHICH AREN'T "VIRTUAL SEMICOLONS". For -;; this function, a newline isn't a "virtual semicolon" if that line ends with -;; a real semicolon (or closing brace). -;; However if point starts inside a comment or preprocessor directive, the -;; content of it is not treated as whitespace. LIM (optional) sets a limit on -;; the backward movement. - (let ((lim (or lim (point-min))) - after-real-br) - (c-backward-syntactic-ws (max lim (c-point 'bol))) - (while ; go back one WS line each time round this loop. - (and (bolp) - (> (point) lim) - (/= (c-awk-get-NL-prop-prev-line) ?\;) - (/= (point) - ;; The following function requires point at BONL [not EOL] to - ;; recognise a preceding comment,. - (progn (c-backward-syntactic-ws (max lim (c-point 'bopl))) - (point))))) - ;; Does the previous line end with a real ; or }? If so, go back to it. - (if (and (bolp) - (eq (c-awk-get-NL-prop-prev-line) ?\;) - (save-excursion - (c-backward-syntactic-ws (max lim (c-point 'bopl))) - (setq after-real-br (point)) - (c-awk-after-rbrace-or-statement-semicolon))) - (goto-char after-real-br)))) - -(defun c-awk-NL-prop-not-set () - ;; Is the NL-prop on the current line either nil or unset? + (let (nl-prop + (pos-or-point (progn (if pos (goto-char pos)) (point)))) + (forward-line 0) + (search-forward-regexp c-awk-one-line-non-syn-ws*-re) + (and (eq (point) pos-or-point) + (progn + (while (and (eq (setq nl-prop (c-awk-get-NL-prop-cur-line)) ?\\) + (eq (forward-line) 0) + (looking-at c-awk-blank-or-comment-line-re))) + (eq nl-prop ?\$)))))) + +(defun c-awk-vsemi-status-unknown-p () + ;; Are we unsure whether there is a virtual semicolon on the current line? + ;; DO NOT under any circumstances attempt to calculate this; that would + ;; defeat the (admittedly kludgey) purpose of this function, which is to + ;; prevent an infinite recursion in c-beginning-of-statement-1 when point + ;; starts at a `while' token. (not (c-get-char-property (c-point 'eol) 'c-awk-NL-prop))) (defun c-awk-clear-NL-props (beg end) @@ -377,6 +555,8 @@ ;; c-awk-NL-prop text property from beg to the end of the buffer (The END ;; parameter is ignored). This ensures that the indentation engine will ;; never use stale values for this property. + ;; + ;; This function might do hidden buffer changes. (save-restriction (widen) (c-clear-char-properties beg (point-max) 'c-awk-NL-prop))) @@ -408,7 +588,7 @@ ;awk-mode-map isn't yet defined. :-( ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - + ;; The following section of the code is to do with font-locking. The biggest ;; problem for font-locking is deciding whether a / is a regular expression ;; delimiter or a division sign - determining precisely where strings and @@ -435,6 +615,8 @@ ;; This is guaranteed to be "safe" for syntactic analysis, i.e. outwith any ;; comment, string or regexp. IT MAY WELL BE that this function should not be ;; executed on a narrowed buffer. +;; +;; This function might do hidden buffer changes. (if pos (goto-char pos)) (forward-line 0) (while (and (> (point) (point-min)) @@ -450,6 +632,8 @@ ;; This is guaranteed to be "safe" for syntactic analysis, i.e. outwith any ;; comment, string or regexp. IT MAY WELL BE that this function should not be ;; executed on a narrowed buffer. +;; +;; This function might do hidden buffer changes. (if pos (goto-char pos)) (end-of-line) (while (and (< (point) (point-max)) @@ -457,135 +641,6 @@ (end-of-line 2)) (point)) -;; N.B. In the following regexps, an EOL is either \n OR \r. This is because -;; Emacs has in the past used \r to mark hidden lines in some fashion (and -;; maybe still does). - -(defconst c-awk-esc-pair-re "\\\\\\(.\\|\n\\|\r\\|\\'\\)") -;; Matches any escaped (with \) character-pair, including an escaped newline. -(defconst c-awk-comment-without-nl "#.*") -;; Matches an AWK comment, not including the terminating NL (if any). Note -;; that the "enclosing" (elisp) regexp must ensure the # is real. -(defconst c-awk-nl-or-eob "\\(\n\\|\r\\|\\'\\)") -;; Matches a newline, or the end of buffer. - -;; "Space" regular expressions. -(defconst c-awk-escaped-nl "\\\\[\n\r]") -;; Matches an escaped newline. -(defconst c-awk-escaped-nls* (concat "\\(" c-awk-escaped-nl "\\)*")) -;; Matches a possibly empty sequence of escaped newlines. Used in -;; awk-font-lock-keywords. -;; (defconst c-awk-escaped-nls*-with-space* -;; (concat "\\(" c-awk-escaped-nls* "\\|" "[ \t]+" "\\)*")) -;; The above RE was very slow. It's runtime was doubling with each additional -;; space :-( Reformulate it as below: -(defconst c-awk-escaped-nls*-with-space* - (concat "\\(" c-awk-escaped-nl "\\|" "[ \t]" "\\)*")) -;; Matches a possibly empty sequence of escaped newlines with optional -;; interspersed spaces and tabs. Used in awk-font-lock-keywords. - -;; REGEXPS FOR "HARMLESS" STRINGS/LINES. -(defconst c-awk-harmless-char-re "[^_#/\"\\\\\n\r]") -;; Matches any character but a _, #, /, ", \, or newline. N.B. _" starts a -;; localisation string in gawk 3.1 -(defconst c-awk-harmless-_ "_\\([^\"]\\|\\'\\)") -;; Matches an underline NOT followed by ". -(defconst c-awk-harmless-string*-re - (concat "\\(" c-awk-harmless-char-re "\\|" c-awk-esc-pair-re "\\|" c-awk-harmless-_ "\\)*")) -;; Matches a (possibly empty) sequence of chars without unescaped /, ", \, -;; #, or newlines. -(defconst c-awk-harmless-string*-here-re - (concat "\\=" c-awk-harmless-string*-re)) -;; Matches the (possibly empty) sequence of chars without unescaped /, ", \, -;; at point. -(defconst c-awk-harmless-line-re - (concat c-awk-harmless-string*-re - "\\(" c-awk-comment-without-nl "\\)?" c-awk-nl-or-eob)) -;; Matches (the tail of) an AWK \"logical\" line not containing an unescaped -;; " or /. "logical" means "possibly containing escaped newlines". A comment -;; is matched as part of the line even if it contains a " or a /. The End of -;; buffer is also an end of line. -(defconst c-awk-harmless-lines+-here-re - (concat "\\=\\(" c-awk-harmless-line-re "\\)+")) -;; Matches a sequence of (at least one) \"harmless-line\" at point. - - -;; REGEXPS FOR AWK STRINGS. -(defconst c-awk-string-ch-re "[^\"\\\n\r]") -;; Matches any character which can appear unescaped in a string. -(defconst c-awk-string-innards-re - (concat "\\(" c-awk-string-ch-re "\\|" c-awk-esc-pair-re "\\)*")) -;; Matches the inside of an AWK string (i.e. without the enclosing quotes). -(defconst c-awk-string-without-end-here-re - (concat "\\=_?\"" c-awk-string-innards-re)) -;; Matches an AWK string at point up to, but not including, any terminator. -;; A gawk 3.1+ string may look like _"localisable string". - -;; REGEXPS FOR AWK REGEXPS. -(defconst c-awk-regexp-normal-re "[^[/\\\n\r]") -;; Matches any AWK regexp character which doesn't require special analysis. -(defconst c-awk-escaped-newlines*-re "\\(\\\\[\n\r]\\)*") -;; Matches a (possibly empty) sequence of escaped newlines. - -;; NOTE: In what follows, "[asdf]" in a regexp will be called a "character -;; list", and "[:alpha:]" inside a character list will be known as a -;; "character class". These terms for these things vary between regexp -;; descriptions . -(defconst c-awk-regexp-char-class-re - "\\[:[a-z]+:\\]") - ;; Matches a character class spec (e.g. [:alpha:]). -(defconst c-awk-regexp-char-list-re - (concat "\\[" c-awk-escaped-newlines*-re "^?" c-awk-escaped-newlines*-re "]?" - "\\(" c-awk-esc-pair-re "\\|" c-awk-regexp-char-class-re - "\\|" "[^]\n\r]" "\\)*" "\\(]\\|$\\)")) -;; Matches a regexp char list, up to (but not including) EOL if the ] is -;; missing. -(defconst c-awk-regexp-innards-re - (concat "\\(" c-awk-esc-pair-re "\\|" c-awk-regexp-char-list-re - "\\|" c-awk-regexp-normal-re "\\)*")) -;; Matches the inside of an AWK regexp (i.e. without the enclosing /s) -(defconst c-awk-regexp-without-end-re - (concat "/" c-awk-regexp-innards-re)) -;; Matches an AWK regexp up to, but not including, any terminating /. - -;; REGEXPS used for scanning an AWK buffer in order to decide IF A '/' IS A -;; REGEXP OPENER OR A DIVISION SIGN. By "state" in the following is meant -;; whether a '/' at the current position would by a regexp opener or a -;; division sign. -(defconst c-awk-neutral-re -; "\\([{}@` \t]\\|\\+\\+\\|--\\|\\\\.\\)+") ; changed, 2003/6/7 - "\\([{}@` \t]\\|\\+\\+\\|--\\|\\\\.\\)") -;; A "neutral" char(pair). Doesn't change the "state" of a subsequent /. -;; This is space/tab, braces, an auto-increment/decrement operator or an -;; escaped character. Or one of the (illegal) characters @ or `. But NOT an -;; end of line (even if escaped). -(defconst c-awk-neutrals*-re - (concat "\\(" c-awk-neutral-re "\\)*")) -;; A (possibly empty) string of neutral characters (or character pairs). -(defconst c-awk-var-num-ket-re "[]\)0-9a-zA-Z_$.\x80-\xff]+") -;; Matches a char which is a constituent of a variable or number, or a ket -;; (i.e. closing bracKET), round or square. Assume that all characters \x80 to -;; \xff are "letters". -(defconst c-awk-div-sign-re - (concat c-awk-var-num-ket-re c-awk-neutrals*-re "/")) -;; Will match a piece of AWK buffer ending in / which is a division sign, in -;; a context where an immediate / would be a regexp bracket. It follows a -;; variable or number (with optional intervening "neutral" characters). This -;; will only work when there won't be a preceding " or / before the sought / -;; to foul things up. -(defconst c-awk-non-arith-op-bra-re - "[[\(&=:!><,?;'~|]") -;; Matches an openeing BRAcket ,round or square, or any operator character -;; apart from +,-,/,*,%. For the purpose at hand (detecting a / which is a -;; regexp bracket) these arith ops are unnecessary and a pain, because of "++" -;; and "--". -(defconst c-awk-regexp-sign-re - (concat c-awk-non-arith-op-bra-re c-awk-neutrals*-re "/")) -;; Will match a piece of AWK buffer ending in / which is an opening regexp -;; bracket, in a context where an immediate / would be a division sign. This -;; will only work when there won't be a preceding " or / before the sought / -;; to foul things up. - ;; ACM, 2002/02/15: The idea of the next function is to put the "Error font" ;; on strings/regexps which are missing their closing delimiter. ;; 2002/4/28. The default syntax for / has been changed from "string" to @@ -604,24 +659,26 @@ ;; ;; If the closing delimiter is missing (i.e., there is an EOL there) set the ;; STRING-FENCE property on the opening " or / and closing EOL. +;; +;; This function does hidden buffer changes. (if (eq (char-after beg) ?_) (setq beg (1+ beg))) ;; First put the properties on the delimiters. (cond ((eq end (point-max)) ; string/regexp terminated by EOB - (put-text-property beg (1+ beg) 'syntax-table '(15))) ; (15) = "string fence" + (c-put-char-property beg 'syntax-table '(15))) ; (15) = "string fence" ((/= (char-after beg) (char-after end)) ; missing end delimiter - (put-text-property beg (1+ beg) 'syntax-table '(15)) - (put-text-property end (1+ end) 'syntax-table '(15))) + (c-put-char-property beg 'syntax-table '(15)) + (c-put-char-property end 'syntax-table '(15))) ((eq (char-after beg) ?/) ; Properly bracketed regexp - (put-text-property beg (1+ beg) 'syntax-table '(7)) ; (7) = "string" - (put-text-property end (1+ end) 'syntax-table '(7))) + (c-put-char-property beg 'syntax-table '(7)) ; (7) = "string" + (c-put-char-property end 'syntax-table '(7))) (t)) ; Properly bracketed string: Nothing to do. ;; Now change the properties of any escaped "s in the string to punctuation. (save-excursion (goto-char (1+ beg)) (or (eobp) (while (search-forward "\"" end t) - (put-text-property (1- (point)) (point) 'syntax-table '(1)))))) + (c-put-char-property (1- (point)) 'syntax-table '(1)))))) (defun c-awk-syntax-tablify-string () ;; Point is at the opening " or _" of a string. Set the syntax-table @@ -629,6 +686,8 @@ ;; ;; The result is nil if a / immediately after the string would be a regexp ;; opener, t if it would be a division sign. + ;; + ;; This function does hidden buffer changes. (search-forward-regexp c-awk-string-without-end-here-re nil t) ; a (possibly unterminated) string (c-awk-set-string-regexp-syntax-table-properties (match-beginning 0) (match-end 0)) @@ -652,6 +711,8 @@ ;; point is. ;; ;; The result is what ANCHOR-STATE-/DIV (see above) is where point is left. + ;; + ;; This function might do hidden buffer changes. (let ((/point (point))) (goto-char anchor) ;; Analyse the line to find out what the / is. @@ -659,7 +720,7 @@ (not (search-forward-regexp c-awk-regexp-sign-re (1+ /point) t)) (search-forward-regexp c-awk-div-sign-re (1+ /point) t)) ;; A division sign. - (progn (goto-char (1+ /point)) nil) + (progn (goto-char (1+ /point)) nil) ;; A regexp opener ;; Jump over the regexp innards, setting the match data. (goto-char /point) @@ -697,14 +758,18 @@ ;; given the property "punctuation". This will later allow other routines ;; to use the regexp "\\S\"*" to skip over the string innards. ;; (iv) Inside a comment, all syntax-table properties are cleared. +;; +;; This function does hidden buffer changes. (let (anchor (anchor-state-/div nil)) ; t means a following / would be a div sign. (c-awk-beginning-of-logical-line) ; ACM 2002/7/21. This is probably redundant. - (put-text-property (point) lim 'syntax-table nil) - (search-forward-regexp c-awk-harmless-lines+-here-re nil t) ; skip harmless lines. - + (c-clear-char-properties (point) lim 'syntax-table) ;; Once round the next loop for each string, regexp, or div sign - (while (< (point) lim) + (while (progn + ;; Skip any "harmless" lines before the next tricky one. + (if (search-forward-regexp c-awk-harmless-lines+-here-re nil t) + (setq anchor-state-/div nil)) + (< (point) lim)) (setq anchor (point)) (search-forward-regexp c-awk-harmless-string*-here-re nil t) ;; We are now looking at either a " or a /. @@ -712,11 +777,7 @@ (setq anchor-state-/div (if (looking-at "_?\"") (c-awk-syntax-tablify-string) - (c-awk-syntax-tablify-/ anchor anchor-state-/div))) - - ;; Skip any further "harmless" lines before the next tricky one. - (if (search-forward-regexp c-awk-harmless-lines+-here-re nil t) - (setq anchor-state-/div nil))) + (c-awk-syntax-tablify-/ anchor anchor-state-/div)))) nil)) @@ -733,6 +794,8 @@ ;; This function is called exclusively from the before-change-functions hook. ;; It does two things: Finds the end of the (logical) line on which END lies, ;; and clears c-awk-NL-prop text properties from this point onwards. +;; +;; This function might do hidden buffer changes. (save-restriction (save-excursion (setq c-awk-old-EOLL (c-awk-end-of-logical-line end)) @@ -742,8 +805,10 @@ (defun c-awk-end-of-change-region (beg end old-len) ;; Find the end of the region which needs to be font-locked after a change. ;; This is the end of the logical line on which the change happened, either - ;; as it was before the change, or as it is now, which ever is later. + ;; as it was before the change, or as it is now, whichever is later. ;; N.B. point is left undefined. + ;; + ;; This function might do hidden buffer changes. (max (+ (- c-awk-old-EOLL old-len) (- end beg)) (c-awk-end-of-logical-line end))) @@ -753,14 +818,17 @@ ;; changed region. However, if font-lock is enabled, this function does ;; nothing, since an enabled font-lock after-change function will always do ;; this. +;; +;; This function might do hidden buffer changes. (unless (and (boundp 'font-lock-mode) font-lock-mode) (save-restriction (save-excursion - (setq end (c-awk-end-of-change-region beg end old-len)) - (c-awk-beginning-of-logical-line beg) - (c-save-buffer-state nil ; So that read-only status isn't affected. + (save-match-data + (setq end (c-awk-end-of-change-region beg end old-len)) + (c-awk-beginning-of-logical-line beg) + (c-save-buffer-state nil ; So that read-only status isn't affected. ; (e.g. when first loading the buffer) - (c-awk-set-syntax-table-properties end)))))) + (c-awk-set-syntax-table-properties end))))))) ;; ACM 2002/5/25. When font-locking is invoked by a buffer change, the region ;; specified by the font-lock after-change function must be expanded to @@ -785,10 +853,104 @@ (c-awk-advise-fl-for-awk-region lazy-lock-defer-rest-after-change) (c-awk-advise-fl-for-awk-region lazy-lock-defer-line-after-change) -;; ACM 2002/9/29. Functions for C-M-a and C-M-e - +;; Awk regexps written with help from Peter Galbraith +;; . +;; Take GNU Emacs's 'words out of the following regexp-opts. They dont work +;; in Xemacs 21.4.4. acm 2002/9/19. +(defconst awk-font-lock-keywords + (eval-when-compile + (list + ;; Function names. + '("^\\s *\\(func\\(tion\\)?\\)\\>\\s *\\(\\sw+\\)?" + (1 font-lock-keyword-face) (3 font-lock-function-name-face nil t)) + ;; + ;; Variable names. + (cons + (concat "\\<" + (regexp-opt + '("ARGC" "ARGIND" "ARGV" "BINMODE" "CONVFMT" "ENVIRON" + "ERRNO" "FIELDWIDTHS" "FILENAME" "FNR" "FS" "IGNORECASE" + "LINT" "NF" "NR" "OFMT" "OFS" "ORS" "PROCINFO" "RLENGTH" + "RS" "RSTART" "RT" "SUBSEP" "TEXTDOMAIN") t) "\\>") + 'font-lock-variable-name-face) + + ;; Special file names. (acm, 2002/7/22) + ;; The following regexp was created by first evaluating this in GNU Emacs 21.1: + ;; (regexp-opt '("/dev/stdin" "/dev/stdout" "/dev/stderr" "/dev/fd/n" "/dev/pid" + ;; "/dev/ppid" "/dev/pgrpid" "/dev/user") 'words) + ;; , removing the "?:" from each "\\(?:" (for backward compatibility with older Emacsen) + ;; , replacing the "n" in "dev/fd/n" with "[0-9]+" + ;; , removing the unwanted \\< at the beginning, and finally filling out the + ;; regexp so that a " must come before, and either a " or heuristic stuff after. + ;; The surrounding quotes are fontified along with the filename, since, semantically, + ;; they are an indivisible unit. + '("\\(\"/dev/\\(fd/[0-9]+\\|p\\(\\(\\(gr\\)?p\\)?id\\)\\|\ +std\\(err\\|in\\|out\\)\\|user\\)\\)\\>\ +\\(\\(\"\\)\\|\\([^\"/\n\r][^\"\n\r]*\\)?$\\)" + (1 font-lock-variable-name-face t) + (8 font-lock-variable-name-face t t)) + ;; Do the same (almost) with + ;; (regexp-opt '("/inet/tcp/lport/rhost/rport" "/inet/udp/lport/rhost/rport" + ;; "/inet/raw/lport/rhost/rport") 'words) + ;; This cannot be combined with the above pattern, because the match number + ;; for the (optional) closing \" would then exceed 9. + '("\\(\"/inet/\\(\\(raw\\|\\(tc\\|ud\\)p\\)/lport/rhost/rport\\)\\)\\>\ +\\(\\(\"\\)\\|\\([^\"/\n\r][^\"\n\r]*\\)?$\\)" + (1 font-lock-variable-name-face t) + (6 font-lock-variable-name-face t t)) + + ;; Keywords. + (concat "\\<" + (regexp-opt + '("BEGIN" "END" "break" "continue" "delete" "do" "else" + "exit" "for" "getline" "if" "in" "next" "nextfile" + "return" "while") + t) "\\>") + + ;; Builtins. + `(eval . (list + ,(concat + "\\<" + (regexp-opt + '("adump" "and" "asort" "atan2" "bindtextdomain" "close" + "compl" "cos" "dcgettext" "exp" "extension" "fflush" + "gensub" "gsub" "index" "int" "length" "log" "lshift" + "match" "mktime" "or" "print" "printf" "rand" "rshift" + "sin" "split" "sprintf" "sqrt" "srand" "stopme" + "strftime" "strtonum" "sub" "substr" "system" + "systime" "tolower" "toupper" "xor") t) + "\\>") + 0 c-preprocessor-face-name)) + + ;; gawk debugging keywords. (acm, 2002/7/21) + ;; (Removed, 2003/6/6. These functions are now fontified as built-ins) + ;; (list (concat "\\<" (regexp-opt '("adump" "stopme") t) "\\>") + ;; 0 'font-lock-warning-face) + + ;; User defined functions with an apparent spurious space before the + ;; opening parenthesis. acm, 2002/5/30. + `(,(concat "\\(\\w\\|_\\)" c-awk-escaped-nls* "\\s " + c-awk-escaped-nls*-with-space* "(") + (0 'font-lock-warning-face)) + + ;; Space after \ in what looks like an escaped newline. 2002/5/31 + '("\\\\\\s +$" 0 font-lock-warning-face t) + + ;; Unbalanced string (") or regexp (/) delimiters. 2002/02/16. + '("\\s|" 0 font-lock-warning-face t nil) + ;; gawk 3.1 localizable strings ( _"translate me!"). 2002/5/21 + '("\\(_\\)\\s|" 1 font-lock-warning-face) + '("\\(_\\)\\s\"" 1 font-lock-string-face) ; FIXME! not for XEmacs. 2002/10/6 + )) + "Default expressions to highlight in AWK mode.") + +;; ACM 2002/9/29. Movement functions, e.g. for C-M-a and C-M-e + +;; The following three regexps differ from those earlier on in cc-awk.el in +;; that they assume the syntax-table properties have been set. They are thus +;; not useful for code which sets these properties. (defconst c-awk-terminated-regexp-or-string-here-re "\\=\\s\"\\S\"*\\s\"") -;; Matches a terminated string/regexp (utilising syntax-table properties). +;; Matches a terminated string/regexp. (defconst c-awk-unterminated-regexp-or-string-here-re "\\=\\s|\\S|*$") ;; Matches an unterminated string/regexp, NOT including the eol at the end. @@ -797,17 +959,36 @@ (concat "\\([^{;#/\"\\\\\n\r]\\|" c-awk-esc-pair-re "\\)*")) ;; Matches any "harmless" character in a pattern or an escaped character pair. +(defun c-awk-at-statement-end-p () + ;; Point is not inside a comment or string. Is it AT the end of a + ;; statement? This means immediately after the last non-ws character of the + ;; statement. The caller is responsible for widening the buffer, if + ;; appropriate. + (and (not (bobp)) + (save-excursion + (backward-char) + (or (looking-at "[};]") + (and (memq (c-awk-get-NL-prop-cur-line) '(?\$ ?\\)) + (looking-at + (eval-when-compile + (concat "[^ \t\n\r\\]" c-awk-escaped-nls*-with-space* + "[#\n\r]")))))))) + (defun c-awk-beginning-of-defun (&optional arg) "Move backward to the beginning of an AWK \"defun\". With ARG, do it that many times. Negative arg -N means move forward to Nth following beginning of defun. Returns t unless search stops due to beginning or end of buffer. By a \"defun\" is meant either a pattern-action pair or a function. The start -of a defun is recognised as code starting at column zero which is neither a +of a defun is recognized as code starting at column zero which is neither a closing brace nor a comment nor a continuation of the previous line. Unlike in some other modes, having an opening brace at column 0 is neither necessary -nor helpful." +nor helpful. + +Note that this function might do hidden buffer changes. See the +comment at the start of cc-engine.el for more info." (interactive "p") + (or arg (setq arg 1)) (save-match-data (c-save-buffer-state ; ensures the buffer is writable. nil @@ -819,14 +1000,14 @@ nor helpful." ;; is genuinely a beginning-of-defun. (while (and (setq found (search-backward-regexp "^[^#} \t\n\r]" (point-min) 'stop-at-limit)) - (not (memq (c-awk-get-NL-prop-prev-line) '(?\; ?\#))))) + (not (memq (c-awk-get-NL-prop-prev-line) '(?\$ ?\} ?\#))))) (setq arg (1- arg))) ;; The same for a -ve arg. (if (not (eq (point) (point-max))) (forward-char 1)) (while (and found (< arg 0) (not (eq (point) (point-max)))) ; The same for -ve arg. (while (and (setq found (search-forward-regexp "^[^#} \t\n\r]" (point-max) 'stop-at-limit)) - (not (memq (c-awk-get-NL-prop-prev-line) '(?\; ?\#))))) + (not (memq (c-awk-get-NL-prop-prev-line) '(?\$ ?\} ?\#))))) (setq arg (1+ arg))) (if found (goto-char (match-beginning 0)))) (eq arg 0))))) @@ -837,6 +1018,8 @@ nor helpful." ;; comment. Typically, we stop at the { which denotes the corresponding AWK ;; action/function body. Otherwise we stop at the EOL (or ;) marking the ;; absence of an explicit action. + ;; + ;; This function might do hidden buffer changes. (while (progn (search-forward-regexp c-awk-harmless-pattern-characters*) @@ -854,6 +1037,8 @@ nor helpful." (defun c-awk-end-of-defun1 () ;; point is at the start of a "defun". Move to its end. Return end position. + ;; + ;; This function might do hidden buffer changes. (c-awk-forward-awk-pattern) (cond ((looking-at "{") (goto-char (scan-sexps (point) 1))) @@ -865,6 +1050,8 @@ nor helpful." (defun c-awk-beginning-of-defun-p () ;; Are we already at the beginning of a defun? (i.e. at code in column 0 ;; which isn't a }, and isn't a continuation line of any sort. + ;; + ;; This function might do hidden buffer changes. (and (looking-at "^[^#} \t\n\r]") (not (c-awk-prev-line-incomplete-p)))) @@ -874,7 +1061,10 @@ Negative argument -N means move back to Nth preceding end of defun. An end of a defun occurs right after the closing brace that matches the opening brace at its start, or immediately after the AWK pattern when there is -no explicit action; see function `c-awk-beginning-of-defun'." +no explicit action; see function `c-awk-beginning-of-defun'. + +Note that this function might do hidden buffer changes. See the +comment at the start of cc-engine.el for more info." (interactive "p") (or arg (setq arg 1)) (save-match-data @@ -910,6 +1100,7 @@ no explicit action; see function `c-awk-beginning-of-defun'." (< arg 0))) (goto-char (min start-point end-point))))))) + (cc-provide 'cc-awk) ; Changed from 'awk-mode, ACM 2002/5/21 ;;; arch-tag: c4836289-3aa4-4a59-9934-9ccc2bacccf3