+\f
+;; This section defines regular expressions used in the analysis of AWK code.
+
+;; N.B. In the following regexps, an EOL is either \n OR \r. This is because
+;; Emacs has in the past used \r to mark hidden lines in some fashion (and
+;; maybe still does).
+
+(defconst c-awk-esc-pair-re "\\\\\\(.\\|\n\\|\r\\|\\'\\)")
+;; Matches any escaped (with \) character-pair, including an escaped newline.
+(defconst c-awk-non-eol-esc-pair-re "\\\\\\(.\\|\\'\\)")
+;; Matches any escaped (with \) character-pair, apart from an escaped newline.
+(defconst c-awk-comment-without-nl "#.*")
+;; Matches an AWK comment, not including the terminating NL (if any). Note
+;; that the "enclosing" (elisp) regexp must ensure the # is real.
+(defconst c-awk-nl-or-eob "\\(\n\\|\r\\|\\'\\)")
+;; Matches a newline, or the end of buffer.
+
+;; "Space" regular expressions.
+(eval-and-compile
+ (defconst c-awk-escaped-nl "\\\\[\n\r]"))
+;; Matches an escaped newline.
+(eval-and-compile
+ (defconst c-awk-escaped-nls* (concat "\\(" c-awk-escaped-nl "\\)*")))
+;; Matches a possibly empty sequence of escaped newlines. Used in
+;; awk-font-lock-keywords.
+;; (defconst c-awk-escaped-nls*-with-space*
+;; (concat "\\(" c-awk-escaped-nls* "\\|" "[ \t]+" "\\)*"))
+;; The above RE was very slow. It's runtime was doubling with each additional
+;; space :-( Reformulate it as below:
+(eval-and-compile
+ (defconst c-awk-escaped-nls*-with-space*
+ (concat "\\(" c-awk-escaped-nl "\\|" "[ \t]" "\\)*")))
+;; Matches a possibly empty sequence of escaped newlines with optional
+;; interspersed spaces and tabs. Used in awk-font-lock-keywords.
+(defconst c-awk-blank-or-comment-line-re
+ (concat "[ \t]*\\(#\\|\\\\?$\\)"))
+;; Matche (the tail of) a line containing at most either a comment or an
+;; escaped EOL.
+
+;; REGEXPS FOR "HARMLESS" STRINGS/LINES.
+(defconst c-awk-harmless-char-re "[^_#/\"\\\\\n\r]")
+;; Matches any character but a _, #, /, ", \, or newline. N.B. _" starts a
+;; localisation string in gawk 3.1
+(defconst c-awk-harmless-_ "_\\([^\"]\\|\\'\\)")
+;; Matches an underline NOT followed by ".
+(defconst c-awk-harmless-string*-re
+ (concat "\\(" c-awk-harmless-char-re "\\|" c-awk-esc-pair-re "\\|" c-awk-harmless-_ "\\)*"))
+;; Matches a (possibly empty) sequence of chars without unescaped /, ", \,
+;; #, or newlines.
+(defconst c-awk-harmless-string*-here-re
+ (concat "\\=" c-awk-harmless-string*-re))
+;; Matches the (possibly empty) sequence of chars without unescaped /, ", \,
+;; at point.
+(defconst c-awk-harmless-line-re
+ (concat c-awk-harmless-string*-re
+ "\\(" c-awk-comment-without-nl "\\)?" c-awk-nl-or-eob))
+;; Matches (the tail of) an AWK \"logical\" line not containing an unescaped
+;; " or /. "logical" means "possibly containing escaped newlines". A comment
+;; is matched as part of the line even if it contains a " or a /. The End of
+;; buffer is also an end of line.
+(defconst c-awk-harmless-lines+-here-re
+ (concat "\\=\\(" c-awk-harmless-line-re "\\)+"))
+;; Matches a sequence of (at least one) \"harmless-line\" at point.
+
+
+;; REGEXPS FOR AWK STRINGS.
+(defconst c-awk-string-ch-re "[^\"\\\n\r]")
+;; Matches any character which can appear unescaped in a string.
+(defconst c-awk-string-innards-re
+ (concat "\\(" c-awk-string-ch-re "\\|" c-awk-esc-pair-re "\\)*"))
+;; Matches the inside of an AWK string (i.e. without the enclosing quotes).
+(defconst c-awk-string-without-end-here-re
+ (concat "\\=_?\"" c-awk-string-innards-re))
+;; Matches an AWK string at point up to, but not including, any terminator.
+;; A gawk 3.1+ string may look like _"localisable string".
+(defconst c-awk-one-line-possibly-open-string-re
+ (concat "\"\\(" c-awk-string-ch-re "\\|" c-awk-non-eol-esc-pair-re "\\)*"
+ "\\(\"\\|\\\\?$\\|\\'\\)"))
+
+;; REGEXPS FOR AWK REGEXPS.
+(defconst c-awk-regexp-normal-re "[^[/\\\n\r]")
+;; Matches any AWK regexp character which doesn't require special analysis.
+(defconst c-awk-escaped-newlines*-re "\\(\\\\[\n\r]\\)*")
+;; Matches a (possibly empty) sequence of escaped newlines.
+
+;; NOTE: In what follows, "[asdf]" in a regexp will be called a "character
+;; list", and "[:alpha:]" inside a character list will be known as a
+;; "character class". These terms for these things vary between regexp
+;; descriptions .
+(defconst c-awk-regexp-char-class-re
+ "\\[:[a-z]+:\\]")
+ ;; Matches a character class spec (e.g. [:alpha:]).
+(defconst c-awk-regexp-char-list-re
+ (concat "\\[" c-awk-escaped-newlines*-re "^?" c-awk-escaped-newlines*-re "]?"
+ "\\(" c-awk-esc-pair-re "\\|" c-awk-regexp-char-class-re
+ "\\|" "[^]\n\r]" "\\)*" "\\(]\\|$\\)"))
+;; Matches a regexp char list, up to (but not including) EOL if the ] is
+;; missing.
+(defconst c-awk-regexp-one-line-possibly-open-char-list-re
+ (concat "\\[\\]?\\(" c-awk-non-eol-esc-pair-re "\\|" "[^]\n\r]" "\\)*"
+ "\\(]\\|\\\\?$\\|\\'\\)"))
+;; Matches the head (or all) of a regexp char class, up to (but not
+;; including) the first EOL.
+(defconst c-awk-regexp-innards-re
+ (concat "\\(" c-awk-esc-pair-re "\\|" c-awk-regexp-char-list-re
+ "\\|" c-awk-regexp-normal-re "\\)*"))
+;; Matches the inside of an AWK regexp (i.e. without the enclosing /s)
+(defconst c-awk-regexp-without-end-re
+ (concat "/" c-awk-regexp-innards-re))
+;; Matches an AWK regexp up to, but not including, any terminating /.
+(defconst c-awk-one-line-possibly-open-regexp-re
+ (concat "/\\(" c-awk-non-eol-esc-pair-re
+ "\\|" c-awk-regexp-one-line-possibly-open-char-list-re
+ "\\|" c-awk-regexp-normal-re "\\)*"
+ "\\(/\\|\\\\?$\\|\\'\\)"))
+;; Matches as much of the head of an AWK regexp which fits on one line,
+;; possibly all of it.
+
+;; REGEXPS used for scanning an AWK buffer in order to decide IF A '/' IS A
+;; REGEXP OPENER OR A DIVISION SIGN. By "state" in the following is meant
+;; whether a '/' at the current position would by a regexp opener or a
+;; division sign.
+(defconst c-awk-neutral-re
+; "\\([{}@` \t]\\|\\+\\+\\|--\\|\\\\.\\)+") ; changed, 2003/6/7
+ "\\([{}@` \t]\\|\\+\\+\\|--\\|\\\\.\\)")
+;; A "neutral" char(pair). Doesn't change the "state" of a subsequent /.
+;; This is space/tab, braces, an auto-increment/decrement operator or an
+;; escaped character. Or one of the (illegal) characters @ or `. But NOT an
+;; end of line (even if escaped).
+(defconst c-awk-neutrals*-re
+ (concat "\\(" c-awk-neutral-re "\\)*"))
+;; A (possibly empty) string of neutral characters (or character pairs).
+(defconst c-awk-var-num-ket-re "[]\)0-9a-zA-Z_$.\x80-\xff]+")
+;; Matches a char which is a constituent of a variable or number, or a ket
+;; (i.e. closing bracKET), round or square. Assume that all characters \x80 to
+;; \xff are "letters".
+(defconst c-awk-div-sign-re
+ (concat c-awk-var-num-ket-re c-awk-neutrals*-re "/"))
+;; Will match a piece of AWK buffer ending in / which is a division sign, in
+;; a context where an immediate / would be a regexp bracket. It follows a
+;; variable or number (with optional intervening "neutral" characters). This
+;; will only work when there won't be a preceding " or / before the sought /
+;; to foul things up.
+(defconst c-awk-non-arith-op-bra-re
+ "[[\(&=:!><,?;'~|]")
+;; Matches an openeing BRAcket ,round or square, or any operator character
+;; apart from +,-,/,*,%. For the purpose at hand (detecting a / which is a
+;; regexp bracket) these arith ops are unnecessary and a pain, because of "++"
+;; and "--".
+(defconst c-awk-regexp-sign-re
+ (concat c-awk-non-arith-op-bra-re c-awk-neutrals*-re "/"))
+;; Will match a piece of AWK buffer ending in / which is an opening regexp
+;; bracket, in a context where an immediate / would be a division sign. This
+;; will only work when there won't be a preceding " or / before the sought /
+;; to foul things up.
+
+;; REGEXPS USED FOR FINDING THE POSITION OF A "virtual semicolon"
+(defconst c-awk-_-harmless-nonws-char-re "[^#/\"\\\\\n\r \t]")
+;;;; NEW VERSION! (which will be restricted to the current line)
+(defconst c-awk-one-line-non-syn-ws*-re
+ (concat "\\([ \t]*"
+ "\\(" c-awk-_-harmless-nonws-char-re "\\|"
+ c-awk-non-eol-esc-pair-re "\\|"
+ c-awk-one-line-possibly-open-string-re "\\|"
+ c-awk-one-line-possibly-open-regexp-re
+ "\\)"
+ "\\)*"))
+
+\f