X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/b2e6b10fe2d40020a75ab0025af98a4abf339cd2..14fe7b530dc927a88169a841afc0cd806593dea8:/src/syntax.c diff --git a/src/syntax.c b/src/syntax.c index c6cc8da078..bfdf0e5ee6 100644 --- a/src/syntax.c +++ b/src/syntax.c @@ -1,14 +1,14 @@ /* GNU Emacs routines to deal with syntax tables; also word and list parsing. Copyright (C) 1985, 1987, 1993, 1994, 1995, 1997, 1998, 1999, 2001, - 2002, 2003, 2004, 2005, 2006, 2007 + 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. This file is part of GNU Emacs. -GNU Emacs is free software; you can redistribute it and/or modify +GNU Emacs is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 3, or (at your option) -any later version. +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. GNU Emacs is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,13 +16,12 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with GNU Emacs; see the file COPYING. If not, write to -the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -Boston, MA 02110-1301, USA. */ +along with GNU Emacs. If not, see . */ #include #include +#include #include "lisp.h" #include "commands.h" #include "buffer.h" @@ -67,18 +66,20 @@ int open_paren_in_column_0_is_defun_start; struct lisp_parse_state { - int depth; /* Depth at end of parsing. */ - int instring; /* -1 if not within string, else desired terminator. */ - int incomment; /* -1 if in unnestable comment else comment nesting */ - int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */ - int quoted; /* Nonzero if just after an escape char at end of parsing */ - int thislevelstart; /* Char number of most recent start-of-expression at current level */ - int prevlevelstart; /* Char number of start of containing expression */ - int location; /* Char number at which parsing stopped. */ - int mindepth; /* Minimum depth seen while scanning. */ - int comstr_start; /* Position just after last comment/string starter. */ - Lisp_Object levelstarts; /* Char numbers of starts-of-expression - of levels (starting from outermost). */ + int depth; /* Depth at end of parsing. */ + int instring; /* -1 if not within string, else desired terminator. */ + int incomment; /* -1 if in unnestable comment else comment nesting */ + int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */ + int quoted; /* Nonzero if just after an escape char at end of parsing */ + int mindepth; /* Minimum depth seen while scanning. */ + /* Char number of most recent start-of-expression at current level */ + EMACS_INT thislevelstart; + /* Char number of start of containing expression */ + EMACS_INT prevlevelstart; + EMACS_INT location; /* Char number at which parsing stopped. */ + EMACS_INT comstr_start; /* Position of last comment/string starter. */ + Lisp_Object levelstarts; /* Char numbers of starts-of-expression + of levels (starting from outermost). */ }; /* These variables are a cache for finding the start of a defun. @@ -89,23 +90,19 @@ struct lisp_parse_state find_start_begv is the BEGV value when it was found. find_start_modiff is the value of MODIFF when it was found. */ -static int find_start_pos; -static int find_start_value; -static int find_start_value_byte; +static EMACS_INT find_start_pos; +static EMACS_INT find_start_value; +static EMACS_INT find_start_value_byte; static struct buffer *find_start_buffer; -static int find_start_begv; +static EMACS_INT find_start_begv; static int find_start_modiff; -static int find_defun_start P_ ((int, int)); -static int back_comment P_ ((EMACS_INT, EMACS_INT, EMACS_INT, int, int, - EMACS_INT *, EMACS_INT *)); -static int char_quoted P_ ((int, int)); static Lisp_Object skip_chars P_ ((int, Lisp_Object, Lisp_Object, int)); static Lisp_Object skip_syntaxes P_ ((int, Lisp_Object, Lisp_Object)); static Lisp_Object scan_lists P_ ((EMACS_INT, EMACS_INT, EMACS_INT, int)); static void scan_sexps_forward P_ ((struct lisp_parse_state *, - int, int, int, int, + EMACS_INT, EMACS_INT, EMACS_INT, int, int, Lisp_Object, int)); static int in_classes P_ ((int, Lisp_Object)); @@ -292,19 +289,17 @@ update_syntax_table (charpos, count, init, object) or after. On return global syntax data is good for lookup at CHARPOS. */ static int -char_quoted (charpos, bytepos) - register int charpos, bytepos; +char_quoted (EMACS_INT charpos, EMACS_INT bytepos) { register enum syntaxcode code; - register int beg = BEGV; + register EMACS_INT beg = BEGV; register int quoted = 0; - int orig = charpos; - - DEC_BOTH (charpos, bytepos); + EMACS_INT orig = charpos; - while (charpos >= beg) + while (charpos > beg) { int c; + DEC_BOTH (charpos, bytepos); UPDATE_SYNTAX_TABLE_BACKWARD (charpos); c = FETCH_CHAR_AS_MULTIBYTE (bytepos); @@ -312,7 +307,6 @@ char_quoted (charpos, bytepos) if (! (code == Scharquote || code == Sescape)) break; - DEC_BOTH (charpos, bytepos); quoted = !quoted; } @@ -323,9 +317,9 @@ char_quoted (charpos, bytepos) /* Return the bytepos one character after BYTEPOS. We assume that BYTEPOS is not at the end of the buffer. */ -INLINE int +INLINE EMACS_INT inc_bytepos (bytepos) - int bytepos; + EMACS_INT bytepos; { if (NILP (current_buffer->enable_multibyte_characters)) return bytepos + 1; @@ -337,9 +331,9 @@ inc_bytepos (bytepos) /* Return the bytepos one character before BYTEPOS. We assume that BYTEPOS is not at the start of the buffer. */ -INLINE int +INLINE EMACS_INT dec_bytepos (bytepos) - int bytepos; + EMACS_INT bytepos; { if (NILP (current_buffer->enable_multibyte_characters)) return bytepos - 1; @@ -348,7 +342,7 @@ dec_bytepos (bytepos) return bytepos; } -/* Return a defun-start position before before POS and not too far before. +/* Return a defun-start position before POS and not too far before. It should be the last one before POS, or nearly the last. When open_paren_in_column_0_is_defun_start is nonzero, @@ -362,11 +356,11 @@ dec_bytepos (bytepos) valid on return from the subroutine, so the caller should explicitly update the global data. */ -static int +static EMACS_INT find_defun_start (pos, pos_byte) - int pos, pos_byte; + EMACS_INT pos, pos_byte; { - int opoint = PT, opoint_byte = PT_BYTE; + EMACS_INT opoint = PT, opoint_byte = PT_BYTE; if (!open_paren_in_column_0_is_defun_start) { @@ -495,14 +489,14 @@ back_comment (from, from_byte, stop, comnested, comstyle, charpos_ptr, bytepos_p inside another comment). Test case: { a (* b } c (* d *) */ int comment_lossage = 0; - int comment_end = from; - int comment_end_byte = from_byte; - int comstart_pos = 0; - int comstart_byte; + EMACS_INT comment_end = from; + EMACS_INT comment_end_byte = from_byte; + EMACS_INT comstart_pos = 0; + EMACS_INT comstart_byte; /* Place where the containing defun starts, or 0 if we didn't come across it yet. */ - int defun_start = 0; - int defun_start_byte = 0; + EMACS_INT defun_start = 0; + EMACS_INT defun_start_byte = 0; register enum syntaxcode code; int nesting = 1; /* current comment nesting */ int c; @@ -518,6 +512,7 @@ back_comment (from, from_byte, stop, comnested, comstyle, charpos_ptr, bytepos_p { int temp_byte, prev_syntax; int com2start, com2end; + int comstart; /* Move back and examine a character. */ DEC_BOTH (from, from_byte); @@ -536,7 +531,8 @@ back_comment (from, from_byte, stop, comnested, comstyle, charpos_ptr, bytepos_p || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested); com2end = (SYNTAX_FLAGS_COMEND_FIRST (syntax) && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax)); - + comstart = (com2start || code == Scomment); + /* Nasty cases with overlapping 2-char comment markers: - snmp-mode: -- c -- foo -- c -- --- c -- @@ -547,15 +543,16 @@ back_comment (from, from_byte, stop, comnested, comstyle, charpos_ptr, bytepos_p /// */ /* If a 2-char comment sequence partly overlaps with another, - we don't try to be clever. */ - if (from > stop && (com2end || com2start)) + we don't try to be clever. E.g. |*| in C, or }% in modes that + have %..\n and %{..}%. */ + if (from > stop && (com2end || comstart)) { int next = from, next_byte = from_byte, next_c, next_syntax; DEC_BOTH (next, next_byte); UPDATE_SYNTAX_TABLE_BACKWARD (next); next_c = FETCH_CHAR_AS_MULTIBYTE (next_byte); next_syntax = SYNTAX_WITH_FLAGS (next_c); - if (((com2start || comnested) + if (((comstart || comnested) && SYNTAX_FLAGS_COMEND_SECOND (syntax) && SYNTAX_FLAGS_COMEND_FIRST (next_syntax)) || ((com2end || comnested) @@ -858,8 +855,8 @@ static Lisp_Object Vsyntax_code_object; DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0, doc: /* Return the syntax code of CHARACTER, described by a character. -For example, if CHARACTER is a word constituent, -the character `w' is returned. +For example, if CHARACTER is a word constituent, the +character `w' (119) is returned. The characters that correspond to various syntax codes are listed in the documentation of `modify-syntax-entry'. */) (character) @@ -917,8 +914,7 @@ text property. */) if (*p) { int len; - int character = (STRING_CHAR_AND_LENGTH - (p, SBYTES (string) - 1, len)); + int character = STRING_CHAR_AND_LENGTH (p, len); XSETINT (match, character); if (XFASTINT (match) == ' ') match = Qnil; @@ -960,7 +956,7 @@ text property. */) break; } - if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match)) + if (val < XVECTOR_SIZE (Vsyntax_code_object) && NILP (match)) return XVECTOR (Vsyntax_code_object)->contents[val]; else /* Since we can't use a shared object, let's make a new one. */ @@ -975,7 +971,7 @@ DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3, The syntax is changed only for table SYNTAX-TABLE, which defaults to the current buffer's syntax table. CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters -in the range MIN and MAX are changed. +in the range MIN to MAX are changed. The first character of NEWENTRY should be one of the following: Space or - whitespace syntax. w word constituent. _ symbol constituent. . punctuation. @@ -1008,7 +1004,7 @@ this flag: p means CHAR is a prefix character for `backward-prefix-chars'; such characters are treated as whitespace when they occur between expressions. -usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */) +usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */) (c, newentry, syntax_table) Lisp_Object c, newentry, syntax_table; { @@ -1255,7 +1251,7 @@ scan_words (from, count) if ((code != Sword && (! words_include_escapes || (code != Sescape && code != Scharquote))) - || ! EQ (CHAR_TABLE_REF (Vchar_script_table, ch1), script)) + || word_boundary_p (ch0, ch1)) break; INC_BOTH (from, from_byte); ch0 = ch1; @@ -1308,7 +1304,7 @@ scan_words (from, count) if ((code != Sword && (! words_include_escapes || (code != Sescape && code != Scharquote))) - || ! EQ (CHAR_TABLE_REF (Vchar_script_table, ch0), script)) + || word_boundary_p (ch0, ch1)) { INC_BOTH (from, from_byte); break; @@ -1324,7 +1320,7 @@ scan_words (from, count) return from; } -DEFUN ("forward-word", Fforward_word, Sforward_word, 0, 1, "p", +DEFUN ("forward-word", Fforward_word, Sforward_word, 0, 1, "^p", doc: /* Move point forward ARG words (backward if ARG is negative). Normally returns t. If an edge of the buffer or a field boundary is reached, point is left there @@ -1360,7 +1356,7 @@ DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0, doc: /* Move point forward, stopping before a char not in STRING, or at pos LIM. STRING is like the inside of a `[...]' in a regular expression except that `]' is never special and `\\' quotes `^', `-' or `\\' - (but not as the end of a range; quoting is never needed there). + (but not at the end of a range; quoting is never needed there). Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter. With arg "^a-zA-Z", skips nonletters stopping before first letter. Char classes, e.g. `[:alpha:]', are supported. @@ -1562,14 +1558,14 @@ skip_chars (forwardp, string, lim, handle_iso_classes) bzero (fastmap + 0200, 0200); /* We are sure that this loop stops. */ for (i = 0200; ! fastmap2[i]; i++); - c = unibyte_char_to_multibyte (i); + c = BYTE8_TO_CHAR (i); fastmap[CHAR_LEADING_CODE (c)] = 1; range_start_byte = i; range_start_char = c; char_ranges = (int *) alloca (sizeof (int) * 128 * 2); for (i = 129; i < 0400; i++) { - c = unibyte_char_to_multibyte (i); + c = BYTE8_TO_CHAR (i); fastmap[CHAR_LEADING_CODE (c)] = 1; if (i - range_start_byte != c - range_start_char) { @@ -1594,12 +1590,12 @@ skip_chars (forwardp, string, lim, handle_iso_classes) unsigned char leading_code; leading_code = str[i_byte]; - c = STRING_CHAR_AND_LENGTH (str + i_byte, size_byte-i_byte, len); + c = STRING_CHAR_AND_LENGTH (str + i_byte, len); i_byte += len; if (handle_iso_classes && c == '[' && i_byte < size_byte - && STRING_CHAR (str + i_byte, size_byte - i_byte) == ':') + && STRING_CHAR (str + i_byte) == ':') { const unsigned char *class_beg = str + i_byte + 1; const unsigned char *class_end = class_beg; @@ -1639,8 +1635,7 @@ skip_chars (forwardp, string, lim, handle_iso_classes) break; leading_code = str[i_byte]; - c = STRING_CHAR_AND_LENGTH (str + i_byte, - size_byte - i_byte, len); + c = STRING_CHAR_AND_LENGTH (str + i_byte, len); i_byte += len; } /* Treat `-' as range character only if another character @@ -1656,15 +1651,14 @@ skip_chars (forwardp, string, lim, handle_iso_classes) /* Get the end of the range. */ leading_code2 = str[i_byte]; - c2 = STRING_CHAR_AND_LENGTH (str + i_byte, - size_byte - i_byte, len); + c2 = STRING_CHAR_AND_LENGTH (str + i_byte, len); i_byte += len; if (c2 == '\\' && i_byte < size_byte) { leading_code2 = str[i_byte]; - c2 =STRING_CHAR_AND_LENGTH (str + i_byte, size_byte-i_byte, len); + c2 =STRING_CHAR_AND_LENGTH (str + i_byte, len); i_byte += len; } @@ -1713,7 +1707,11 @@ skip_chars (forwardp, string, lim, handle_iso_classes) int c2 = char_ranges[i + 1]; for (; c1 <= c2; c1++) - fastmap[CHAR_TO_BYTE8 (c1)] = 1; + { + int b = CHAR_TO_BYTE_SAFE (c1); + if (b >= 0) + fastmap[b] = 1; + } } } } @@ -1752,6 +1750,12 @@ skip_chars (forwardp, string, lim, handle_iso_classes) } immediate_quit = 1; + /* This code may look up syntax tables using macros that rely on the + gl_state object. To make sure this object is not out of date, + let's initialize it manually. + We ignore syntax-table text-properties for now, since that's + what we've done in the past. */ + SETUP_SYNTAX_TABLE (BEGV, 0); if (forwardp) { if (multibyte) @@ -1766,7 +1770,7 @@ skip_chars (forwardp, string, lim, handle_iso_classes) p = GAP_END_ADDR; stop = endp; } - c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes); + c = STRING_CHAR_AND_LENGTH (p, nbytes); if (! NILP (iso_classes) && in_classes (c, iso_classes)) { if (negate) @@ -1837,7 +1841,7 @@ skip_chars (forwardp, string, lim, handle_iso_classes) } prev_p = p; while (--p >= stop && ! CHAR_HEAD_P (*p)); - c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH); + c = STRING_CHAR (p); if (! NILP (iso_classes) && in_classes (c, iso_classes)) { @@ -1991,7 +1995,7 @@ skip_syntaxes (forwardp, string, lim) p = GAP_END_ADDR; stop = endp; } - c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes); + c = STRING_CHAR_AND_LENGTH (p, nbytes); if (! fastmap[(int) SYNTAX (c)]) break; p += nbytes, pos++, pos_byte += nbytes; @@ -2034,7 +2038,7 @@ skip_syntaxes (forwardp, string, lim) UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1); prev_p = p; while (--p >= stop && ! CHAR_HEAD_P (*p)); - c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH); + c = STRING_CHAR (p); if (! fastmap[(int) SYNTAX (c)]) break; pos--, pos_byte -= prev_p - p; @@ -2077,7 +2081,7 @@ in_classes (c, iso_classes) { int fits_class = 0; - while (! NILP (iso_classes)) + while (CONSP (iso_classes)) { Lisp_Object elt; elt = XCAR (iso_classes); @@ -2501,7 +2505,8 @@ scan_lists (from, count, depth, sexpflag) { case Sescape: case Scharquote: - if (from == stop) goto lose; + if (from == stop) + goto lose; INC_BOTH (from, from_byte); /* treat following character as a word constituent */ case Sword: @@ -2520,7 +2525,8 @@ scan_lists (from, count, depth, sexpflag) case Scharquote: case Sescape: INC_BOTH (from, from_byte); - if (from == stop) goto lose; + if (from == stop) + goto lose; break; case Sword: case Ssymbol: @@ -2586,7 +2592,8 @@ scan_lists (from, count, depth, sexpflag) stringterm = FETCH_CHAR_AS_MULTIBYTE (temp_pos); while (1) { - if (from >= stop) goto lose; + if (from >= stop) + goto lose; UPDATE_SYNTAX_TABLE_FORWARD (from); c = FETCH_CHAR_AS_MULTIBYTE (from_byte); if (code == Sstring @@ -2615,7 +2622,8 @@ scan_lists (from, count, depth, sexpflag) } /* Reached end of buffer. Error if within object, return nil if between */ - if (depth) goto lose; + if (depth) + goto lose; immediate_quit = 0; return Qnil; @@ -2750,7 +2758,8 @@ scan_lists (from, count, depth, sexpflag) case Sstring_fence: while (1) { - if (from == stop) goto lose; + if (from == stop) + goto lose; DEC_BOTH (from, from_byte); UPDATE_SYNTAX_TABLE_BACKWARD (from); if (!char_quoted (from, from_byte) @@ -2765,7 +2774,8 @@ scan_lists (from, count, depth, sexpflag) stringterm = FETCH_CHAR_AS_MULTIBYTE (from_byte); while (1) { - if (from == stop) goto lose; + if (from == stop) + goto lose; DEC_BOTH (from, from_byte); UPDATE_SYNTAX_TABLE_BACKWARD (from); if (!char_quoted (from, from_byte) @@ -2783,7 +2793,8 @@ scan_lists (from, count, depth, sexpflag) } /* Reached start of buffer. Error if within object, return nil if between */ - if (depth) goto lose; + if (depth) + goto lose; immediate_quit = 0; return Qnil; @@ -2900,9 +2911,9 @@ static void scan_sexps_forward (stateptr, from, from_byte, end, targetdepth, stopbefore, oldstate, commentstop) struct lisp_parse_state *stateptr; - register int from; - int from_byte; - int end, targetdepth, stopbefore; + register EMACS_INT from; + EMACS_INT from_byte, end; + int targetdepth, stopbefore; Lisp_Object oldstate; int commentstop; { @@ -2921,8 +2932,8 @@ scan_sexps_forward (stateptr, from, from_byte, end, targetdepth, int mindepth; /* Lowest DEPTH value seen. */ int start_quoted = 0; /* Nonzero means starting after a char quote */ Lisp_Object tem; - int prev_from; /* Keep one character before FROM. */ - int prev_from_byte; + EMACS_INT prev_from; /* Keep one character before FROM. */ + EMACS_INT prev_from_byte; int prev_from_syntax; int boundary_stop = commentstop == -1; int nofence; @@ -3327,17 +3338,17 @@ init_syntax_once () Lisp_Object temp; /* This has to be done here, before we call Fmake_char_table. */ - Qsyntax_table = intern ("syntax-table"); + Qsyntax_table = intern_c_string ("syntax-table"); staticpro (&Qsyntax_table); - /* Intern this now in case it isn't already done. + /* Intern_C_String this now in case it isn't already done. Setting this variable twice is harmless. But don't staticpro it here--that is done in alloc.c. */ - Qchar_table_extra_slots = intern ("char-table-extra-slots"); + Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots"); /* Create objects which can be shared among syntax tables. */ Vsyntax_code_object = Fmake_vector (make_number (Smax), Qnil); - for (i = 0; i < XVECTOR (Vsyntax_code_object)->size; i++) + for (i = 0; i < XVECTOR_SIZE (Vsyntax_code_object); i++) XVECTOR (Vsyntax_code_object)->contents[i] = Fcons (make_number (i), Qnil); @@ -3413,7 +3424,7 @@ init_syntax_once () void syms_of_syntax () { - Qsyntax_table_p = intern ("syntax-table-p"); + Qsyntax_table_p = intern_c_string ("syntax-table-p"); staticpro (&Qsyntax_table_p); staticpro (&Vsyntax_code_object); @@ -3426,12 +3437,12 @@ syms_of_syntax () /* Defined in regex.c */ staticpro (&re_match_object); - Qscan_error = intern ("scan-error"); + Qscan_error = intern_c_string ("scan-error"); staticpro (&Qscan_error); Fput (Qscan_error, Qerror_conditions, - Fcons (Qscan_error, Fcons (Qerror, Qnil))); + pure_cons (Qscan_error, pure_cons (Qerror, Qnil))); Fput (Qscan_error, Qerror_message, - build_string ("Scan error")); + make_pure_c_string ("Scan error")); DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments, doc: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace. */);