X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/341dd15a7bd9d0b4adff846e94289b3e1877eed1..6e104790e756226cbb5b7feaf01854103ded9f36:/src/syntax.c
diff --git a/src/syntax.c b/src/syntax.c
index f282ed3a26..a80f047e43 100644
--- a/src/syntax.c
+++ b/src/syntax.c
@@ -1,14 +1,14 @@
/* GNU Emacs routines to deal with syntax tables; also word and list parsing.
Copyright (C) 1985, 1987, 1993, 1994, 1995, 1997, 1998, 1999, 2001,
- 2002, 2003, 2004, 2005, 2006, 2007, 2008
+ 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
Free Software Foundation, Inc.
This file is part of GNU Emacs.
-GNU Emacs is free software; you can redistribute it and/or modify
+GNU Emacs is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
GNU Emacs is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,13 +16,12 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
-along with GNU Emacs; see the file COPYING. If not, write to
-the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
-Boston, MA 02110-1301, USA. */
+along with GNU Emacs. If not, see . */
#include
#include
+#include
#include "lisp.h"
#include "commands.h"
#include "buffer.h"
@@ -67,18 +66,20 @@ int open_paren_in_column_0_is_defun_start;
struct lisp_parse_state
{
- int depth; /* Depth at end of parsing. */
- int instring; /* -1 if not within string, else desired terminator. */
- int incomment; /* -1 if in unnestable comment else comment nesting */
- int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
- int quoted; /* Nonzero if just after an escape char at end of parsing */
- int thislevelstart; /* Char number of most recent start-of-expression at current level */
- int prevlevelstart; /* Char number of start of containing expression */
- int location; /* Char number at which parsing stopped. */
- int mindepth; /* Minimum depth seen while scanning. */
- int comstr_start; /* Position just after last comment/string starter. */
- Lisp_Object levelstarts; /* Char numbers of starts-of-expression
- of levels (starting from outermost). */
+ int depth; /* Depth at end of parsing. */
+ int instring; /* -1 if not within string, else desired terminator. */
+ int incomment; /* -1 if in unnestable comment else comment nesting */
+ int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
+ int quoted; /* Nonzero if just after an escape char at end of parsing */
+ int mindepth; /* Minimum depth seen while scanning. */
+ /* Char number of most recent start-of-expression at current level */
+ EMACS_INT thislevelstart;
+ /* Char number of start of containing expression */
+ EMACS_INT prevlevelstart;
+ EMACS_INT location; /* Char number at which parsing stopped. */
+ EMACS_INT comstr_start; /* Position of last comment/string starter. */
+ Lisp_Object levelstarts; /* Char numbers of starts-of-expression
+ of levels (starting from outermost). */
};
/* These variables are a cache for finding the start of a defun.
@@ -89,23 +90,19 @@ struct lisp_parse_state
find_start_begv is the BEGV value when it was found.
find_start_modiff is the value of MODIFF when it was found. */
-static int find_start_pos;
-static int find_start_value;
-static int find_start_value_byte;
+static EMACS_INT find_start_pos;
+static EMACS_INT find_start_value;
+static EMACS_INT find_start_value_byte;
static struct buffer *find_start_buffer;
-static int find_start_begv;
+static EMACS_INT find_start_begv;
static int find_start_modiff;
-static int find_defun_start P_ ((int, int));
-static int back_comment P_ ((EMACS_INT, EMACS_INT, EMACS_INT, int, int,
- EMACS_INT *, EMACS_INT *));
-static int char_quoted P_ ((int, int));
static Lisp_Object skip_chars P_ ((int, Lisp_Object, Lisp_Object, int));
static Lisp_Object skip_syntaxes P_ ((int, Lisp_Object, Lisp_Object));
static Lisp_Object scan_lists P_ ((EMACS_INT, EMACS_INT, EMACS_INT, int));
static void scan_sexps_forward P_ ((struct lisp_parse_state *,
- int, int, int, int,
+ EMACS_INT, EMACS_INT, EMACS_INT, int,
int, Lisp_Object, int));
static int in_classes P_ ((int, Lisp_Object));
@@ -292,19 +289,17 @@ update_syntax_table (charpos, count, init, object)
or after. On return global syntax data is good for lookup at CHARPOS. */
static int
-char_quoted (charpos, bytepos)
- register int charpos, bytepos;
+char_quoted (EMACS_INT charpos, EMACS_INT bytepos)
{
register enum syntaxcode code;
- register int beg = BEGV;
+ register EMACS_INT beg = BEGV;
register int quoted = 0;
- int orig = charpos;
-
- DEC_BOTH (charpos, bytepos);
+ EMACS_INT orig = charpos;
- while (charpos >= beg)
+ while (charpos > beg)
{
int c;
+ DEC_BOTH (charpos, bytepos);
UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
c = FETCH_CHAR_AS_MULTIBYTE (bytepos);
@@ -312,7 +307,6 @@ char_quoted (charpos, bytepos)
if (! (code == Scharquote || code == Sescape))
break;
- DEC_BOTH (charpos, bytepos);
quoted = !quoted;
}
@@ -323,9 +317,9 @@ char_quoted (charpos, bytepos)
/* Return the bytepos one character after BYTEPOS.
We assume that BYTEPOS is not at the end of the buffer. */
-INLINE int
+INLINE EMACS_INT
inc_bytepos (bytepos)
- int bytepos;
+ EMACS_INT bytepos;
{
if (NILP (current_buffer->enable_multibyte_characters))
return bytepos + 1;
@@ -337,9 +331,9 @@ inc_bytepos (bytepos)
/* Return the bytepos one character before BYTEPOS.
We assume that BYTEPOS is not at the start of the buffer. */
-INLINE int
+INLINE EMACS_INT
dec_bytepos (bytepos)
- int bytepos;
+ EMACS_INT bytepos;
{
if (NILP (current_buffer->enable_multibyte_characters))
return bytepos - 1;
@@ -348,7 +342,7 @@ dec_bytepos (bytepos)
return bytepos;
}
-/* Return a defun-start position before before POS and not too far before.
+/* Return a defun-start position before POS and not too far before.
It should be the last one before POS, or nearly the last.
When open_paren_in_column_0_is_defun_start is nonzero,
@@ -362,11 +356,11 @@ dec_bytepos (bytepos)
valid on return from the subroutine, so the caller should explicitly
update the global data. */
-static int
+static EMACS_INT
find_defun_start (pos, pos_byte)
- int pos, pos_byte;
+ EMACS_INT pos, pos_byte;
{
- int opoint = PT, opoint_byte = PT_BYTE;
+ EMACS_INT opoint = PT, opoint_byte = PT_BYTE;
if (!open_paren_in_column_0_is_defun_start)
{
@@ -391,8 +385,7 @@ find_defun_start (pos, pos_byte)
/* We optimize syntax-table lookup for rare updates. Thus we accept
only those `^\s(' which are good in global _and_ text-property
syntax-tables. */
- gl_state.current_syntax_table = current_buffer->syntax_table;
- gl_state.use_global = 0;
+ SETUP_BUFFER_SYNTAX_TABLE ();
while (PT > BEGV)
{
int c;
@@ -407,8 +400,7 @@ find_defun_start (pos, pos_byte)
if (SYNTAX (c) == Sopen)
break;
/* Now fallback to the default value. */
- gl_state.current_syntax_table = current_buffer->syntax_table;
- gl_state.use_global = 0;
+ SETUP_BUFFER_SYNTAX_TABLE ();
}
/* Move to beg of previous line. */
scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
@@ -495,14 +487,14 @@ back_comment (from, from_byte, stop, comnested, comstyle, charpos_ptr, bytepos_p
inside another comment).
Test case: { a (* b } c (* d *) */
int comment_lossage = 0;
- int comment_end = from;
- int comment_end_byte = from_byte;
- int comstart_pos = 0;
- int comstart_byte;
+ EMACS_INT comment_end = from;
+ EMACS_INT comment_end_byte = from_byte;
+ EMACS_INT comstart_pos = 0;
+ EMACS_INT comstart_byte;
/* Place where the containing defun starts,
or 0 if we didn't come across it yet. */
- int defun_start = 0;
- int defun_start_byte = 0;
+ EMACS_INT defun_start = 0;
+ EMACS_INT defun_start_byte = 0;
register enum syntaxcode code;
int nesting = 1; /* current comment nesting */
int c;
@@ -858,19 +850,17 @@ static Lisp_Object Vsyntax_code_object;
DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
doc: /* Return the syntax code of CHARACTER, described by a character.
-For example, if CHARACTER is a word constituent,
-the character `w' is returned.
+For example, if CHARACTER is a word constituent, the
+character `w' (119) is returned.
The characters that correspond to various syntax codes
are listed in the documentation of `modify-syntax-entry'. */)
(character)
Lisp_Object character;
{
int char_int;
- gl_state.current_syntax_table = current_buffer->syntax_table;
-
- gl_state.use_global = 0;
CHECK_NUMBER (character);
char_int = XINT (character);
+ SETUP_BUFFER_SYNTAX_TABLE ();
return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
}
@@ -880,10 +870,9 @@ DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
Lisp_Object character;
{
int char_int, code;
- gl_state.current_syntax_table = current_buffer->syntax_table;
- gl_state.use_global = 0;
CHECK_NUMBER (character);
char_int = XINT (character);
+ SETUP_BUFFER_SYNTAX_TABLE ();
code = SYNTAX (char_int);
if (code == Sopen || code == Sclose)
return SYNTAX_MATCH (char_int);
@@ -917,8 +906,7 @@ text property. */)
if (*p)
{
int len;
- int character = (STRING_CHAR_AND_LENGTH
- (p, SBYTES (string) - 1, len));
+ int character = STRING_CHAR_AND_LENGTH (p, len);
XSETINT (match, character);
if (XFASTINT (match) == ' ')
match = Qnil;
@@ -975,7 +963,7 @@ DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
The syntax is changed only for table SYNTAX-TABLE, which defaults to
the current buffer's syntax table.
CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
-in the range MIN and MAX are changed.
+in the range MIN to MAX are changed.
The first character of NEWENTRY should be one of the following:
Space or - whitespace syntax. w word constituent.
_ symbol constituent. . punctuation.
@@ -1008,7 +996,7 @@ this flag:
p means CHAR is a prefix character for `backward-prefix-chars';
such characters are treated as whitespace when they occur
between expressions.
-usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
+usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
(c, newentry, syntax_table)
Lisp_Object c, newentry, syntax_table;
{
@@ -1255,7 +1243,7 @@ scan_words (from, count)
if ((code != Sword
&& (! words_include_escapes
|| (code != Sescape && code != Scharquote)))
- || ! EQ (CHAR_TABLE_REF (Vchar_script_table, ch1), script))
+ || word_boundary_p (ch0, ch1))
break;
INC_BOTH (from, from_byte);
ch0 = ch1;
@@ -1308,7 +1296,7 @@ scan_words (from, count)
if ((code != Sword
&& (! words_include_escapes
|| (code != Sescape && code != Scharquote)))
- || ! EQ (CHAR_TABLE_REF (Vchar_script_table, ch0), script))
+ || word_boundary_p (ch0, ch1))
{
INC_BOTH (from, from_byte);
break;
@@ -1360,7 +1348,7 @@ DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
doc: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
STRING is like the inside of a `[...]' in a regular expression
except that `]' is never special and `\\' quotes `^', `-' or `\\'
- (but not as the end of a range; quoting is never needed there).
+ (but not at the end of a range; quoting is never needed there).
Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
With arg "^a-zA-Z", skips nonletters stopping before first letter.
Char classes, e.g. `[:alpha:]', are supported.
@@ -1562,14 +1550,14 @@ skip_chars (forwardp, string, lim, handle_iso_classes)
bzero (fastmap + 0200, 0200);
/* We are sure that this loop stops. */
for (i = 0200; ! fastmap2[i]; i++);
- c = unibyte_char_to_multibyte (i);
+ c = BYTE8_TO_CHAR (i);
fastmap[CHAR_LEADING_CODE (c)] = 1;
range_start_byte = i;
range_start_char = c;
char_ranges = (int *) alloca (sizeof (int) * 128 * 2);
for (i = 129; i < 0400; i++)
{
- c = unibyte_char_to_multibyte (i);
+ c = BYTE8_TO_CHAR (i);
fastmap[CHAR_LEADING_CODE (c)] = 1;
if (i - range_start_byte != c - range_start_char)
{
@@ -1594,12 +1582,12 @@ skip_chars (forwardp, string, lim, handle_iso_classes)
unsigned char leading_code;
leading_code = str[i_byte];
- c = STRING_CHAR_AND_LENGTH (str + i_byte, size_byte-i_byte, len);
+ c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
i_byte += len;
if (handle_iso_classes && c == '['
&& i_byte < size_byte
- && STRING_CHAR (str + i_byte, size_byte - i_byte) == ':')
+ && STRING_CHAR (str + i_byte) == ':')
{
const unsigned char *class_beg = str + i_byte + 1;
const unsigned char *class_end = class_beg;
@@ -1639,8 +1627,7 @@ skip_chars (forwardp, string, lim, handle_iso_classes)
break;
leading_code = str[i_byte];
- c = STRING_CHAR_AND_LENGTH (str + i_byte,
- size_byte - i_byte, len);
+ c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
i_byte += len;
}
/* Treat `-' as range character only if another character
@@ -1656,15 +1643,14 @@ skip_chars (forwardp, string, lim, handle_iso_classes)
/* Get the end of the range. */
leading_code2 = str[i_byte];
- c2 = STRING_CHAR_AND_LENGTH (str + i_byte,
- size_byte - i_byte, len);
+ c2 = STRING_CHAR_AND_LENGTH (str + i_byte, len);
i_byte += len;
if (c2 == '\\'
&& i_byte < size_byte)
{
leading_code2 = str[i_byte];
- c2 =STRING_CHAR_AND_LENGTH (str + i_byte, size_byte-i_byte, len);
+ c2 =STRING_CHAR_AND_LENGTH (str + i_byte, len);
i_byte += len;
}
@@ -1713,7 +1699,11 @@ skip_chars (forwardp, string, lim, handle_iso_classes)
int c2 = char_ranges[i + 1];
for (; c1 <= c2; c1++)
- fastmap[CHAR_TO_BYTE8 (c1)] = 1;
+ {
+ int b = CHAR_TO_BYTE_SAFE (c1);
+ if (b >= 0)
+ fastmap[b] = 1;
+ }
}
}
}
@@ -1752,6 +1742,12 @@ skip_chars (forwardp, string, lim, handle_iso_classes)
}
immediate_quit = 1;
+ /* This code may look up syntax tables using macros that rely on the
+ gl_state object. To make sure this object is not out of date,
+ let's initialize it manually.
+ We ignore syntax-table text-properties for now, since that's
+ what we've done in the past. */
+ SETUP_BUFFER_SYNTAX_TABLE ();
if (forwardp)
{
if (multibyte)
@@ -1766,7 +1762,7 @@ skip_chars (forwardp, string, lim, handle_iso_classes)
p = GAP_END_ADDR;
stop = endp;
}
- c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes);
+ c = STRING_CHAR_AND_LENGTH (p, nbytes);
if (! NILP (iso_classes) && in_classes (c, iso_classes))
{
if (negate)
@@ -1837,7 +1833,7 @@ skip_chars (forwardp, string, lim, handle_iso_classes)
}
prev_p = p;
while (--p >= stop && ! CHAR_HEAD_P (*p));
- c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH);
+ c = STRING_CHAR (p);
if (! NILP (iso_classes) && in_classes (c, iso_classes))
{
@@ -1991,7 +1987,7 @@ skip_syntaxes (forwardp, string, lim)
p = GAP_END_ADDR;
stop = endp;
}
- c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes);
+ c = STRING_CHAR_AND_LENGTH (p, nbytes);
if (! fastmap[(int) SYNTAX (c)])
break;
p += nbytes, pos++, pos_byte += nbytes;
@@ -2034,7 +2030,7 @@ skip_syntaxes (forwardp, string, lim)
UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
prev_p = p;
while (--p >= stop && ! CHAR_HEAD_P (*p));
- c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH);
+ c = STRING_CHAR (p);
if (! fastmap[(int) SYNTAX (c)])
break;
pos--, pos_byte -= prev_p - p;
@@ -2077,7 +2073,7 @@ in_classes (c, iso_classes)
{
int fits_class = 0;
- while (! NILP (iso_classes))
+ while (CONSP (iso_classes))
{
Lisp_Object elt;
elt = XCAR (iso_classes);
@@ -2907,9 +2903,9 @@ static void
scan_sexps_forward (stateptr, from, from_byte, end, targetdepth,
stopbefore, oldstate, commentstop)
struct lisp_parse_state *stateptr;
- register int from;
- int from_byte;
- int end, targetdepth, stopbefore;
+ register EMACS_INT from;
+ EMACS_INT from_byte, end;
+ int targetdepth, stopbefore;
Lisp_Object oldstate;
int commentstop;
{
@@ -2928,8 +2924,8 @@ scan_sexps_forward (stateptr, from, from_byte, end, targetdepth,
int mindepth; /* Lowest DEPTH value seen. */
int start_quoted = 0; /* Nonzero means starting after a char quote */
Lisp_Object tem;
- int prev_from; /* Keep one character before FROM. */
- int prev_from_byte;
+ EMACS_INT prev_from; /* Keep one character before FROM. */
+ EMACS_INT prev_from_byte;
int prev_from_syntax;
int boundary_stop = commentstop == -1;
int nofence;
@@ -3334,13 +3330,13 @@ init_syntax_once ()
Lisp_Object temp;
/* This has to be done here, before we call Fmake_char_table. */
- Qsyntax_table = intern ("syntax-table");
+ Qsyntax_table = intern_c_string ("syntax-table");
staticpro (&Qsyntax_table);
- /* Intern this now in case it isn't already done.
+ /* Intern_C_String this now in case it isn't already done.
Setting this variable twice is harmless.
But don't staticpro it here--that is done in alloc.c. */
- Qchar_table_extra_slots = intern ("char-table-extra-slots");
+ Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
/* Create objects which can be shared among syntax tables. */
Vsyntax_code_object = Fmake_vector (make_number (Smax), Qnil);
@@ -3420,7 +3416,7 @@ init_syntax_once ()
void
syms_of_syntax ()
{
- Qsyntax_table_p = intern ("syntax-table-p");
+ Qsyntax_table_p = intern_c_string ("syntax-table-p");
staticpro (&Qsyntax_table_p);
staticpro (&Vsyntax_code_object);
@@ -3433,12 +3429,12 @@ syms_of_syntax ()
/* Defined in regex.c */
staticpro (&re_match_object);
- Qscan_error = intern ("scan-error");
+ Qscan_error = intern_c_string ("scan-error");
staticpro (&Qscan_error);
Fput (Qscan_error, Qerror_conditions,
- Fcons (Qscan_error, Fcons (Qerror, Qnil)));
+ pure_cons (Qscan_error, pure_cons (Qerror, Qnil)));
Fput (Qscan_error, Qerror_message,
- build_string ("Scan error"));
+ make_pure_c_string ("Scan error"));
DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
doc: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace. */);