/* GNU Emacs routines to deal with syntax tables; also word and list parsing.
- Copyright (C) 1985, 1987, 1993, 1994, 1995, 1997, 1998, 1999, 2002,
- 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+ Copyright (C) 1985, 1987, 1993, 1994, 1995, 1997, 1998, 1999, 2001,
+ 2002, 2003, 2004, 2005, 2006, 2007, 2008
+ Free Software Foundation, Inc.
This file is part of GNU Emacs.
-GNU Emacs is free software; you can redistribute it and/or modify
+GNU Emacs is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
GNU Emacs is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
-along with GNU Emacs; see the file COPYING. If not, write to
-the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
-Boston, MA 02110-1301, USA. */
+along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
struct lisp_parse_state
{
- int depth; /* Depth at end of parsing. */
- int instring; /* -1 if not within string, else desired terminator. */
- int incomment; /* -1 if in unnestable comment else comment nesting */
- int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
- int quoted; /* Nonzero if just after an escape char at end of parsing */
- int thislevelstart; /* Char number of most recent start-of-expression at current level */
- int prevlevelstart; /* Char number of start of containing expression */
- int location; /* Char number at which parsing stopped. */
- int mindepth; /* Minimum depth seen while scanning. */
- int comstr_start; /* Position just after last comment/string starter. */
- Lisp_Object levelstarts; /* Char numbers of starts-of-expression
- of levels (starting from outermost). */
+ int depth; /* Depth at end of parsing. */
+ int instring; /* -1 if not within string, else desired terminator. */
+ int incomment; /* -1 if in unnestable comment else comment nesting */
+ int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
+ int quoted; /* Nonzero if just after an escape char at end of parsing */
+ int mindepth; /* Minimum depth seen while scanning. */
+ /* Char number of most recent start-of-expression at current level */
+ EMACS_INT thislevelstart;
+ /* Char number of start of containing expression */
+ EMACS_INT prevlevelstart;
+ EMACS_INT location; /* Char number at which parsing stopped. */
+ EMACS_INT comstr_start; /* Position of last comment/string starter. */
+ Lisp_Object levelstarts; /* Char numbers of starts-of-expression
+ of levels (starting from outermost). */
};
\f
/* These variables are a cache for finding the start of a defun.
find_start_begv is the BEGV value when it was found.
find_start_modiff is the value of MODIFF when it was found. */
-static int find_start_pos;
-static int find_start_value;
-static int find_start_value_byte;
+static EMACS_INT find_start_pos;
+static EMACS_INT find_start_value;
+static EMACS_INT find_start_value_byte;
static struct buffer *find_start_buffer;
-static int find_start_begv;
+static EMACS_INT find_start_begv;
static int find_start_modiff;
-static int find_defun_start P_ ((int, int));
-static int back_comment P_ ((int, int, int, int, int, int *, int *));
-static int char_quoted P_ ((int, int));
static Lisp_Object skip_chars P_ ((int, Lisp_Object, Lisp_Object, int));
static Lisp_Object skip_syntaxes P_ ((int, Lisp_Object, Lisp_Object));
-static Lisp_Object scan_lists P_ ((int, int, int, int));
+static Lisp_Object scan_lists P_ ((EMACS_INT, EMACS_INT, EMACS_INT, int));
static void scan_sexps_forward P_ ((struct lisp_parse_state *,
- int, int, int, int,
+ EMACS_INT, EMACS_INT, EMACS_INT, int,
int, Lisp_Object, int));
static int in_classes P_ ((int, Lisp_Object));
\f
or after. On return global syntax data is good for lookup at CHARPOS. */
static int
-char_quoted (charpos, bytepos)
- register int charpos, bytepos;
+char_quoted (EMACS_INT charpos, EMACS_INT bytepos)
{
register enum syntaxcode code;
- register int beg = BEGV;
+ register EMACS_INT beg = BEGV;
register int quoted = 0;
- int orig = charpos;
-
- DEC_BOTH (charpos, bytepos);
+ EMACS_INT orig = charpos;
- while (charpos >= beg)
+ while (charpos > beg)
{
int c;
+ DEC_BOTH (charpos, bytepos);
UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
c = FETCH_CHAR_AS_MULTIBYTE (bytepos);
if (! (code == Scharquote || code == Sescape))
break;
- DEC_BOTH (charpos, bytepos);
quoted = !quoted;
}
/* Return the bytepos one character after BYTEPOS.
We assume that BYTEPOS is not at the end of the buffer. */
-INLINE int
+INLINE EMACS_INT
inc_bytepos (bytepos)
- int bytepos;
+ EMACS_INT bytepos;
{
if (NILP (current_buffer->enable_multibyte_characters))
return bytepos + 1;
/* Return the bytepos one character before BYTEPOS.
We assume that BYTEPOS is not at the start of the buffer. */
-INLINE int
+INLINE EMACS_INT
dec_bytepos (bytepos)
- int bytepos;
+ EMACS_INT bytepos;
{
if (NILP (current_buffer->enable_multibyte_characters))
return bytepos - 1;
valid on return from the subroutine, so the caller should explicitly
update the global data. */
-static int
+static EMACS_INT
find_defun_start (pos, pos_byte)
- int pos, pos_byte;
+ EMACS_INT pos, pos_byte;
{
- int opoint = PT, opoint_byte = PT_BYTE;
+ EMACS_INT opoint = PT, opoint_byte = PT_BYTE;
if (!open_paren_in_column_0_is_defun_start)
{
static int
back_comment (from, from_byte, stop, comnested, comstyle, charpos_ptr, bytepos_ptr)
- int from, from_byte, stop;
+ EMACS_INT from, from_byte, stop;
int comnested, comstyle;
- int *charpos_ptr, *bytepos_ptr;
+ EMACS_INT *charpos_ptr, *bytepos_ptr;
{
/* Look back, counting the parity of string-quotes,
and recording the comment-starters seen.
inside another comment).
Test case: { a (* b } c (* d *) */
int comment_lossage = 0;
- int comment_end = from;
- int comment_end_byte = from_byte;
- int comstart_pos = 0;
- int comstart_byte;
+ EMACS_INT comment_end = from;
+ EMACS_INT comment_end_byte = from_byte;
+ EMACS_INT comstart_pos = 0;
+ EMACS_INT comstart_byte;
/* Place where the containing defun starts,
or 0 if we didn't come across it yet. */
- int defun_start = 0;
- int defun_start_byte = 0;
+ EMACS_INT defun_start = 0;
+ EMACS_INT defun_start_byte = 0;
register enum syntaxcode code;
int nesting = 1; /* current comment nesting */
int c;
check_syntax_table (obj)
Lisp_Object obj;
{
- if (!(CHAR_TABLE_P (obj)
- && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table)))
- wrong_type_argument (Qsyntax_table_p, obj);
+ CHECK_TYPE (CHAR_TABLE_P (obj) && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table),
+ Qsyntax_table_p, obj);
}
DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
doc: /* Convert a syntax specification STRING into syntax cell form.
STRING should be a string as it is allowed as argument of
`modify-syntax-entry'. Value is the equivalent cons cell
-(CODE . MATCHING-CHAR) that can be used as value of a `syntax-table'
+\(CODE . MATCHING-CHAR) that can be used as value of a `syntax-table'
text property. */)
(string)
Lisp_Object string;
SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table, c, newentry);
else
SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
+
+ /* We clear the regexp cache, since character classes can now have
+ different values from those in the compiled regexps.*/
+ clear_regexp_cache ();
+
return Qnil;
}
\f
script = CHAR_TABLE_REF (Vchar_script_table, ch1);
while (1)
{
- int temp_byte;
-
if (from == beg)
break;
- temp_byte = dec_bytepos (from_byte);
+ DEC_BOTH (from, from_byte);
UPDATE_SYNTAX_TABLE_BACKWARD (from);
- ch0 = FETCH_CHAR_AS_MULTIBYTE (temp_byte);
+ ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
code = SYNTAX (ch0);
if ((code != Sword
&& (! words_include_escapes
|| (code != Sescape && code != Scharquote)))
|| ! EQ (CHAR_TABLE_REF (Vchar_script_table, ch0), script))
- break;
- DEC_BOTH (from, from_byte);
+ {
+ INC_BOTH (from, from_byte);
+ break;
+ }
ch1 = ch0;
}
}
return from;
}
-DEFUN ("forward-word", Fforward_word, Sforward_word, 0, 1, "p",
+DEFUN ("forward-word", Fforward_word, Sforward_word, 0, 1, "^p",
doc: /* Move point forward ARG words (backward if ARG is negative).
Normally returns t.
If an edge of the buffer or a field boundary is reached, point is left there
const unsigned char *class_beg = str + i_byte + 1;
const unsigned char *class_end = class_beg;
const unsigned char *class_limit = str + size_byte - 2;
- /* Leave room for the null. */
+ /* Leave room for the null. */
unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
re_wctype_t cc;
int c2 = char_ranges[i + 1];
for (; c1 <= c2; c1++)
- fastmap[CHAR_TO_BYTE8 (c1)] = 1;
+ {
+ int b = CHAR_TO_BYTE_SAFE (c1);
+ if (b >= 0)
+ fastmap[b] = 1;
+ }
}
}
}
p = GPT_ADDR;
stop = endp;
}
+ UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
prev_p = p;
while (--p >= stop && ! CHAR_HEAD_P (*p));
c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH);
if (! fastmap[(int) SYNTAX (c)])
break;
pos--, pos_byte -= prev_p - p;
- UPDATE_SYNTAX_TABLE_BACKWARD (pos);
}
}
else
p = GPT_ADDR;
stop = endp;
}
+ UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
if (! fastmap[(int) SYNTAX (p[-1])])
break;
p--, pos--, pos_byte--;
- UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
}
}
}
static int
forw_comment (from, from_byte, stop, nesting, style, prev_syntax,
charpos_ptr, bytepos_ptr, incomment_ptr)
- int from, from_byte, stop;
+ EMACS_INT from, from_byte, stop;
int nesting, style, prev_syntax;
- int *charpos_ptr, *bytepos_ptr, *incomment_ptr;
+ EMACS_INT *charpos_ptr, *bytepos_ptr;
+ int *incomment_ptr;
{
register int c, c1;
register enum syntaxcode code;
(count)
Lisp_Object count;
{
- register int from;
- int from_byte;
- register int stop;
+ register EMACS_INT from;
+ EMACS_INT from_byte;
+ register EMACS_INT stop;
register int c, c1;
register enum syntaxcode code;
int comstyle = 0; /* style of comment encountered */
int comnested = 0; /* whether the comment is nestable or not */
int found;
- int count1;
- int out_charpos, out_bytepos;
+ EMACS_INT count1;
+ EMACS_INT out_charpos, out_bytepos;
int dummy;
CHECK_NUMBER (count);
while (1)
{
DEC_BOTH (from, from_byte);
- if (from == stop)
- break;
UPDATE_SYNTAX_TABLE_BACKWARD (from);
c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
if (SYNTAX (c) == Scomment_fence
found = 1;
break;
}
+ else if (from == stop)
+ break;
}
if (found == 0)
{
from_byte = ini_byte;
goto leave;
}
+ else
+ /* We have skipped one comment. */
+ break;
}
else if (code == Sendcomment)
{
static Lisp_Object
scan_lists (from, count, depth, sexpflag)
- register int from;
- int count, depth, sexpflag;
+ register EMACS_INT from;
+ EMACS_INT count, depth;
+ int sexpflag;
{
Lisp_Object val;
- register int stop = count > 0 ? ZV : BEGV;
+ register EMACS_INT stop = count > 0 ? ZV : BEGV;
register int c, c1;
int stringterm;
int quoted;
int min_depth = depth; /* Err out if depth gets less than this. */
int comstyle = 0; /* style of comment encountered */
int comnested = 0; /* whether the comment is nestable or not */
- int temp_pos;
- int last_good = from;
+ EMACS_INT temp_pos;
+ EMACS_INT last_good = from;
int found;
- int from_byte;
- int out_bytepos, out_charpos;
+ EMACS_INT from_byte;
+ EMACS_INT out_bytepos, out_charpos;
int temp, dummy;
int multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
{
case Sescape:
case Scharquote:
- if (from == stop) goto lose;
+ if (from == stop)
+ goto lose;
INC_BOTH (from, from_byte);
/* treat following character as a word constituent */
case Sword:
case Scharquote:
case Sescape:
INC_BOTH (from, from_byte);
- if (from == stop) goto lose;
+ if (from == stop)
+ goto lose;
break;
case Sword:
case Ssymbol:
close1:
if (!--depth) goto done;
if (depth < min_depth)
- Fsignal (Qscan_error,
- Fcons (build_string ("Containing expression ends prematurely"),
- Fcons (make_number (last_good),
- Fcons (make_number (from), Qnil))));
+ xsignal3 (Qscan_error,
+ build_string ("Containing expression ends prematurely"),
+ make_number (last_good), make_number (from));
break;
case Sstring:
stringterm = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
while (1)
{
- if (from >= stop) goto lose;
+ if (from >= stop)
+ goto lose;
UPDATE_SYNTAX_TABLE_FORWARD (from);
c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
if (code == Sstring
}
/* Reached end of buffer. Error if within object, return nil if between */
- if (depth) goto lose;
+ if (depth)
+ goto lose;
immediate_quit = 0;
return Qnil;
open2:
if (!--depth) goto done2;
if (depth < min_depth)
- Fsignal (Qscan_error,
- Fcons (build_string ("Containing expression ends prematurely"),
- Fcons (make_number (last_good),
- Fcons (make_number (from), Qnil))));
+ xsignal3 (Qscan_error,
+ build_string ("Containing expression ends prematurely"),
+ make_number (last_good), make_number (from));
break;
case Sendcomment:
case Sstring_fence:
while (1)
{
- if (from == stop) goto lose;
+ if (from == stop)
+ goto lose;
DEC_BOTH (from, from_byte);
UPDATE_SYNTAX_TABLE_BACKWARD (from);
if (!char_quoted (from, from_byte)
stringterm = FETCH_CHAR_AS_MULTIBYTE (from_byte);
while (1)
{
- if (from == stop) goto lose;
+ if (from == stop)
+ goto lose;
DEC_BOTH (from, from_byte);
UPDATE_SYNTAX_TABLE_BACKWARD (from);
if (!char_quoted (from, from_byte)
}
/* Reached start of buffer. Error if within object, return nil if between */
- if (depth) goto lose;
+ if (depth)
+ goto lose;
immediate_quit = 0;
return Qnil;
return val;
lose:
- Fsignal (Qscan_error,
- Fcons (build_string ("Unbalanced parentheses"),
- Fcons (make_number (last_good),
- Fcons (make_number (from), Qnil))));
- abort ();
- /* NOTREACHED */
+ xsignal3 (Qscan_error,
+ build_string ("Unbalanced parentheses"),
+ make_number (last_good), make_number (from));
}
DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
scan_sexps_forward (stateptr, from, from_byte, end, targetdepth,
stopbefore, oldstate, commentstop)
struct lisp_parse_state *stateptr;
- register int from;
- int from_byte;
- int end, targetdepth, stopbefore;
+ register EMACS_INT from;
+ EMACS_INT from_byte, end;
+ int targetdepth, stopbefore;
Lisp_Object oldstate;
int commentstop;
{
int mindepth; /* Lowest DEPTH value seen. */
int start_quoted = 0; /* Nonzero means starting after a char quote */
Lisp_Object tem;
- int prev_from; /* Keep one character before FROM. */
- int prev_from_byte;
+ EMACS_INT prev_from; /* Keep one character before FROM. */
+ EMACS_INT prev_from_byte;
int prev_from_syntax;
int boundary_stop = commentstop == -1;
int nofence;
int found;
- int out_bytepos, out_charpos;
+ EMACS_INT out_bytepos, out_charpos;
int temp;
prev_from = from;
any character that starts a sexp.
Fifth arg OLDSTATE is a list like what this function returns.
It is used to initialize the state of the parse. Elements number 1, 2, 6
- and 8 are ignored; you can leave off element 8 (the last) entirely.
+ and 8 are ignored.
Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
If it is symbol `syntax-table', stop after the start of a comment or a
string, or after end of a comment or a string. */)
Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
+ /* Control characters should not be whitespace. */
+ temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
+ for (i = 0; i <= ' ' - 1; i++)
+ SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
+ SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 0177, temp);
+
+ /* Except that a few really are whitespace. */
+ temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
+ SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ' ', temp);
+ SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\t', temp);
+ SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\n', temp);
+ SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 015, temp);
+ SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 014, temp);
+
temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
for (i = 'a'; i <= 'z'; i++)
SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);