OFROM[I] is position of the earliest comment-starter seen
which is I+2X quotes from the comment-end.
PARITY is current parity of quotes from the comment end. */
- int parity = 0;
- int my_stringend = 0;
+ int string_style = -1; /* Presumed outside of any string. */
int string_lossage = 0;
+ /* Not a real lossage: indicates that we have passed a matching comment
+ starter plus an non-matching comment-ender, meaning that any matching
+ comment-starter we might see later could be a false positive (hidden
+ inside another comment).
+ Test case: { a (* b } c (* d *) */
+ int comment_lossage = 0;
int comment_end = from;
int comment_end_byte = from_byte;
int comstart_pos = 0;
int comstart_byte;
- /* Value that PARITY had, when we reached the position
- in COMSTART_POS. */
- int comstart_parity = 0;
int scanstart = from - 1;
/* Place where the containing defun starts,
or 0 if we didn't come across it yet. */
if (code != Sendcomment && char_quoted (from, from_byte))
continue;
- /* Track parity of quotes. */
- if (code == Sstring)
+ switch (code)
{
- parity ^= 1;
- if (my_stringend == 0)
- my_stringend = c;
- /* If we have two kinds of string delimiters.
- There's no way to grok this scanning backwards. */
- else if (my_stringend != c)
- string_lossage = 1;
- }
-
- if (code == Sstring_fence || code == Scomment_fence)
- {
- parity ^= 1;
- if (my_stringend == 0)
- my_stringend
- = code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE;
- /* If we have two kinds of string delimiters.
- There's no way to grok this scanning backwards. */
- else if (my_stringend != (code == Sstring_fence
- ? ST_STRING_STYLE : ST_COMMENT_STYLE))
+ case Sstring_fence:
+ case Scomment_fence:
+ c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
+ case Sstring:
+ /* Track parity of quotes. */
+ if (string_style == -1)
+ /* Entering a string. */
+ string_style = c;
+ else if (string_style == c)
+ /* Leaving the string. */
+ string_style = -1;
+ else
+ /* If we have two kinds of string delimiters.
+ There's no way to grok this scanning backwards. */
string_lossage = 1;
- }
+ break;
+
+ case Scomment:
+ /* We've already checked that it is the relevant comstyle. */
+ if (string_style != -1 || comment_lossage || string_lossage)
+ /* There are odd string quotes involved, so let's be careful.
+ Test case in Pascal: " { " a { " } */
+ goto lossage;
- if (code == Scomment)
- /* We've already checked that it is the relevant comstyle. */
- {
- if (comnested && --nesting <= 0 && parity == 0 && !string_lossage)
+ if (!comnested)
+ {
+ /* Record best comment-starter so far. */
+ comstart_pos = from;
+ comstart_byte = from_byte;
+ }
+ else if (--nesting <= 0)
/* nested comments have to be balanced, so we don't need to
keep looking for earlier ones. We use here the same (slightly
incorrect) reasoning as below: since it is followed by uniform
paired string quotes, this comment-start has to be outside of
strings, else the comment-end itself would be inside a string. */
goto done;
+ break;
- /* Record comment-starters according to that
- quote-parity to the comment-end. */
- comstart_parity = parity;
- comstart_pos = from;
- comstart_byte = from_byte;
- }
+ case Sendcomment:
+ if (SYNTAX_COMMENT_STYLE (FETCH_CHAR (from_byte)) == comstyle)
+ /* This is the same style of comment ender as ours. */
+ {
+ if (comnested)
+ nesting++;
+ else
+ /* Anything before that can't count because it would match
+ this comment-ender rather than ours. */
+ from = stop; /* Break out of the loop. */
+ }
+ else if (comstart_pos != 0 || c != '\n')
+ /* We're mixing comment styles here, so we'd better be careful.
+ The (comstart_pos != 0 || c != '\n') check is not quite correct
+ (we should just always set comment_lossage), but removing it
+ would imply that any multiline comment in C would go through
+ lossage, which seems overkill.
+ The failure should only happen in the rare cases such as
+ { (* } *) */
+ comment_lossage = 1;
+ break;
- /* If we find another earlier comment-ender,
- any comment-starts earlier than that don't count
- (because they go with the earlier comment-ender). */
- if (code == Sendcomment
- && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from_byte)) == comstyle)
- {
- if (comnested)
- nesting++;
- else
- break;
- }
+ case Sopen:
+ /* Assume a defun-start point is outside of strings. */
+ if (open_paren_in_column_0_is_defun_start
+ && (from == stop
+ || (temp_byte = dec_bytepos (from_byte),
+ FETCH_CHAR (temp_byte) == '\n')))
+ {
+ defun_start = from;
+ defun_start_byte = from_byte;
+ from = stop; /* Break out of the loop. */
+ }
+ break;
- /* Assume a defun-start point is outside of strings. */
- if (code == Sopen
- && (from == stop
- || (temp_byte = dec_bytepos (from_byte),
- FETCH_CHAR (temp_byte) == '\n')))
- {
- defun_start = from;
- defun_start_byte = from_byte;
+ default:
break;
}
}
from_byte = comment_end_byte;
UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
}
- /* If the earliest comment starter
- is followed by uniform paired string quotes or none,
- we know it can't be inside a string
- since if it were then the comment ender would be inside one.
- So it does start a comment. Skip back to it. */
- else if (!comnested && comstart_parity == 0 && !string_lossage)
+ /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
+ or `done'), then we've found the beginning of the non-nested comment. */
+ else if (1) /* !comnested */
{
from = comstart_pos;
from_byte = comstart_byte;
}
else
{
+ struct lisp_parse_state state;
+ lossage:
/* We had two kinds of string delimiters mixed up
together. Decode this going forwards.
- Scan fwd from the previous comment ender
+ Scan fwd from a known safe place (beginning-of-defun)
to the one in question; this records where we
last passed a comment starter. */
- struct lisp_parse_state state;
/* If we did not already find the defun start, find it now. */
if (defun_start == 0)
{
defun_start = find_defun_start (comment_end, comment_end_byte);
defun_start_byte = find_start_value_byte;
}
- scan_sexps_forward (&state,
- defun_start, defun_start_byte,
- comment_end - 1, -10000, 0, Qnil, 0);
- if (state.incomment)
- {
- /* scan_sexps_forward changed the direction of search in
- global variables, so we need to update it completely. */
-
- from = state.comstr_start;
- }
- else
+ do
{
- from = comment_end;
- }
+ scan_sexps_forward (&state,
+ defun_start, defun_start_byte,
+ comment_end, -10000, 0, Qnil, 0);
+ defun_start = comment_end;
+ if (state.incomment == (comnested ? 1 : -1)
+ && state.comstyle == comstyle)
+ from = state.comstr_start;
+ else
+ {
+ from = comment_end;
+ if (state.incomment)
+ /* If comment_end is inside some other comment, maybe ours
+ is nested, so we need to try again from within the
+ surrounding comment. Example: { a (* " *) */
+ {
+ /* FIXME: We should advance by one or two chars. */
+ defun_start = state.comstr_start + 2;
+ defun_start_byte = CHAR_TO_BYTE (defun_start);
+ }
+ }
+ } while (defun_start < comment_end);
+
from_byte = CHAR_TO_BYTE (from);
UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
}
Lisp_Object value;
{
register enum syntaxcode code;
- char desc, start1, start2, end1, end2, prefix, comstyle;
+ char desc, start1, start2, end1, end2, prefix, comstyle, comnested;
char str[2];
Lisp_Object first, match_lisp;
end2 = (XINT (first) >> 19) & 1;
prefix = (XINT (first) >> 20) & 1;
comstyle = (XINT (first) >> 21) & 1;
+ comnested = (XINT (first) >> 22) & 1;
if ((int) code < 0 || (int) code >= (int) Smax)
{
insert ("p", 1);
if (comstyle)
insert ("b", 1);
+ if (comnested)
+ insert ("n", 1);
insert_string ("\twhich means: ");
insert_string (",\n\t is the second character of a comment-end sequence");
if (comstyle)
insert_string (" (comment style b)");
+ if (comnested)
+ insert_string (" (nestable)");
if (prefix)
insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
/* Avoid jumping out of an input field. */
val = XFASTINT (Fconstrain_to_field (make_number (val), make_number (PT),
- Qt, Qnil));
+ Qt, Qnil, Qnil));
SET_PT (val);
return val == orig_val ? Qt : Qnil;
int multibyte = !NILP (current_buffer->enable_multibyte_characters);
int string_multibyte;
int size_byte;
+ unsigned char *str;
+ int len;
CHECK_STRING (string, 0);
char_ranges = (int *) alloca (XSTRING (string)->size * (sizeof (int)) * 2);
string_multibyte = STRING_MULTIBYTE (string);
+ str = XSTRING (string)->data;
size_byte = STRING_BYTES (XSTRING (string));
+ /* Adjust the multibyteness of the string to that of the buffer. */
+ if (multibyte != string_multibyte)
+ {
+ int nbytes;
+
+ if (multibyte)
+ nbytes = count_size_as_multibyte (XSTRING (string)->data,
+ XSTRING (string)->size);
+ else
+ nbytes = XSTRING (string)->size;
+ if (nbytes != size_byte)
+ {
+ str = (unsigned char *) alloca (nbytes);
+ copy_text (XSTRING (string)->data, str, size_byte,
+ string_multibyte, multibyte);
+ size_byte = nbytes;
+ }
+ }
+
if (NILP (lim))
XSETINT (lim, forwardp ? ZV : BEGV);
else
bzero (fastmap, sizeof fastmap);
- i = 0, i_byte = 0;
+ i_byte = 0;
if (i_byte < size_byte
&& XSTRING (string)->data[0] == '^')
{
- negate = 1; i++, i_byte++;
+ negate = 1; i_byte++;
}
/* Find the characters specified and set their elements of fastmap.
while (i_byte < size_byte)
{
- int c_leading_code = XSTRING (string)->data[i_byte];
+ int c_leading_code = str[i_byte];
- FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
-
- /* Convert multibyteness between what the string has
- and what the buffer has. */
- if (multibyte)
- c = unibyte_char_to_multibyte (c);
- else
- c &= 0377;
+ c = STRING_CHAR_AND_LENGTH (str + i_byte, size_byte - i_byte, len);
+ i_byte += len;
if (syntaxp)
fastmap[syntax_spec_code[c & 0377]] = 1;
if (i_byte == size_byte)
break;
- c_leading_code = XSTRING (string)->data[i_byte];
- FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
+ c_leading_code = str[i_byte];
+ c = STRING_CHAR_AND_LENGTH (str+i_byte, size_byte-i_byte, len);
+ i_byte += len;
}
if (i_byte < size_byte
- && XSTRING (string)->data[i_byte] == '-')
+ && str[i_byte] == '-')
{
unsigned int c2, c2_leading_code;
/* Skip over the dash. */
- i++, i_byte++;
+ i_byte++;
if (i_byte == size_byte)
break;
/* Get the end of the range. */
- c2_leading_code = XSTRING (string)->data[i_byte];
- FETCH_STRING_CHAR_ADVANCE (c2, string, i, i_byte);
+ c2_leading_code = str[i_byte];
+ c2 =STRING_CHAR_AND_LENGTH (str+i_byte, size_byte-i_byte, len);
+ i_byte += len;
if (SINGLE_BYTE_CHAR_P (c))
{
if (! SINGLE_BYTE_CHAR_P (c2))
- error ("Invalid charcter range: %s",
- XSTRING (string)->data);
+ {
+ /* Handle a range such as \177-\377 in multibyte
+ mode. Split that into two ranges, the low
+ one ending at 0237, and the high one starting
+ at the smallest character in the charset of
+ C2 and ending at C2. */
+ int charset = CHAR_CHARSET (c2);
+ int c1 = MAKE_CHAR (charset, 0, 0);
+
+ fastmap[c2_leading_code] = 1;
+ char_ranges[n_char_ranges++] = c1;
+ char_ranges[n_char_ranges++] = c2;
+ c2 = 0237;
+ }
while (c <= c2)
{
fastmap[c] = 1;
c++;
}
}
- else
+ else if (! SINGLE_BYTE_CHAR_P (c2))
{
if (c_leading_code != c2_leading_code)
- error ("Invalid charcter range: %s",
+ error ("Invalid character range: %s",
XSTRING (string)->data);
- fastmap[c_leading_code] = 1;
if (c <= c2)
{
+ fastmap[c_leading_code] = 1;
char_ranges[n_char_ranges++] = c;
char_ranges[n_char_ranges++] = c2;
}
}
else
{
- fastmap[c_leading_code] = 1;
- if (!SINGLE_BYTE_CHAR_P (c))
+ if (SINGLE_BYTE_CHAR_P (c))
+ fastmap[c] = 1;
+ else
{
+ fastmap[c_leading_code] = 1;
char_ranges[n_char_ranges++] = c;
char_ranges[n_char_ranges++] = c;
}
if (multibyte)
while (pos < XINT (lim) && fastmap[(c = FETCH_BYTE (pos_byte))])
{
- if (!BASE_LEADING_CODE_P (c))
- INC_BOTH (pos, pos_byte);
- else if (n_char_ranges)
+ /* If we are looking at a multibyte character, we
+ must look up the character in the table
+ CHAR_RANGES. If there's no data in the table,
+ that character is not what we want to skip. */
+ if (BASE_LEADING_CODE_P (c)
+ && (c = FETCH_MULTIBYTE_CHAR (pos_byte),
+ ! SINGLE_BYTE_CHAR_P (c)))
{
- /* We much check CHAR_RANGES for a multibyte
- character. */
- ch = FETCH_MULTIBYTE_CHAR (pos_byte);
+ /* The following code do the right thing even if
+ n_char_ranges is zero (i.e. no data in
+ CHAR_RANGES). */
for (i = 0; i < n_char_ranges; i += 2)
- if ((ch >= char_ranges[i] && ch <= char_ranges[i + 1]))
+ if (c >= char_ranges[i] && c <= char_ranges[i + 1])
break;
if (!(negate ^ (i < n_char_ranges)))
break;
-
- INC_BOTH (pos, pos_byte);
- }
- else
- {
- if (!negate) break;
- INC_BOTH (pos, pos_byte);
}
+ INC_BOTH (pos, pos_byte);
}
else
while (pos < XINT (lim) && fastmap[FETCH_BYTE (pos)])
if (multibyte)
while (pos > XINT (lim))
{
- int savepos = pos_byte;
- DEC_BOTH (pos, pos_byte);
- if (fastmap[(c = FETCH_BYTE (pos_byte))])
- {
- if (!BASE_LEADING_CODE_P (c))
- ;
- else if (n_char_ranges)
- {
- /* We much check CHAR_RANGES for a multibyte
- character. */
- ch = FETCH_MULTIBYTE_CHAR (pos_byte);
- for (i = 0; i < n_char_ranges; i += 2)
- if (ch >= char_ranges[i] && ch <= char_ranges[i + 1])
- break;
- if (!(negate ^ (i < n_char_ranges)))
- {
- pos++;
- pos_byte = savepos;
- break;
- }
- }
- else
- if (!negate)
- {
- pos++;
- pos_byte = savepos;
- break;
- }
- }
- else
+ int prev_pos_byte = pos_byte;
+
+ DEC_POS (prev_pos_byte);
+ if (!fastmap[(c = FETCH_BYTE (prev_pos_byte))])
+ break;
+
+ /* See the comment in the previous similar code. */
+ if (BASE_LEADING_CODE_P (c)
+ && (c = FETCH_MULTIBYTE_CHAR (prev_pos_byte),
+ ! SINGLE_BYTE_CHAR_P (c)))
{
- pos++;
- pos_byte = savepos;
- break;
+ for (i = 0; i < n_char_ranges; i += 2)
+ if (c >= char_ranges[i] && c <= char_ranges[i + 1])
+ break;
+ if (!(negate ^ (i < n_char_ranges)))
+ break;
}
+ pos--;
+ pos_byte = prev_pos_byte;
}
else
while (pos > XINT (lim) && fastmap[FETCH_BYTE (pos - 1)])
code = SYNTAX (c);
comstart_first = SYNTAX_COMSTART_FIRST (c);
comnested = SYNTAX_COMMENT_NESTED (c);
+ comstyle = SYNTAX_COMMENT_STYLE (c);
INC_BOTH (from, from_byte);
UPDATE_SYNTAX_TABLE_FORWARD (from);
- comstyle = 0;
if (from < stop && comstart_first
&& (c1 = FETCH_CHAR (from_byte),
SYNTAX_COMSTART_SECOND (c1)))
code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
comstart_first = SYNTAX_COMSTART_FIRST (c);
comnested = SYNTAX_COMMENT_NESTED (c);
+ comstyle = SYNTAX_COMMENT_STYLE (c);
prefix = SYNTAX_PREFIX (c);
if (depth == min_depth)
last_good = from;
}
/* Quoting turns anything except a comment-ender
- into a word character. Note that this if cannot be true
+ into a word character. Note that this cannot be true
if we decremented FROM in the if-statement above. */
if (code != Sendcomment && char_quoted (from, from_byte))
code = Sword;
/* curlevel++->last ran into compiler bug on Apollo */
curlevel->last = XINT (Fcar (tem));
if (++curlevel == endlevel)
- error ("Nesting too deep for parser");
+ curlevel--; /* error ("Nesting too deep for parser"); */
curlevel->prev = -1;
curlevel->last = -1;
tem = Fcdr (tem);
if (code == Scomment)
{
+ state.comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax);
state.incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
1 : -1);
state.comstr_start = prev_from;
curlevel->prev = curlevel->last;
break;
- startincomment:
- if (commentstop == 1)
- goto done;
- goto commentloop;
-
case Scomment:
- if (! state.incomment)
- abort ();
if (commentstop || boundary_stop) goto done;
- commentloop:
- /* The (from == BEGV) test is to enter the loop in the middle so
+ startincomment:
+ /* The (from == BEGV) test was to enter the loop in the middle so
that we find a 2-char comment ender even if we start in the
- middle of it. */
+ middle of it. We don't want to do that if we're just at the
+ beginning of the comment (think of (*) ... (*)). */
found = forw_comment (from, from_byte, end,
state.incomment, state.comstyle,
- (from == BEGV) ? 0 : prev_from_syntax,
+ (from == BEGV || from < state.comstr_start + 3)
+ ? 0 : prev_from_syntax,
&out_charpos, &out_bytepos, &state.incomment);
from = out_charpos; from_byte = out_bytepos;
/* Beware! prev_from and friends are invalid now.
Luckily, the `done' doesn't use them and the INC_FROM
sets them to a sane value without looking at them. */
if (!found) goto done;
- INC_FROM;
+ INC_FROM;
state.incomment = 0;
state.comstyle = 0; /* reset the comment style */
if (boundary_stop) goto done;
/* curlevel++->last ran into compiler bug on Apollo */
curlevel->last = prev_from;
if (++curlevel == endlevel)
- error ("Nesting too deep for parser");
+ curlevel--; /* error ("Nesting too deep for parser"); */
curlevel->prev = -1;
curlevel->last = -1;
if (targetdepth == depth) goto done;
else an integer (the current comment nesting).\n\
5. t if following a quote character.\n\
6. the minimum paren-depth encountered during this scan.\n\
- 7. t if in a comment of style b; `syntax-table' if the comment\n\
+ 7. t if in a comment of style b; symbol `syntax-table' if the comment\n\
should be terminated by a generic comment delimiter.\n\
8. character address of start of comment or string; nil if not in one.\n\
9. Intermediate data for continuation of parsing (subject to change).\n\
It is used to initialize the state of the parse. Elements number 1, 2, 6\n\
and 8 are ignored; you can leave off element 8 (the last) entirely.\n\
Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.\n\
- If it is `syntax-table', stop after the start of a comment or a string,\n\
- or after end of a comment or a string.")
+ If it is symbol `syntax-table', stop after the start of a comment or a\n\
+ string, or after end of a comment or a string.")
(from, to, targetdepth, stopbefore, state, commentstop)
*/