]> code.delx.au - gnu-emacs/blob - src/syntax.c
(skip_chars): Change the way of handling a range from unibyte char to
[gnu-emacs] / src / syntax.c
1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 87, 93, 94, 95, 97, 1998, 1999 Free Software Foundation, Inc.
3
4 This file is part of GNU Emacs.
5
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
21
22 #include <config.h>
23 #include <ctype.h>
24 #include "lisp.h"
25 #include "commands.h"
26 #include "buffer.h"
27 #include "charset.h"
28
29 /* Make syntax table lookup grant data in gl_state. */
30 #define SYNTAX_ENTRY_VIA_PROPERTY
31
32 #include "syntax.h"
33 #include "intervals.h"
34
35 /* We use these constants in place for comment-style and
36 string-ender-char to distinguish comments/strings started by
37 comment_fence and string_fence codes. */
38
39 #define ST_COMMENT_STYLE (256 + 1)
40 #define ST_STRING_STYLE (256 + 2)
41 #include "category.h"
42
43 Lisp_Object Qsyntax_table_p, Qsyntax_table, Qscan_error;
44
45 int words_include_escapes;
46 int parse_sexp_lookup_properties;
47
48 /* Nonzero means `scan-sexps' treat all multibyte characters as symbol. */
49 int multibyte_syntax_as_symbol;
50
51 /* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
52 if not compiled with GCC. No need to mark it, since it is used
53 only very temporarily. */
54 Lisp_Object syntax_temp;
55
56 /* Non-zero means an open parenthesis in column 0 is always considered
57 to be the start of a defun. Zero means an open parenthesis in
58 column 0 has no special meaning. */
59
60 int open_paren_in_column_0_is_defun_start;
61
62 /* This is the internal form of the parse state used in parse-partial-sexp. */
63
64 struct lisp_parse_state
65 {
66 int depth; /* Depth at end of parsing. */
67 int instring; /* -1 if not within string, else desired terminator. */
68 int incomment; /* -1 if in unnestable comment else comment nesting */
69 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
70 int quoted; /* Nonzero if just after an escape char at end of parsing */
71 int thislevelstart; /* Char number of most recent start-of-expression at current level */
72 int prevlevelstart; /* Char number of start of containing expression */
73 int location; /* Char number at which parsing stopped. */
74 int mindepth; /* Minimum depth seen while scanning. */
75 int comstr_start; /* Position just after last comment/string starter. */
76 Lisp_Object levelstarts; /* Char numbers of starts-of-expression
77 of levels (starting from outermost). */
78 };
79 \f
80 /* These variables are a cache for finding the start of a defun.
81 find_start_pos is the place for which the defun start was found.
82 find_start_value is the defun start position found for it.
83 find_start_value_byte is the corresponding byte position.
84 find_start_buffer is the buffer it was found in.
85 find_start_begv is the BEGV value when it was found.
86 find_start_modiff is the value of MODIFF when it was found. */
87
88 static int find_start_pos;
89 static int find_start_value;
90 static int find_start_value_byte;
91 static struct buffer *find_start_buffer;
92 static int find_start_begv;
93 static int find_start_modiff;
94
95
96 static int find_defun_start P_ ((int, int));
97 static int back_comment P_ ((int, int, int, int, int, int *, int *));
98 static int char_quoted P_ ((int, int));
99 static Lisp_Object skip_chars P_ ((int, int, Lisp_Object, Lisp_Object));
100 static Lisp_Object scan_lists P_ ((int, int, int, int));
101 static void scan_sexps_forward P_ ((struct lisp_parse_state *,
102 int, int, int, int,
103 int, Lisp_Object, int));
104 \f
105
106 struct gl_state_s gl_state; /* Global state of syntax parser. */
107
108 INTERVAL interval_of ();
109 #define INTERVALS_AT_ONCE 10 /* 1 + max-number of intervals
110 to scan to property-change. */
111
112 /* Update gl_state to an appropriate interval which contains CHARPOS. The
113 sign of COUNT give the relative position of CHARPOS wrt the previously
114 valid interval. If INIT, only [be]_property fields of gl_state are
115 valid at start, the rest is filled basing on OBJECT.
116
117 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
118 direction than the intervals - or in an interval. We update the
119 current syntax-table basing on the property of this interval, and
120 update the interval to start further than CHARPOS - or be
121 NULL_INTERVAL. We also update lim_property to be the next value of
122 charpos to call this subroutine again - or be before/after the
123 start/end of OBJECT. */
124
125 void
126 update_syntax_table (charpos, count, init, object)
127 int charpos, count, init;
128 Lisp_Object object;
129 {
130 Lisp_Object tmp_table;
131 int cnt = 0, invalidate = 1;
132 INTERVAL i, oldi;
133
134 if (init)
135 {
136 gl_state.start = gl_state.b_property;
137 gl_state.stop = gl_state.e_property;
138 gl_state.forward_i = interval_of (charpos, object);
139 i = gl_state.backward_i = gl_state.forward_i;
140 gl_state.left_ok = gl_state.right_ok = 1;
141 invalidate = 0;
142 if (NULL_INTERVAL_P (i))
143 return;
144 /* interval_of updates only ->position of the return value, so
145 update the parents manually to speed up update_interval. */
146 while (!NULL_PARENT (i))
147 {
148 if (AM_RIGHT_CHILD (i))
149 INTERVAL_PARENT (i)->position = i->position
150 - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
151 - TOTAL_LENGTH (INTERVAL_PARENT (i))
152 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
153 else
154 INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
155 + TOTAL_LENGTH (i);
156 i = INTERVAL_PARENT (i);
157 }
158 i = gl_state.forward_i;
159 gl_state.b_property = i->position - 1 - gl_state.offset;
160 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
161 goto update;
162 }
163 oldi = i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
164
165 /* We are guarantied to be called with CHARPOS either in i,
166 or further off. */
167 if (NULL_INTERVAL_P (i))
168 error ("Error in syntax_table logic for to-the-end intervals");
169 else if (charpos < i->position) /* Move left. */
170 {
171 if (count > 0)
172 error ("Error in syntax_table logic for intervals <-");
173 /* Update the interval. */
174 i = update_interval (i, charpos);
175 if (oldi->position != INTERVAL_LAST_POS (i))
176 {
177 invalidate = 0;
178 gl_state.right_ok = 1; /* Invalidate the other end. */
179 gl_state.forward_i = i;
180 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
181 }
182 }
183 else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right. */
184 {
185 if (count < 0)
186 error ("Error in syntax_table logic for intervals ->");
187 /* Update the interval. */
188 i = update_interval (i, charpos);
189 if (i->position != INTERVAL_LAST_POS (oldi))
190 {
191 invalidate = 0;
192 gl_state.left_ok = 1; /* Invalidate the other end. */
193 gl_state.backward_i = i;
194 gl_state.b_property = i->position - 1 - gl_state.offset;
195 }
196 }
197 else if (count > 0 ? gl_state.right_ok : gl_state.left_ok)
198 {
199 /* We do not need to recalculate tmp_table. */
200 tmp_table = gl_state.old_prop;
201 }
202
203 update:
204 tmp_table = textget (i->plist, Qsyntax_table);
205
206 if (invalidate)
207 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
208
209 if (invalidate) /* Did not get to adjacent interval. */
210 { /* with the same table => */
211 /* invalidate the old range. */
212 if (count > 0)
213 {
214 gl_state.backward_i = i;
215 gl_state.left_ok = 1; /* Invalidate the other end. */
216 gl_state.b_property = i->position - 1 - gl_state.offset;
217 }
218 else
219 {
220 gl_state.forward_i = i;
221 gl_state.right_ok = 1; /* Invalidate the other end. */
222 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
223 }
224 }
225
226 gl_state.current_syntax_table = tmp_table;
227 gl_state.old_prop = tmp_table;
228 if (EQ (Fsyntax_table_p (tmp_table), Qt))
229 {
230 gl_state.use_global = 0;
231 }
232 else if (CONSP (tmp_table))
233 {
234 gl_state.use_global = 1;
235 gl_state.global_code = tmp_table;
236 }
237 else
238 {
239 gl_state.use_global = 0;
240 gl_state.current_syntax_table = current_buffer->syntax_table;
241 }
242
243 while (!NULL_INTERVAL_P (i))
244 {
245 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
246 {
247 if (count > 0)
248 gl_state.right_ok = 0;
249 else
250 gl_state.left_ok = 0;
251 break;
252 }
253 else if (cnt == INTERVALS_AT_ONCE)
254 {
255 if (count > 0)
256 gl_state.right_ok = 1;
257 else
258 gl_state.left_ok = 1;
259 break;
260 }
261 cnt++;
262 i = count > 0 ? next_interval (i) : previous_interval (i);
263 }
264 if (NULL_INTERVAL_P (i))
265 { /* This property goes to the end. */
266 if (count > 0)
267 gl_state.e_property = gl_state.stop;
268 else
269 gl_state.b_property = gl_state.start;
270 }
271 else
272 {
273 if (count > 0)
274 {
275 gl_state.e_property = i->position - gl_state.offset;
276 gl_state.forward_i = i;
277 }
278 else
279 {
280 gl_state.b_property = i->position + LENGTH (i) - 1 - gl_state.offset;
281 gl_state.backward_i = i;
282 }
283 }
284 }
285 \f
286 /* Returns TRUE if char at CHARPOS is quoted.
287 Global syntax-table data should be set up already to be good at CHARPOS
288 or after. On return global syntax data is good for lookup at CHARPOS. */
289
290 static int
291 char_quoted (charpos, bytepos)
292 register int charpos, bytepos;
293 {
294 register enum syntaxcode code;
295 register int beg = BEGV;
296 register int quoted = 0;
297 int orig = charpos;
298
299 DEC_BOTH (charpos, bytepos);
300
301 while (bytepos >= beg)
302 {
303 UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
304 code = SYNTAX (FETCH_CHAR (bytepos));
305 if (! (code == Scharquote || code == Sescape))
306 break;
307
308 DEC_BOTH (charpos, bytepos);
309 quoted = !quoted;
310 }
311
312 UPDATE_SYNTAX_TABLE (orig);
313 return quoted;
314 }
315
316 /* Return the bytepos one character after BYTEPOS.
317 We assume that BYTEPOS is not at the end of the buffer. */
318
319 INLINE int
320 inc_bytepos (bytepos)
321 int bytepos;
322 {
323 if (NILP (current_buffer->enable_multibyte_characters))
324 return bytepos + 1;
325
326 INC_POS (bytepos);
327 return bytepos;
328 }
329
330 /* Return the bytepos one character before BYTEPOS.
331 We assume that BYTEPOS is not at the start of the buffer. */
332
333 INLINE int
334 dec_bytepos (bytepos)
335 int bytepos;
336 {
337 if (NILP (current_buffer->enable_multibyte_characters))
338 return bytepos - 1;
339
340 DEC_POS (bytepos);
341 return bytepos;
342 }
343 \f
344 /* Find a defun-start that is the last one before POS (or nearly the last).
345 We record what we find, so that another call in the same area
346 can return the same value right away.
347
348 There is no promise at which position the global syntax data is
349 valid on return from the subroutine, so the caller should explicitly
350 update the global data. */
351
352 static int
353 find_defun_start (pos, pos_byte)
354 int pos, pos_byte;
355 {
356 int opoint = PT, opoint_byte = PT_BYTE;
357
358 /* Use previous finding, if it's valid and applies to this inquiry. */
359 if (current_buffer == find_start_buffer
360 /* Reuse the defun-start even if POS is a little farther on.
361 POS might be in the next defun, but that's ok.
362 Our value may not be the best possible, but will still be usable. */
363 && pos <= find_start_pos + 1000
364 && pos >= find_start_value
365 && BEGV == find_start_begv
366 && MODIFF == find_start_modiff)
367 return find_start_value;
368
369 /* Back up to start of line. */
370 scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
371
372 /* We optimize syntax-table lookup for rare updates. Thus we accept
373 only those `^\s(' which are good in global _and_ text-property
374 syntax-tables. */
375 gl_state.current_syntax_table = current_buffer->syntax_table;
376 gl_state.use_global = 0;
377 if (open_paren_in_column_0_is_defun_start)
378 {
379 while (PT > BEGV)
380 {
381 /* Open-paren at start of line means we may have found our
382 defun-start. */
383 if (SYNTAX (FETCH_CHAR (PT_BYTE)) == Sopen)
384 {
385 SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
386 if (SYNTAX (FETCH_CHAR (PT_BYTE)) == Sopen)
387 break;
388 /* Now fallback to the default value. */
389 gl_state.current_syntax_table = current_buffer->syntax_table;
390 gl_state.use_global = 0;
391 }
392 /* Move to beg of previous line. */
393 scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
394 }
395 }
396
397 /* Record what we found, for the next try. */
398 find_start_value = PT;
399 find_start_value_byte = PT_BYTE;
400 find_start_buffer = current_buffer;
401 find_start_modiff = MODIFF;
402 find_start_begv = BEGV;
403 find_start_pos = pos;
404
405 TEMP_SET_PT_BOTH (opoint, opoint_byte);
406
407 return find_start_value;
408 }
409 \f
410 /* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
411
412 static int
413 prev_char_comend_first (pos, pos_byte)
414 int pos, pos_byte;
415 {
416 int c, val;
417
418 DEC_BOTH (pos, pos_byte);
419 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
420 c = FETCH_CHAR (pos_byte);
421 val = SYNTAX_COMEND_FIRST (c);
422 UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
423 return val;
424 }
425
426 /* Return the SYNTAX_COMSTART_FIRST of the character before POS, POS_BYTE. */
427
428 /* static int
429 * prev_char_comstart_first (pos, pos_byte)
430 * int pos, pos_byte;
431 * {
432 * int c, val;
433 *
434 * DEC_BOTH (pos, pos_byte);
435 * UPDATE_SYNTAX_TABLE_BACKWARD (pos);
436 * c = FETCH_CHAR (pos_byte);
437 * val = SYNTAX_COMSTART_FIRST (c);
438 * UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
439 * return val;
440 * } */
441
442 /* Checks whether charpos FROM is at the end of a comment.
443 FROM_BYTE is the bytepos corresponding to FROM.
444 Do not move back before STOP.
445
446 Return a positive value if we find a comment ending at FROM/FROM_BYTE;
447 return -1 otherwise.
448
449 If successful, store the charpos of the comment's beginning
450 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
451
452 Global syntax data remains valid for backward search starting at
453 the returned value (or at FROM, if the search was not successful). */
454
455 static int
456 back_comment (from, from_byte, stop, comnested, comstyle, charpos_ptr, bytepos_ptr)
457 int from, from_byte, stop;
458 int comnested, comstyle;
459 int *charpos_ptr, *bytepos_ptr;
460 {
461 /* Look back, counting the parity of string-quotes,
462 and recording the comment-starters seen.
463 When we reach a safe place, assume that's not in a string;
464 then step the main scan to the earliest comment-starter seen
465 an even number of string quotes away from the safe place.
466
467 OFROM[I] is position of the earliest comment-starter seen
468 which is I+2X quotes from the comment-end.
469 PARITY is current parity of quotes from the comment end. */
470 int string_style = -1; /* Presumed outside of any string. */
471 int string_lossage = 0;
472 /* Not a real lossage: indicates that we have passed a matching comment
473 starter plus an non-matching comment-ender, meaning that any matching
474 comment-starter we might see later could be a false positive (hidden
475 inside another comment).
476 Test case: { a (* b } c (* d *) */
477 int comment_lossage = 0;
478 int comment_end = from;
479 int comment_end_byte = from_byte;
480 int comstart_pos = 0;
481 int comstart_byte;
482 /* Place where the containing defun starts,
483 or 0 if we didn't come across it yet. */
484 int defun_start = 0;
485 int defun_start_byte = 0;
486 register enum syntaxcode code;
487 int nesting = 1; /* current comment nesting */
488 int c;
489 int syntax = 0;
490
491 /* FIXME: A }} comment-ender style leads to incorrect behavior
492 in the case of {{ c }}} because we ignore the last two chars which are
493 assumed to be comment-enders although they aren't. */
494
495 /* At beginning of range to scan, we're outside of strings;
496 that determines quote parity to the comment-end. */
497 while (from != stop)
498 {
499 int temp_byte, prev_syntax;
500 int com2start, com2end;
501
502 /* Move back and examine a character. */
503 DEC_BOTH (from, from_byte);
504 UPDATE_SYNTAX_TABLE_BACKWARD (from);
505
506 prev_syntax = syntax;
507 c = FETCH_CHAR (from_byte);
508 syntax = SYNTAX_WITH_FLAGS (c);
509 code = SYNTAX (c);
510
511 /* Check for 2-char comment markers. */
512 com2start = (SYNTAX_FLAGS_COMSTART_FIRST (syntax)
513 && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)
514 && comstyle == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax)
515 && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
516 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested);
517 com2end = (SYNTAX_FLAGS_COMEND_FIRST (syntax)
518 && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax));
519
520 /* Nasty cases with overlapping 2-char comment markers:
521 - snmp-mode: -- c -- foo -- c --
522 --- c --
523 ------ c --
524 - c-mode: *||*
525 |* *|* *|
526 |*| |* |*|
527 /// */
528
529 /* If a 2-char comment sequence partly overlaps with another,
530 we don't try to be clever. */
531 if (from > stop && (com2end || com2start))
532 {
533 int next = from, next_byte = from_byte, next_c, next_syntax;
534 DEC_BOTH (next, next_byte);
535 UPDATE_SYNTAX_TABLE_BACKWARD (next);
536 next_c = FETCH_CHAR (next_byte);
537 next_syntax = SYNTAX_WITH_FLAGS (next_c);
538 if (((com2start || comnested)
539 && SYNTAX_FLAGS_COMEND_SECOND (syntax)
540 && SYNTAX_FLAGS_COMEND_FIRST (next_syntax))
541 || ((com2end || comnested)
542 && SYNTAX_FLAGS_COMSTART_SECOND (syntax)
543 && comstyle == SYNTAX_FLAGS_COMMENT_STYLE (syntax)
544 && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax)))
545 goto lossage;
546 /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
547 }
548
549 if (com2start && comstart_pos == 0)
550 /* We're looking at a comment starter. But it might be a comment
551 ender as well (see snmp-mode). The first time we see one, we
552 need to consider it as a comment starter,
553 and the subsequent times as a comment ender. */
554 com2end = 0;
555
556 /* Turn a 2-char comment sequences into the appropriate syntax. */
557 if (com2end)
558 code = Sendcomment;
559 else if (com2start)
560 code = Scomment;
561 /* Ignore comment starters of a different style. */
562 else if (code == Scomment
563 && (comstyle != SYNTAX_FLAGS_COMMENT_STYLE (syntax)
564 || SYNTAX_FLAGS_COMMENT_NESTED (syntax) != comnested))
565 continue;
566
567 /* Ignore escaped characters, except comment-enders. */
568 if (code != Sendcomment && char_quoted (from, from_byte))
569 continue;
570
571 switch (code)
572 {
573 case Sstring_fence:
574 case Scomment_fence:
575 c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
576 case Sstring:
577 /* Track parity of quotes. */
578 if (string_style == -1)
579 /* Entering a string. */
580 string_style = c;
581 else if (string_style == c)
582 /* Leaving the string. */
583 string_style = -1;
584 else
585 /* If we have two kinds of string delimiters.
586 There's no way to grok this scanning backwards. */
587 string_lossage = 1;
588 break;
589
590 case Scomment:
591 /* We've already checked that it is the relevant comstyle. */
592 if (string_style != -1 || comment_lossage || string_lossage)
593 /* There are odd string quotes involved, so let's be careful.
594 Test case in Pascal: " { " a { " } */
595 goto lossage;
596
597 if (!comnested)
598 {
599 /* Record best comment-starter so far. */
600 comstart_pos = from;
601 comstart_byte = from_byte;
602 }
603 else if (--nesting <= 0)
604 /* nested comments have to be balanced, so we don't need to
605 keep looking for earlier ones. We use here the same (slightly
606 incorrect) reasoning as below: since it is followed by uniform
607 paired string quotes, this comment-start has to be outside of
608 strings, else the comment-end itself would be inside a string. */
609 goto done;
610 break;
611
612 case Sendcomment:
613 if (SYNTAX_FLAGS_COMMENT_STYLE (syntax) == comstyle
614 && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
615 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested)
616 /* This is the same style of comment ender as ours. */
617 {
618 if (comnested)
619 nesting++;
620 else
621 /* Anything before that can't count because it would match
622 this comment-ender rather than ours. */
623 from = stop; /* Break out of the loop. */
624 }
625 else if (comstart_pos != 0 || c != '\n')
626 /* We're mixing comment styles here, so we'd better be careful.
627 The (comstart_pos != 0 || c != '\n') check is not quite correct
628 (we should just always set comment_lossage), but removing it
629 would imply that any multiline comment in C would go through
630 lossage, which seems overkill.
631 The failure should only happen in the rare cases such as
632 { (* } *) */
633 comment_lossage = 1;
634 break;
635
636 case Sopen:
637 /* Assume a defun-start point is outside of strings. */
638 if (open_paren_in_column_0_is_defun_start
639 && (from == stop
640 || (temp_byte = dec_bytepos (from_byte),
641 FETCH_CHAR (temp_byte) == '\n')))
642 {
643 defun_start = from;
644 defun_start_byte = from_byte;
645 from = stop; /* Break out of the loop. */
646 }
647 break;
648
649 default:
650 break;
651 }
652 }
653
654 if (comstart_pos == 0)
655 {
656 from = comment_end;
657 from_byte = comment_end_byte;
658 UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
659 }
660 /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
661 or `done'), then we've found the beginning of the non-nested comment. */
662 else if (1) /* !comnested */
663 {
664 from = comstart_pos;
665 from_byte = comstart_byte;
666 /* Globals are correct now. */
667 }
668 else
669 {
670 struct lisp_parse_state state;
671 lossage:
672 /* We had two kinds of string delimiters mixed up
673 together. Decode this going forwards.
674 Scan fwd from a known safe place (beginning-of-defun)
675 to the one in question; this records where we
676 last passed a comment starter. */
677 /* If we did not already find the defun start, find it now. */
678 if (defun_start == 0)
679 {
680 defun_start = find_defun_start (comment_end, comment_end_byte);
681 defun_start_byte = find_start_value_byte;
682 }
683 do
684 {
685 scan_sexps_forward (&state,
686 defun_start, defun_start_byte,
687 comment_end, -10000, 0, Qnil, 0);
688 defun_start = comment_end;
689 if (state.incomment == (comnested ? 1 : -1)
690 && state.comstyle == comstyle)
691 from = state.comstr_start;
692 else
693 {
694 from = comment_end;
695 if (state.incomment)
696 /* If comment_end is inside some other comment, maybe ours
697 is nested, so we need to try again from within the
698 surrounding comment. Example: { a (* " *) */
699 {
700 /* FIXME: We should advance by one or two chars. */
701 defun_start = state.comstr_start + 2;
702 defun_start_byte = CHAR_TO_BYTE (defun_start);
703 }
704 }
705 } while (defun_start < comment_end);
706
707 from_byte = CHAR_TO_BYTE (from);
708 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
709 }
710
711 done:
712 *charpos_ptr = from;
713 *bytepos_ptr = from_byte;
714
715 return (from == comment_end) ? -1 : from;
716 }
717 \f
718 DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
719 "Return t if OBJECT is a syntax table.\n\
720 Currently, any char-table counts as a syntax table.")
721 (object)
722 Lisp_Object object;
723 {
724 if (CHAR_TABLE_P (object)
725 && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
726 return Qt;
727 return Qnil;
728 }
729
730 static void
731 check_syntax_table (obj)
732 Lisp_Object obj;
733 {
734 if (!(CHAR_TABLE_P (obj)
735 && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table)))
736 wrong_type_argument (Qsyntax_table_p, obj);
737 }
738
739 DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
740 "Return the current syntax table.\n\
741 This is the one specified by the current buffer.")
742 ()
743 {
744 return current_buffer->syntax_table;
745 }
746
747 DEFUN ("standard-syntax-table", Fstandard_syntax_table,
748 Sstandard_syntax_table, 0, 0, 0,
749 "Return the standard syntax table.\n\
750 This is the one used for new buffers.")
751 ()
752 {
753 return Vstandard_syntax_table;
754 }
755
756 DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
757 "Construct a new syntax table and return it.\n\
758 It is a copy of the TABLE, which defaults to the standard syntax table.")
759 (table)
760 Lisp_Object table;
761 {
762 Lisp_Object copy;
763
764 if (!NILP (table))
765 check_syntax_table (table);
766 else
767 table = Vstandard_syntax_table;
768
769 copy = Fcopy_sequence (table);
770
771 /* Only the standard syntax table should have a default element.
772 Other syntax tables should inherit from parents instead. */
773 XCHAR_TABLE (copy)->defalt = Qnil;
774
775 /* Copied syntax tables should all have parents.
776 If we copied one with no parent, such as the standard syntax table,
777 use the standard syntax table as the copy's parent. */
778 if (NILP (XCHAR_TABLE (copy)->parent))
779 Fset_char_table_parent (copy, Vstandard_syntax_table);
780 return copy;
781 }
782
783 DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
784 "Select a new syntax table for the current buffer.\n\
785 One argument, a syntax table.")
786 (table)
787 Lisp_Object table;
788 {
789 int idx;
790 check_syntax_table (table);
791 current_buffer->syntax_table = table;
792 /* Indicate that this buffer now has a specified syntax table. */
793 idx = PER_BUFFER_VAR_IDX (syntax_table);
794 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
795 return table;
796 }
797 \f
798 /* Convert a letter which signifies a syntax code
799 into the code it signifies.
800 This is used by modify-syntax-entry, and other things. */
801
802 unsigned char syntax_spec_code[0400] =
803 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
804 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
805 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
806 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
807 (char) Swhitespace, (char) Scomment_fence, (char) Sstring, 0377,
808 (char) Smath, 0377, 0377, (char) Squote,
809 (char) Sopen, (char) Sclose, 0377, 0377,
810 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
811 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
812 0377, 0377, 0377, 0377,
813 (char) Scomment, 0377, (char) Sendcomment, 0377,
814 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
815 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
816 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
817 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
818 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
819 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
820 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
821 0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
822 };
823
824 /* Indexed by syntax code, give the letter that describes it. */
825
826 char syntax_code_spec[16] =
827 {
828 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
829 '!', '|'
830 };
831
832 /* Indexed by syntax code, give the object (cons of syntax code and
833 nil) to be stored in syntax table. Since these objects can be
834 shared among syntax tables, we generate them in advance. By
835 sharing objects, the function `describe-syntax' can give a more
836 compact listing. */
837 static Lisp_Object Vsyntax_code_object;
838
839 \f
840 /* Look up the value for CHARACTER in syntax table TABLE's parent
841 and its parents. SYNTAX_ENTRY calls this, when TABLE itself has nil
842 for CHARACTER. It's actually used only when not compiled with GCC. */
843
844 Lisp_Object
845 syntax_parent_lookup (table, character)
846 Lisp_Object table;
847 int character;
848 {
849 Lisp_Object value;
850
851 while (1)
852 {
853 table = XCHAR_TABLE (table)->parent;
854 if (NILP (table))
855 return Qnil;
856
857 value = XCHAR_TABLE (table)->contents[character];
858 if (!NILP (value))
859 return value;
860 }
861 }
862
863 DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
864 "Return the syntax code of CHARACTER, described by a character.\n\
865 For example, if CHARACTER is a word constituent,\n\
866 the character `w' is returned.\n\
867 The characters that correspond to various syntax codes\n\
868 are listed in the documentation of `modify-syntax-entry'.")
869 (character)
870 Lisp_Object character;
871 {
872 int char_int;
873 gl_state.current_syntax_table = current_buffer->syntax_table;
874
875 gl_state.use_global = 0;
876 CHECK_NUMBER (character, 0);
877 char_int = XINT (character);
878 return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
879 }
880
881 DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
882 "Return the matching parenthesis of CHARACTER, or nil if none.")
883 (character)
884 Lisp_Object character;
885 {
886 int char_int, code;
887 gl_state.current_syntax_table = current_buffer->syntax_table;
888 gl_state.use_global = 0;
889 CHECK_NUMBER (character, 0);
890 char_int = XINT (character);
891 code = SYNTAX (char_int);
892 if (code == Sopen || code == Sclose)
893 return SYNTAX_MATCH (char_int);
894 return Qnil;
895 }
896
897 /* This comment supplies the doc string for modify-syntax-entry,
898 for make-docfile to see. We cannot put this in the real DEFUN
899 due to limits in the Unix cpp.
900
901 DEFUN ("modify-syntax-entry", foo, bar, 2, 3, 0,
902 "Set syntax for character CHAR according to string S.\n\
903 The syntax is changed only for table TABLE, which defaults to\n\
904 the current buffer's syntax table.\n\
905 The first character of S should be one of the following:\n\
906 Space or - whitespace syntax. w word constituent.\n\
907 _ symbol constituent. . punctuation.\n\
908 ( open-parenthesis. ) close-parenthesis.\n\
909 \" string quote. \\ escape.\n\
910 $ paired delimiter. ' expression quote or prefix operator.\n\
911 < comment starter. > comment ender.\n\
912 / character-quote. @ inherit from `standard-syntax-table'.\n\
913 \n\
914 Only single-character comment start and end sequences are represented thus.\n\
915 Two-character sequences are represented as described below.\n\
916 The second character of S is the matching parenthesis,\n\
917 used only if the first character is `(' or `)'.\n\
918 Any additional characters are flags.\n\
919 Defined flags are the characters 1, 2, 3, 4, b, p, and n.\n\
920 1 means CHAR is the start of a two-char comment start sequence.\n\
921 2 means CHAR is the second character of such a sequence.\n\
922 3 means CHAR is the start of a two-char comment end sequence.\n\
923 4 means CHAR is the second character of such a sequence.\n\
924 \n\
925 There can be up to two orthogonal comment sequences. This is to support\n\
926 language modes such as C++. By default, all comment sequences are of style\n\
927 a, but you can set the comment sequence style to b (on the second character\n\
928 of a comment-start, or the first character of a comment-end sequence) using\n\
929 this flag:\n\
930 b means CHAR is part of comment sequence b.\n\
931 n means CHAR is part of a nestable comment sequence.\n\
932 \n\
933 p means CHAR is a prefix character for `backward-prefix-chars';\n\
934 such characters are treated as whitespace when they occur\n\
935 between expressions.")
936 (char, s, table)
937 */
938
939 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
940 /* I really don't know why this is interactive
941 help-form should at least be made useful whilst reading the second arg
942 */
943 "cSet syntax for character: \nsSet syntax for %s to: ",
944 0 /* See immediately above */)
945 (c, newentry, syntax_table)
946 Lisp_Object c, newentry, syntax_table;
947 {
948 register unsigned char *p;
949 register enum syntaxcode code;
950 int val;
951 Lisp_Object match;
952
953 CHECK_NUMBER (c, 0);
954 CHECK_STRING (newentry, 1);
955
956 if (NILP (syntax_table))
957 syntax_table = current_buffer->syntax_table;
958 else
959 check_syntax_table (syntax_table);
960
961 p = XSTRING (newentry)->data;
962 code = (enum syntaxcode) syntax_spec_code[*p++];
963 if (((int) code & 0377) == 0377)
964 error ("invalid syntax description letter: %c", p[-1]);
965
966 if (code == Sinherit)
967 {
968 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), Qnil);
969 return Qnil;
970 }
971
972 if (*p)
973 {
974 int len;
975 int character = (STRING_CHAR_AND_LENGTH
976 (p, STRING_BYTES (XSTRING (newentry)) - 1, len));
977 XSETINT (match, character);
978 if (XFASTINT (match) == ' ')
979 match = Qnil;
980 p += len;
981 }
982 else
983 match = Qnil;
984
985 val = (int) code;
986 while (*p)
987 switch (*p++)
988 {
989 case '1':
990 val |= 1 << 16;
991 break;
992
993 case '2':
994 val |= 1 << 17;
995 break;
996
997 case '3':
998 val |= 1 << 18;
999 break;
1000
1001 case '4':
1002 val |= 1 << 19;
1003 break;
1004
1005 case 'p':
1006 val |= 1 << 20;
1007 break;
1008
1009 case 'b':
1010 val |= 1 << 21;
1011 break;
1012
1013 case 'n':
1014 val |= 1 << 22;
1015 break;
1016 }
1017
1018 if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match))
1019 newentry = XVECTOR (Vsyntax_code_object)->contents[val];
1020 else
1021 /* Since we can't use a shared object, let's make a new one. */
1022 newentry = Fcons (make_number (val), match);
1023
1024 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
1025
1026 return Qnil;
1027 }
1028 \f
1029 /* Dump syntax table to buffer in human-readable format */
1030
1031 static void
1032 describe_syntax (value)
1033 Lisp_Object value;
1034 {
1035 register enum syntaxcode code;
1036 char desc, start1, start2, end1, end2, prefix, comstyle, comnested;
1037 char str[2];
1038 Lisp_Object first, match_lisp;
1039
1040 Findent_to (make_number (16), make_number (1));
1041
1042 if (NILP (value))
1043 {
1044 insert_string ("default\n");
1045 return;
1046 }
1047
1048 if (CHAR_TABLE_P (value))
1049 {
1050 insert_string ("deeper char-table ...\n");
1051 return;
1052 }
1053
1054 if (!CONSP (value))
1055 {
1056 insert_string ("invalid\n");
1057 return;
1058 }
1059
1060 first = XCAR (value);
1061 match_lisp = XCDR (value);
1062
1063 if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
1064 {
1065 insert_string ("invalid\n");
1066 return;
1067 }
1068
1069 code = (enum syntaxcode) (XINT (first) & 0377);
1070 start1 = (XINT (first) >> 16) & 1;
1071 start2 = (XINT (first) >> 17) & 1;
1072 end1 = (XINT (first) >> 18) & 1;
1073 end2 = (XINT (first) >> 19) & 1;
1074 prefix = (XINT (first) >> 20) & 1;
1075 comstyle = (XINT (first) >> 21) & 1;
1076 comnested = (XINT (first) >> 22) & 1;
1077
1078 if ((int) code < 0 || (int) code >= (int) Smax)
1079 {
1080 insert_string ("invalid");
1081 return;
1082 }
1083 desc = syntax_code_spec[(int) code];
1084
1085 str[0] = desc, str[1] = 0;
1086 insert (str, 1);
1087
1088 if (NILP (match_lisp))
1089 insert (" ", 1);
1090 else
1091 insert_char (XINT (match_lisp));
1092
1093 if (start1)
1094 insert ("1", 1);
1095 if (start2)
1096 insert ("2", 1);
1097
1098 if (end1)
1099 insert ("3", 1);
1100 if (end2)
1101 insert ("4", 1);
1102
1103 if (prefix)
1104 insert ("p", 1);
1105 if (comstyle)
1106 insert ("b", 1);
1107 if (comnested)
1108 insert ("n", 1);
1109
1110 insert_string ("\twhich means: ");
1111
1112 switch (SWITCH_ENUM_CAST (code))
1113 {
1114 case Swhitespace:
1115 insert_string ("whitespace"); break;
1116 case Spunct:
1117 insert_string ("punctuation"); break;
1118 case Sword:
1119 insert_string ("word"); break;
1120 case Ssymbol:
1121 insert_string ("symbol"); break;
1122 case Sopen:
1123 insert_string ("open"); break;
1124 case Sclose:
1125 insert_string ("close"); break;
1126 case Squote:
1127 insert_string ("quote"); break;
1128 case Sstring:
1129 insert_string ("string"); break;
1130 case Smath:
1131 insert_string ("math"); break;
1132 case Sescape:
1133 insert_string ("escape"); break;
1134 case Scharquote:
1135 insert_string ("charquote"); break;
1136 case Scomment:
1137 insert_string ("comment"); break;
1138 case Sendcomment:
1139 insert_string ("endcomment"); break;
1140 default:
1141 insert_string ("invalid");
1142 return;
1143 }
1144
1145 if (!NILP (match_lisp))
1146 {
1147 insert_string (", matches ");
1148 insert_char (XINT (match_lisp));
1149 }
1150
1151 if (start1)
1152 insert_string (",\n\t is the first character of a comment-start sequence");
1153 if (start2)
1154 insert_string (",\n\t is the second character of a comment-start sequence");
1155
1156 if (end1)
1157 insert_string (",\n\t is the first character of a comment-end sequence");
1158 if (end2)
1159 insert_string (",\n\t is the second character of a comment-end sequence");
1160 if (comstyle)
1161 insert_string (" (comment style b)");
1162 if (comnested)
1163 insert_string (" (nestable)");
1164
1165 if (prefix)
1166 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
1167
1168 insert_string ("\n");
1169 }
1170
1171 static Lisp_Object
1172 describe_syntax_1 (vector)
1173 Lisp_Object vector;
1174 {
1175 struct buffer *old = current_buffer;
1176 set_buffer_internal (XBUFFER (Vstandard_output));
1177 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil, (int *) 0, 0);
1178 while (! NILP (XCHAR_TABLE (vector)->parent))
1179 {
1180 vector = XCHAR_TABLE (vector)->parent;
1181 insert_string ("\nThe parent syntax table is:");
1182 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil,
1183 (int *) 0, 0);
1184 }
1185
1186 call0 (intern ("help-mode"));
1187 set_buffer_internal (old);
1188 return Qnil;
1189 }
1190
1191 DEFUN ("describe-syntax", Fdescribe_syntax, Sdescribe_syntax, 0, 0, "",
1192 "Describe the syntax specifications in the syntax table.\n\
1193 The descriptions are inserted in a buffer, which is then displayed.")
1194 ()
1195 {
1196 internal_with_output_to_temp_buffer
1197 ("*Help*", describe_syntax_1, current_buffer->syntax_table);
1198
1199 return Qnil;
1200 }
1201 \f
1202 int parse_sexp_ignore_comments;
1203
1204 /* Return the position across COUNT words from FROM.
1205 If that many words cannot be found before the end of the buffer, return 0.
1206 COUNT negative means scan backward and stop at word beginning. */
1207
1208 int
1209 scan_words (from, count)
1210 register int from, count;
1211 {
1212 register int beg = BEGV;
1213 register int end = ZV;
1214 register int from_byte = CHAR_TO_BYTE (from);
1215 register enum syntaxcode code;
1216 int ch0, ch1;
1217
1218 immediate_quit = 1;
1219 QUIT;
1220
1221 SETUP_SYNTAX_TABLE (from, count);
1222
1223 while (count > 0)
1224 {
1225 while (1)
1226 {
1227 if (from == end)
1228 {
1229 immediate_quit = 0;
1230 return 0;
1231 }
1232 UPDATE_SYNTAX_TABLE_FORWARD (from);
1233 ch0 = FETCH_CHAR (from_byte);
1234 code = SYNTAX (ch0);
1235 INC_BOTH (from, from_byte);
1236 if (words_include_escapes
1237 && (code == Sescape || code == Scharquote))
1238 break;
1239 if (code == Sword)
1240 break;
1241 }
1242 /* Now CH0 is a character which begins a word and FROM is the
1243 position of the next character. */
1244 while (1)
1245 {
1246 if (from == end) break;
1247 UPDATE_SYNTAX_TABLE_FORWARD (from);
1248 ch1 = FETCH_CHAR (from_byte);
1249 code = SYNTAX (ch1);
1250 if (!(words_include_escapes
1251 && (code == Sescape || code == Scharquote)))
1252 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
1253 break;
1254 INC_BOTH (from, from_byte);
1255 ch0 = ch1;
1256 }
1257 count--;
1258 }
1259 while (count < 0)
1260 {
1261 while (1)
1262 {
1263 if (from == beg)
1264 {
1265 immediate_quit = 0;
1266 return 0;
1267 }
1268 DEC_BOTH (from, from_byte);
1269 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1270 ch1 = FETCH_CHAR (from_byte);
1271 code = SYNTAX (ch1);
1272 if (words_include_escapes
1273 && (code == Sescape || code == Scharquote))
1274 break;
1275 if (code == Sword)
1276 break;
1277 }
1278 /* Now CH1 is a character which ends a word and FROM is the
1279 position of it. */
1280 while (1)
1281 {
1282 int temp_byte;
1283
1284 if (from == beg)
1285 break;
1286 temp_byte = dec_bytepos (from_byte);
1287 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1288 ch0 = FETCH_CHAR (temp_byte);
1289 code = SYNTAX (ch0);
1290 if (!(words_include_escapes
1291 && (code == Sescape || code == Scharquote)))
1292 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
1293 break;
1294 DEC_BOTH (from, from_byte);
1295 ch1 = ch0;
1296 }
1297 count++;
1298 }
1299
1300 immediate_quit = 0;
1301
1302 return from;
1303 }
1304
1305 DEFUN ("forward-word", Fforward_word, Sforward_word, 1, 1, "p",
1306 "Move point forward ARG words (backward if ARG is negative).\n\
1307 Normally returns t.\n\
1308 If an edge of the buffer or a field boundary is reached, point is left there\n\
1309 and the function returns nil. Field boundaries are not noticed if\n\
1310 `inhibit-field-text-motion' is non-nil.")
1311 (count)
1312 Lisp_Object count;
1313 {
1314 int orig_val, val;
1315 CHECK_NUMBER (count, 0);
1316
1317 val = orig_val = scan_words (PT, XINT (count));
1318 if (! orig_val)
1319 val = XINT (count) > 0 ? ZV : BEGV;
1320
1321 /* Avoid jumping out of an input field. */
1322 val = XFASTINT (Fconstrain_to_field (make_number (val), make_number (PT),
1323 Qt, Qnil, Qnil));
1324
1325 SET_PT (val);
1326 return val == orig_val ? Qt : Qnil;
1327 }
1328 \f
1329 Lisp_Object skip_chars ();
1330
1331 DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
1332 "Move point forward, stopping before a char not in STRING, or at pos LIM.\n\
1333 STRING is like the inside of a `[...]' in a regular expression\n\
1334 except that `]' is never special and `\\' quotes `^', `-' or `\\'\n\
1335 (but not as the end of a range; quoting is never needed there).\n\
1336 Thus, with arg \"a-zA-Z\", this skips letters stopping before first nonletter.\n\
1337 With arg \"^a-zA-Z\", skips nonletters stopping before first letter.\n\
1338 Returns the distance traveled, either zero or positive.")
1339 (string, lim)
1340 Lisp_Object string, lim;
1341 {
1342 return skip_chars (1, 0, string, lim);
1343 }
1344
1345 DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
1346 "Move point backward, stopping after a char not in STRING, or at pos LIM.\n\
1347 See `skip-chars-forward' for details.\n\
1348 Returns the distance traveled, either zero or negative.")
1349 (string, lim)
1350 Lisp_Object string, lim;
1351 {
1352 return skip_chars (0, 0, string, lim);
1353 }
1354
1355 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
1356 "Move point forward across chars in specified syntax classes.\n\
1357 SYNTAX is a string of syntax code characters.\n\
1358 Stop before a char whose syntax is not in SYNTAX, or at position LIM.\n\
1359 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
1360 This function returns the distance traveled, either zero or positive.")
1361 (syntax, lim)
1362 Lisp_Object syntax, lim;
1363 {
1364 return skip_chars (1, 1, syntax, lim);
1365 }
1366
1367 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
1368 "Move point backward across chars in specified syntax classes.\n\
1369 SYNTAX is a string of syntax code characters.\n\
1370 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.\n\
1371 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
1372 This function returns the distance traveled, either zero or negative.")
1373 (syntax, lim)
1374 Lisp_Object syntax, lim;
1375 {
1376 return skip_chars (0, 1, syntax, lim);
1377 }
1378
1379 static Lisp_Object
1380 skip_chars (forwardp, syntaxp, string, lim)
1381 int forwardp, syntaxp;
1382 Lisp_Object string, lim;
1383 {
1384 register unsigned int c;
1385 register int ch;
1386 unsigned char fastmap[0400];
1387 /* If SYNTAXP is 0, STRING may contain multi-byte form of characters
1388 of which codes don't fit in FASTMAP. In that case, set the
1389 ranges of characters in CHAR_RANGES. */
1390 int *char_ranges;
1391 int n_char_ranges = 0;
1392 int negate = 0;
1393 register int i, i_byte;
1394 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1395 int string_multibyte;
1396 int size_byte;
1397 unsigned char *str;
1398 int len;
1399
1400 CHECK_STRING (string, 0);
1401 char_ranges = (int *) alloca (XSTRING (string)->size * (sizeof (int)) * 2);
1402 string_multibyte = STRING_MULTIBYTE (string);
1403 str = XSTRING (string)->data;
1404 size_byte = STRING_BYTES (XSTRING (string));
1405
1406 /* Adjust the multibyteness of the string to that of the buffer. */
1407 if (multibyte != string_multibyte)
1408 {
1409 int nbytes;
1410
1411 if (multibyte)
1412 nbytes = count_size_as_multibyte (XSTRING (string)->data,
1413 XSTRING (string)->size);
1414 else
1415 nbytes = XSTRING (string)->size;
1416 if (nbytes != size_byte)
1417 {
1418 str = (unsigned char *) alloca (nbytes);
1419 copy_text (XSTRING (string)->data, str, size_byte,
1420 string_multibyte, multibyte);
1421 size_byte = nbytes;
1422 }
1423 }
1424
1425 if (NILP (lim))
1426 XSETINT (lim, forwardp ? ZV : BEGV);
1427 else
1428 CHECK_NUMBER_COERCE_MARKER (lim, 0);
1429
1430 /* In any case, don't allow scan outside bounds of buffer. */
1431 if (XINT (lim) > ZV)
1432 XSETFASTINT (lim, ZV);
1433 if (XINT (lim) < BEGV)
1434 XSETFASTINT (lim, BEGV);
1435
1436 bzero (fastmap, sizeof fastmap);
1437
1438 i_byte = 0;
1439
1440 if (i_byte < size_byte
1441 && XSTRING (string)->data[0] == '^')
1442 {
1443 negate = 1; i_byte++;
1444 }
1445
1446 /* Find the characters specified and set their elements of fastmap.
1447 If syntaxp, each character counts as itself.
1448 Otherwise, handle backslashes and ranges specially. */
1449
1450 while (i_byte < size_byte)
1451 {
1452 c = STRING_CHAR_AND_LENGTH (str + i_byte, size_byte - i_byte, len);
1453 i_byte += len;
1454
1455 if (syntaxp)
1456 fastmap[syntax_spec_code[c & 0377]] = 1;
1457 else
1458 {
1459 if (c == '\\')
1460 {
1461 if (i_byte == size_byte)
1462 break;
1463
1464 c = STRING_CHAR_AND_LENGTH (str+i_byte, size_byte-i_byte, len);
1465 i_byte += len;
1466 }
1467 if (i_byte < size_byte
1468 && str[i_byte] == '-')
1469 {
1470 unsigned int c2;
1471
1472 /* Skip over the dash. */
1473 i_byte++;
1474
1475 if (i_byte == size_byte)
1476 break;
1477
1478 /* Get the end of the range. */
1479 c2 =STRING_CHAR_AND_LENGTH (str+i_byte, size_byte-i_byte, len);
1480 i_byte += len;
1481
1482 if (SINGLE_BYTE_CHAR_P (c))
1483 {
1484 if (! SINGLE_BYTE_CHAR_P (c2))
1485 {
1486 /* Handle a range starting by a unibyte char and
1487 ending by a multibyte char. Split
1488 that into two ranges, the low one ending at
1489 0377, and the high one starting at the
1490 smallest character in the charset of C2 and
1491 ending at C2. */
1492 int charset = CHAR_CHARSET (c2);
1493 int c1 = MAKE_CHAR (charset, 0, 0);
1494
1495 char_ranges[n_char_ranges++] = c1;
1496 char_ranges[n_char_ranges++] = c2;
1497 c2 = 0377;
1498 }
1499 while (c <= c2)
1500 {
1501 fastmap[c] = 1;
1502 c++;
1503 }
1504 }
1505 else if (c <= c2) /* Both C and C2 are multibyte char. */
1506 {
1507 char_ranges[n_char_ranges++] = c;
1508 char_ranges[n_char_ranges++] = c2;
1509 }
1510 }
1511 else
1512 {
1513 if (SINGLE_BYTE_CHAR_P (c))
1514 fastmap[c] = 1;
1515 else
1516 {
1517 char_ranges[n_char_ranges++] = c;
1518 char_ranges[n_char_ranges++] = c;
1519 }
1520 }
1521 }
1522 }
1523
1524 /* If ^ was the first character, complement the fastmap. */
1525 if (negate)
1526 for (i = 0; i < sizeof fastmap; i++)
1527 fastmap[i] ^= 1;
1528
1529 {
1530 int start_point = PT;
1531 int pos = PT;
1532 int pos_byte = PT_BYTE;
1533
1534 immediate_quit = 1;
1535 if (syntaxp)
1536 {
1537 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
1538 if (forwardp)
1539 {
1540 if (multibyte)
1541 {
1542 if (pos < XINT (lim))
1543 while (fastmap[(int) SYNTAX (FETCH_CHAR (pos_byte))])
1544 {
1545 /* Since we already checked for multibyteness,
1546 avoid using INC_BOTH which checks again. */
1547 INC_POS (pos_byte);
1548 pos++;
1549 if (pos >= XINT (lim))
1550 break;
1551 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1552 }
1553 }
1554 else
1555 {
1556 while (pos < XINT (lim)
1557 && fastmap[(int) SYNTAX (FETCH_BYTE (pos))])
1558 {
1559 pos++;
1560 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1561 }
1562 }
1563 }
1564 else
1565 {
1566 if (multibyte)
1567 {
1568 while (pos > XINT (lim))
1569 {
1570 int savepos = pos_byte;
1571 /* Since we already checked for multibyteness,
1572 avoid using DEC_BOTH which checks again. */
1573 pos--;
1574 DEC_POS (pos_byte);
1575 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
1576 if (!fastmap[(int) SYNTAX (FETCH_CHAR (pos_byte))])
1577 {
1578 pos++;
1579 pos_byte = savepos;
1580 break;
1581 }
1582 }
1583 }
1584 else
1585 {
1586 if (pos > XINT (lim))
1587 while (fastmap[(int) SYNTAX (FETCH_BYTE (pos - 1))])
1588 {
1589 pos--;
1590 if (pos <= XINT (lim))
1591 break;
1592 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
1593 }
1594 }
1595 }
1596 }
1597 else
1598 {
1599 if (forwardp)
1600 {
1601 if (multibyte)
1602 while (pos < XINT (lim))
1603 {
1604 c = FETCH_MULTIBYTE_CHAR (pos_byte);
1605 if (SINGLE_BYTE_CHAR_P (c))
1606 {
1607 if (!fastmap[c])
1608 break;
1609 }
1610 else
1611 {
1612 /* If we are looking at a multibyte character,
1613 we must look up the character in the table
1614 CHAR_RANGES. If there's no data in the
1615 table, that character is not what we want to
1616 skip. */
1617
1618 /* The following code do the right thing even if
1619 n_char_ranges is zero (i.e. no data in
1620 CHAR_RANGES). */
1621 for (i = 0; i < n_char_ranges; i += 2)
1622 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1623 break;
1624 if (!(negate ^ (i < n_char_ranges)))
1625 break;
1626 }
1627 INC_BOTH (pos, pos_byte);
1628 }
1629 else
1630 while (pos < XINT (lim) && fastmap[FETCH_BYTE (pos)])
1631 pos++;
1632 }
1633 else
1634 {
1635 if (multibyte)
1636 while (pos > XINT (lim))
1637 {
1638 int prev_pos_byte = pos_byte;
1639
1640 DEC_POS (prev_pos_byte);
1641 c = FETCH_MULTIBYTE_CHAR (prev_pos_byte);
1642 if (SINGLE_BYTE_CHAR_P (c))
1643 {
1644 if (!fastmap[c])
1645 break;
1646 }
1647 else
1648 {
1649 /* See the comment in the previous similar code. */
1650 for (i = 0; i < n_char_ranges; i += 2)
1651 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1652 break;
1653 if (!(negate ^ (i < n_char_ranges)))
1654 break;
1655 }
1656 pos--;
1657 pos_byte = prev_pos_byte;
1658 }
1659 else
1660 while (pos > XINT (lim) && fastmap[FETCH_BYTE (pos - 1)])
1661 pos--;
1662 }
1663 }
1664
1665 #if 0 /* Not needed now that a position in mid-character
1666 cannot be specified in Lisp. */
1667 if (multibyte
1668 /* INC_POS or DEC_POS might have moved POS over LIM. */
1669 && (forwardp ? (pos > XINT (lim)) : (pos < XINT (lim))))
1670 pos = XINT (lim);
1671 #endif
1672
1673 if (! multibyte)
1674 pos_byte = pos;
1675
1676 SET_PT_BOTH (pos, pos_byte);
1677 immediate_quit = 0;
1678
1679 return make_number (PT - start_point);
1680 }
1681 }
1682 \f
1683 /* Jump over a comment, assuming we are at the beginning of one.
1684 FROM is the current position.
1685 FROM_BYTE is the bytepos corresponding to FROM.
1686 Do not move past STOP (a charpos).
1687 The comment over which we have to jump is of style STYLE
1688 (either SYNTAX_COMMENT_STYLE(foo) or ST_COMMENT_STYLE).
1689 NESTING should be positive to indicate the nesting at the beginning
1690 for nested comments and should be zero or negative else.
1691 ST_COMMENT_STYLE cannot be nested.
1692 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
1693 (or 0 If the search cannot start in the middle of a two-character).
1694
1695 If successful, return 1 and store the charpos of the comment's end
1696 into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
1697 Else, return 0 and store the charpos STOP into *CHARPOS_PTR, the
1698 corresponding bytepos into *BYTEPOS_PTR and the current nesting
1699 (as defined for state.incomment) in *INCOMMENT_PTR.
1700
1701 The comment end is the last character of the comment rather than the
1702 character just after the comment.
1703
1704 Global syntax data is assumed to initially be valid for FROM and
1705 remains valid for forward search starting at the returned position. */
1706
1707 static int
1708 forw_comment (from, from_byte, stop, nesting, style, prev_syntax,
1709 charpos_ptr, bytepos_ptr, incomment_ptr)
1710 int from, from_byte, stop;
1711 int nesting, style, prev_syntax;
1712 int *charpos_ptr, *bytepos_ptr, *incomment_ptr;
1713 {
1714 register int c, c1;
1715 register enum syntaxcode code;
1716 register int syntax;
1717
1718 if (nesting <= 0) nesting = -1;
1719
1720 /* Enter the loop in the middle so that we find
1721 a 2-char comment ender if we start in the middle of it. */
1722 syntax = prev_syntax;
1723 if (syntax != 0) goto forw_incomment;
1724
1725 while (1)
1726 {
1727 if (from == stop)
1728 {
1729 *incomment_ptr = nesting;
1730 *charpos_ptr = from;
1731 *bytepos_ptr = from_byte;
1732 return 0;
1733 }
1734 c = FETCH_CHAR (from_byte);
1735 syntax = SYNTAX_WITH_FLAGS (c);
1736 code = syntax & 0xff;
1737 if (code == Sendcomment
1738 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
1739 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ?
1740 (nesting > 0 && --nesting == 0) : nesting < 0))
1741 /* we have encountered a comment end of the same style
1742 as the comment sequence which began this comment
1743 section */
1744 break;
1745 if (code == Scomment_fence
1746 && style == ST_COMMENT_STYLE)
1747 /* we have encountered a comment end of the same style
1748 as the comment sequence which began this comment
1749 section. */
1750 break;
1751 if (nesting > 0
1752 && code == Scomment
1753 && SYNTAX_FLAGS_COMMENT_NESTED (syntax)
1754 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style)
1755 /* we have encountered a nested comment of the same style
1756 as the comment sequence which began this comment section */
1757 nesting++;
1758 INC_BOTH (from, from_byte);
1759 UPDATE_SYNTAX_TABLE_FORWARD (from);
1760
1761 forw_incomment:
1762 if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
1763 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
1764 && (c1 = FETCH_CHAR (from_byte),
1765 SYNTAX_COMEND_SECOND (c1))
1766 && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
1767 SYNTAX_COMMENT_NESTED (c1)) ? nesting > 0 : nesting < 0))
1768 {
1769 if (--nesting <= 0)
1770 /* we have encountered a comment end of the same style
1771 as the comment sequence which began this comment
1772 section */
1773 break;
1774 else
1775 {
1776 INC_BOTH (from, from_byte);
1777 UPDATE_SYNTAX_TABLE_FORWARD (from);
1778 }
1779 }
1780 if (nesting > 0
1781 && from < stop
1782 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
1783 && (c1 = FETCH_CHAR (from_byte),
1784 SYNTAX_COMMENT_STYLE (c1) == style
1785 && SYNTAX_COMSTART_SECOND (c1))
1786 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
1787 SYNTAX_COMMENT_NESTED (c1)))
1788 /* we have encountered a nested comment of the same style
1789 as the comment sequence which began this comment
1790 section */
1791 {
1792 INC_BOTH (from, from_byte);
1793 UPDATE_SYNTAX_TABLE_FORWARD (from);
1794 nesting++;
1795 }
1796 }
1797 *charpos_ptr = from;
1798 *bytepos_ptr = from_byte;
1799 return 1;
1800 }
1801
1802 DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
1803 "Move forward across up to N comments. If N is negative, move backward.\n\
1804 Stop scanning if we find something other than a comment or whitespace.\n\
1805 Set point to where scanning stops.\n\
1806 If N comments are found as expected, with nothing except whitespace\n\
1807 between them, return t; otherwise return nil.")
1808 (count)
1809 Lisp_Object count;
1810 {
1811 register int from;
1812 int from_byte;
1813 register int stop;
1814 register int c, c1;
1815 register enum syntaxcode code;
1816 int comstyle = 0; /* style of comment encountered */
1817 int comnested = 0; /* whether the comment is nestable or not */
1818 int found;
1819 int count1;
1820 int out_charpos, out_bytepos;
1821 int dummy;
1822
1823 CHECK_NUMBER (count, 0);
1824 count1 = XINT (count);
1825 stop = count1 > 0 ? ZV : BEGV;
1826
1827 immediate_quit = 1;
1828 QUIT;
1829
1830 from = PT;
1831 from_byte = PT_BYTE;
1832
1833 SETUP_SYNTAX_TABLE (from, count1);
1834 while (count1 > 0)
1835 {
1836 do
1837 {
1838 int comstart_first;
1839
1840 if (from == stop)
1841 {
1842 SET_PT_BOTH (from, from_byte);
1843 immediate_quit = 0;
1844 return Qnil;
1845 }
1846 c = FETCH_CHAR (from_byte);
1847 code = SYNTAX (c);
1848 comstart_first = SYNTAX_COMSTART_FIRST (c);
1849 comnested = SYNTAX_COMMENT_NESTED (c);
1850 comstyle = SYNTAX_COMMENT_STYLE (c);
1851 INC_BOTH (from, from_byte);
1852 UPDATE_SYNTAX_TABLE_FORWARD (from);
1853 if (from < stop && comstart_first
1854 && (c1 = FETCH_CHAR (from_byte),
1855 SYNTAX_COMSTART_SECOND (c1)))
1856 {
1857 /* We have encountered a comment start sequence and we
1858 are ignoring all text inside comments. We must record
1859 the comment style this sequence begins so that later,
1860 only a comment end of the same style actually ends
1861 the comment section. */
1862 code = Scomment;
1863 comstyle = SYNTAX_COMMENT_STYLE (c1);
1864 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
1865 INC_BOTH (from, from_byte);
1866 UPDATE_SYNTAX_TABLE_FORWARD (from);
1867 }
1868 }
1869 while (code == Swhitespace || (code == Sendcomment && c == '\n'));
1870
1871 if (code == Scomment_fence)
1872 comstyle = ST_COMMENT_STYLE;
1873 else if (code != Scomment)
1874 {
1875 immediate_quit = 0;
1876 DEC_BOTH (from, from_byte);
1877 SET_PT_BOTH (from, from_byte);
1878 return Qnil;
1879 }
1880 /* We're at the start of a comment. */
1881 found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
1882 &out_charpos, &out_bytepos, &dummy);
1883 from = out_charpos; from_byte = out_bytepos;
1884 if (!found)
1885 {
1886 immediate_quit = 0;
1887 SET_PT_BOTH (from, from_byte);
1888 return Qnil;
1889 }
1890 INC_BOTH (from, from_byte);
1891 UPDATE_SYNTAX_TABLE_FORWARD (from);
1892 /* We have skipped one comment. */
1893 count1--;
1894 }
1895
1896 while (count1 < 0)
1897 {
1898 while (1)
1899 {
1900 int quoted, comstart_second;
1901
1902 if (from <= stop)
1903 {
1904 SET_PT_BOTH (BEGV, BEGV_BYTE);
1905 immediate_quit = 0;
1906 return Qnil;
1907 }
1908
1909 DEC_BOTH (from, from_byte);
1910 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
1911 quoted = char_quoted (from, from_byte);
1912 if (quoted)
1913 {
1914 DEC_BOTH (from, from_byte);
1915 goto leave;
1916 }
1917 c = FETCH_CHAR (from_byte);
1918 code = SYNTAX (c);
1919 comstyle = 0;
1920 comnested = SYNTAX_COMMENT_NESTED (c);
1921 if (code == Sendcomment)
1922 comstyle = SYNTAX_COMMENT_STYLE (c);
1923 if (from > stop && SYNTAX_COMEND_SECOND (c)
1924 && prev_char_comend_first (from, from_byte)
1925 && !char_quoted (from - 1, dec_bytepos (from_byte)))
1926 {
1927 /* We must record the comment style encountered so that
1928 later, we can match only the proper comment begin
1929 sequence of the same style. */
1930 DEC_BOTH (from, from_byte);
1931 code = Sendcomment;
1932 /* Calling char_quoted, above, set up global syntax position
1933 at the new value of FROM. */
1934 c1 = FETCH_CHAR (from_byte);
1935 comstyle = SYNTAX_COMMENT_STYLE (c1);
1936 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
1937 }
1938
1939 if (code == Scomment_fence)
1940 {
1941 /* Skip until first preceding unquoted comment_fence. */
1942 int found = 0, ini = from, ini_byte = from_byte;
1943
1944 while (1)
1945 {
1946 DEC_BOTH (from, from_byte);
1947 if (from == stop)
1948 break;
1949 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1950 c = FETCH_CHAR (from_byte);
1951 if (SYNTAX (c) == Scomment_fence
1952 && !char_quoted (from, from_byte))
1953 {
1954 found = 1;
1955 break;
1956 }
1957 }
1958 if (found == 0)
1959 {
1960 from = ini; /* Set point to ini + 1. */
1961 from_byte = ini_byte;
1962 goto leave;
1963 }
1964 }
1965 else if (code == Sendcomment)
1966 {
1967 found = back_comment (from, from_byte, stop, comnested, comstyle,
1968 &out_charpos, &out_bytepos);
1969 if (found == -1)
1970 {
1971 if (c == '\n')
1972 /* This end-of-line is not an end-of-comment.
1973 Treat it like a whitespace.
1974 CC-mode (and maybe others) relies on this behavior. */
1975 ;
1976 else
1977 {
1978 /* Failure: we should go back to the end of this
1979 not-quite-endcomment. */
1980 if (SYNTAX(c) != code)
1981 /* It was a two-char Sendcomment. */
1982 INC_BOTH (from, from_byte);
1983 goto leave;
1984 }
1985 }
1986 else
1987 {
1988 /* We have skipped one comment. */
1989 from = out_charpos, from_byte = out_bytepos;
1990 break;
1991 }
1992 }
1993 else if (code != Swhitespace)
1994 {
1995 leave:
1996 immediate_quit = 0;
1997 INC_BOTH (from, from_byte);
1998 SET_PT_BOTH (from, from_byte);
1999 return Qnil;
2000 }
2001 }
2002
2003 count1++;
2004 }
2005
2006 SET_PT_BOTH (from, from_byte);
2007 immediate_quit = 0;
2008 return Qt;
2009 }
2010 \f
2011 /* Return syntax code of character C if C is a single byte character
2012 or `multibyte_symbol_p' is zero. Otherwise, retrun Ssymbol. */
2013
2014 #define SYNTAX_WITH_MULTIBYTE_CHECK(c) \
2015 ((SINGLE_BYTE_CHAR_P (c) || !multibyte_symbol_p) \
2016 ? SYNTAX (c) : Ssymbol)
2017
2018 static Lisp_Object
2019 scan_lists (from, count, depth, sexpflag)
2020 register int from;
2021 int count, depth, sexpflag;
2022 {
2023 Lisp_Object val;
2024 register int stop = count > 0 ? ZV : BEGV;
2025 register int c, c1;
2026 int stringterm;
2027 int quoted;
2028 int mathexit = 0;
2029 register enum syntaxcode code, temp_code;
2030 int min_depth = depth; /* Err out if depth gets less than this. */
2031 int comstyle = 0; /* style of comment encountered */
2032 int comnested = 0; /* whether the comment is nestable or not */
2033 int temp_pos;
2034 int last_good = from;
2035 int found;
2036 int from_byte;
2037 int out_bytepos, out_charpos;
2038 int temp, dummy;
2039 int multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
2040
2041 if (depth > 0) min_depth = 0;
2042
2043 if (from > ZV) from = ZV;
2044 if (from < BEGV) from = BEGV;
2045
2046 from_byte = CHAR_TO_BYTE (from);
2047
2048 immediate_quit = 1;
2049 QUIT;
2050
2051 SETUP_SYNTAX_TABLE (from, count);
2052 while (count > 0)
2053 {
2054 while (from < stop)
2055 {
2056 int comstart_first, prefix;
2057 UPDATE_SYNTAX_TABLE_FORWARD (from);
2058 c = FETCH_CHAR (from_byte);
2059 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2060 comstart_first = SYNTAX_COMSTART_FIRST (c);
2061 comnested = SYNTAX_COMMENT_NESTED (c);
2062 comstyle = SYNTAX_COMMENT_STYLE (c);
2063 prefix = SYNTAX_PREFIX (c);
2064 if (depth == min_depth)
2065 last_good = from;
2066 INC_BOTH (from, from_byte);
2067 UPDATE_SYNTAX_TABLE_FORWARD (from);
2068 if (from < stop && comstart_first
2069 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from_byte))
2070 && parse_sexp_ignore_comments)
2071 {
2072 /* we have encountered a comment start sequence and we
2073 are ignoring all text inside comments. We must record
2074 the comment style this sequence begins so that later,
2075 only a comment end of the same style actually ends
2076 the comment section */
2077 code = Scomment;
2078 c1 = FETCH_CHAR (from_byte);
2079 comstyle = SYNTAX_COMMENT_STYLE (c1);
2080 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2081 INC_BOTH (from, from_byte);
2082 UPDATE_SYNTAX_TABLE_FORWARD (from);
2083 }
2084
2085 if (prefix)
2086 continue;
2087
2088 switch (SWITCH_ENUM_CAST (code))
2089 {
2090 case Sescape:
2091 case Scharquote:
2092 if (from == stop) goto lose;
2093 INC_BOTH (from, from_byte);
2094 /* treat following character as a word constituent */
2095 case Sword:
2096 case Ssymbol:
2097 if (depth || !sexpflag) break;
2098 /* This word counts as a sexp; return at end of it. */
2099 while (from < stop)
2100 {
2101 UPDATE_SYNTAX_TABLE_FORWARD (from);
2102
2103 /* Some compilers can't handle this inside the switch. */
2104 c = FETCH_CHAR (from_byte);
2105 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2106 switch (temp)
2107 {
2108 case Scharquote:
2109 case Sescape:
2110 INC_BOTH (from, from_byte);
2111 if (from == stop) goto lose;
2112 break;
2113 case Sword:
2114 case Ssymbol:
2115 case Squote:
2116 break;
2117 default:
2118 goto done;
2119 }
2120 INC_BOTH (from, from_byte);
2121 }
2122 goto done;
2123
2124 case Scomment_fence:
2125 comstyle = ST_COMMENT_STYLE;
2126 /* FALLTHROUGH */
2127 case Scomment:
2128 if (!parse_sexp_ignore_comments) break;
2129 UPDATE_SYNTAX_TABLE_FORWARD (from);
2130 found = forw_comment (from, from_byte, stop,
2131 comnested, comstyle, 0,
2132 &out_charpos, &out_bytepos, &dummy);
2133 from = out_charpos, from_byte = out_bytepos;
2134 if (!found)
2135 {
2136 if (depth == 0)
2137 goto done;
2138 goto lose;
2139 }
2140 INC_BOTH (from, from_byte);
2141 UPDATE_SYNTAX_TABLE_FORWARD (from);
2142 break;
2143
2144 case Smath:
2145 if (!sexpflag)
2146 break;
2147 if (from != stop && c == FETCH_CHAR (from_byte))
2148 {
2149 INC_BOTH (from, from_byte);
2150 }
2151 if (mathexit)
2152 {
2153 mathexit = 0;
2154 goto close1;
2155 }
2156 mathexit = 1;
2157
2158 case Sopen:
2159 if (!++depth) goto done;
2160 break;
2161
2162 case Sclose:
2163 close1:
2164 if (!--depth) goto done;
2165 if (depth < min_depth)
2166 Fsignal (Qscan_error,
2167 Fcons (build_string ("Containing expression ends prematurely"),
2168 Fcons (make_number (last_good),
2169 Fcons (make_number (from), Qnil))));
2170 break;
2171
2172 case Sstring:
2173 case Sstring_fence:
2174 temp_pos = dec_bytepos (from_byte);
2175 stringterm = FETCH_CHAR (temp_pos);
2176 while (1)
2177 {
2178 if (from >= stop) goto lose;
2179 UPDATE_SYNTAX_TABLE_FORWARD (from);
2180 c = FETCH_CHAR (from_byte);
2181 if (code == Sstring
2182 ? c == stringterm
2183 : SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring_fence)
2184 break;
2185
2186 /* Some compilers can't handle this inside the switch. */
2187 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2188 switch (temp)
2189 {
2190 case Scharquote:
2191 case Sescape:
2192 INC_BOTH (from, from_byte);
2193 }
2194 INC_BOTH (from, from_byte);
2195 }
2196 INC_BOTH (from, from_byte);
2197 if (!depth && sexpflag) goto done;
2198 break;
2199 }
2200 }
2201
2202 /* Reached end of buffer. Error if within object, return nil if between */
2203 if (depth) goto lose;
2204
2205 immediate_quit = 0;
2206 return Qnil;
2207
2208 /* End of object reached */
2209 done:
2210 count--;
2211 }
2212
2213 while (count < 0)
2214 {
2215 while (from > stop)
2216 {
2217 DEC_BOTH (from, from_byte);
2218 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2219 c = FETCH_CHAR (from_byte);
2220 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2221 if (depth == min_depth)
2222 last_good = from;
2223 comstyle = 0;
2224 comnested = SYNTAX_COMMENT_NESTED (c);
2225 if (code == Sendcomment)
2226 comstyle = SYNTAX_COMMENT_STYLE (c);
2227 if (from > stop && SYNTAX_COMEND_SECOND (c)
2228 && prev_char_comend_first (from, from_byte)
2229 && parse_sexp_ignore_comments)
2230 {
2231 /* We must record the comment style encountered so that
2232 later, we can match only the proper comment begin
2233 sequence of the same style. */
2234 DEC_BOTH (from, from_byte);
2235 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2236 code = Sendcomment;
2237 c1 = FETCH_CHAR (from_byte);
2238 comstyle = SYNTAX_COMMENT_STYLE (c1);
2239 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2240 }
2241
2242 /* Quoting turns anything except a comment-ender
2243 into a word character. Note that this cannot be true
2244 if we decremented FROM in the if-statement above. */
2245 if (code != Sendcomment && char_quoted (from, from_byte))
2246 code = Sword;
2247 else if (SYNTAX_PREFIX (c))
2248 continue;
2249
2250 switch (SWITCH_ENUM_CAST (code))
2251 {
2252 case Sword:
2253 case Ssymbol:
2254 case Sescape:
2255 case Scharquote:
2256 if (depth || !sexpflag) break;
2257 /* This word counts as a sexp; count object finished
2258 after passing it. */
2259 while (from > stop)
2260 {
2261 temp_pos = from_byte;
2262 if (! NILP (current_buffer->enable_multibyte_characters))
2263 DEC_POS (temp_pos);
2264 else
2265 temp_pos--;
2266 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2267 c1 = FETCH_CHAR (temp_pos);
2268 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
2269 /* Don't allow comment-end to be quoted. */
2270 if (temp_code == Sendcomment)
2271 goto done2;
2272 quoted = char_quoted (from - 1, temp_pos);
2273 if (quoted)
2274 {
2275 DEC_BOTH (from, from_byte);
2276 temp_pos = dec_bytepos (temp_pos);
2277 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2278 }
2279 c1 = FETCH_CHAR (temp_pos);
2280 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
2281 if (! (quoted || temp_code == Sword
2282 || temp_code == Ssymbol
2283 || temp_code == Squote))
2284 goto done2;
2285 DEC_BOTH (from, from_byte);
2286 }
2287 goto done2;
2288
2289 case Smath:
2290 if (!sexpflag)
2291 break;
2292 temp_pos = dec_bytepos (from_byte);
2293 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2294 if (from != stop && c == FETCH_CHAR (temp_pos))
2295 DEC_BOTH (from, from_byte);
2296 if (mathexit)
2297 {
2298 mathexit = 0;
2299 goto open2;
2300 }
2301 mathexit = 1;
2302
2303 case Sclose:
2304 if (!++depth) goto done2;
2305 break;
2306
2307 case Sopen:
2308 open2:
2309 if (!--depth) goto done2;
2310 if (depth < min_depth)
2311 Fsignal (Qscan_error,
2312 Fcons (build_string ("Containing expression ends prematurely"),
2313 Fcons (make_number (last_good),
2314 Fcons (make_number (from), Qnil))));
2315 break;
2316
2317 case Sendcomment:
2318 if (!parse_sexp_ignore_comments)
2319 break;
2320 found = back_comment (from, from_byte, stop, comnested, comstyle,
2321 &out_charpos, &out_bytepos);
2322 /* FIXME: if found == -1, then it really wasn't a comment-end.
2323 For single-char Sendcomment, we can't do much about it apart
2324 from skipping the char.
2325 For 2-char endcomments, we could try again, taking both
2326 chars as separate entities, but it's a lot of trouble
2327 for very little gain, so we don't bother either. -sm */
2328 if (found != -1)
2329 from = out_charpos, from_byte = out_bytepos;
2330 break;
2331
2332 case Scomment_fence:
2333 case Sstring_fence:
2334 while (1)
2335 {
2336 DEC_BOTH (from, from_byte);
2337 if (from == stop) goto lose;
2338 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2339 if (!char_quoted (from, from_byte)
2340 && (c = FETCH_CHAR (from_byte),
2341 SYNTAX_WITH_MULTIBYTE_CHECK (c) == code))
2342 break;
2343 }
2344 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2345 break;
2346
2347 case Sstring:
2348 stringterm = FETCH_CHAR (from_byte);
2349 while (1)
2350 {
2351 if (from == stop) goto lose;
2352 temp_pos = from_byte;
2353 if (! NILP (current_buffer->enable_multibyte_characters))
2354 DEC_POS (temp_pos);
2355 else
2356 temp_pos--;
2357 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2358 if (!char_quoted (from - 1, temp_pos)
2359 && stringterm == FETCH_CHAR (temp_pos))
2360 break;
2361 DEC_BOTH (from, from_byte);
2362 }
2363 DEC_BOTH (from, from_byte);
2364 if (!depth && sexpflag) goto done2;
2365 break;
2366 }
2367 }
2368
2369 /* Reached start of buffer. Error if within object, return nil if between */
2370 if (depth) goto lose;
2371
2372 immediate_quit = 0;
2373 return Qnil;
2374
2375 done2:
2376 count++;
2377 }
2378
2379
2380 immediate_quit = 0;
2381 XSETFASTINT (val, from);
2382 return val;
2383
2384 lose:
2385 Fsignal (Qscan_error,
2386 Fcons (build_string ("Unbalanced parentheses"),
2387 Fcons (make_number (last_good),
2388 Fcons (make_number (from), Qnil))));
2389
2390 /* NOTREACHED */
2391 }
2392
2393 DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
2394 "Scan from character number FROM by COUNT lists.\n\
2395 Returns the character number of the position thus found.\n\
2396 \n\
2397 If DEPTH is nonzero, paren depth begins counting from that value,\n\
2398 only places where the depth in parentheses becomes zero\n\
2399 are candidates for stopping; COUNT such places are counted.\n\
2400 Thus, a positive value for DEPTH means go out levels.\n\
2401 \n\
2402 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
2403 \n\
2404 If the beginning or end of (the accessible part of) the buffer is reached\n\
2405 and the depth is wrong, an error is signaled.\n\
2406 If the depth is right but the count is not used up, nil is returned.")
2407 (from, count, depth)
2408 Lisp_Object from, count, depth;
2409 {
2410 CHECK_NUMBER (from, 0);
2411 CHECK_NUMBER (count, 1);
2412 CHECK_NUMBER (depth, 2);
2413
2414 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2415 }
2416
2417 DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
2418 "Scan from character number FROM by COUNT balanced expressions.\n\
2419 If COUNT is negative, scan backwards.\n\
2420 Returns the character number of the position thus found.\n\
2421 \n\
2422 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
2423 \n\
2424 If the beginning or end of (the accessible part of) the buffer is reached\n\
2425 in the middle of a parenthetical grouping, an error is signaled.\n\
2426 If the beginning or end is reached between groupings\n\
2427 but before count is used up, nil is returned.")
2428 (from, count)
2429 Lisp_Object from, count;
2430 {
2431 CHECK_NUMBER (from, 0);
2432 CHECK_NUMBER (count, 1);
2433
2434 return scan_lists (XINT (from), XINT (count), 0, 1);
2435 }
2436
2437 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
2438 0, 0, 0,
2439 "Move point backward over any number of chars with prefix syntax.\n\
2440 This includes chars with \"quote\" or \"prefix\" syntax (' or p).")
2441 ()
2442 {
2443 int beg = BEGV;
2444 int opoint = PT;
2445 int opoint_byte = PT_BYTE;
2446 int pos = PT;
2447 int pos_byte = PT_BYTE;
2448 int c;
2449
2450 if (pos <= beg)
2451 {
2452 SET_PT_BOTH (opoint, opoint_byte);
2453
2454 return Qnil;
2455 }
2456
2457 SETUP_SYNTAX_TABLE (pos, -1);
2458
2459 DEC_BOTH (pos, pos_byte);
2460
2461 while (!char_quoted (pos, pos_byte)
2462 /* Previous statement updates syntax table. */
2463 && ((c = FETCH_CHAR (pos_byte), SYNTAX (c) == Squote)
2464 || SYNTAX_PREFIX (c)))
2465 {
2466 opoint = pos;
2467 opoint_byte = pos_byte;
2468
2469 if (pos + 1 > beg)
2470 DEC_BOTH (pos, pos_byte);
2471 }
2472
2473 SET_PT_BOTH (opoint, opoint_byte);
2474
2475 return Qnil;
2476 }
2477 \f
2478 /* Parse forward from FROM / FROM_BYTE to END,
2479 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2480 and return a description of the state of the parse at END.
2481 If STOPBEFORE is nonzero, stop at the start of an atom.
2482 If COMMENTSTOP is 1, stop at the start of a comment.
2483 If COMMENTSTOP is -1, stop at the start or end of a comment,
2484 after the beginning of a string, or after the end of a string. */
2485
2486 static void
2487 scan_sexps_forward (stateptr, from, from_byte, end, targetdepth,
2488 stopbefore, oldstate, commentstop)
2489 struct lisp_parse_state *stateptr;
2490 register int from;
2491 int end, targetdepth, stopbefore;
2492 Lisp_Object oldstate;
2493 int commentstop;
2494 {
2495 struct lisp_parse_state state;
2496
2497 register enum syntaxcode code;
2498 int c1;
2499 int comnested;
2500 struct level { int last, prev; };
2501 struct level levelstart[100];
2502 register struct level *curlevel = levelstart;
2503 struct level *endlevel = levelstart + 100;
2504 register int depth; /* Paren depth of current scanning location.
2505 level - levelstart equals this except
2506 when the depth becomes negative. */
2507 int mindepth; /* Lowest DEPTH value seen. */
2508 int start_quoted = 0; /* Nonzero means starting after a char quote */
2509 Lisp_Object tem;
2510 int prev_from; /* Keep one character before FROM. */
2511 int prev_from_byte;
2512 int prev_from_syntax;
2513 int boundary_stop = commentstop == -1;
2514 int nofence;
2515 int found;
2516 int out_bytepos, out_charpos;
2517 int temp;
2518
2519 prev_from = from;
2520 prev_from_byte = from_byte;
2521 if (from != BEGV)
2522 DEC_BOTH (prev_from, prev_from_byte);
2523
2524 /* Use this macro instead of `from++'. */
2525 #define INC_FROM \
2526 do { prev_from = from; \
2527 prev_from_byte = from_byte; \
2528 prev_from_syntax \
2529 = SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte)); \
2530 INC_BOTH (from, from_byte); \
2531 UPDATE_SYNTAX_TABLE_FORWARD (from); \
2532 } while (0)
2533
2534 immediate_quit = 1;
2535 QUIT;
2536
2537 if (NILP (oldstate))
2538 {
2539 depth = 0;
2540 state.instring = -1;
2541 state.incomment = 0;
2542 state.comstyle = 0; /* comment style a by default. */
2543 state.comstr_start = -1; /* no comment/string seen. */
2544 }
2545 else
2546 {
2547 tem = Fcar (oldstate);
2548 if (!NILP (tem))
2549 depth = XINT (tem);
2550 else
2551 depth = 0;
2552
2553 oldstate = Fcdr (oldstate);
2554 oldstate = Fcdr (oldstate);
2555 oldstate = Fcdr (oldstate);
2556 tem = Fcar (oldstate);
2557 /* Check whether we are inside string_fence-style string: */
2558 state.instring = (!NILP (tem)
2559 ? (INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE)
2560 : -1);
2561
2562 oldstate = Fcdr (oldstate);
2563 tem = Fcar (oldstate);
2564 state.incomment = (!NILP (tem)
2565 ? (INTEGERP (tem) ? XINT (tem) : -1)
2566 : 0);
2567
2568 oldstate = Fcdr (oldstate);
2569 tem = Fcar (oldstate);
2570 start_quoted = !NILP (tem);
2571
2572 /* if the eighth element of the list is nil, we are in comment
2573 style a. If it is non-nil, we are in comment style b */
2574 oldstate = Fcdr (oldstate);
2575 oldstate = Fcdr (oldstate);
2576 tem = Fcar (oldstate);
2577 state.comstyle = NILP (tem) ? 0 : (EQ (tem, Qsyntax_table)
2578 ? ST_COMMENT_STYLE : 1);
2579
2580 oldstate = Fcdr (oldstate);
2581 tem = Fcar (oldstate);
2582 state.comstr_start = NILP (tem) ? -1 : XINT (tem) ;
2583 oldstate = Fcdr (oldstate);
2584 tem = Fcar (oldstate);
2585 while (!NILP (tem)) /* >= second enclosing sexps. */
2586 {
2587 /* curlevel++->last ran into compiler bug on Apollo */
2588 curlevel->last = XINT (Fcar (tem));
2589 if (++curlevel == endlevel)
2590 curlevel--; /* error ("Nesting too deep for parser"); */
2591 curlevel->prev = -1;
2592 curlevel->last = -1;
2593 tem = Fcdr (tem);
2594 }
2595 }
2596 state.quoted = 0;
2597 mindepth = depth;
2598
2599 curlevel->prev = -1;
2600 curlevel->last = -1;
2601
2602 SETUP_SYNTAX_TABLE (prev_from, 1);
2603 prev_from_syntax = SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte));
2604 UPDATE_SYNTAX_TABLE_FORWARD (from);
2605
2606 /* Enter the loop at a place appropriate for initial state. */
2607
2608 if (state.incomment)
2609 goto startincomment;
2610 if (state.instring >= 0)
2611 {
2612 nofence = state.instring != ST_STRING_STYLE;
2613 if (start_quoted)
2614 goto startquotedinstring;
2615 goto startinstring;
2616 }
2617 else if (start_quoted)
2618 goto startquoted;
2619
2620 #if 0 /* This seems to be redundant with the identical code above. */
2621 SETUP_SYNTAX_TABLE (prev_from, 1);
2622 prev_from_syntax = SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte));
2623 UPDATE_SYNTAX_TABLE_FORWARD (from);
2624 #endif
2625
2626 while (from < end)
2627 {
2628 INC_FROM;
2629 code = prev_from_syntax & 0xff;
2630
2631 if (code == Scomment)
2632 {
2633 state.comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax);
2634 state.incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
2635 1 : -1);
2636 state.comstr_start = prev_from;
2637 }
2638 else if (code == Scomment_fence)
2639 {
2640 /* Record the comment style we have entered so that only
2641 the comment-end sequence of the same style actually
2642 terminates the comment section. */
2643 state.comstyle = ST_COMMENT_STYLE;
2644 state.incomment = -1;
2645 state.comstr_start = prev_from;
2646 code = Scomment;
2647 }
2648 else if (from < end)
2649 if (SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax))
2650 if (c1 = FETCH_CHAR (from_byte),
2651 SYNTAX_COMSTART_SECOND (c1))
2652 /* Duplicate code to avoid a complex if-expression
2653 which causes trouble for the SGI compiler. */
2654 {
2655 /* Record the comment style we have entered so that only
2656 the comment-end sequence of the same style actually
2657 terminates the comment section. */
2658 state.comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from_byte));
2659 comnested = SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax);
2660 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2661 state.incomment = comnested ? 1 : -1;
2662 state.comstr_start = prev_from;
2663 INC_FROM;
2664 code = Scomment;
2665 }
2666
2667 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
2668 continue;
2669 switch (SWITCH_ENUM_CAST (code))
2670 {
2671 case Sescape:
2672 case Scharquote:
2673 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2674 curlevel->last = prev_from;
2675 startquoted:
2676 if (from == end) goto endquoted;
2677 INC_FROM;
2678 goto symstarted;
2679 /* treat following character as a word constituent */
2680 case Sword:
2681 case Ssymbol:
2682 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2683 curlevel->last = prev_from;
2684 symstarted:
2685 while (from < end)
2686 {
2687 /* Some compilers can't handle this inside the switch. */
2688 temp = SYNTAX (FETCH_CHAR (from_byte));
2689 switch (temp)
2690 {
2691 case Scharquote:
2692 case Sescape:
2693 INC_FROM;
2694 if (from == end) goto endquoted;
2695 break;
2696 case Sword:
2697 case Ssymbol:
2698 case Squote:
2699 break;
2700 default:
2701 goto symdone;
2702 }
2703 INC_FROM;
2704 }
2705 symdone:
2706 curlevel->prev = curlevel->last;
2707 break;
2708
2709 case Scomment:
2710 if (commentstop || boundary_stop) goto done;
2711 startincomment:
2712 /* The (from == BEGV) test was to enter the loop in the middle so
2713 that we find a 2-char comment ender even if we start in the
2714 middle of it. We don't want to do that if we're just at the
2715 beginning of the comment (think of (*) ... (*)). */
2716 found = forw_comment (from, from_byte, end,
2717 state.incomment, state.comstyle,
2718 (from == BEGV || from < state.comstr_start + 3)
2719 ? 0 : prev_from_syntax,
2720 &out_charpos, &out_bytepos, &state.incomment);
2721 from = out_charpos; from_byte = out_bytepos;
2722 /* Beware! prev_from and friends are invalid now.
2723 Luckily, the `done' doesn't use them and the INC_FROM
2724 sets them to a sane value without looking at them. */
2725 if (!found) goto done;
2726 INC_FROM;
2727 state.incomment = 0;
2728 state.comstyle = 0; /* reset the comment style */
2729 if (boundary_stop) goto done;
2730 break;
2731
2732 case Sopen:
2733 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2734 depth++;
2735 /* curlevel++->last ran into compiler bug on Apollo */
2736 curlevel->last = prev_from;
2737 if (++curlevel == endlevel)
2738 curlevel--; /* error ("Nesting too deep for parser"); */
2739 curlevel->prev = -1;
2740 curlevel->last = -1;
2741 if (targetdepth == depth) goto done;
2742 break;
2743
2744 case Sclose:
2745 depth--;
2746 if (depth < mindepth)
2747 mindepth = depth;
2748 if (curlevel != levelstart)
2749 curlevel--;
2750 curlevel->prev = curlevel->last;
2751 if (targetdepth == depth) goto done;
2752 break;
2753
2754 case Sstring:
2755 case Sstring_fence:
2756 state.comstr_start = from - 1;
2757 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2758 curlevel->last = prev_from;
2759 state.instring = (code == Sstring
2760 ? (FETCH_CHAR (prev_from_byte))
2761 : ST_STRING_STYLE);
2762 if (boundary_stop) goto done;
2763 startinstring:
2764 {
2765 nofence = state.instring != ST_STRING_STYLE;
2766
2767 while (1)
2768 {
2769 int c;
2770
2771 if (from >= end) goto done;
2772 c = FETCH_CHAR (from_byte);
2773 /* Some compilers can't handle this inside the switch. */
2774 temp = SYNTAX (c);
2775
2776 /* Check TEMP here so that if the char has
2777 a syntax-table property which says it is NOT
2778 a string character, it does not end the string. */
2779 if (nofence && c == state.instring && temp == Sstring)
2780 break;
2781
2782 switch (temp)
2783 {
2784 case Sstring_fence:
2785 if (!nofence) goto string_end;
2786 break;
2787 case Scharquote:
2788 case Sescape:
2789 INC_FROM;
2790 startquotedinstring:
2791 if (from >= end) goto endquoted;
2792 }
2793 INC_FROM;
2794 }
2795 }
2796 string_end:
2797 state.instring = -1;
2798 curlevel->prev = curlevel->last;
2799 INC_FROM;
2800 if (boundary_stop) goto done;
2801 break;
2802
2803 case Smath:
2804 break;
2805 }
2806 }
2807 goto done;
2808
2809 stop: /* Here if stopping before start of sexp. */
2810 from = prev_from; /* We have just fetched the char that starts it; */
2811 goto done; /* but return the position before it. */
2812
2813 endquoted:
2814 state.quoted = 1;
2815 done:
2816 state.depth = depth;
2817 state.mindepth = mindepth;
2818 state.thislevelstart = curlevel->prev;
2819 state.prevlevelstart
2820 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
2821 state.location = from;
2822 state.levelstarts = Qnil;
2823 while (--curlevel >= levelstart)
2824 state.levelstarts = Fcons (make_number (curlevel->last),
2825 state.levelstarts);
2826 immediate_quit = 0;
2827
2828 *stateptr = state;
2829 }
2830
2831 /* This comment supplies the doc string for parse-partial-sexp,
2832 for make-docfile to see. We cannot put this in the real DEFUN
2833 due to limits in the Unix cpp.
2834
2835 DEFUN ("parse-partial-sexp", Ffoo, Sfoo, 2, 6, 0,
2836 "Parse Lisp syntax starting at FROM until TO; return status of parse at TO.\n\
2837 Parsing stops at TO or when certain criteria are met;\n\
2838 point is set to where parsing stops.\n\
2839 If fifth arg STATE is omitted or nil,\n\
2840 parsing assumes that FROM is the beginning of a function.\n\
2841 Value is a list of ten elements describing final state of parsing:\n\
2842 0. depth in parens.\n\
2843 1. character address of start of innermost containing list; nil if none.\n\
2844 2. character address of start of last complete sexp terminated.\n\
2845 3. non-nil if inside a string.\n\
2846 (it is the character that will terminate the string,\n\
2847 or t if the string should be terminated by a generic string delimiter.)\n\
2848 4. nil if outside a comment, t if inside a non-nestable comment, \n\
2849 else an integer (the current comment nesting).\n\
2850 5. t if following a quote character.\n\
2851 6. the minimum paren-depth encountered during this scan.\n\
2852 7. t if in a comment of style b; symbol `syntax-table' if the comment\n\
2853 should be terminated by a generic comment delimiter.\n\
2854 8. character address of start of comment or string; nil if not in one.\n\
2855 9. Intermediate data for continuation of parsing (subject to change).\n\
2856 If third arg TARGETDEPTH is non-nil, parsing stops if the depth\n\
2857 in parentheses becomes equal to TARGETDEPTH.\n\
2858 Fourth arg STOPBEFORE non-nil means stop when come to\n\
2859 any character that starts a sexp.\n\
2860 Fifth arg STATE is a nine-element list like what this function returns.\n\
2861 It is used to initialize the state of the parse. Elements number 1, 2, 6\n\
2862 and 8 are ignored; you can leave off element 8 (the last) entirely.\n\
2863 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.\n\
2864 If it is symbol `syntax-table', stop after the start of a comment or a\n\
2865 string, or after end of a comment or a string.")
2866 (from, to, targetdepth, stopbefore, state, commentstop)
2867 */
2868
2869 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
2870 0 /* See immediately above */)
2871 (from, to, targetdepth, stopbefore, oldstate, commentstop)
2872 Lisp_Object from, to, targetdepth, stopbefore, oldstate, commentstop;
2873 {
2874 struct lisp_parse_state state;
2875 int target;
2876
2877 if (!NILP (targetdepth))
2878 {
2879 CHECK_NUMBER (targetdepth, 3);
2880 target = XINT (targetdepth);
2881 }
2882 else
2883 target = -100000; /* We won't reach this depth */
2884
2885 validate_region (&from, &to);
2886 scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
2887 XINT (to),
2888 target, !NILP (stopbefore), oldstate,
2889 (NILP (commentstop)
2890 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
2891
2892 SET_PT (state.location);
2893
2894 return Fcons (make_number (state.depth),
2895 Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart),
2896 Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart),
2897 Fcons (state.instring >= 0
2898 ? (state.instring == ST_STRING_STYLE
2899 ? Qt : make_number (state.instring)) : Qnil,
2900 Fcons (state.incomment < 0 ? Qt :
2901 (state.incomment == 0 ? Qnil :
2902 make_number (state.incomment)),
2903 Fcons (state.quoted ? Qt : Qnil,
2904 Fcons (make_number (state.mindepth),
2905 Fcons ((state.comstyle
2906 ? (state.comstyle == ST_COMMENT_STYLE
2907 ? Qsyntax_table : Qt) :
2908 Qnil),
2909 Fcons (((state.incomment
2910 || (state.instring >= 0))
2911 ? make_number (state.comstr_start)
2912 : Qnil),
2913 Fcons (state.levelstarts, Qnil))))))))));
2914 }
2915 \f
2916 void
2917 init_syntax_once ()
2918 {
2919 register int i, c;
2920 Lisp_Object temp;
2921
2922 /* This has to be done here, before we call Fmake_char_table. */
2923 Qsyntax_table = intern ("syntax-table");
2924 staticpro (&Qsyntax_table);
2925
2926 /* Intern this now in case it isn't already done.
2927 Setting this variable twice is harmless.
2928 But don't staticpro it here--that is done in alloc.c. */
2929 Qchar_table_extra_slots = intern ("char-table-extra-slots");
2930
2931 /* Create objects which can be shared among syntax tables. */
2932 Vsyntax_code_object = Fmake_vector (make_number (13), Qnil);
2933 for (i = 0; i < XVECTOR (Vsyntax_code_object)->size; i++)
2934 XVECTOR (Vsyntax_code_object)->contents[i]
2935 = Fcons (make_number (i), Qnil);
2936
2937 /* Now we are ready to set up this property, so we can
2938 create syntax tables. */
2939 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
2940
2941 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
2942
2943 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
2944
2945 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
2946 for (i = 'a'; i <= 'z'; i++)
2947 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
2948 for (i = 'A'; i <= 'Z'; i++)
2949 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
2950 for (i = '0'; i <= '9'; i++)
2951 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
2952
2953 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
2954 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
2955
2956 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
2957 Fcons (make_number (Sopen), make_number (')')));
2958 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
2959 Fcons (make_number (Sclose), make_number ('(')));
2960 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
2961 Fcons (make_number (Sopen), make_number (']')));
2962 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
2963 Fcons (make_number (Sclose), make_number ('[')));
2964 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
2965 Fcons (make_number (Sopen), make_number ('}')));
2966 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
2967 Fcons (make_number (Sclose), make_number ('{')));
2968 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
2969 Fcons (make_number ((int) Sstring), Qnil));
2970 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
2971 Fcons (make_number ((int) Sescape), Qnil));
2972
2973 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Ssymbol];
2974 for (i = 0; i < 10; i++)
2975 {
2976 c = "_-+*/&|<>="[i];
2977 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
2978 }
2979
2980 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
2981 for (i = 0; i < 12; i++)
2982 {
2983 c = ".,;:?!#@~^'`"[i];
2984 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
2985 }
2986
2987 /* All multibyte characters have syntax `word' by default. */
2988 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
2989 for (i = CHAR_TABLE_SINGLE_BYTE_SLOTS; i < CHAR_TABLE_ORDINARY_SLOTS; i++)
2990 XCHAR_TABLE (Vstandard_syntax_table)->contents[i] = temp;
2991 }
2992
2993 void
2994 syms_of_syntax ()
2995 {
2996 Qsyntax_table_p = intern ("syntax-table-p");
2997 staticpro (&Qsyntax_table_p);
2998
2999 staticpro (&Vsyntax_code_object);
3000
3001 Qscan_error = intern ("scan-error");
3002 staticpro (&Qscan_error);
3003 Fput (Qscan_error, Qerror_conditions,
3004 Fcons (Qscan_error, Fcons (Qerror, Qnil)));
3005 Fput (Qscan_error, Qerror_message,
3006 build_string ("Scan error"));
3007
3008 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
3009 "Non-nil means `forward-sexp', etc., should treat comments as whitespace.");
3010
3011 DEFVAR_BOOL ("parse-sexp-lookup-properties", &parse_sexp_lookup_properties,
3012 "Non-nil means `forward-sexp', etc., grant `syntax-table' property.\n\
3013 The value of this property should be either a syntax table, or a cons\n\
3014 of the form (SYNTAXCODE . MATCHCHAR), SYNTAXCODE being the numeric\n\
3015 syntax code, MATCHCHAR being nil or the character to match (which is\n\
3016 relevant only for open/close type.");
3017
3018 words_include_escapes = 0;
3019 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
3020 "Non-nil means `forward-word', etc., should treat escape chars part of words.");
3021
3022 DEFVAR_BOOL ("multibyte-syntax-as-symbol", &multibyte_syntax_as_symbol,
3023 "Non-nil means `scan-sexps' treats all multibyte characters as symbol.");
3024 multibyte_syntax_as_symbol = 0;
3025
3026 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3027 &open_paren_in_column_0_is_defun_start,
3028 "Non-nil means an open paren in column 0 denotes the start of a defun.");
3029 open_paren_in_column_0_is_defun_start = 1;
3030
3031 defsubr (&Ssyntax_table_p);
3032 defsubr (&Ssyntax_table);
3033 defsubr (&Sstandard_syntax_table);
3034 defsubr (&Scopy_syntax_table);
3035 defsubr (&Sset_syntax_table);
3036 defsubr (&Schar_syntax);
3037 defsubr (&Smatching_paren);
3038 defsubr (&Smodify_syntax_entry);
3039 defsubr (&Sdescribe_syntax);
3040
3041 defsubr (&Sforward_word);
3042
3043 defsubr (&Sskip_chars_forward);
3044 defsubr (&Sskip_chars_backward);
3045 defsubr (&Sskip_syntax_forward);
3046 defsubr (&Sskip_syntax_backward);
3047
3048 defsubr (&Sforward_comment);
3049 defsubr (&Sscan_lists);
3050 defsubr (&Sscan_sexps);
3051 defsubr (&Sbackward_prefix_chars);
3052 defsubr (&Sparse_partial_sexp);
3053 }