]> code.delx.au - gnu-emacs/blob - src/syntax.c
* syntax.c (skip_chars): Fix previous change. Make the handling of
[gnu-emacs] / src / syntax.c
1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 87, 93, 94, 95, 97, 1998, 1999 Free Software Foundation, Inc.
3
4 This file is part of GNU Emacs.
5
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
21
22 #include <config.h>
23 #include <ctype.h>
24 #include "lisp.h"
25 #include "commands.h"
26 #include "buffer.h"
27 #include "charset.h"
28
29 /* Make syntax table lookup grant data in gl_state. */
30 #define SYNTAX_ENTRY_VIA_PROPERTY
31
32 #include "syntax.h"
33 #include "intervals.h"
34
35 /* We use these constants in place for comment-style and
36 string-ender-char to distinguish comments/strings started by
37 comment_fence and string_fence codes. */
38
39 #define ST_COMMENT_STYLE (256 + 1)
40 #define ST_STRING_STYLE (256 + 2)
41 #include "category.h"
42
43 Lisp_Object Qsyntax_table_p, Qsyntax_table, Qscan_error;
44
45 int words_include_escapes;
46 int parse_sexp_lookup_properties;
47
48 /* Nonzero means `scan-sexps' treat all multibyte characters as symbol. */
49 int multibyte_syntax_as_symbol;
50
51 /* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
52 if not compiled with GCC. No need to mark it, since it is used
53 only very temporarily. */
54 Lisp_Object syntax_temp;
55
56 /* Non-zero means an open parenthesis in column 0 is always considered
57 to be the start of a defun. Zero means an open parenthesis in
58 column 0 has no special meaning. */
59
60 int open_paren_in_column_0_is_defun_start;
61
62 /* This is the internal form of the parse state used in parse-partial-sexp. */
63
64 struct lisp_parse_state
65 {
66 int depth; /* Depth at end of parsing. */
67 int instring; /* -1 if not within string, else desired terminator. */
68 int incomment; /* -1 if in unnestable comment else comment nesting */
69 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
70 int quoted; /* Nonzero if just after an escape char at end of parsing */
71 int thislevelstart; /* Char number of most recent start-of-expression at current level */
72 int prevlevelstart; /* Char number of start of containing expression */
73 int location; /* Char number at which parsing stopped. */
74 int mindepth; /* Minimum depth seen while scanning. */
75 int comstr_start; /* Position just after last comment/string starter. */
76 Lisp_Object levelstarts; /* Char numbers of starts-of-expression
77 of levels (starting from outermost). */
78 };
79 \f
80 /* These variables are a cache for finding the start of a defun.
81 find_start_pos is the place for which the defun start was found.
82 find_start_value is the defun start position found for it.
83 find_start_value_byte is the corresponding byte position.
84 find_start_buffer is the buffer it was found in.
85 find_start_begv is the BEGV value when it was found.
86 find_start_modiff is the value of MODIFF when it was found. */
87
88 static int find_start_pos;
89 static int find_start_value;
90 static int find_start_value_byte;
91 static struct buffer *find_start_buffer;
92 static int find_start_begv;
93 static int find_start_modiff;
94
95
96 static int find_defun_start P_ ((int, int));
97 static int back_comment P_ ((int, int, int, int, int, int *, int *));
98 static int char_quoted P_ ((int, int));
99 static Lisp_Object skip_chars P_ ((int, int, Lisp_Object, Lisp_Object));
100 static Lisp_Object scan_lists P_ ((int, int, int, int));
101 static void scan_sexps_forward P_ ((struct lisp_parse_state *,
102 int, int, int, int,
103 int, Lisp_Object, int));
104 \f
105
106 struct gl_state_s gl_state; /* Global state of syntax parser. */
107
108 INTERVAL interval_of ();
109 #define INTERVALS_AT_ONCE 10 /* 1 + max-number of intervals
110 to scan to property-change. */
111
112 /* Update gl_state to an appropriate interval which contains CHARPOS. The
113 sign of COUNT give the relative position of CHARPOS wrt the previously
114 valid interval. If INIT, only [be]_property fields of gl_state are
115 valid at start, the rest is filled basing on OBJECT.
116
117 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
118 direction than the intervals - or in an interval. We update the
119 current syntax-table basing on the property of this interval, and
120 update the interval to start further than CHARPOS - or be
121 NULL_INTERVAL. We also update lim_property to be the next value of
122 charpos to call this subroutine again - or be before/after the
123 start/end of OBJECT. */
124
125 void
126 update_syntax_table (charpos, count, init, object)
127 int charpos, count, init;
128 Lisp_Object object;
129 {
130 Lisp_Object tmp_table;
131 int cnt = 0, invalidate = 1;
132 INTERVAL i, oldi;
133
134 if (init)
135 {
136 gl_state.start = gl_state.b_property;
137 gl_state.stop = gl_state.e_property;
138 gl_state.forward_i = interval_of (charpos, object);
139 i = gl_state.backward_i = gl_state.forward_i;
140 gl_state.left_ok = gl_state.right_ok = 1;
141 invalidate = 0;
142 if (NULL_INTERVAL_P (i))
143 return;
144 /* interval_of updates only ->position of the return value, so
145 update the parents manually to speed up update_interval. */
146 while (!NULL_PARENT (i))
147 {
148 if (AM_RIGHT_CHILD (i))
149 INTERVAL_PARENT (i)->position = i->position
150 - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
151 - TOTAL_LENGTH (INTERVAL_PARENT (i))
152 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
153 else
154 INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
155 + TOTAL_LENGTH (i);
156 i = INTERVAL_PARENT (i);
157 }
158 i = gl_state.forward_i;
159 gl_state.b_property = i->position - 1 - gl_state.offset;
160 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
161 goto update;
162 }
163 oldi = i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
164
165 /* We are guarantied to be called with CHARPOS either in i,
166 or further off. */
167 if (NULL_INTERVAL_P (i))
168 error ("Error in syntax_table logic for to-the-end intervals");
169 else if (charpos < i->position) /* Move left. */
170 {
171 if (count > 0)
172 error ("Error in syntax_table logic for intervals <-");
173 /* Update the interval. */
174 i = update_interval (i, charpos);
175 if (oldi->position != INTERVAL_LAST_POS (i))
176 {
177 invalidate = 0;
178 gl_state.right_ok = 1; /* Invalidate the other end. */
179 gl_state.forward_i = i;
180 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
181 }
182 }
183 else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right. */
184 {
185 if (count < 0)
186 error ("Error in syntax_table logic for intervals ->");
187 /* Update the interval. */
188 i = update_interval (i, charpos);
189 if (i->position != INTERVAL_LAST_POS (oldi))
190 {
191 invalidate = 0;
192 gl_state.left_ok = 1; /* Invalidate the other end. */
193 gl_state.backward_i = i;
194 gl_state.b_property = i->position - 1 - gl_state.offset;
195 }
196 }
197 else if (count > 0 ? gl_state.right_ok : gl_state.left_ok)
198 {
199 /* We do not need to recalculate tmp_table. */
200 tmp_table = gl_state.old_prop;
201 }
202
203 update:
204 tmp_table = textget (i->plist, Qsyntax_table);
205
206 if (invalidate)
207 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
208
209 if (invalidate) /* Did not get to adjacent interval. */
210 { /* with the same table => */
211 /* invalidate the old range. */
212 if (count > 0)
213 {
214 gl_state.backward_i = i;
215 gl_state.left_ok = 1; /* Invalidate the other end. */
216 gl_state.b_property = i->position - 1 - gl_state.offset;
217 }
218 else
219 {
220 gl_state.forward_i = i;
221 gl_state.right_ok = 1; /* Invalidate the other end. */
222 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
223 }
224 }
225
226 gl_state.current_syntax_table = tmp_table;
227 gl_state.old_prop = tmp_table;
228 if (EQ (Fsyntax_table_p (tmp_table), Qt))
229 {
230 gl_state.use_global = 0;
231 }
232 else if (CONSP (tmp_table))
233 {
234 gl_state.use_global = 1;
235 gl_state.global_code = tmp_table;
236 }
237 else
238 {
239 gl_state.use_global = 0;
240 gl_state.current_syntax_table = current_buffer->syntax_table;
241 }
242
243 while (!NULL_INTERVAL_P (i))
244 {
245 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
246 {
247 if (count > 0)
248 gl_state.right_ok = 0;
249 else
250 gl_state.left_ok = 0;
251 break;
252 }
253 else if (cnt == INTERVALS_AT_ONCE)
254 {
255 if (count > 0)
256 gl_state.right_ok = 1;
257 else
258 gl_state.left_ok = 1;
259 break;
260 }
261 cnt++;
262 i = count > 0 ? next_interval (i) : previous_interval (i);
263 }
264 if (NULL_INTERVAL_P (i))
265 { /* This property goes to the end. */
266 if (count > 0)
267 gl_state.e_property = gl_state.stop;
268 else
269 gl_state.b_property = gl_state.start;
270 }
271 else
272 {
273 if (count > 0)
274 {
275 gl_state.e_property = i->position - gl_state.offset;
276 gl_state.forward_i = i;
277 }
278 else
279 {
280 gl_state.b_property = i->position + LENGTH (i) - 1 - gl_state.offset;
281 gl_state.backward_i = i;
282 }
283 }
284 }
285 \f
286 /* Returns TRUE if char at CHARPOS is quoted.
287 Global syntax-table data should be set up already to be good at CHARPOS
288 or after. On return global syntax data is good for lookup at CHARPOS. */
289
290 static int
291 char_quoted (charpos, bytepos)
292 register int charpos, bytepos;
293 {
294 register enum syntaxcode code;
295 register int beg = BEGV;
296 register int quoted = 0;
297 int orig = charpos;
298
299 DEC_BOTH (charpos, bytepos);
300
301 while (bytepos >= beg)
302 {
303 UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
304 code = SYNTAX (FETCH_CHAR (bytepos));
305 if (! (code == Scharquote || code == Sescape))
306 break;
307
308 DEC_BOTH (charpos, bytepos);
309 quoted = !quoted;
310 }
311
312 UPDATE_SYNTAX_TABLE (orig);
313 return quoted;
314 }
315
316 /* Return the bytepos one character after BYTEPOS.
317 We assume that BYTEPOS is not at the end of the buffer. */
318
319 INLINE int
320 inc_bytepos (bytepos)
321 int bytepos;
322 {
323 if (NILP (current_buffer->enable_multibyte_characters))
324 return bytepos + 1;
325
326 INC_POS (bytepos);
327 return bytepos;
328 }
329
330 /* Return the bytepos one character before BYTEPOS.
331 We assume that BYTEPOS is not at the start of the buffer. */
332
333 INLINE int
334 dec_bytepos (bytepos)
335 int bytepos;
336 {
337 if (NILP (current_buffer->enable_multibyte_characters))
338 return bytepos - 1;
339
340 DEC_POS (bytepos);
341 return bytepos;
342 }
343 \f
344 /* Find a defun-start that is the last one before POS (or nearly the last).
345 We record what we find, so that another call in the same area
346 can return the same value right away.
347
348 There is no promise at which position the global syntax data is
349 valid on return from the subroutine, so the caller should explicitly
350 update the global data. */
351
352 static int
353 find_defun_start (pos, pos_byte)
354 int pos, pos_byte;
355 {
356 int opoint = PT, opoint_byte = PT_BYTE;
357
358 /* Use previous finding, if it's valid and applies to this inquiry. */
359 if (current_buffer == find_start_buffer
360 /* Reuse the defun-start even if POS is a little farther on.
361 POS might be in the next defun, but that's ok.
362 Our value may not be the best possible, but will still be usable. */
363 && pos <= find_start_pos + 1000
364 && pos >= find_start_value
365 && BEGV == find_start_begv
366 && MODIFF == find_start_modiff)
367 return find_start_value;
368
369 /* Back up to start of line. */
370 scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
371
372 /* We optimize syntax-table lookup for rare updates. Thus we accept
373 only those `^\s(' which are good in global _and_ text-property
374 syntax-tables. */
375 gl_state.current_syntax_table = current_buffer->syntax_table;
376 gl_state.use_global = 0;
377 if (open_paren_in_column_0_is_defun_start)
378 {
379 while (PT > BEGV)
380 {
381 /* Open-paren at start of line means we may have found our
382 defun-start. */
383 if (SYNTAX (FETCH_CHAR (PT_BYTE)) == Sopen)
384 {
385 SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
386 if (SYNTAX (FETCH_CHAR (PT_BYTE)) == Sopen)
387 break;
388 /* Now fallback to the default value. */
389 gl_state.current_syntax_table = current_buffer->syntax_table;
390 gl_state.use_global = 0;
391 }
392 /* Move to beg of previous line. */
393 scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
394 }
395 }
396
397 /* Record what we found, for the next try. */
398 find_start_value = PT;
399 find_start_value_byte = PT_BYTE;
400 find_start_buffer = current_buffer;
401 find_start_modiff = MODIFF;
402 find_start_begv = BEGV;
403 find_start_pos = pos;
404
405 TEMP_SET_PT_BOTH (opoint, opoint_byte);
406
407 return find_start_value;
408 }
409 \f
410 /* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
411
412 static int
413 prev_char_comend_first (pos, pos_byte)
414 int pos, pos_byte;
415 {
416 int c, val;
417
418 DEC_BOTH (pos, pos_byte);
419 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
420 c = FETCH_CHAR (pos_byte);
421 val = SYNTAX_COMEND_FIRST (c);
422 UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
423 return val;
424 }
425
426 /* Return the SYNTAX_COMSTART_FIRST of the character before POS, POS_BYTE. */
427
428 static int
429 prev_char_comstart_first (pos, pos_byte)
430 int pos, pos_byte;
431 {
432 int c, val;
433
434 DEC_BOTH (pos, pos_byte);
435 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
436 c = FETCH_CHAR (pos_byte);
437 val = SYNTAX_COMSTART_FIRST (c);
438 UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
439 return val;
440 }
441
442 /* Checks whether charpos FROM is at the end of a comment.
443 FROM_BYTE is the bytepos corresponding to FROM.
444 Do not move back before STOP.
445
446 Return a positive value if we find a comment ending at FROM/FROM_BYTE;
447 return -1 otherwise.
448
449 If successful, store the charpos of the comment's beginning
450 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
451
452 Global syntax data remains valid for backward search starting at
453 the returned value (or at FROM, if the search was not successful). */
454
455 static int
456 back_comment (from, from_byte, stop, comnested, comstyle, charpos_ptr, bytepos_ptr)
457 int from, from_byte, stop;
458 int comnested, comstyle;
459 int *charpos_ptr, *bytepos_ptr;
460 {
461 /* Look back, counting the parity of string-quotes,
462 and recording the comment-starters seen.
463 When we reach a safe place, assume that's not in a string;
464 then step the main scan to the earliest comment-starter seen
465 an even number of string quotes away from the safe place.
466
467 OFROM[I] is position of the earliest comment-starter seen
468 which is I+2X quotes from the comment-end.
469 PARITY is current parity of quotes from the comment end. */
470 int string_style = -1; /* Presumed outside of any string. */
471 int string_lossage = 0;
472 int comment_end = from;
473 int comment_end_byte = from_byte;
474 int comstart_pos = 0;
475 int comstart_byte;
476 /* Value that PARITY had, when we reached the position
477 in COMSTART_POS. */
478 int scanstart = from - 1;
479 /* Place where the containing defun starts,
480 or 0 if we didn't come across it yet. */
481 int defun_start = 0;
482 int defun_start_byte = 0;
483 register enum syntaxcode code;
484 int nesting = 1; /* current comment nesting */
485 int c;
486
487 /* At beginning of range to scan, we're outside of strings;
488 that determines quote parity to the comment-end. */
489 while (from != stop)
490 {
491 int temp_byte;
492
493 /* Move back and examine a character. */
494 DEC_BOTH (from, from_byte);
495 UPDATE_SYNTAX_TABLE_BACKWARD (from);
496
497 c = FETCH_CHAR (from_byte);
498 code = SYNTAX (c);
499
500 /* If this char is the second of a 2-char comment end sequence,
501 back up and give the pair the appropriate syntax. */
502 if (from > stop && SYNTAX_COMEND_SECOND (c)
503 && prev_char_comend_first (from, from_byte))
504 {
505 code = Sendcomment;
506 DEC_BOTH (from, from_byte);
507 UPDATE_SYNTAX_TABLE_BACKWARD (from);
508 c = FETCH_CHAR (from_byte);
509 }
510
511 /* If this char starts a 2-char comment start sequence,
512 treat it like a 1-char comment starter. */
513 if (from < scanstart && SYNTAX_COMSTART_FIRST (c))
514 {
515 temp_byte = inc_bytepos (from_byte);
516 UPDATE_SYNTAX_TABLE_FORWARD (from + 1);
517 if (SYNTAX_COMSTART_SECOND (FETCH_CHAR (temp_byte))
518 && comstyle == SYNTAX_COMMENT_STYLE (FETCH_CHAR (temp_byte)))
519 code = Scomment;
520 UPDATE_SYNTAX_TABLE_BACKWARD (from);
521 }
522 else if (code == Scomment && comstyle != SYNTAX_COMMENT_STYLE (c))
523 /* Ignore comment starters of a different style. */
524 continue;
525
526 /* Ignore escaped characters, except comment-enders. */
527 if (code != Sendcomment && char_quoted (from, from_byte))
528 continue;
529
530 switch (code)
531 {
532 case Sstring_fence:
533 case Scomment_fence:
534 c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
535 case Sstring:
536 /* Track parity of quotes. */
537 if (string_style == -1)
538 /* Entering a string. */
539 string_style = c;
540 else if (string_style == c)
541 /* Leaving the string. */
542 string_style = -1;
543 else
544 /* If we have two kinds of string delimiters.
545 There's no way to grok this scanning backwards. */
546 string_lossage = 1;
547 break;
548
549 case Scomment:
550 /* We've already checked that it is the relevant comstyle. */
551 if (string_style != -1 || string_lossage)
552 /* There are odd string quotes involved, so let's be careful.
553 Test case in Pascal: " { " a { " } */
554 goto lossage;
555
556 if (comnested && --nesting <= 0)
557 /* nested comments have to be balanced, so we don't need to
558 keep looking for earlier ones. We use here the same (slightly
559 incorrect) reasoning as below: since it is followed by uniform
560 paired string quotes, this comment-start has to be outside of
561 strings, else the comment-end itself would be inside a string. */
562 goto done;
563
564 /* Record comment-starters according to that
565 quote-parity to the comment-end. */
566 comstart_pos = from;
567 comstart_byte = from_byte;
568 break;
569
570 case Sendcomment:
571 if (SYNTAX_COMMENT_STYLE (FETCH_CHAR (from_byte)) == comstyle)
572 /* This is the same style of comment ender as ours. */
573 {
574 if (comnested)
575 nesting++;
576 else
577 /* Anything before that can't count because it would match
578 this comment-ender rather than ours. */
579 from = stop; /* Break out of the loop. */
580 }
581 break;
582
583 case Sopen:
584 /* Assume a defun-start point is outside of strings. */
585 if (open_paren_in_column_0_is_defun_start
586 && (from == stop
587 || (temp_byte = dec_bytepos (from_byte),
588 FETCH_CHAR (temp_byte) == '\n')))
589 {
590 defun_start = from;
591 defun_start_byte = from_byte;
592 from = stop; /* Break out of the loop. */
593 }
594 break;
595
596 default:
597 continue;
598 }
599 }
600
601 if (comstart_pos == 0)
602 {
603 from = comment_end;
604 from_byte = comment_end_byte;
605 UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
606 }
607 /* If the earliest comment starter
608 is followed by uniform paired string quotes or none,
609 we know it can't be inside a string
610 since if it were then the comment ender would be inside one.
611 So it does start a comment. Skip back to it. */
612 else if (!comnested)
613 {
614 from = comstart_pos;
615 from_byte = comstart_byte;
616 /* Globals are correct now. */
617 }
618 else
619 {
620 struct lisp_parse_state state;
621 lossage:
622 /* We had two kinds of string delimiters mixed up
623 together. Decode this going forwards.
624 Scan fwd from a known safe place (beginning-of-defun)
625 to the one in question; this records where we
626 last passed a comment starter. */
627 /* If we did not already find the defun start, find it now. */
628 if (defun_start == 0)
629 {
630 defun_start = find_defun_start (comment_end, comment_end_byte);
631 defun_start_byte = find_start_value_byte;
632 }
633 do
634 {
635 scan_sexps_forward (&state,
636 defun_start, defun_start_byte,
637 comment_end, -10000, 0, Qnil, 0);
638 defun_start = comment_end;
639 if (state.incomment == (comnested ? 1 : -1)
640 && state.comstyle == comstyle)
641 from = state.comstr_start;
642 else
643 {
644 from = comment_end;
645 if (state.incomment)
646 /* If comment_end is inside some other comment, maybe ours
647 is nested, so we need to try again from within the
648 surrounding comment. Example: { a (* " *) */
649 {
650 /* FIXME: We should advance by one or two chars. */
651 defun_start = state.comstr_start + 2;
652 defun_start_byte = CHAR_TO_BYTE (defun_start);
653 }
654 }
655 } while (defun_start < comment_end);
656
657 from_byte = CHAR_TO_BYTE (from);
658 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
659 }
660
661 done:
662 *charpos_ptr = from;
663 *bytepos_ptr = from_byte;
664
665 return (from == comment_end) ? -1 : from;
666 }
667 \f
668 DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
669 "Return t if OBJECT is a syntax table.\n\
670 Currently, any char-table counts as a syntax table.")
671 (object)
672 Lisp_Object object;
673 {
674 if (CHAR_TABLE_P (object)
675 && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
676 return Qt;
677 return Qnil;
678 }
679
680 static void
681 check_syntax_table (obj)
682 Lisp_Object obj;
683 {
684 if (!(CHAR_TABLE_P (obj)
685 && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table)))
686 wrong_type_argument (Qsyntax_table_p, obj);
687 }
688
689 DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
690 "Return the current syntax table.\n\
691 This is the one specified by the current buffer.")
692 ()
693 {
694 return current_buffer->syntax_table;
695 }
696
697 DEFUN ("standard-syntax-table", Fstandard_syntax_table,
698 Sstandard_syntax_table, 0, 0, 0,
699 "Return the standard syntax table.\n\
700 This is the one used for new buffers.")
701 ()
702 {
703 return Vstandard_syntax_table;
704 }
705
706 DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
707 "Construct a new syntax table and return it.\n\
708 It is a copy of the TABLE, which defaults to the standard syntax table.")
709 (table)
710 Lisp_Object table;
711 {
712 Lisp_Object copy;
713
714 if (!NILP (table))
715 check_syntax_table (table);
716 else
717 table = Vstandard_syntax_table;
718
719 copy = Fcopy_sequence (table);
720
721 /* Only the standard syntax table should have a default element.
722 Other syntax tables should inherit from parents instead. */
723 XCHAR_TABLE (copy)->defalt = Qnil;
724
725 /* Copied syntax tables should all have parents.
726 If we copied one with no parent, such as the standard syntax table,
727 use the standard syntax table as the copy's parent. */
728 if (NILP (XCHAR_TABLE (copy)->parent))
729 Fset_char_table_parent (copy, Vstandard_syntax_table);
730 return copy;
731 }
732
733 DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
734 "Select a new syntax table for the current buffer.\n\
735 One argument, a syntax table.")
736 (table)
737 Lisp_Object table;
738 {
739 int idx;
740 check_syntax_table (table);
741 current_buffer->syntax_table = table;
742 /* Indicate that this buffer now has a specified syntax table. */
743 idx = PER_BUFFER_VAR_IDX (syntax_table);
744 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
745 return table;
746 }
747 \f
748 /* Convert a letter which signifies a syntax code
749 into the code it signifies.
750 This is used by modify-syntax-entry, and other things. */
751
752 unsigned char syntax_spec_code[0400] =
753 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
754 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
755 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
756 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
757 (char) Swhitespace, (char) Scomment_fence, (char) Sstring, 0377,
758 (char) Smath, 0377, 0377, (char) Squote,
759 (char) Sopen, (char) Sclose, 0377, 0377,
760 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
761 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
762 0377, 0377, 0377, 0377,
763 (char) Scomment, 0377, (char) Sendcomment, 0377,
764 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
765 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
766 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
767 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
768 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
769 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
770 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
771 0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
772 };
773
774 /* Indexed by syntax code, give the letter that describes it. */
775
776 char syntax_code_spec[16] =
777 {
778 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
779 '!', '|'
780 };
781
782 /* Indexed by syntax code, give the object (cons of syntax code and
783 nil) to be stored in syntax table. Since these objects can be
784 shared among syntax tables, we generate them in advance. By
785 sharing objects, the function `describe-syntax' can give a more
786 compact listing. */
787 static Lisp_Object Vsyntax_code_object;
788
789 \f
790 /* Look up the value for CHARACTER in syntax table TABLE's parent
791 and its parents. SYNTAX_ENTRY calls this, when TABLE itself has nil
792 for CHARACTER. It's actually used only when not compiled with GCC. */
793
794 Lisp_Object
795 syntax_parent_lookup (table, character)
796 Lisp_Object table;
797 int character;
798 {
799 Lisp_Object value;
800
801 while (1)
802 {
803 table = XCHAR_TABLE (table)->parent;
804 if (NILP (table))
805 return Qnil;
806
807 value = XCHAR_TABLE (table)->contents[character];
808 if (!NILP (value))
809 return value;
810 }
811 }
812
813 DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
814 "Return the syntax code of CHARACTER, described by a character.\n\
815 For example, if CHARACTER is a word constituent,\n\
816 the character `w' is returned.\n\
817 The characters that correspond to various syntax codes\n\
818 are listed in the documentation of `modify-syntax-entry'.")
819 (character)
820 Lisp_Object character;
821 {
822 int char_int;
823 gl_state.current_syntax_table = current_buffer->syntax_table;
824
825 gl_state.use_global = 0;
826 CHECK_NUMBER (character, 0);
827 char_int = XINT (character);
828 return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
829 }
830
831 DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
832 "Return the matching parenthesis of CHARACTER, or nil if none.")
833 (character)
834 Lisp_Object character;
835 {
836 int char_int, code;
837 gl_state.current_syntax_table = current_buffer->syntax_table;
838 gl_state.use_global = 0;
839 CHECK_NUMBER (character, 0);
840 char_int = XINT (character);
841 code = SYNTAX (char_int);
842 if (code == Sopen || code == Sclose)
843 return SYNTAX_MATCH (char_int);
844 return Qnil;
845 }
846
847 /* This comment supplies the doc string for modify-syntax-entry,
848 for make-docfile to see. We cannot put this in the real DEFUN
849 due to limits in the Unix cpp.
850
851 DEFUN ("modify-syntax-entry", foo, bar, 2, 3, 0,
852 "Set syntax for character CHAR according to string S.\n\
853 The syntax is changed only for table TABLE, which defaults to\n\
854 the current buffer's syntax table.\n\
855 The first character of S should be one of the following:\n\
856 Space or - whitespace syntax. w word constituent.\n\
857 _ symbol constituent. . punctuation.\n\
858 ( open-parenthesis. ) close-parenthesis.\n\
859 \" string quote. \\ escape.\n\
860 $ paired delimiter. ' expression quote or prefix operator.\n\
861 < comment starter. > comment ender.\n\
862 / character-quote. @ inherit from `standard-syntax-table'.\n\
863 \n\
864 Only single-character comment start and end sequences are represented thus.\n\
865 Two-character sequences are represented as described below.\n\
866 The second character of S is the matching parenthesis,\n\
867 used only if the first character is `(' or `)'.\n\
868 Any additional characters are flags.\n\
869 Defined flags are the characters 1, 2, 3, 4, b, p, and n.\n\
870 1 means CHAR is the start of a two-char comment start sequence.\n\
871 2 means CHAR is the second character of such a sequence.\n\
872 3 means CHAR is the start of a two-char comment end sequence.\n\
873 4 means CHAR is the second character of such a sequence.\n\
874 \n\
875 There can be up to two orthogonal comment sequences. This is to support\n\
876 language modes such as C++. By default, all comment sequences are of style\n\
877 a, but you can set the comment sequence style to b (on the second character\n\
878 of a comment-start, or the first character of a comment-end sequence) using\n\
879 this flag:\n\
880 b means CHAR is part of comment sequence b.\n\
881 n means CHAR is part of a nestable comment sequence.\n\
882 \n\
883 p means CHAR is a prefix character for `backward-prefix-chars';\n\
884 such characters are treated as whitespace when they occur\n\
885 between expressions.")
886 (char, s, table)
887 */
888
889 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
890 /* I really don't know why this is interactive
891 help-form should at least be made useful whilst reading the second arg
892 */
893 "cSet syntax for character: \nsSet syntax for %s to: ",
894 0 /* See immediately above */)
895 (c, newentry, syntax_table)
896 Lisp_Object c, newentry, syntax_table;
897 {
898 register unsigned char *p;
899 register enum syntaxcode code;
900 int val;
901 Lisp_Object match;
902
903 CHECK_NUMBER (c, 0);
904 CHECK_STRING (newentry, 1);
905
906 if (NILP (syntax_table))
907 syntax_table = current_buffer->syntax_table;
908 else
909 check_syntax_table (syntax_table);
910
911 p = XSTRING (newentry)->data;
912 code = (enum syntaxcode) syntax_spec_code[*p++];
913 if (((int) code & 0377) == 0377)
914 error ("invalid syntax description letter: %c", p[-1]);
915
916 if (code == Sinherit)
917 {
918 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), Qnil);
919 return Qnil;
920 }
921
922 if (*p)
923 {
924 int len;
925 int character = (STRING_CHAR_AND_LENGTH
926 (p, STRING_BYTES (XSTRING (newentry)) - 1, len));
927 XSETINT (match, character);
928 if (XFASTINT (match) == ' ')
929 match = Qnil;
930 p += len;
931 }
932 else
933 match = Qnil;
934
935 val = (int) code;
936 while (*p)
937 switch (*p++)
938 {
939 case '1':
940 val |= 1 << 16;
941 break;
942
943 case '2':
944 val |= 1 << 17;
945 break;
946
947 case '3':
948 val |= 1 << 18;
949 break;
950
951 case '4':
952 val |= 1 << 19;
953 break;
954
955 case 'p':
956 val |= 1 << 20;
957 break;
958
959 case 'b':
960 val |= 1 << 21;
961 break;
962
963 case 'n':
964 val |= 1 << 22;
965 break;
966 }
967
968 if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match))
969 newentry = XVECTOR (Vsyntax_code_object)->contents[val];
970 else
971 /* Since we can't use a shared object, let's make a new one. */
972 newentry = Fcons (make_number (val), match);
973
974 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
975
976 return Qnil;
977 }
978 \f
979 /* Dump syntax table to buffer in human-readable format */
980
981 static void
982 describe_syntax (value)
983 Lisp_Object value;
984 {
985 register enum syntaxcode code;
986 char desc, start1, start2, end1, end2, prefix, comstyle, comnested;
987 char str[2];
988 Lisp_Object first, match_lisp;
989
990 Findent_to (make_number (16), make_number (1));
991
992 if (NILP (value))
993 {
994 insert_string ("default\n");
995 return;
996 }
997
998 if (CHAR_TABLE_P (value))
999 {
1000 insert_string ("deeper char-table ...\n");
1001 return;
1002 }
1003
1004 if (!CONSP (value))
1005 {
1006 insert_string ("invalid\n");
1007 return;
1008 }
1009
1010 first = XCAR (value);
1011 match_lisp = XCDR (value);
1012
1013 if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
1014 {
1015 insert_string ("invalid\n");
1016 return;
1017 }
1018
1019 code = (enum syntaxcode) (XINT (first) & 0377);
1020 start1 = (XINT (first) >> 16) & 1;
1021 start2 = (XINT (first) >> 17) & 1;
1022 end1 = (XINT (first) >> 18) & 1;
1023 end2 = (XINT (first) >> 19) & 1;
1024 prefix = (XINT (first) >> 20) & 1;
1025 comstyle = (XINT (first) >> 21) & 1;
1026 comnested = (XINT (first) >> 22) & 1;
1027
1028 if ((int) code < 0 || (int) code >= (int) Smax)
1029 {
1030 insert_string ("invalid");
1031 return;
1032 }
1033 desc = syntax_code_spec[(int) code];
1034
1035 str[0] = desc, str[1] = 0;
1036 insert (str, 1);
1037
1038 if (NILP (match_lisp))
1039 insert (" ", 1);
1040 else
1041 insert_char (XINT (match_lisp));
1042
1043 if (start1)
1044 insert ("1", 1);
1045 if (start2)
1046 insert ("2", 1);
1047
1048 if (end1)
1049 insert ("3", 1);
1050 if (end2)
1051 insert ("4", 1);
1052
1053 if (prefix)
1054 insert ("p", 1);
1055 if (comstyle)
1056 insert ("b", 1);
1057 if (comnested)
1058 insert ("n", 1);
1059
1060 insert_string ("\twhich means: ");
1061
1062 switch (SWITCH_ENUM_CAST (code))
1063 {
1064 case Swhitespace:
1065 insert_string ("whitespace"); break;
1066 case Spunct:
1067 insert_string ("punctuation"); break;
1068 case Sword:
1069 insert_string ("word"); break;
1070 case Ssymbol:
1071 insert_string ("symbol"); break;
1072 case Sopen:
1073 insert_string ("open"); break;
1074 case Sclose:
1075 insert_string ("close"); break;
1076 case Squote:
1077 insert_string ("quote"); break;
1078 case Sstring:
1079 insert_string ("string"); break;
1080 case Smath:
1081 insert_string ("math"); break;
1082 case Sescape:
1083 insert_string ("escape"); break;
1084 case Scharquote:
1085 insert_string ("charquote"); break;
1086 case Scomment:
1087 insert_string ("comment"); break;
1088 case Sendcomment:
1089 insert_string ("endcomment"); break;
1090 default:
1091 insert_string ("invalid");
1092 return;
1093 }
1094
1095 if (!NILP (match_lisp))
1096 {
1097 insert_string (", matches ");
1098 insert_char (XINT (match_lisp));
1099 }
1100
1101 if (start1)
1102 insert_string (",\n\t is the first character of a comment-start sequence");
1103 if (start2)
1104 insert_string (",\n\t is the second character of a comment-start sequence");
1105
1106 if (end1)
1107 insert_string (",\n\t is the first character of a comment-end sequence");
1108 if (end2)
1109 insert_string (",\n\t is the second character of a comment-end sequence");
1110 if (comstyle)
1111 insert_string (" (comment style b)");
1112 if (comnested)
1113 insert_string (" (nestable)");
1114
1115 if (prefix)
1116 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
1117
1118 insert_string ("\n");
1119 }
1120
1121 static Lisp_Object
1122 describe_syntax_1 (vector)
1123 Lisp_Object vector;
1124 {
1125 struct buffer *old = current_buffer;
1126 set_buffer_internal (XBUFFER (Vstandard_output));
1127 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil, (int *) 0, 0);
1128 while (! NILP (XCHAR_TABLE (vector)->parent))
1129 {
1130 vector = XCHAR_TABLE (vector)->parent;
1131 insert_string ("\nThe parent syntax table is:");
1132 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil,
1133 (int *) 0, 0);
1134 }
1135
1136 call0 (intern ("help-mode"));
1137 set_buffer_internal (old);
1138 return Qnil;
1139 }
1140
1141 DEFUN ("describe-syntax", Fdescribe_syntax, Sdescribe_syntax, 0, 0, "",
1142 "Describe the syntax specifications in the syntax table.\n\
1143 The descriptions are inserted in a buffer, which is then displayed.")
1144 ()
1145 {
1146 internal_with_output_to_temp_buffer
1147 ("*Help*", describe_syntax_1, current_buffer->syntax_table);
1148
1149 return Qnil;
1150 }
1151 \f
1152 int parse_sexp_ignore_comments;
1153
1154 /* Return the position across COUNT words from FROM.
1155 If that many words cannot be found before the end of the buffer, return 0.
1156 COUNT negative means scan backward and stop at word beginning. */
1157
1158 int
1159 scan_words (from, count)
1160 register int from, count;
1161 {
1162 register int beg = BEGV;
1163 register int end = ZV;
1164 register int from_byte = CHAR_TO_BYTE (from);
1165 register enum syntaxcode code;
1166 int ch0, ch1;
1167
1168 immediate_quit = 1;
1169 QUIT;
1170
1171 SETUP_SYNTAX_TABLE (from, count);
1172
1173 while (count > 0)
1174 {
1175 while (1)
1176 {
1177 if (from == end)
1178 {
1179 immediate_quit = 0;
1180 return 0;
1181 }
1182 UPDATE_SYNTAX_TABLE_FORWARD (from);
1183 ch0 = FETCH_CHAR (from_byte);
1184 code = SYNTAX (ch0);
1185 INC_BOTH (from, from_byte);
1186 if (words_include_escapes
1187 && (code == Sescape || code == Scharquote))
1188 break;
1189 if (code == Sword)
1190 break;
1191 }
1192 /* Now CH0 is a character which begins a word and FROM is the
1193 position of the next character. */
1194 while (1)
1195 {
1196 if (from == end) break;
1197 UPDATE_SYNTAX_TABLE_FORWARD (from);
1198 ch1 = FETCH_CHAR (from_byte);
1199 code = SYNTAX (ch1);
1200 if (!(words_include_escapes
1201 && (code == Sescape || code == Scharquote)))
1202 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
1203 break;
1204 INC_BOTH (from, from_byte);
1205 ch0 = ch1;
1206 }
1207 count--;
1208 }
1209 while (count < 0)
1210 {
1211 while (1)
1212 {
1213 if (from == beg)
1214 {
1215 immediate_quit = 0;
1216 return 0;
1217 }
1218 DEC_BOTH (from, from_byte);
1219 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1220 ch1 = FETCH_CHAR (from_byte);
1221 code = SYNTAX (ch1);
1222 if (words_include_escapes
1223 && (code == Sescape || code == Scharquote))
1224 break;
1225 if (code == Sword)
1226 break;
1227 }
1228 /* Now CH1 is a character which ends a word and FROM is the
1229 position of it. */
1230 while (1)
1231 {
1232 int temp_byte;
1233
1234 if (from == beg)
1235 break;
1236 temp_byte = dec_bytepos (from_byte);
1237 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1238 ch0 = FETCH_CHAR (temp_byte);
1239 code = SYNTAX (ch0);
1240 if (!(words_include_escapes
1241 && (code == Sescape || code == Scharquote)))
1242 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
1243 break;
1244 DEC_BOTH (from, from_byte);
1245 ch1 = ch0;
1246 }
1247 count++;
1248 }
1249
1250 immediate_quit = 0;
1251
1252 return from;
1253 }
1254
1255 DEFUN ("forward-word", Fforward_word, Sforward_word, 1, 1, "p",
1256 "Move point forward ARG words (backward if ARG is negative).\n\
1257 Normally returns t.\n\
1258 If an edge of the buffer or a field boundary is reached, point is left there\n\
1259 and the function returns nil. Field boundaries are not noticed if\n\
1260 `inhibit-field-text-motion' is non-nil.")
1261 (count)
1262 Lisp_Object count;
1263 {
1264 int orig_val, val;
1265 CHECK_NUMBER (count, 0);
1266
1267 val = orig_val = scan_words (PT, XINT (count));
1268 if (! orig_val)
1269 val = XINT (count) > 0 ? ZV : BEGV;
1270
1271 /* Avoid jumping out of an input field. */
1272 val = XFASTINT (Fconstrain_to_field (make_number (val), make_number (PT),
1273 Qt, Qnil, Qnil));
1274
1275 SET_PT (val);
1276 return val == orig_val ? Qt : Qnil;
1277 }
1278 \f
1279 Lisp_Object skip_chars ();
1280
1281 DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
1282 "Move point forward, stopping before a char not in STRING, or at pos LIM.\n\
1283 STRING is like the inside of a `[...]' in a regular expression\n\
1284 except that `]' is never special and `\\' quotes `^', `-' or `\\'\n\
1285 (but not as the end of a range; quoting is never needed there).\n\
1286 Thus, with arg \"a-zA-Z\", this skips letters stopping before first nonletter.\n\
1287 With arg \"^a-zA-Z\", skips nonletters stopping before first letter.\n\
1288 Returns the distance traveled, either zero or positive.")
1289 (string, lim)
1290 Lisp_Object string, lim;
1291 {
1292 return skip_chars (1, 0, string, lim);
1293 }
1294
1295 DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
1296 "Move point backward, stopping after a char not in STRING, or at pos LIM.\n\
1297 See `skip-chars-forward' for details.\n\
1298 Returns the distance traveled, either zero or negative.")
1299 (string, lim)
1300 Lisp_Object string, lim;
1301 {
1302 return skip_chars (0, 0, string, lim);
1303 }
1304
1305 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
1306 "Move point forward across chars in specified syntax classes.\n\
1307 SYNTAX is a string of syntax code characters.\n\
1308 Stop before a char whose syntax is not in SYNTAX, or at position LIM.\n\
1309 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
1310 This function returns the distance traveled, either zero or positive.")
1311 (syntax, lim)
1312 Lisp_Object syntax, lim;
1313 {
1314 return skip_chars (1, 1, syntax, lim);
1315 }
1316
1317 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
1318 "Move point backward across chars in specified syntax classes.\n\
1319 SYNTAX is a string of syntax code characters.\n\
1320 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.\n\
1321 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
1322 This function returns the distance traveled, either zero or negative.")
1323 (syntax, lim)
1324 Lisp_Object syntax, lim;
1325 {
1326 return skip_chars (0, 1, syntax, lim);
1327 }
1328
1329 static Lisp_Object
1330 skip_chars (forwardp, syntaxp, string, lim)
1331 int forwardp, syntaxp;
1332 Lisp_Object string, lim;
1333 {
1334 register unsigned int c;
1335 register int ch;
1336 unsigned char fastmap[0400];
1337 /* If SYNTAXP is 0, STRING may contain multi-byte form of characters
1338 of which codes don't fit in FASTMAP. In that case, we set the
1339 first byte of multibyte form (i.e. base leading-code) in FASTMAP
1340 and set the actual ranges of characters in CHAR_RANGES. In the
1341 form "X-Y" of STRING, both X and Y must belong to the same
1342 character set because a range striding across character sets is
1343 meaningless. */
1344 int *char_ranges;
1345 int n_char_ranges = 0;
1346 int negate = 0;
1347 register int i, i_byte;
1348 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1349 int string_multibyte;
1350 int size_byte;
1351 unsigned char *str;
1352 int len;
1353
1354 CHECK_STRING (string, 0);
1355 char_ranges = (int *) alloca (XSTRING (string)->size * (sizeof (int)) * 2);
1356 string_multibyte = STRING_MULTIBYTE (string);
1357 str = XSTRING (string)->data;
1358 size_byte = STRING_BYTES (XSTRING (string));
1359
1360 /* Adjust the multibyteness of the string to that of the buffer. */
1361 if (multibyte != string_multibyte)
1362 {
1363 int nbytes;
1364
1365 if (multibyte)
1366 nbytes = count_size_as_multibyte (XSTRING (string)->data,
1367 XSTRING (string)->size);
1368 else
1369 nbytes = XSTRING (string)->size;
1370 if (nbytes != size_byte)
1371 {
1372 str = (unsigned char *) alloca (nbytes);
1373 copy_text (XSTRING (string)->data, str, size_byte,
1374 string_multibyte, multibyte);
1375 size_byte = nbytes;
1376 }
1377 }
1378
1379 if (NILP (lim))
1380 XSETINT (lim, forwardp ? ZV : BEGV);
1381 else
1382 CHECK_NUMBER_COERCE_MARKER (lim, 0);
1383
1384 /* In any case, don't allow scan outside bounds of buffer. */
1385 if (XINT (lim) > ZV)
1386 XSETFASTINT (lim, ZV);
1387 if (XINT (lim) < BEGV)
1388 XSETFASTINT (lim, BEGV);
1389
1390 bzero (fastmap, sizeof fastmap);
1391
1392 i_byte = 0;
1393
1394 if (i_byte < size_byte
1395 && XSTRING (string)->data[0] == '^')
1396 {
1397 negate = 1; i_byte++;
1398 }
1399
1400 /* Find the characters specified and set their elements of fastmap.
1401 If syntaxp, each character counts as itself.
1402 Otherwise, handle backslashes and ranges specially. */
1403
1404 while (i_byte < size_byte)
1405 {
1406 int c_leading_code = str[i_byte];
1407
1408 c = STRING_CHAR_AND_LENGTH (str + i_byte, size_byte - i_byte, len);
1409 i_byte += len;
1410
1411 if (syntaxp)
1412 fastmap[syntax_spec_code[c & 0377]] = 1;
1413 else
1414 {
1415 if (c == '\\')
1416 {
1417 if (i_byte == size_byte)
1418 break;
1419
1420 c_leading_code = str[i_byte];
1421 c = STRING_CHAR_AND_LENGTH (str+i_byte, size_byte-i_byte, len);
1422 i_byte += len;
1423 }
1424 if (i_byte < size_byte
1425 && str[i_byte] == '-')
1426 {
1427 unsigned int c2, c2_leading_code;
1428
1429 /* Skip over the dash. */
1430 i_byte++;
1431
1432 if (i_byte == size_byte)
1433 break;
1434
1435 /* Get the end of the range. */
1436 c2_leading_code = str[i_byte];
1437 c2 =STRING_CHAR_AND_LENGTH (str+i_byte, size_byte-i_byte, len);
1438 i_byte += len;
1439
1440 if (SINGLE_BYTE_CHAR_P (c))
1441 {
1442 if (! SINGLE_BYTE_CHAR_P (c2))
1443 {
1444 /* Handle a range such as \177-\377 in multibyte
1445 mode. Split that into two ranges, the low
1446 one ending at 0237, and the high one starting
1447 at the smallest character in the charset of
1448 C2 and ending at C2. */
1449 int charset = CHAR_CHARSET (c2);
1450 int c1 = MAKE_CHAR (charset, 0, 0);
1451
1452 fastmap[c2_leading_code] = 1;
1453 char_ranges[n_char_ranges++] = c1;
1454 char_ranges[n_char_ranges++] = c2;
1455 c2 = 0237;
1456 }
1457 while (c <= c2)
1458 {
1459 fastmap[c] = 1;
1460 c++;
1461 }
1462 }
1463 else if (! SINGLE_BYTE_CHAR_P (c2))
1464 {
1465 if (c_leading_code != c2_leading_code)
1466 error ("Invalid character range: %s",
1467 XSTRING (string)->data);
1468 if (c <= c2)
1469 {
1470 fastmap[c_leading_code] = 1;
1471 char_ranges[n_char_ranges++] = c;
1472 char_ranges[n_char_ranges++] = c2;
1473 }
1474 }
1475 }
1476 else
1477 {
1478 if (SINGLE_BYTE_CHAR_P (c))
1479 fastmap[c] = 1;
1480 else
1481 {
1482 fastmap[c_leading_code] = 1;
1483 char_ranges[n_char_ranges++] = c;
1484 char_ranges[n_char_ranges++] = c;
1485 }
1486 }
1487 }
1488 }
1489
1490 /* If ^ was the first character, complement the fastmap. In
1491 addition, as all multibyte characters have possibility of
1492 matching, set all entries for base leading codes, which is
1493 harmless even if SYNTAXP is 1. */
1494
1495 if (negate)
1496 for (i = 0; i < sizeof fastmap; i++)
1497 {
1498 if (!multibyte || !BASE_LEADING_CODE_P (i))
1499 fastmap[i] ^= 1;
1500 else
1501 fastmap[i] = 1;
1502 }
1503
1504 {
1505 int start_point = PT;
1506 int pos = PT;
1507 int pos_byte = PT_BYTE;
1508
1509 immediate_quit = 1;
1510 if (syntaxp)
1511 {
1512 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
1513 if (forwardp)
1514 {
1515 if (multibyte)
1516 {
1517 if (pos < XINT (lim))
1518 while (fastmap[(int) SYNTAX (FETCH_CHAR (pos_byte))])
1519 {
1520 /* Since we already checked for multibyteness,
1521 avoid using INC_BOTH which checks again. */
1522 INC_POS (pos_byte);
1523 pos++;
1524 if (pos >= XINT (lim))
1525 break;
1526 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1527 }
1528 }
1529 else
1530 {
1531 while (pos < XINT (lim)
1532 && fastmap[(int) SYNTAX (FETCH_BYTE (pos))])
1533 {
1534 pos++;
1535 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1536 }
1537 }
1538 }
1539 else
1540 {
1541 if (multibyte)
1542 {
1543 while (pos > XINT (lim))
1544 {
1545 int savepos = pos_byte;
1546 /* Since we already checked for multibyteness,
1547 avoid using DEC_BOTH which checks again. */
1548 pos--;
1549 DEC_POS (pos_byte);
1550 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
1551 if (!fastmap[(int) SYNTAX (FETCH_CHAR (pos_byte))])
1552 {
1553 pos++;
1554 pos_byte = savepos;
1555 break;
1556 }
1557 }
1558 }
1559 else
1560 {
1561 if (pos > XINT (lim))
1562 while (fastmap[(int) SYNTAX (FETCH_BYTE (pos - 1))])
1563 {
1564 pos--;
1565 if (pos <= XINT (lim))
1566 break;
1567 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
1568 }
1569 }
1570 }
1571 }
1572 else
1573 {
1574 if (forwardp)
1575 {
1576 if (multibyte)
1577 while (pos < XINT (lim) && fastmap[(c = FETCH_BYTE (pos_byte))])
1578 {
1579 /* If we are looking at a multibyte character, we
1580 must look up the character in the table
1581 CHAR_RANGES. If there's no data in the table,
1582 that character is not what we want to skip. */
1583 if (BASE_LEADING_CODE_P (c)
1584 && (c = FETCH_MULTIBYTE_CHAR (pos_byte),
1585 ! SINGLE_BYTE_CHAR_P (c)))
1586 {
1587 /* The following code do the right thing even if
1588 n_char_ranges is zero (i.e. no data in
1589 CHAR_RANGES). */
1590 for (i = 0; i < n_char_ranges; i += 2)
1591 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1592 break;
1593 if (!(negate ^ (i < n_char_ranges)))
1594 break;
1595 }
1596 INC_BOTH (pos, pos_byte);
1597 }
1598 else
1599 while (pos < XINT (lim) && fastmap[FETCH_BYTE (pos)])
1600 pos++;
1601 }
1602 else
1603 {
1604 if (multibyte)
1605 while (pos > XINT (lim))
1606 {
1607 int prev_pos_byte = pos_byte;
1608
1609 DEC_POS (prev_pos_byte);
1610 if (!fastmap[(c = FETCH_BYTE (prev_pos_byte))])
1611 break;
1612
1613 /* See the comment in the previous similar code. */
1614 if (BASE_LEADING_CODE_P (c)
1615 && (c = FETCH_MULTIBYTE_CHAR (prev_pos_byte),
1616 ! SINGLE_BYTE_CHAR_P (c)))
1617 {
1618 for (i = 0; i < n_char_ranges; i += 2)
1619 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1620 break;
1621 if (!(negate ^ (i < n_char_ranges)))
1622 break;
1623 }
1624 pos--;
1625 pos_byte = prev_pos_byte;
1626 }
1627 else
1628 while (pos > XINT (lim) && fastmap[FETCH_BYTE (pos - 1)])
1629 pos--;
1630 }
1631 }
1632
1633 #if 0 /* Not needed now that a position in mid-character
1634 cannot be specified in Lisp. */
1635 if (multibyte
1636 /* INC_POS or DEC_POS might have moved POS over LIM. */
1637 && (forwardp ? (pos > XINT (lim)) : (pos < XINT (lim))))
1638 pos = XINT (lim);
1639 #endif
1640
1641 if (! multibyte)
1642 pos_byte = pos;
1643
1644 SET_PT_BOTH (pos, pos_byte);
1645 immediate_quit = 0;
1646
1647 return make_number (PT - start_point);
1648 }
1649 }
1650 \f
1651 /* Jump over a comment, assuming we are at the beginning of one.
1652 FROM is the current position.
1653 FROM_BYTE is the bytepos corresponding to FROM.
1654 Do not move past STOP (a charpos).
1655 The comment over which we have to jump is of style STYLE
1656 (either SYNTAX_COMMENT_STYLE(foo) or ST_COMMENT_STYLE).
1657 NESTING should be positive to indicate the nesting at the beginning
1658 for nested comments and should be zero or negative else.
1659 ST_COMMENT_STYLE cannot be nested.
1660 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
1661 (or 0 If the search cannot start in the middle of a two-character).
1662
1663 If successful, return 1 and store the charpos of the comment's end
1664 into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
1665 Else, return 0 and store the charpos STOP into *CHARPOS_PTR, the
1666 corresponding bytepos into *BYTEPOS_PTR and the current nesting
1667 (as defined for state.incomment) in *INCOMMENT_PTR.
1668
1669 The comment end is the last character of the comment rather than the
1670 character just after the comment.
1671
1672 Global syntax data is assumed to initially be valid for FROM and
1673 remains valid for forward search starting at the returned position. */
1674
1675 static int
1676 forw_comment (from, from_byte, stop, nesting, style, prev_syntax,
1677 charpos_ptr, bytepos_ptr, incomment_ptr)
1678 int from, from_byte, stop;
1679 int nesting, style, prev_syntax;
1680 int *charpos_ptr, *bytepos_ptr, *incomment_ptr;
1681 {
1682 register int c, c1;
1683 register enum syntaxcode code;
1684 register int syntax;
1685
1686 if (nesting <= 0) nesting = -1;
1687
1688 /* Enter the loop in the middle so that we find
1689 a 2-char comment ender if we start in the middle of it. */
1690 syntax = prev_syntax;
1691 if (syntax != 0) goto forw_incomment;
1692
1693 while (1)
1694 {
1695 if (from == stop)
1696 {
1697 *incomment_ptr = nesting;
1698 *charpos_ptr = from;
1699 *bytepos_ptr = from_byte;
1700 return 0;
1701 }
1702 c = FETCH_CHAR (from_byte);
1703 syntax = SYNTAX_WITH_FLAGS (c);
1704 code = syntax & 0xff;
1705 if (code == Sendcomment
1706 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
1707 && --nesting <= 0)
1708 /* we have encountered a comment end of the same style
1709 as the comment sequence which began this comment
1710 section */
1711 break;
1712 if (code == Scomment_fence
1713 && style == ST_COMMENT_STYLE)
1714 /* we have encountered a comment end of the same style
1715 as the comment sequence which began this comment
1716 section. */
1717 break;
1718 if (nesting > 0
1719 && code == Scomment
1720 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style)
1721 /* we have encountered a nested comment of the same style
1722 as the comment sequence which began this comment section */
1723 nesting++;
1724 INC_BOTH (from, from_byte);
1725 UPDATE_SYNTAX_TABLE_FORWARD (from);
1726
1727 forw_incomment:
1728 if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
1729 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
1730 && (c1 = FETCH_CHAR (from_byte),
1731 SYNTAX_COMEND_SECOND (c1)))
1732 {
1733 if (--nesting <= 0)
1734 /* we have encountered a comment end of the same style
1735 as the comment sequence which began this comment
1736 section */
1737 break;
1738 else
1739 {
1740 INC_BOTH (from, from_byte);
1741 UPDATE_SYNTAX_TABLE_FORWARD (from);
1742 }
1743 }
1744 if (nesting > 0
1745 && from < stop
1746 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
1747 && (c1 = FETCH_CHAR (from_byte),
1748 SYNTAX_COMMENT_STYLE (c1) == style
1749 && SYNTAX_COMSTART_SECOND (c1)))
1750 /* we have encountered a nested comment of the same style
1751 as the comment sequence which began this comment
1752 section */
1753 {
1754 INC_BOTH (from, from_byte);
1755 UPDATE_SYNTAX_TABLE_FORWARD (from);
1756 nesting++;
1757 }
1758 }
1759 *charpos_ptr = from;
1760 *bytepos_ptr = from_byte;
1761 return 1;
1762 }
1763
1764 DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
1765 "Move forward across up to N comments. If N is negative, move backward.\n\
1766 Stop scanning if we find something other than a comment or whitespace.\n\
1767 Set point to where scanning stops.\n\
1768 If N comments are found as expected, with nothing except whitespace\n\
1769 between them, return t; otherwise return nil.")
1770 (count)
1771 Lisp_Object count;
1772 {
1773 register int from;
1774 int from_byte;
1775 register int stop;
1776 register int c, c1;
1777 register enum syntaxcode code;
1778 int comstyle = 0; /* style of comment encountered */
1779 int comnested = 0; /* whether the comment is nestable or not */
1780 int found;
1781 int count1;
1782 int out_charpos, out_bytepos;
1783 int dummy;
1784
1785 CHECK_NUMBER (count, 0);
1786 count1 = XINT (count);
1787 stop = count1 > 0 ? ZV : BEGV;
1788
1789 immediate_quit = 1;
1790 QUIT;
1791
1792 from = PT;
1793 from_byte = PT_BYTE;
1794
1795 SETUP_SYNTAX_TABLE (from, count1);
1796 while (count1 > 0)
1797 {
1798 do
1799 {
1800 int comstart_first;
1801
1802 if (from == stop)
1803 {
1804 SET_PT_BOTH (from, from_byte);
1805 immediate_quit = 0;
1806 return Qnil;
1807 }
1808 c = FETCH_CHAR (from_byte);
1809 code = SYNTAX (c);
1810 comstart_first = SYNTAX_COMSTART_FIRST (c);
1811 comnested = SYNTAX_COMMENT_NESTED (c);
1812 comstyle = SYNTAX_COMMENT_STYLE (c);
1813 INC_BOTH (from, from_byte);
1814 UPDATE_SYNTAX_TABLE_FORWARD (from);
1815 if (from < stop && comstart_first
1816 && (c1 = FETCH_CHAR (from_byte),
1817 SYNTAX_COMSTART_SECOND (c1)))
1818 {
1819 /* We have encountered a comment start sequence and we
1820 are ignoring all text inside comments. We must record
1821 the comment style this sequence begins so that later,
1822 only a comment end of the same style actually ends
1823 the comment section. */
1824 code = Scomment;
1825 comstyle = SYNTAX_COMMENT_STYLE (c1);
1826 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
1827 INC_BOTH (from, from_byte);
1828 UPDATE_SYNTAX_TABLE_FORWARD (from);
1829 }
1830 /* FIXME: here we ignore 2-char endcomments while we don't
1831 when going backwards. */
1832 }
1833 while (code == Swhitespace || code == Sendcomment);
1834
1835 if (code == Scomment_fence)
1836 comstyle = ST_COMMENT_STYLE;
1837 else if (code != Scomment)
1838 {
1839 immediate_quit = 0;
1840 DEC_BOTH (from, from_byte);
1841 SET_PT_BOTH (from, from_byte);
1842 return Qnil;
1843 }
1844 /* We're at the start of a comment. */
1845 found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
1846 &out_charpos, &out_bytepos, &dummy);
1847 from = out_charpos; from_byte = out_bytepos;
1848 if (!found)
1849 {
1850 immediate_quit = 0;
1851 SET_PT_BOTH (from, from_byte);
1852 return Qnil;
1853 }
1854 INC_BOTH (from, from_byte);
1855 UPDATE_SYNTAX_TABLE_FORWARD (from);
1856 /* We have skipped one comment. */
1857 count1--;
1858 }
1859
1860 while (count1 < 0)
1861 {
1862 while (1)
1863 {
1864 int quoted, comstart_second;
1865
1866 if (from <= stop)
1867 {
1868 SET_PT_BOTH (BEGV, BEGV_BYTE);
1869 immediate_quit = 0;
1870 return Qnil;
1871 }
1872
1873 DEC_BOTH (from, from_byte);
1874 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
1875 quoted = char_quoted (from, from_byte);
1876 if (quoted)
1877 {
1878 DEC_BOTH (from, from_byte);
1879 goto leave;
1880 }
1881 c = FETCH_CHAR (from_byte);
1882 code = SYNTAX (c);
1883 comstyle = 0;
1884 comnested = SYNTAX_COMMENT_NESTED (c);
1885 if (code == Sendcomment)
1886 comstyle = SYNTAX_COMMENT_STYLE (c);
1887 comstart_second = SYNTAX_COMSTART_SECOND (c);
1888 if (from > stop && SYNTAX_COMEND_SECOND (c)
1889 && prev_char_comend_first (from, from_byte)
1890 && !char_quoted (from - 1, dec_bytepos (from_byte)))
1891 {
1892 /* We must record the comment style encountered so that
1893 later, we can match only the proper comment begin
1894 sequence of the same style. */
1895 DEC_BOTH (from, from_byte);
1896 code = Sendcomment;
1897 /* Calling char_quoted, above, set up global syntax position
1898 at the new value of FROM. */
1899 c1 = FETCH_CHAR (from_byte);
1900 comstyle = SYNTAX_COMMENT_STYLE (c1);
1901 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
1902 }
1903 if (from > stop && comstart_second
1904 && prev_char_comstart_first (from, from_byte)
1905 && !char_quoted (from - 1, dec_bytepos (from_byte)))
1906 {
1907 code = Scomment;
1908 DEC_BOTH (from, from_byte);
1909 }
1910
1911 if (code == Scomment_fence)
1912 {
1913 /* Skip until first preceding unquoted comment_fence. */
1914 int found = 0, ini = from, ini_byte = from_byte;
1915
1916 while (1)
1917 {
1918 DEC_BOTH (from, from_byte);
1919 if (from == stop)
1920 break;
1921 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1922 c = FETCH_CHAR (from_byte);
1923 if (SYNTAX (c) == Scomment_fence
1924 && !char_quoted (from, from_byte))
1925 {
1926 found = 1;
1927 break;
1928 }
1929 }
1930 if (found == 0)
1931 {
1932 from = ini; /* Set point to ini + 1. */
1933 from_byte = ini_byte;
1934 goto leave;
1935 }
1936 }
1937 else if (code == Sendcomment)
1938 {
1939 found = back_comment (from, from_byte, stop, comnested, comstyle,
1940 &out_charpos, &out_bytepos);
1941 if (found == -1)
1942 {
1943 #if 0 /* cc-mode (and maybe others) relies on the bogus behavior. */
1944 /* Failure: we should go back to the end of this
1945 not-quite-endcomment. */
1946 if (SYNTAX(c) != code)
1947 /* It was a two-char Sendcomment. */
1948 INC_BOTH (from, from_byte);
1949 goto leave;
1950 #endif
1951 }
1952 else
1953 /* We have skipped one comment. */
1954 from = out_charpos, from_byte = out_bytepos;
1955 break;
1956 }
1957 else if (code != Swhitespace && code != Scomment)
1958 {
1959 leave:
1960 immediate_quit = 0;
1961 INC_BOTH (from, from_byte);
1962 SET_PT_BOTH (from, from_byte);
1963 return Qnil;
1964 }
1965 }
1966
1967 count1++;
1968 }
1969
1970 SET_PT_BOTH (from, from_byte);
1971 immediate_quit = 0;
1972 return Qt;
1973 }
1974 \f
1975 /* Return syntax code of character C if C is a single byte character
1976 or `multibyte_symbol_p' is zero. Otherwise, retrun Ssymbol. */
1977
1978 #define SYNTAX_WITH_MULTIBYTE_CHECK(c) \
1979 ((SINGLE_BYTE_CHAR_P (c) || !multibyte_symbol_p) \
1980 ? SYNTAX (c) : Ssymbol)
1981
1982 static Lisp_Object
1983 scan_lists (from, count, depth, sexpflag)
1984 register int from;
1985 int count, depth, sexpflag;
1986 {
1987 Lisp_Object val;
1988 register int stop = count > 0 ? ZV : BEGV;
1989 register int c, c1;
1990 int stringterm;
1991 int quoted;
1992 int mathexit = 0;
1993 register enum syntaxcode code, temp_code;
1994 int min_depth = depth; /* Err out if depth gets less than this. */
1995 int comstyle = 0; /* style of comment encountered */
1996 int comnested = 0; /* whether the comment is nestable or not */
1997 int temp_pos;
1998 int last_good = from;
1999 int found;
2000 int from_byte;
2001 int out_bytepos, out_charpos;
2002 int temp, dummy;
2003 int multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
2004
2005 if (depth > 0) min_depth = 0;
2006
2007 if (from > ZV) from = ZV;
2008 if (from < BEGV) from = BEGV;
2009
2010 from_byte = CHAR_TO_BYTE (from);
2011
2012 immediate_quit = 1;
2013 QUIT;
2014
2015 SETUP_SYNTAX_TABLE (from, count);
2016 while (count > 0)
2017 {
2018 while (from < stop)
2019 {
2020 int comstart_first, prefix;
2021 UPDATE_SYNTAX_TABLE_FORWARD (from);
2022 c = FETCH_CHAR (from_byte);
2023 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2024 comstart_first = SYNTAX_COMSTART_FIRST (c);
2025 comnested = SYNTAX_COMMENT_NESTED (c);
2026 comstyle = SYNTAX_COMMENT_STYLE (c);
2027 prefix = SYNTAX_PREFIX (c);
2028 if (depth == min_depth)
2029 last_good = from;
2030 INC_BOTH (from, from_byte);
2031 UPDATE_SYNTAX_TABLE_FORWARD (from);
2032 if (from < stop && comstart_first
2033 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from_byte))
2034 && parse_sexp_ignore_comments)
2035 {
2036 /* we have encountered a comment start sequence and we
2037 are ignoring all text inside comments. We must record
2038 the comment style this sequence begins so that later,
2039 only a comment end of the same style actually ends
2040 the comment section */
2041 code = Scomment;
2042 c1 = FETCH_CHAR (from_byte);
2043 comstyle = SYNTAX_COMMENT_STYLE (c1);
2044 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2045 INC_BOTH (from, from_byte);
2046 UPDATE_SYNTAX_TABLE_FORWARD (from);
2047 }
2048
2049 if (prefix)
2050 continue;
2051
2052 switch (SWITCH_ENUM_CAST (code))
2053 {
2054 case Sescape:
2055 case Scharquote:
2056 if (from == stop) goto lose;
2057 INC_BOTH (from, from_byte);
2058 /* treat following character as a word constituent */
2059 case Sword:
2060 case Ssymbol:
2061 if (depth || !sexpflag) break;
2062 /* This word counts as a sexp; return at end of it. */
2063 while (from < stop)
2064 {
2065 UPDATE_SYNTAX_TABLE_FORWARD (from);
2066
2067 /* Some compilers can't handle this inside the switch. */
2068 c = FETCH_CHAR (from_byte);
2069 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2070 switch (temp)
2071 {
2072 case Scharquote:
2073 case Sescape:
2074 INC_BOTH (from, from_byte);
2075 if (from == stop) goto lose;
2076 break;
2077 case Sword:
2078 case Ssymbol:
2079 case Squote:
2080 break;
2081 default:
2082 goto done;
2083 }
2084 INC_BOTH (from, from_byte);
2085 }
2086 goto done;
2087
2088 case Scomment_fence:
2089 comstyle = ST_COMMENT_STYLE;
2090 /* FALLTHROUGH */
2091 case Scomment:
2092 if (!parse_sexp_ignore_comments) break;
2093 UPDATE_SYNTAX_TABLE_FORWARD (from);
2094 found = forw_comment (from, from_byte, stop,
2095 comnested, comstyle, 0,
2096 &out_charpos, &out_bytepos, &dummy);
2097 from = out_charpos, from_byte = out_bytepos;
2098 if (!found)
2099 {
2100 if (depth == 0)
2101 goto done;
2102 goto lose;
2103 }
2104 INC_BOTH (from, from_byte);
2105 UPDATE_SYNTAX_TABLE_FORWARD (from);
2106 break;
2107
2108 case Smath:
2109 if (!sexpflag)
2110 break;
2111 if (from != stop && c == FETCH_CHAR (from_byte))
2112 {
2113 INC_BOTH (from, from_byte);
2114 }
2115 if (mathexit)
2116 {
2117 mathexit = 0;
2118 goto close1;
2119 }
2120 mathexit = 1;
2121
2122 case Sopen:
2123 if (!++depth) goto done;
2124 break;
2125
2126 case Sclose:
2127 close1:
2128 if (!--depth) goto done;
2129 if (depth < min_depth)
2130 Fsignal (Qscan_error,
2131 Fcons (build_string ("Containing expression ends prematurely"),
2132 Fcons (make_number (last_good),
2133 Fcons (make_number (from), Qnil))));
2134 break;
2135
2136 case Sstring:
2137 case Sstring_fence:
2138 temp_pos = dec_bytepos (from_byte);
2139 stringterm = FETCH_CHAR (temp_pos);
2140 while (1)
2141 {
2142 if (from >= stop) goto lose;
2143 UPDATE_SYNTAX_TABLE_FORWARD (from);
2144 c = FETCH_CHAR (from_byte);
2145 if (code == Sstring
2146 ? c == stringterm
2147 : SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring_fence)
2148 break;
2149
2150 /* Some compilers can't handle this inside the switch. */
2151 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2152 switch (temp)
2153 {
2154 case Scharquote:
2155 case Sescape:
2156 INC_BOTH (from, from_byte);
2157 }
2158 INC_BOTH (from, from_byte);
2159 }
2160 INC_BOTH (from, from_byte);
2161 if (!depth && sexpflag) goto done;
2162 break;
2163 }
2164 }
2165
2166 /* Reached end of buffer. Error if within object, return nil if between */
2167 if (depth) goto lose;
2168
2169 immediate_quit = 0;
2170 return Qnil;
2171
2172 /* End of object reached */
2173 done:
2174 count--;
2175 }
2176
2177 while (count < 0)
2178 {
2179 while (from > stop)
2180 {
2181 DEC_BOTH (from, from_byte);
2182 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2183 c = FETCH_CHAR (from_byte);
2184 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2185 if (depth == min_depth)
2186 last_good = from;
2187 comstyle = 0;
2188 comnested = SYNTAX_COMMENT_NESTED (c);
2189 if (code == Sendcomment)
2190 comstyle = SYNTAX_COMMENT_STYLE (c);
2191 if (from > stop && SYNTAX_COMEND_SECOND (c)
2192 && prev_char_comend_first (from, from_byte)
2193 && parse_sexp_ignore_comments)
2194 {
2195 /* We must record the comment style encountered so that
2196 later, we can match only the proper comment begin
2197 sequence of the same style. */
2198 DEC_BOTH (from, from_byte);
2199 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2200 code = Sendcomment;
2201 c1 = FETCH_CHAR (from_byte);
2202 comstyle = SYNTAX_COMMENT_STYLE (c1);
2203 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2204 }
2205
2206 /* Quoting turns anything except a comment-ender
2207 into a word character. Note that this cannot be true
2208 if we decremented FROM in the if-statement above. */
2209 if (code != Sendcomment && char_quoted (from, from_byte))
2210 code = Sword;
2211 else if (SYNTAX_PREFIX (c))
2212 continue;
2213
2214 switch (SWITCH_ENUM_CAST (code))
2215 {
2216 case Sword:
2217 case Ssymbol:
2218 case Sescape:
2219 case Scharquote:
2220 if (depth || !sexpflag) break;
2221 /* This word counts as a sexp; count object finished
2222 after passing it. */
2223 while (from > stop)
2224 {
2225 temp_pos = from_byte;
2226 if (! NILP (current_buffer->enable_multibyte_characters))
2227 DEC_POS (temp_pos);
2228 else
2229 temp_pos--;
2230 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2231 c1 = FETCH_CHAR (temp_pos);
2232 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
2233 /* Don't allow comment-end to be quoted. */
2234 if (temp_code == Sendcomment)
2235 goto done2;
2236 quoted = char_quoted (from - 1, temp_pos);
2237 if (quoted)
2238 {
2239 DEC_BOTH (from, from_byte);
2240 temp_pos = dec_bytepos (temp_pos);
2241 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2242 }
2243 c1 = FETCH_CHAR (temp_pos);
2244 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
2245 if (! (quoted || temp_code == Sword
2246 || temp_code == Ssymbol
2247 || temp_code == Squote))
2248 goto done2;
2249 DEC_BOTH (from, from_byte);
2250 }
2251 goto done2;
2252
2253 case Smath:
2254 if (!sexpflag)
2255 break;
2256 temp_pos = dec_bytepos (from_byte);
2257 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2258 if (from != stop && c == FETCH_CHAR (temp_pos))
2259 DEC_BOTH (from, from_byte);
2260 if (mathexit)
2261 {
2262 mathexit = 0;
2263 goto open2;
2264 }
2265 mathexit = 1;
2266
2267 case Sclose:
2268 if (!++depth) goto done2;
2269 break;
2270
2271 case Sopen:
2272 open2:
2273 if (!--depth) goto done2;
2274 if (depth < min_depth)
2275 Fsignal (Qscan_error,
2276 Fcons (build_string ("Containing expression ends prematurely"),
2277 Fcons (make_number (last_good),
2278 Fcons (make_number (from), Qnil))));
2279 break;
2280
2281 case Sendcomment:
2282 if (!parse_sexp_ignore_comments)
2283 break;
2284 found = back_comment (from, from_byte, stop, comnested, comstyle,
2285 &out_charpos, &out_bytepos);
2286 /* FIXME: if found == -1, then it really wasn't a comment-end.
2287 For single-char Sendcomment, we can't do much about it apart
2288 from skipping the char.
2289 For 2-char endcomments, we could try again, taking both
2290 chars as separate entities, but it's a lot of trouble
2291 for very little gain, so we don't bother either. -sm */
2292 if (found != -1)
2293 from = out_charpos, from_byte = out_bytepos;
2294 break;
2295
2296 case Scomment_fence:
2297 case Sstring_fence:
2298 while (1)
2299 {
2300 DEC_BOTH (from, from_byte);
2301 if (from == stop) goto lose;
2302 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2303 if (!char_quoted (from, from_byte)
2304 && (c = FETCH_CHAR (from_byte),
2305 SYNTAX_WITH_MULTIBYTE_CHECK (c) == code))
2306 break;
2307 }
2308 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2309 break;
2310
2311 case Sstring:
2312 stringterm = FETCH_CHAR (from_byte);
2313 while (1)
2314 {
2315 if (from == stop) goto lose;
2316 temp_pos = from_byte;
2317 if (! NILP (current_buffer->enable_multibyte_characters))
2318 DEC_POS (temp_pos);
2319 else
2320 temp_pos--;
2321 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2322 if (!char_quoted (from - 1, temp_pos)
2323 && stringterm == FETCH_CHAR (temp_pos))
2324 break;
2325 DEC_BOTH (from, from_byte);
2326 }
2327 DEC_BOTH (from, from_byte);
2328 if (!depth && sexpflag) goto done2;
2329 break;
2330 }
2331 }
2332
2333 /* Reached start of buffer. Error if within object, return nil if between */
2334 if (depth) goto lose;
2335
2336 immediate_quit = 0;
2337 return Qnil;
2338
2339 done2:
2340 count++;
2341 }
2342
2343
2344 immediate_quit = 0;
2345 XSETFASTINT (val, from);
2346 return val;
2347
2348 lose:
2349 Fsignal (Qscan_error,
2350 Fcons (build_string ("Unbalanced parentheses"),
2351 Fcons (make_number (last_good),
2352 Fcons (make_number (from), Qnil))));
2353
2354 /* NOTREACHED */
2355 }
2356
2357 DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
2358 "Scan from character number FROM by COUNT lists.\n\
2359 Returns the character number of the position thus found.\n\
2360 \n\
2361 If DEPTH is nonzero, paren depth begins counting from that value,\n\
2362 only places where the depth in parentheses becomes zero\n\
2363 are candidates for stopping; COUNT such places are counted.\n\
2364 Thus, a positive value for DEPTH means go out levels.\n\
2365 \n\
2366 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
2367 \n\
2368 If the beginning or end of (the accessible part of) the buffer is reached\n\
2369 and the depth is wrong, an error is signaled.\n\
2370 If the depth is right but the count is not used up, nil is returned.")
2371 (from, count, depth)
2372 Lisp_Object from, count, depth;
2373 {
2374 CHECK_NUMBER (from, 0);
2375 CHECK_NUMBER (count, 1);
2376 CHECK_NUMBER (depth, 2);
2377
2378 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2379 }
2380
2381 DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
2382 "Scan from character number FROM by COUNT balanced expressions.\n\
2383 If COUNT is negative, scan backwards.\n\
2384 Returns the character number of the position thus found.\n\
2385 \n\
2386 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
2387 \n\
2388 If the beginning or end of (the accessible part of) the buffer is reached\n\
2389 in the middle of a parenthetical grouping, an error is signaled.\n\
2390 If the beginning or end is reached between groupings\n\
2391 but before count is used up, nil is returned.")
2392 (from, count)
2393 Lisp_Object from, count;
2394 {
2395 CHECK_NUMBER (from, 0);
2396 CHECK_NUMBER (count, 1);
2397
2398 return scan_lists (XINT (from), XINT (count), 0, 1);
2399 }
2400
2401 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
2402 0, 0, 0,
2403 "Move point backward over any number of chars with prefix syntax.\n\
2404 This includes chars with \"quote\" or \"prefix\" syntax (' or p).")
2405 ()
2406 {
2407 int beg = BEGV;
2408 int opoint = PT;
2409 int opoint_byte = PT_BYTE;
2410 int pos = PT;
2411 int pos_byte = PT_BYTE;
2412 int c;
2413
2414 if (pos <= beg)
2415 {
2416 SET_PT_BOTH (opoint, opoint_byte);
2417
2418 return Qnil;
2419 }
2420
2421 SETUP_SYNTAX_TABLE (pos, -1);
2422
2423 DEC_BOTH (pos, pos_byte);
2424
2425 while (!char_quoted (pos, pos_byte)
2426 /* Previous statement updates syntax table. */
2427 && ((c = FETCH_CHAR (pos_byte), SYNTAX (c) == Squote)
2428 || SYNTAX_PREFIX (c)))
2429 {
2430 opoint = pos;
2431 opoint_byte = pos_byte;
2432
2433 if (pos + 1 > beg)
2434 DEC_BOTH (pos, pos_byte);
2435 }
2436
2437 SET_PT_BOTH (opoint, opoint_byte);
2438
2439 return Qnil;
2440 }
2441 \f
2442 /* Parse forward from FROM / FROM_BYTE to END,
2443 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2444 and return a description of the state of the parse at END.
2445 If STOPBEFORE is nonzero, stop at the start of an atom.
2446 If COMMENTSTOP is 1, stop at the start of a comment.
2447 If COMMENTSTOP is -1, stop at the start or end of a comment,
2448 after the beginning of a string, or after the end of a string. */
2449
2450 static void
2451 scan_sexps_forward (stateptr, from, from_byte, end, targetdepth,
2452 stopbefore, oldstate, commentstop)
2453 struct lisp_parse_state *stateptr;
2454 register int from;
2455 int end, targetdepth, stopbefore;
2456 Lisp_Object oldstate;
2457 int commentstop;
2458 {
2459 struct lisp_parse_state state;
2460
2461 register enum syntaxcode code;
2462 int c1;
2463 int comnested;
2464 struct level { int last, prev; };
2465 struct level levelstart[100];
2466 register struct level *curlevel = levelstart;
2467 struct level *endlevel = levelstart + 100;
2468 register int depth; /* Paren depth of current scanning location.
2469 level - levelstart equals this except
2470 when the depth becomes negative. */
2471 int mindepth; /* Lowest DEPTH value seen. */
2472 int start_quoted = 0; /* Nonzero means starting after a char quote */
2473 Lisp_Object tem;
2474 int prev_from; /* Keep one character before FROM. */
2475 int prev_from_byte;
2476 int prev_from_syntax;
2477 int boundary_stop = commentstop == -1;
2478 int nofence;
2479 int found;
2480 int out_bytepos, out_charpos;
2481 int temp;
2482
2483 prev_from = from;
2484 prev_from_byte = from_byte;
2485 if (from != BEGV)
2486 DEC_BOTH (prev_from, prev_from_byte);
2487
2488 /* Use this macro instead of `from++'. */
2489 #define INC_FROM \
2490 do { prev_from = from; \
2491 prev_from_byte = from_byte; \
2492 prev_from_syntax \
2493 = SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte)); \
2494 INC_BOTH (from, from_byte); \
2495 UPDATE_SYNTAX_TABLE_FORWARD (from); \
2496 } while (0)
2497
2498 immediate_quit = 1;
2499 QUIT;
2500
2501 if (NILP (oldstate))
2502 {
2503 depth = 0;
2504 state.instring = -1;
2505 state.incomment = 0;
2506 state.comstyle = 0; /* comment style a by default. */
2507 state.comstr_start = -1; /* no comment/string seen. */
2508 }
2509 else
2510 {
2511 tem = Fcar (oldstate);
2512 if (!NILP (tem))
2513 depth = XINT (tem);
2514 else
2515 depth = 0;
2516
2517 oldstate = Fcdr (oldstate);
2518 oldstate = Fcdr (oldstate);
2519 oldstate = Fcdr (oldstate);
2520 tem = Fcar (oldstate);
2521 /* Check whether we are inside string_fence-style string: */
2522 state.instring = (!NILP (tem)
2523 ? (INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE)
2524 : -1);
2525
2526 oldstate = Fcdr (oldstate);
2527 tem = Fcar (oldstate);
2528 state.incomment = (!NILP (tem)
2529 ? (INTEGERP (tem) ? XINT (tem) : -1)
2530 : 0);
2531
2532 oldstate = Fcdr (oldstate);
2533 tem = Fcar (oldstate);
2534 start_quoted = !NILP (tem);
2535
2536 /* if the eighth element of the list is nil, we are in comment
2537 style a. If it is non-nil, we are in comment style b */
2538 oldstate = Fcdr (oldstate);
2539 oldstate = Fcdr (oldstate);
2540 tem = Fcar (oldstate);
2541 state.comstyle = NILP (tem) ? 0 : (EQ (tem, Qsyntax_table)
2542 ? ST_COMMENT_STYLE : 1);
2543
2544 oldstate = Fcdr (oldstate);
2545 tem = Fcar (oldstate);
2546 state.comstr_start = NILP (tem) ? -1 : XINT (tem) ;
2547 oldstate = Fcdr (oldstate);
2548 tem = Fcar (oldstate);
2549 while (!NILP (tem)) /* >= second enclosing sexps. */
2550 {
2551 /* curlevel++->last ran into compiler bug on Apollo */
2552 curlevel->last = XINT (Fcar (tem));
2553 if (++curlevel == endlevel)
2554 curlevel--; /* error ("Nesting too deep for parser"); */
2555 curlevel->prev = -1;
2556 curlevel->last = -1;
2557 tem = Fcdr (tem);
2558 }
2559 }
2560 state.quoted = 0;
2561 mindepth = depth;
2562
2563 curlevel->prev = -1;
2564 curlevel->last = -1;
2565
2566 SETUP_SYNTAX_TABLE (prev_from, 1);
2567 prev_from_syntax = SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte));
2568 UPDATE_SYNTAX_TABLE_FORWARD (from);
2569
2570 /* Enter the loop at a place appropriate for initial state. */
2571
2572 if (state.incomment)
2573 goto startincomment;
2574 if (state.instring >= 0)
2575 {
2576 nofence = state.instring != ST_STRING_STYLE;
2577 if (start_quoted)
2578 goto startquotedinstring;
2579 goto startinstring;
2580 }
2581 else if (start_quoted)
2582 goto startquoted;
2583
2584 #if 0 /* This seems to be redundant with the identical code above. */
2585 SETUP_SYNTAX_TABLE (prev_from, 1);
2586 prev_from_syntax = SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte));
2587 UPDATE_SYNTAX_TABLE_FORWARD (from);
2588 #endif
2589
2590 while (from < end)
2591 {
2592 INC_FROM;
2593 code = prev_from_syntax & 0xff;
2594
2595 if (code == Scomment)
2596 {
2597 state.comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax);
2598 state.incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
2599 1 : -1);
2600 state.comstr_start = prev_from;
2601 }
2602 else if (code == Scomment_fence)
2603 {
2604 /* Record the comment style we have entered so that only
2605 the comment-end sequence of the same style actually
2606 terminates the comment section. */
2607 state.comstyle = ST_COMMENT_STYLE;
2608 state.incomment = -1;
2609 state.comstr_start = prev_from;
2610 code = Scomment;
2611 }
2612 else if (from < end)
2613 if (SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax))
2614 if (c1 = FETCH_CHAR (from_byte),
2615 SYNTAX_COMSTART_SECOND (c1))
2616 /* Duplicate code to avoid a complex if-expression
2617 which causes trouble for the SGI compiler. */
2618 {
2619 /* Record the comment style we have entered so that only
2620 the comment-end sequence of the same style actually
2621 terminates the comment section. */
2622 state.comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from_byte));
2623 comnested = SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax);
2624 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2625 state.incomment = comnested ? 1 : -1;
2626 state.comstr_start = prev_from;
2627 INC_FROM;
2628 code = Scomment;
2629 }
2630
2631 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
2632 continue;
2633 switch (SWITCH_ENUM_CAST (code))
2634 {
2635 case Sescape:
2636 case Scharquote:
2637 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2638 curlevel->last = prev_from;
2639 startquoted:
2640 if (from == end) goto endquoted;
2641 INC_FROM;
2642 goto symstarted;
2643 /* treat following character as a word constituent */
2644 case Sword:
2645 case Ssymbol:
2646 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2647 curlevel->last = prev_from;
2648 symstarted:
2649 while (from < end)
2650 {
2651 /* Some compilers can't handle this inside the switch. */
2652 temp = SYNTAX (FETCH_CHAR (from_byte));
2653 switch (temp)
2654 {
2655 case Scharquote:
2656 case Sescape:
2657 INC_FROM;
2658 if (from == end) goto endquoted;
2659 break;
2660 case Sword:
2661 case Ssymbol:
2662 case Squote:
2663 break;
2664 default:
2665 goto symdone;
2666 }
2667 INC_FROM;
2668 }
2669 symdone:
2670 curlevel->prev = curlevel->last;
2671 break;
2672
2673 case Scomment:
2674 if (commentstop || boundary_stop) goto done;
2675 startincomment:
2676 /* The (from == BEGV) test was to enter the loop in the middle so
2677 that we find a 2-char comment ender even if we start in the
2678 middle of it. We don't want to do that if we're just at the
2679 beginning of the comment (think of (*) ... (*)). */
2680 found = forw_comment (from, from_byte, end,
2681 state.incomment, state.comstyle,
2682 (from == BEGV || from < state.comstr_start + 3)
2683 ? 0 : prev_from_syntax,
2684 &out_charpos, &out_bytepos, &state.incomment);
2685 from = out_charpos; from_byte = out_bytepos;
2686 /* Beware! prev_from and friends are invalid now.
2687 Luckily, the `done' doesn't use them and the INC_FROM
2688 sets them to a sane value without looking at them. */
2689 if (!found) goto done;
2690 INC_FROM;
2691 state.incomment = 0;
2692 state.comstyle = 0; /* reset the comment style */
2693 if (boundary_stop) goto done;
2694 break;
2695
2696 case Sopen:
2697 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2698 depth++;
2699 /* curlevel++->last ran into compiler bug on Apollo */
2700 curlevel->last = prev_from;
2701 if (++curlevel == endlevel)
2702 curlevel--; /* error ("Nesting too deep for parser"); */
2703 curlevel->prev = -1;
2704 curlevel->last = -1;
2705 if (targetdepth == depth) goto done;
2706 break;
2707
2708 case Sclose:
2709 depth--;
2710 if (depth < mindepth)
2711 mindepth = depth;
2712 if (curlevel != levelstart)
2713 curlevel--;
2714 curlevel->prev = curlevel->last;
2715 if (targetdepth == depth) goto done;
2716 break;
2717
2718 case Sstring:
2719 case Sstring_fence:
2720 state.comstr_start = from - 1;
2721 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2722 curlevel->last = prev_from;
2723 state.instring = (code == Sstring
2724 ? (FETCH_CHAR (prev_from_byte))
2725 : ST_STRING_STYLE);
2726 if (boundary_stop) goto done;
2727 startinstring:
2728 {
2729 nofence = state.instring != ST_STRING_STYLE;
2730
2731 while (1)
2732 {
2733 int c;
2734
2735 if (from >= end) goto done;
2736 c = FETCH_CHAR (from_byte);
2737 /* Some compilers can't handle this inside the switch. */
2738 temp = SYNTAX (c);
2739
2740 /* Check TEMP here so that if the char has
2741 a syntax-table property which says it is NOT
2742 a string character, it does not end the string. */
2743 if (nofence && c == state.instring && temp == Sstring)
2744 break;
2745
2746 switch (temp)
2747 {
2748 case Sstring_fence:
2749 if (!nofence) goto string_end;
2750 break;
2751 case Scharquote:
2752 case Sescape:
2753 INC_FROM;
2754 startquotedinstring:
2755 if (from >= end) goto endquoted;
2756 }
2757 INC_FROM;
2758 }
2759 }
2760 string_end:
2761 state.instring = -1;
2762 curlevel->prev = curlevel->last;
2763 INC_FROM;
2764 if (boundary_stop) goto done;
2765 break;
2766
2767 case Smath:
2768 break;
2769 }
2770 }
2771 goto done;
2772
2773 stop: /* Here if stopping before start of sexp. */
2774 from = prev_from; /* We have just fetched the char that starts it; */
2775 goto done; /* but return the position before it. */
2776
2777 endquoted:
2778 state.quoted = 1;
2779 done:
2780 state.depth = depth;
2781 state.mindepth = mindepth;
2782 state.thislevelstart = curlevel->prev;
2783 state.prevlevelstart
2784 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
2785 state.location = from;
2786 state.levelstarts = Qnil;
2787 while (--curlevel >= levelstart)
2788 state.levelstarts = Fcons (make_number (curlevel->last),
2789 state.levelstarts);
2790 immediate_quit = 0;
2791
2792 *stateptr = state;
2793 }
2794
2795 /* This comment supplies the doc string for parse-partial-sexp,
2796 for make-docfile to see. We cannot put this in the real DEFUN
2797 due to limits in the Unix cpp.
2798
2799 DEFUN ("parse-partial-sexp", Ffoo, Sfoo, 2, 6, 0,
2800 "Parse Lisp syntax starting at FROM until TO; return status of parse at TO.\n\
2801 Parsing stops at TO or when certain criteria are met;\n\
2802 point is set to where parsing stops.\n\
2803 If fifth arg STATE is omitted or nil,\n\
2804 parsing assumes that FROM is the beginning of a function.\n\
2805 Value is a list of ten elements describing final state of parsing:\n\
2806 0. depth in parens.\n\
2807 1. character address of start of innermost containing list; nil if none.\n\
2808 2. character address of start of last complete sexp terminated.\n\
2809 3. non-nil if inside a string.\n\
2810 (it is the character that will terminate the string,\n\
2811 or t if the string should be terminated by a generic string delimiter.)\n\
2812 4. nil if outside a comment, t if inside a non-nestable comment, \n\
2813 else an integer (the current comment nesting).\n\
2814 5. t if following a quote character.\n\
2815 6. the minimum paren-depth encountered during this scan.\n\
2816 7. t if in a comment of style b; symbol `syntax-table' if the comment\n\
2817 should be terminated by a generic comment delimiter.\n\
2818 8. character address of start of comment or string; nil if not in one.\n\
2819 9. Intermediate data for continuation of parsing (subject to change).\n\
2820 If third arg TARGETDEPTH is non-nil, parsing stops if the depth\n\
2821 in parentheses becomes equal to TARGETDEPTH.\n\
2822 Fourth arg STOPBEFORE non-nil means stop when come to\n\
2823 any character that starts a sexp.\n\
2824 Fifth arg STATE is a nine-element list like what this function returns.\n\
2825 It is used to initialize the state of the parse. Elements number 1, 2, 6\n\
2826 and 8 are ignored; you can leave off element 8 (the last) entirely.\n\
2827 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.\n\
2828 If it is symbol `syntax-table', stop after the start of a comment or a\n\
2829 string, or after end of a comment or a string.")
2830 (from, to, targetdepth, stopbefore, state, commentstop)
2831 */
2832
2833 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
2834 0 /* See immediately above */)
2835 (from, to, targetdepth, stopbefore, oldstate, commentstop)
2836 Lisp_Object from, to, targetdepth, stopbefore, oldstate, commentstop;
2837 {
2838 struct lisp_parse_state state;
2839 int target;
2840
2841 if (!NILP (targetdepth))
2842 {
2843 CHECK_NUMBER (targetdepth, 3);
2844 target = XINT (targetdepth);
2845 }
2846 else
2847 target = -100000; /* We won't reach this depth */
2848
2849 validate_region (&from, &to);
2850 scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
2851 XINT (to),
2852 target, !NILP (stopbefore), oldstate,
2853 (NILP (commentstop)
2854 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
2855
2856 SET_PT (state.location);
2857
2858 return Fcons (make_number (state.depth),
2859 Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart),
2860 Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart),
2861 Fcons (state.instring >= 0
2862 ? (state.instring == ST_STRING_STYLE
2863 ? Qt : make_number (state.instring)) : Qnil,
2864 Fcons (state.incomment < 0 ? Qt :
2865 (state.incomment == 0 ? Qnil :
2866 make_number (state.incomment)),
2867 Fcons (state.quoted ? Qt : Qnil,
2868 Fcons (make_number (state.mindepth),
2869 Fcons ((state.comstyle
2870 ? (state.comstyle == ST_COMMENT_STYLE
2871 ? Qsyntax_table : Qt) :
2872 Qnil),
2873 Fcons (((state.incomment
2874 || (state.instring >= 0))
2875 ? make_number (state.comstr_start)
2876 : Qnil),
2877 Fcons (state.levelstarts, Qnil))))))))));
2878 }
2879 \f
2880 void
2881 init_syntax_once ()
2882 {
2883 register int i, c;
2884 Lisp_Object temp;
2885
2886 /* This has to be done here, before we call Fmake_char_table. */
2887 Qsyntax_table = intern ("syntax-table");
2888 staticpro (&Qsyntax_table);
2889
2890 /* Intern this now in case it isn't already done.
2891 Setting this variable twice is harmless.
2892 But don't staticpro it here--that is done in alloc.c. */
2893 Qchar_table_extra_slots = intern ("char-table-extra-slots");
2894
2895 /* Create objects which can be shared among syntax tables. */
2896 Vsyntax_code_object = Fmake_vector (make_number (13), Qnil);
2897 for (i = 0; i < XVECTOR (Vsyntax_code_object)->size; i++)
2898 XVECTOR (Vsyntax_code_object)->contents[i]
2899 = Fcons (make_number (i), Qnil);
2900
2901 /* Now we are ready to set up this property, so we can
2902 create syntax tables. */
2903 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
2904
2905 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
2906
2907 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
2908
2909 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
2910 for (i = 'a'; i <= 'z'; i++)
2911 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
2912 for (i = 'A'; i <= 'Z'; i++)
2913 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
2914 for (i = '0'; i <= '9'; i++)
2915 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
2916
2917 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
2918 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
2919
2920 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
2921 Fcons (make_number (Sopen), make_number (')')));
2922 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
2923 Fcons (make_number (Sclose), make_number ('(')));
2924 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
2925 Fcons (make_number (Sopen), make_number (']')));
2926 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
2927 Fcons (make_number (Sclose), make_number ('[')));
2928 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
2929 Fcons (make_number (Sopen), make_number ('}')));
2930 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
2931 Fcons (make_number (Sclose), make_number ('{')));
2932 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
2933 Fcons (make_number ((int) Sstring), Qnil));
2934 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
2935 Fcons (make_number ((int) Sescape), Qnil));
2936
2937 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Ssymbol];
2938 for (i = 0; i < 10; i++)
2939 {
2940 c = "_-+*/&|<>="[i];
2941 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
2942 }
2943
2944 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
2945 for (i = 0; i < 12; i++)
2946 {
2947 c = ".,;:?!#@~^'`"[i];
2948 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
2949 }
2950
2951 /* All multibyte characters have syntax `word' by default. */
2952 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
2953 for (i = CHAR_TABLE_SINGLE_BYTE_SLOTS; i < CHAR_TABLE_ORDINARY_SLOTS; i++)
2954 XCHAR_TABLE (Vstandard_syntax_table)->contents[i] = temp;
2955 }
2956
2957 void
2958 syms_of_syntax ()
2959 {
2960 Qsyntax_table_p = intern ("syntax-table-p");
2961 staticpro (&Qsyntax_table_p);
2962
2963 staticpro (&Vsyntax_code_object);
2964
2965 Qscan_error = intern ("scan-error");
2966 staticpro (&Qscan_error);
2967 Fput (Qscan_error, Qerror_conditions,
2968 Fcons (Qscan_error, Fcons (Qerror, Qnil)));
2969 Fput (Qscan_error, Qerror_message,
2970 build_string ("Scan error"));
2971
2972 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
2973 "Non-nil means `forward-sexp', etc., should treat comments as whitespace.");
2974
2975 DEFVAR_BOOL ("parse-sexp-lookup-properties", &parse_sexp_lookup_properties,
2976 "Non-nil means `forward-sexp', etc., grant `syntax-table' property.\n\
2977 The value of this property should be either a syntax table, or a cons\n\
2978 of the form (SYNTAXCODE . MATCHCHAR), SYNTAXCODE being the numeric\n\
2979 syntax code, MATCHCHAR being nil or the character to match (which is\n\
2980 relevant only for open/close type.");
2981
2982 words_include_escapes = 0;
2983 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
2984 "Non-nil means `forward-word', etc., should treat escape chars part of words.");
2985
2986 DEFVAR_BOOL ("multibyte-syntax-as-symbol", &multibyte_syntax_as_symbol,
2987 "Non-nil means `scan-sexps' treats all multibyte characters as symbol.");
2988 multibyte_syntax_as_symbol = 0;
2989
2990 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
2991 &open_paren_in_column_0_is_defun_start,
2992 "Non-nil means an open paren in column 0 denotes the start of a defun.");
2993 open_paren_in_column_0_is_defun_start = 1;
2994
2995 defsubr (&Ssyntax_table_p);
2996 defsubr (&Ssyntax_table);
2997 defsubr (&Sstandard_syntax_table);
2998 defsubr (&Scopy_syntax_table);
2999 defsubr (&Sset_syntax_table);
3000 defsubr (&Schar_syntax);
3001 defsubr (&Smatching_paren);
3002 defsubr (&Smodify_syntax_entry);
3003 defsubr (&Sdescribe_syntax);
3004
3005 defsubr (&Sforward_word);
3006
3007 defsubr (&Sskip_chars_forward);
3008 defsubr (&Sskip_chars_backward);
3009 defsubr (&Sskip_syntax_forward);
3010 defsubr (&Sskip_syntax_backward);
3011
3012 defsubr (&Sforward_comment);
3013 defsubr (&Sscan_lists);
3014 defsubr (&Sscan_sexps);
3015 defsubr (&Sbackward_prefix_chars);
3016 defsubr (&Sparse_partial_sexp);
3017 }