]> code.delx.au - gnu-emacs/blob - src/syntax.c
d2eddafa43954ffa965425cf46ac2a86615a7e71
[gnu-emacs] / src / syntax.c
1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 87, 93, 94, 95, 97, 1998, 1999 Free Software Foundation, Inc.
3
4 This file is part of GNU Emacs.
5
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
21
22 #include <config.h>
23 #include <ctype.h>
24 #include "lisp.h"
25 #include "commands.h"
26 #include "buffer.h"
27 #include "charset.h"
28
29 /* Make syntax table lookup grant data in gl_state. */
30 #define SYNTAX_ENTRY_VIA_PROPERTY
31
32 #include "syntax.h"
33 #include "intervals.h"
34
35 /* We use these constants in place for comment-style and
36 string-ender-char to distinguish comments/strings started by
37 comment_fence and string_fence codes. */
38
39 #define ST_COMMENT_STYLE (256 + 1)
40 #define ST_STRING_STYLE (256 + 2)
41 #include "category.h"
42
43 Lisp_Object Qsyntax_table_p, Qsyntax_table, Qscan_error;
44
45 int words_include_escapes;
46 int parse_sexp_lookup_properties;
47
48 /* Nonzero means `scan-sexps' treat all multibyte characters as symbol. */
49 int multibyte_syntax_as_symbol;
50
51 /* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
52 if not compiled with GCC. No need to mark it, since it is used
53 only very temporarily. */
54 Lisp_Object syntax_temp;
55
56 /* Non-zero means an open parenthesis in column 0 is always considered
57 to be the start of a defun. Zero means an open parenthesis in
58 column 0 has no special meaning. */
59
60 int open_paren_in_column_0_is_defun_start;
61
62 /* This is the internal form of the parse state used in parse-partial-sexp. */
63
64 struct lisp_parse_state
65 {
66 int depth; /* Depth at end of parsing. */
67 int instring; /* -1 if not within string, else desired terminator. */
68 int incomment; /* -1 if in unnestable comment else comment nesting */
69 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
70 int quoted; /* Nonzero if just after an escape char at end of parsing */
71 int thislevelstart; /* Char number of most recent start-of-expression at current level */
72 int prevlevelstart; /* Char number of start of containing expression */
73 int location; /* Char number at which parsing stopped. */
74 int mindepth; /* Minimum depth seen while scanning. */
75 int comstr_start; /* Position just after last comment/string starter. */
76 Lisp_Object levelstarts; /* Char numbers of starts-of-expression
77 of levels (starting from outermost). */
78 };
79 \f
80 /* These variables are a cache for finding the start of a defun.
81 find_start_pos is the place for which the defun start was found.
82 find_start_value is the defun start position found for it.
83 find_start_value_byte is the corresponding byte position.
84 find_start_buffer is the buffer it was found in.
85 find_start_begv is the BEGV value when it was found.
86 find_start_modiff is the value of MODIFF when it was found. */
87
88 static int find_start_pos;
89 static int find_start_value;
90 static int find_start_value_byte;
91 static struct buffer *find_start_buffer;
92 static int find_start_begv;
93 static int find_start_modiff;
94
95
96 static int find_defun_start P_ ((int, int));
97 static int back_comment P_ ((int, int, int, int, int, int *, int *));
98 static int char_quoted P_ ((int, int));
99 static Lisp_Object skip_chars P_ ((int, int, Lisp_Object, Lisp_Object));
100 static Lisp_Object scan_lists P_ ((int, int, int, int));
101 static void scan_sexps_forward P_ ((struct lisp_parse_state *,
102 int, int, int, int,
103 int, Lisp_Object, int));
104 \f
105
106 struct gl_state_s gl_state; /* Global state of syntax parser. */
107
108 INTERVAL interval_of ();
109 #define INTERVALS_AT_ONCE 10 /* 1 + max-number of intervals
110 to scan to property-change. */
111
112 /* Update gl_state to an appropriate interval which contains CHARPOS. The
113 sign of COUNT give the relative position of CHARPOS wrt the previously
114 valid interval. If INIT, only [be]_property fields of gl_state are
115 valid at start, the rest is filled basing on OBJECT.
116
117 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
118 direction than the intervals - or in an interval. We update the
119 current syntax-table basing on the property of this interval, and
120 update the interval to start further than CHARPOS - or be
121 NULL_INTERVAL. We also update lim_property to be the next value of
122 charpos to call this subroutine again - or be before/after the
123 start/end of OBJECT. */
124
125 void
126 update_syntax_table (charpos, count, init, object)
127 int charpos, count, init;
128 Lisp_Object object;
129 {
130 Lisp_Object tmp_table;
131 int cnt = 0, invalidate = 1;
132 INTERVAL i, oldi;
133
134 if (init)
135 {
136 gl_state.start = gl_state.b_property;
137 gl_state.stop = gl_state.e_property;
138 gl_state.forward_i = interval_of (charpos, object);
139 i = gl_state.backward_i = gl_state.forward_i;
140 gl_state.left_ok = gl_state.right_ok = 1;
141 invalidate = 0;
142 if (NULL_INTERVAL_P (i))
143 return;
144 /* interval_of updates only ->position of the return value, so
145 update the parents manually to speed up update_interval. */
146 while (!NULL_PARENT (i))
147 {
148 if (AM_RIGHT_CHILD (i))
149 INTERVAL_PARENT (i)->position = i->position
150 - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
151 - TOTAL_LENGTH (INTERVAL_PARENT (i))
152 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
153 else
154 INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
155 + TOTAL_LENGTH (i);
156 i = INTERVAL_PARENT (i);
157 }
158 i = gl_state.forward_i;
159 gl_state.b_property = i->position - 1 - gl_state.offset;
160 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
161 goto update;
162 }
163 oldi = i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
164
165 /* We are guarantied to be called with CHARPOS either in i,
166 or further off. */
167 if (NULL_INTERVAL_P (i))
168 error ("Error in syntax_table logic for to-the-end intervals");
169 else if (charpos < i->position) /* Move left. */
170 {
171 if (count > 0)
172 error ("Error in syntax_table logic for intervals <-");
173 /* Update the interval. */
174 i = update_interval (i, charpos);
175 if (oldi->position != INTERVAL_LAST_POS (i))
176 {
177 invalidate = 0;
178 gl_state.right_ok = 1; /* Invalidate the other end. */
179 gl_state.forward_i = i;
180 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
181 }
182 }
183 else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right. */
184 {
185 if (count < 0)
186 error ("Error in syntax_table logic for intervals ->");
187 /* Update the interval. */
188 i = update_interval (i, charpos);
189 if (i->position != INTERVAL_LAST_POS (oldi))
190 {
191 invalidate = 0;
192 gl_state.left_ok = 1; /* Invalidate the other end. */
193 gl_state.backward_i = i;
194 gl_state.b_property = i->position - 1 - gl_state.offset;
195 }
196 }
197 else if (count > 0 ? gl_state.right_ok : gl_state.left_ok)
198 {
199 /* We do not need to recalculate tmp_table. */
200 tmp_table = gl_state.old_prop;
201 }
202
203 update:
204 tmp_table = textget (i->plist, Qsyntax_table);
205
206 if (invalidate)
207 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
208
209 if (invalidate) /* Did not get to adjacent interval. */
210 { /* with the same table => */
211 /* invalidate the old range. */
212 if (count > 0)
213 {
214 gl_state.backward_i = i;
215 gl_state.left_ok = 1; /* Invalidate the other end. */
216 gl_state.b_property = i->position - 1 - gl_state.offset;
217 }
218 else
219 {
220 gl_state.forward_i = i;
221 gl_state.right_ok = 1; /* Invalidate the other end. */
222 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
223 }
224 }
225
226 gl_state.current_syntax_table = tmp_table;
227 gl_state.old_prop = tmp_table;
228 if (EQ (Fsyntax_table_p (tmp_table), Qt))
229 {
230 gl_state.use_global = 0;
231 }
232 else if (CONSP (tmp_table))
233 {
234 gl_state.use_global = 1;
235 gl_state.global_code = tmp_table;
236 }
237 else
238 {
239 gl_state.use_global = 0;
240 gl_state.current_syntax_table = current_buffer->syntax_table;
241 }
242
243 while (!NULL_INTERVAL_P (i))
244 {
245 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
246 {
247 if (count > 0)
248 gl_state.right_ok = 0;
249 else
250 gl_state.left_ok = 0;
251 break;
252 }
253 else if (cnt == INTERVALS_AT_ONCE)
254 {
255 if (count > 0)
256 gl_state.right_ok = 1;
257 else
258 gl_state.left_ok = 1;
259 break;
260 }
261 cnt++;
262 i = count > 0 ? next_interval (i) : previous_interval (i);
263 }
264 if (NULL_INTERVAL_P (i))
265 { /* This property goes to the end. */
266 if (count > 0)
267 gl_state.e_property = gl_state.stop;
268 else
269 gl_state.b_property = gl_state.start;
270 }
271 else
272 {
273 if (count > 0)
274 {
275 gl_state.e_property = i->position - gl_state.offset;
276 gl_state.forward_i = i;
277 }
278 else
279 {
280 gl_state.b_property = i->position + LENGTH (i) - 1 - gl_state.offset;
281 gl_state.backward_i = i;
282 }
283 }
284 }
285 \f
286 /* Returns TRUE if char at CHARPOS is quoted.
287 Global syntax-table data should be set up already to be good at CHARPOS
288 or after. On return global syntax data is good for lookup at CHARPOS. */
289
290 static int
291 char_quoted (charpos, bytepos)
292 register int charpos, bytepos;
293 {
294 register enum syntaxcode code;
295 register int beg = BEGV;
296 register int quoted = 0;
297 int orig = charpos;
298
299 DEC_BOTH (charpos, bytepos);
300
301 while (bytepos >= beg)
302 {
303 UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
304 code = SYNTAX (FETCH_CHAR (bytepos));
305 if (! (code == Scharquote || code == Sescape))
306 break;
307
308 DEC_BOTH (charpos, bytepos);
309 quoted = !quoted;
310 }
311
312 UPDATE_SYNTAX_TABLE (orig);
313 return quoted;
314 }
315
316 /* Return the bytepos one character after BYTEPOS.
317 We assume that BYTEPOS is not at the end of the buffer. */
318
319 INLINE int
320 inc_bytepos (bytepos)
321 int bytepos;
322 {
323 if (NILP (current_buffer->enable_multibyte_characters))
324 return bytepos + 1;
325
326 INC_POS (bytepos);
327 return bytepos;
328 }
329
330 /* Return the bytepos one character before BYTEPOS.
331 We assume that BYTEPOS is not at the start of the buffer. */
332
333 INLINE int
334 dec_bytepos (bytepos)
335 int bytepos;
336 {
337 if (NILP (current_buffer->enable_multibyte_characters))
338 return bytepos - 1;
339
340 DEC_POS (bytepos);
341 return bytepos;
342 }
343 \f
344 /* Find a defun-start that is the last one before POS (or nearly the last).
345 We record what we find, so that another call in the same area
346 can return the same value right away.
347
348 There is no promise at which position the global syntax data is
349 valid on return from the subroutine, so the caller should explicitly
350 update the global data. */
351
352 static int
353 find_defun_start (pos, pos_byte)
354 int pos, pos_byte;
355 {
356 int opoint = PT, opoint_byte = PT_BYTE;
357
358 /* Use previous finding, if it's valid and applies to this inquiry. */
359 if (current_buffer == find_start_buffer
360 /* Reuse the defun-start even if POS is a little farther on.
361 POS might be in the next defun, but that's ok.
362 Our value may not be the best possible, but will still be usable. */
363 && pos <= find_start_pos + 1000
364 && pos >= find_start_value
365 && BEGV == find_start_begv
366 && MODIFF == find_start_modiff)
367 return find_start_value;
368
369 /* Back up to start of line. */
370 scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
371
372 /* We optimize syntax-table lookup for rare updates. Thus we accept
373 only those `^\s(' which are good in global _and_ text-property
374 syntax-tables. */
375 gl_state.current_syntax_table = current_buffer->syntax_table;
376 gl_state.use_global = 0;
377 if (open_paren_in_column_0_is_defun_start)
378 {
379 while (PT > BEGV)
380 {
381 /* Open-paren at start of line means we may have found our
382 defun-start. */
383 if (SYNTAX (FETCH_CHAR (PT_BYTE)) == Sopen)
384 {
385 SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
386 if (SYNTAX (FETCH_CHAR (PT_BYTE)) == Sopen)
387 break;
388 /* Now fallback to the default value. */
389 gl_state.current_syntax_table = current_buffer->syntax_table;
390 gl_state.use_global = 0;
391 }
392 /* Move to beg of previous line. */
393 scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
394 }
395 }
396
397 /* Record what we found, for the next try. */
398 find_start_value = PT;
399 find_start_value_byte = PT_BYTE;
400 find_start_buffer = current_buffer;
401 find_start_modiff = MODIFF;
402 find_start_begv = BEGV;
403 find_start_pos = pos;
404
405 TEMP_SET_PT_BOTH (opoint, opoint_byte);
406
407 return find_start_value;
408 }
409 \f
410 /* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
411
412 static int
413 prev_char_comend_first (pos, pos_byte)
414 int pos, pos_byte;
415 {
416 int c, val;
417
418 DEC_BOTH (pos, pos_byte);
419 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
420 c = FETCH_CHAR (pos_byte);
421 val = SYNTAX_COMEND_FIRST (c);
422 UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
423 return val;
424 }
425
426 /* Return the SYNTAX_COMSTART_FIRST of the character before POS, POS_BYTE. */
427
428 static int
429 prev_char_comstart_first (pos, pos_byte)
430 int pos, pos_byte;
431 {
432 int c, val;
433
434 DEC_BOTH (pos, pos_byte);
435 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
436 c = FETCH_CHAR (pos_byte);
437 val = SYNTAX_COMSTART_FIRST (c);
438 UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
439 return val;
440 }
441
442 /* Checks whether charpos FROM is at the end of a comment.
443 FROM_BYTE is the bytepos corresponding to FROM.
444 Do not move back before STOP.
445
446 Return a positive value if we find a comment ending at FROM/FROM_BYTE;
447 return -1 otherwise.
448
449 If successful, store the charpos of the comment's beginning
450 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
451
452 Global syntax data remains valid for backward search starting at
453 the returned value (or at FROM, if the search was not successful). */
454
455 static int
456 back_comment (from, from_byte, stop, comnested, comstyle, charpos_ptr, bytepos_ptr)
457 int from, from_byte, stop;
458 int comnested, comstyle;
459 int *charpos_ptr, *bytepos_ptr;
460 {
461 /* Look back, counting the parity of string-quotes,
462 and recording the comment-starters seen.
463 When we reach a safe place, assume that's not in a string;
464 then step the main scan to the earliest comment-starter seen
465 an even number of string quotes away from the safe place.
466
467 OFROM[I] is position of the earliest comment-starter seen
468 which is I+2X quotes from the comment-end.
469 PARITY is current parity of quotes from the comment end. */
470 int parity = 0;
471 int my_stringend = 0;
472 int string_lossage = 0;
473 int comment_end = from;
474 int comment_end_byte = from_byte;
475 int comstart_pos = 0;
476 int comstart_byte;
477 /* Value that PARITY had, when we reached the position
478 in COMSTART_POS. */
479 int comstart_parity = 0;
480 int scanstart = from - 1;
481 /* Place where the containing defun starts,
482 or 0 if we didn't come across it yet. */
483 int defun_start = 0;
484 int defun_start_byte = 0;
485 register enum syntaxcode code;
486 int nesting = 1; /* current comment nesting */
487 int c;
488
489 /* At beginning of range to scan, we're outside of strings;
490 that determines quote parity to the comment-end. */
491 while (from != stop)
492 {
493 int temp_byte;
494
495 /* Move back and examine a character. */
496 DEC_BOTH (from, from_byte);
497 UPDATE_SYNTAX_TABLE_BACKWARD (from);
498
499 c = FETCH_CHAR (from_byte);
500 code = SYNTAX (c);
501
502 /* If this char is the second of a 2-char comment end sequence,
503 back up and give the pair the appropriate syntax. */
504 if (from > stop && SYNTAX_COMEND_SECOND (c)
505 && prev_char_comend_first (from, from_byte))
506 {
507 code = Sendcomment;
508 DEC_BOTH (from, from_byte);
509 UPDATE_SYNTAX_TABLE_BACKWARD (from);
510 c = FETCH_CHAR (from_byte);
511 }
512
513 /* If this char starts a 2-char comment start sequence,
514 treat it like a 1-char comment starter. */
515 if (from < scanstart && SYNTAX_COMSTART_FIRST (c))
516 {
517 temp_byte = inc_bytepos (from_byte);
518 UPDATE_SYNTAX_TABLE_FORWARD (from + 1);
519 if (SYNTAX_COMSTART_SECOND (FETCH_CHAR (temp_byte))
520 && comstyle == SYNTAX_COMMENT_STYLE (FETCH_CHAR (temp_byte)))
521 code = Scomment;
522 UPDATE_SYNTAX_TABLE_BACKWARD (from);
523 }
524 else if (code == Scomment && comstyle != SYNTAX_COMMENT_STYLE (c))
525 /* Ignore comment starters of a different style. */
526 continue;
527
528 /* Ignore escaped characters, except comment-enders. */
529 if (code != Sendcomment && char_quoted (from, from_byte))
530 continue;
531
532 /* Track parity of quotes. */
533 if (code == Sstring)
534 {
535 parity ^= 1;
536 if (my_stringend == 0)
537 my_stringend = c;
538 /* If we have two kinds of string delimiters.
539 There's no way to grok this scanning backwards. */
540 else if (my_stringend != c)
541 string_lossage = 1;
542 }
543
544 if (code == Sstring_fence || code == Scomment_fence)
545 {
546 parity ^= 1;
547 if (my_stringend == 0)
548 my_stringend
549 = code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE;
550 /* If we have two kinds of string delimiters.
551 There's no way to grok this scanning backwards. */
552 else if (my_stringend != (code == Sstring_fence
553 ? ST_STRING_STYLE : ST_COMMENT_STYLE))
554 string_lossage = 1;
555 }
556
557 if (code == Scomment)
558 /* We've already checked that it is the relevant comstyle. */
559 {
560 if (comnested && --nesting <= 0 && parity == 0 && !string_lossage)
561 /* nested comments have to be balanced, so we don't need to
562 keep looking for earlier ones. We use here the same (slightly
563 incorrect) reasoning as below: since it is followed by uniform
564 paired string quotes, this comment-start has to be outside of
565 strings, else the comment-end itself would be inside a string. */
566 goto done;
567
568 /* Record comment-starters according to that
569 quote-parity to the comment-end. */
570 comstart_parity = parity;
571 comstart_pos = from;
572 comstart_byte = from_byte;
573 }
574
575 /* If we find another earlier comment-ender,
576 any comment-starts earlier than that don't count
577 (because they go with the earlier comment-ender). */
578 if (code == Sendcomment
579 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from_byte)) == comstyle)
580 {
581 if (comnested)
582 nesting++;
583 else
584 break;
585 }
586
587 /* Assume a defun-start point is outside of strings. */
588 if (code == Sopen
589 && (from == stop
590 || (temp_byte = dec_bytepos (from_byte),
591 FETCH_CHAR (temp_byte) == '\n')))
592 {
593 defun_start = from;
594 defun_start_byte = from_byte;
595 break;
596 }
597 }
598
599 if (comstart_pos == 0)
600 {
601 from = comment_end;
602 from_byte = comment_end_byte;
603 UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
604 }
605 /* If the earliest comment starter
606 is followed by uniform paired string quotes or none,
607 we know it can't be inside a string
608 since if it were then the comment ender would be inside one.
609 So it does start a comment. Skip back to it. */
610 else if (!comnested && comstart_parity == 0 && !string_lossage)
611 {
612 from = comstart_pos;
613 from_byte = comstart_byte;
614 /* Globals are correct now. */
615 }
616 else
617 {
618 /* We had two kinds of string delimiters mixed up
619 together. Decode this going forwards.
620 Scan fwd from the previous comment ender
621 to the one in question; this records where we
622 last passed a comment starter. */
623 struct lisp_parse_state state;
624 /* If we did not already find the defun start, find it now. */
625 if (defun_start == 0)
626 {
627 defun_start = find_defun_start (comment_end, comment_end_byte);
628 defun_start_byte = find_start_value_byte;
629 }
630 scan_sexps_forward (&state,
631 defun_start, defun_start_byte,
632 comment_end - 1, -10000, 0, Qnil, 0);
633 if (state.incomment)
634 {
635 /* scan_sexps_forward changed the direction of search in
636 global variables, so we need to update it completely. */
637
638 from = state.comstr_start;
639 }
640 else
641 {
642 from = comment_end;
643 }
644 from_byte = CHAR_TO_BYTE (from);
645 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
646 }
647
648 done:
649 *charpos_ptr = from;
650 *bytepos_ptr = from_byte;
651
652 return (from == comment_end) ? -1 : from;
653 }
654 \f
655 DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
656 "Return t if OBJECT is a syntax table.\n\
657 Currently, any char-table counts as a syntax table.")
658 (object)
659 Lisp_Object object;
660 {
661 if (CHAR_TABLE_P (object)
662 && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
663 return Qt;
664 return Qnil;
665 }
666
667 static void
668 check_syntax_table (obj)
669 Lisp_Object obj;
670 {
671 if (!(CHAR_TABLE_P (obj)
672 && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table)))
673 wrong_type_argument (Qsyntax_table_p, obj);
674 }
675
676 DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
677 "Return the current syntax table.\n\
678 This is the one specified by the current buffer.")
679 ()
680 {
681 return current_buffer->syntax_table;
682 }
683
684 DEFUN ("standard-syntax-table", Fstandard_syntax_table,
685 Sstandard_syntax_table, 0, 0, 0,
686 "Return the standard syntax table.\n\
687 This is the one used for new buffers.")
688 ()
689 {
690 return Vstandard_syntax_table;
691 }
692
693 DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
694 "Construct a new syntax table and return it.\n\
695 It is a copy of the TABLE, which defaults to the standard syntax table.")
696 (table)
697 Lisp_Object table;
698 {
699 Lisp_Object copy;
700
701 if (!NILP (table))
702 check_syntax_table (table);
703 else
704 table = Vstandard_syntax_table;
705
706 copy = Fcopy_sequence (table);
707
708 /* Only the standard syntax table should have a default element.
709 Other syntax tables should inherit from parents instead. */
710 XCHAR_TABLE (copy)->defalt = Qnil;
711
712 /* Copied syntax tables should all have parents.
713 If we copied one with no parent, such as the standard syntax table,
714 use the standard syntax table as the copy's parent. */
715 if (NILP (XCHAR_TABLE (copy)->parent))
716 Fset_char_table_parent (copy, Vstandard_syntax_table);
717 return copy;
718 }
719
720 DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
721 "Select a new syntax table for the current buffer.\n\
722 One argument, a syntax table.")
723 (table)
724 Lisp_Object table;
725 {
726 int idx;
727 check_syntax_table (table);
728 current_buffer->syntax_table = table;
729 /* Indicate that this buffer now has a specified syntax table. */
730 idx = PER_BUFFER_VAR_IDX (syntax_table);
731 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
732 return table;
733 }
734 \f
735 /* Convert a letter which signifies a syntax code
736 into the code it signifies.
737 This is used by modify-syntax-entry, and other things. */
738
739 unsigned char syntax_spec_code[0400] =
740 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
741 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
742 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
743 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
744 (char) Swhitespace, (char) Scomment_fence, (char) Sstring, 0377,
745 (char) Smath, 0377, 0377, (char) Squote,
746 (char) Sopen, (char) Sclose, 0377, 0377,
747 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
748 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
749 0377, 0377, 0377, 0377,
750 (char) Scomment, 0377, (char) Sendcomment, 0377,
751 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
752 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
753 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
754 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
755 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
756 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
757 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
758 0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
759 };
760
761 /* Indexed by syntax code, give the letter that describes it. */
762
763 char syntax_code_spec[16] =
764 {
765 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
766 '!', '|'
767 };
768
769 /* Indexed by syntax code, give the object (cons of syntax code and
770 nil) to be stored in syntax table. Since these objects can be
771 shared among syntax tables, we generate them in advance. By
772 sharing objects, the function `describe-syntax' can give a more
773 compact listing. */
774 static Lisp_Object Vsyntax_code_object;
775
776 \f
777 /* Look up the value for CHARACTER in syntax table TABLE's parent
778 and its parents. SYNTAX_ENTRY calls this, when TABLE itself has nil
779 for CHARACTER. It's actually used only when not compiled with GCC. */
780
781 Lisp_Object
782 syntax_parent_lookup (table, character)
783 Lisp_Object table;
784 int character;
785 {
786 Lisp_Object value;
787
788 while (1)
789 {
790 table = XCHAR_TABLE (table)->parent;
791 if (NILP (table))
792 return Qnil;
793
794 value = XCHAR_TABLE (table)->contents[character];
795 if (!NILP (value))
796 return value;
797 }
798 }
799
800 DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
801 "Return the syntax code of CHARACTER, described by a character.\n\
802 For example, if CHARACTER is a word constituent,\n\
803 the character `w' is returned.\n\
804 The characters that correspond to various syntax codes\n\
805 are listed in the documentation of `modify-syntax-entry'.")
806 (character)
807 Lisp_Object character;
808 {
809 int char_int;
810 gl_state.current_syntax_table = current_buffer->syntax_table;
811
812 gl_state.use_global = 0;
813 CHECK_NUMBER (character, 0);
814 char_int = XINT (character);
815 return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
816 }
817
818 DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
819 "Return the matching parenthesis of CHARACTER, or nil if none.")
820 (character)
821 Lisp_Object character;
822 {
823 int char_int, code;
824 gl_state.current_syntax_table = current_buffer->syntax_table;
825 gl_state.use_global = 0;
826 CHECK_NUMBER (character, 0);
827 char_int = XINT (character);
828 code = SYNTAX (char_int);
829 if (code == Sopen || code == Sclose)
830 return SYNTAX_MATCH (char_int);
831 return Qnil;
832 }
833
834 /* This comment supplies the doc string for modify-syntax-entry,
835 for make-docfile to see. We cannot put this in the real DEFUN
836 due to limits in the Unix cpp.
837
838 DEFUN ("modify-syntax-entry", foo, bar, 2, 3, 0,
839 "Set syntax for character CHAR according to string S.\n\
840 The syntax is changed only for table TABLE, which defaults to\n\
841 the current buffer's syntax table.\n\
842 The first character of S should be one of the following:\n\
843 Space or - whitespace syntax. w word constituent.\n\
844 _ symbol constituent. . punctuation.\n\
845 ( open-parenthesis. ) close-parenthesis.\n\
846 \" string quote. \\ escape.\n\
847 $ paired delimiter. ' expression quote or prefix operator.\n\
848 < comment starter. > comment ender.\n\
849 / character-quote. @ inherit from `standard-syntax-table'.\n\
850 \n\
851 Only single-character comment start and end sequences are represented thus.\n\
852 Two-character sequences are represented as described below.\n\
853 The second character of S is the matching parenthesis,\n\
854 used only if the first character is `(' or `)'.\n\
855 Any additional characters are flags.\n\
856 Defined flags are the characters 1, 2, 3, 4, b, p, and n.\n\
857 1 means CHAR is the start of a two-char comment start sequence.\n\
858 2 means CHAR is the second character of such a sequence.\n\
859 3 means CHAR is the start of a two-char comment end sequence.\n\
860 4 means CHAR is the second character of such a sequence.\n\
861 \n\
862 There can be up to two orthogonal comment sequences. This is to support\n\
863 language modes such as C++. By default, all comment sequences are of style\n\
864 a, but you can set the comment sequence style to b (on the second character\n\
865 of a comment-start, or the first character of a comment-end sequence) using\n\
866 this flag:\n\
867 b means CHAR is part of comment sequence b.\n\
868 n means CHAR is part of a nestable comment sequence.\n\
869 \n\
870 p means CHAR is a prefix character for `backward-prefix-chars';\n\
871 such characters are treated as whitespace when they occur\n\
872 between expressions.")
873 (char, s, table)
874 */
875
876 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
877 /* I really don't know why this is interactive
878 help-form should at least be made useful whilst reading the second arg
879 */
880 "cSet syntax for character: \nsSet syntax for %s to: ",
881 0 /* See immediately above */)
882 (c, newentry, syntax_table)
883 Lisp_Object c, newentry, syntax_table;
884 {
885 register unsigned char *p;
886 register enum syntaxcode code;
887 int val;
888 Lisp_Object match;
889
890 CHECK_NUMBER (c, 0);
891 CHECK_STRING (newentry, 1);
892
893 if (NILP (syntax_table))
894 syntax_table = current_buffer->syntax_table;
895 else
896 check_syntax_table (syntax_table);
897
898 p = XSTRING (newentry)->data;
899 code = (enum syntaxcode) syntax_spec_code[*p++];
900 if (((int) code & 0377) == 0377)
901 error ("invalid syntax description letter: %c", p[-1]);
902
903 if (code == Sinherit)
904 {
905 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), Qnil);
906 return Qnil;
907 }
908
909 if (*p)
910 {
911 int len;
912 int character = (STRING_CHAR_AND_LENGTH
913 (p, STRING_BYTES (XSTRING (newentry)) - 1, len));
914 XSETINT (match, character);
915 if (XFASTINT (match) == ' ')
916 match = Qnil;
917 p += len;
918 }
919 else
920 match = Qnil;
921
922 val = (int) code;
923 while (*p)
924 switch (*p++)
925 {
926 case '1':
927 val |= 1 << 16;
928 break;
929
930 case '2':
931 val |= 1 << 17;
932 break;
933
934 case '3':
935 val |= 1 << 18;
936 break;
937
938 case '4':
939 val |= 1 << 19;
940 break;
941
942 case 'p':
943 val |= 1 << 20;
944 break;
945
946 case 'b':
947 val |= 1 << 21;
948 break;
949
950 case 'n':
951 val |= 1 << 22;
952 break;
953 }
954
955 if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match))
956 newentry = XVECTOR (Vsyntax_code_object)->contents[val];
957 else
958 /* Since we can't use a shared object, let's make a new one. */
959 newentry = Fcons (make_number (val), match);
960
961 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
962
963 return Qnil;
964 }
965 \f
966 /* Dump syntax table to buffer in human-readable format */
967
968 static void
969 describe_syntax (value)
970 Lisp_Object value;
971 {
972 register enum syntaxcode code;
973 char desc, start1, start2, end1, end2, prefix, comstyle;
974 char str[2];
975 Lisp_Object first, match_lisp;
976
977 Findent_to (make_number (16), make_number (1));
978
979 if (NILP (value))
980 {
981 insert_string ("default\n");
982 return;
983 }
984
985 if (CHAR_TABLE_P (value))
986 {
987 insert_string ("deeper char-table ...\n");
988 return;
989 }
990
991 if (!CONSP (value))
992 {
993 insert_string ("invalid\n");
994 return;
995 }
996
997 first = XCAR (value);
998 match_lisp = XCDR (value);
999
1000 if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
1001 {
1002 insert_string ("invalid\n");
1003 return;
1004 }
1005
1006 code = (enum syntaxcode) (XINT (first) & 0377);
1007 start1 = (XINT (first) >> 16) & 1;
1008 start2 = (XINT (first) >> 17) & 1;
1009 end1 = (XINT (first) >> 18) & 1;
1010 end2 = (XINT (first) >> 19) & 1;
1011 prefix = (XINT (first) >> 20) & 1;
1012 comstyle = (XINT (first) >> 21) & 1;
1013
1014 if ((int) code < 0 || (int) code >= (int) Smax)
1015 {
1016 insert_string ("invalid");
1017 return;
1018 }
1019 desc = syntax_code_spec[(int) code];
1020
1021 str[0] = desc, str[1] = 0;
1022 insert (str, 1);
1023
1024 if (NILP (match_lisp))
1025 insert (" ", 1);
1026 else
1027 insert_char (XINT (match_lisp));
1028
1029 if (start1)
1030 insert ("1", 1);
1031 if (start2)
1032 insert ("2", 1);
1033
1034 if (end1)
1035 insert ("3", 1);
1036 if (end2)
1037 insert ("4", 1);
1038
1039 if (prefix)
1040 insert ("p", 1);
1041 if (comstyle)
1042 insert ("b", 1);
1043
1044 insert_string ("\twhich means: ");
1045
1046 switch (SWITCH_ENUM_CAST (code))
1047 {
1048 case Swhitespace:
1049 insert_string ("whitespace"); break;
1050 case Spunct:
1051 insert_string ("punctuation"); break;
1052 case Sword:
1053 insert_string ("word"); break;
1054 case Ssymbol:
1055 insert_string ("symbol"); break;
1056 case Sopen:
1057 insert_string ("open"); break;
1058 case Sclose:
1059 insert_string ("close"); break;
1060 case Squote:
1061 insert_string ("quote"); break;
1062 case Sstring:
1063 insert_string ("string"); break;
1064 case Smath:
1065 insert_string ("math"); break;
1066 case Sescape:
1067 insert_string ("escape"); break;
1068 case Scharquote:
1069 insert_string ("charquote"); break;
1070 case Scomment:
1071 insert_string ("comment"); break;
1072 case Sendcomment:
1073 insert_string ("endcomment"); break;
1074 default:
1075 insert_string ("invalid");
1076 return;
1077 }
1078
1079 if (!NILP (match_lisp))
1080 {
1081 insert_string (", matches ");
1082 insert_char (XINT (match_lisp));
1083 }
1084
1085 if (start1)
1086 insert_string (",\n\t is the first character of a comment-start sequence");
1087 if (start2)
1088 insert_string (",\n\t is the second character of a comment-start sequence");
1089
1090 if (end1)
1091 insert_string (",\n\t is the first character of a comment-end sequence");
1092 if (end2)
1093 insert_string (",\n\t is the second character of a comment-end sequence");
1094 if (comstyle)
1095 insert_string (" (comment style b)");
1096
1097 if (prefix)
1098 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
1099
1100 insert_string ("\n");
1101 }
1102
1103 static Lisp_Object
1104 describe_syntax_1 (vector)
1105 Lisp_Object vector;
1106 {
1107 struct buffer *old = current_buffer;
1108 set_buffer_internal (XBUFFER (Vstandard_output));
1109 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil, (int *) 0, 0);
1110 while (! NILP (XCHAR_TABLE (vector)->parent))
1111 {
1112 vector = XCHAR_TABLE (vector)->parent;
1113 insert_string ("\nThe parent syntax table is:");
1114 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil,
1115 (int *) 0, 0);
1116 }
1117
1118 call0 (intern ("help-mode"));
1119 set_buffer_internal (old);
1120 return Qnil;
1121 }
1122
1123 DEFUN ("describe-syntax", Fdescribe_syntax, Sdescribe_syntax, 0, 0, "",
1124 "Describe the syntax specifications in the syntax table.\n\
1125 The descriptions are inserted in a buffer, which is then displayed.")
1126 ()
1127 {
1128 internal_with_output_to_temp_buffer
1129 ("*Help*", describe_syntax_1, current_buffer->syntax_table);
1130
1131 return Qnil;
1132 }
1133 \f
1134 int parse_sexp_ignore_comments;
1135
1136 /* Return the position across COUNT words from FROM.
1137 If that many words cannot be found before the end of the buffer, return 0.
1138 COUNT negative means scan backward and stop at word beginning. */
1139
1140 int
1141 scan_words (from, count)
1142 register int from, count;
1143 {
1144 register int beg = BEGV;
1145 register int end = ZV;
1146 register int from_byte = CHAR_TO_BYTE (from);
1147 register enum syntaxcode code;
1148 int ch0, ch1;
1149
1150 immediate_quit = 1;
1151 QUIT;
1152
1153 SETUP_SYNTAX_TABLE (from, count);
1154
1155 while (count > 0)
1156 {
1157 while (1)
1158 {
1159 if (from == end)
1160 {
1161 immediate_quit = 0;
1162 return 0;
1163 }
1164 UPDATE_SYNTAX_TABLE_FORWARD (from);
1165 ch0 = FETCH_CHAR (from_byte);
1166 code = SYNTAX (ch0);
1167 INC_BOTH (from, from_byte);
1168 if (words_include_escapes
1169 && (code == Sescape || code == Scharquote))
1170 break;
1171 if (code == Sword)
1172 break;
1173 }
1174 /* Now CH0 is a character which begins a word and FROM is the
1175 position of the next character. */
1176 while (1)
1177 {
1178 if (from == end) break;
1179 UPDATE_SYNTAX_TABLE_FORWARD (from);
1180 ch1 = FETCH_CHAR (from_byte);
1181 code = SYNTAX (ch1);
1182 if (!(words_include_escapes
1183 && (code == Sescape || code == Scharquote)))
1184 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
1185 break;
1186 INC_BOTH (from, from_byte);
1187 ch0 = ch1;
1188 }
1189 count--;
1190 }
1191 while (count < 0)
1192 {
1193 while (1)
1194 {
1195 if (from == beg)
1196 {
1197 immediate_quit = 0;
1198 return 0;
1199 }
1200 DEC_BOTH (from, from_byte);
1201 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1202 ch1 = FETCH_CHAR (from_byte);
1203 code = SYNTAX (ch1);
1204 if (words_include_escapes
1205 && (code == Sescape || code == Scharquote))
1206 break;
1207 if (code == Sword)
1208 break;
1209 }
1210 /* Now CH1 is a character which ends a word and FROM is the
1211 position of it. */
1212 while (1)
1213 {
1214 int temp_byte;
1215
1216 if (from == beg)
1217 break;
1218 temp_byte = dec_bytepos (from_byte);
1219 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1220 ch0 = FETCH_CHAR (temp_byte);
1221 code = SYNTAX (ch0);
1222 if (!(words_include_escapes
1223 && (code == Sescape || code == Scharquote)))
1224 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
1225 break;
1226 DEC_BOTH (from, from_byte);
1227 ch1 = ch0;
1228 }
1229 count++;
1230 }
1231
1232 immediate_quit = 0;
1233
1234 return from;
1235 }
1236
1237 DEFUN ("forward-word", Fforward_word, Sforward_word, 1, 1, "p",
1238 "Move point forward ARG words (backward if ARG is negative).\n\
1239 Normally returns t.\n\
1240 If an edge of the buffer or a field boundary is reached, point is left there\n\
1241 and the function returns nil. Field boundaries are not noticed if\n\
1242 `inhibit-field-text-motion' is non-nil.")
1243 (count)
1244 Lisp_Object count;
1245 {
1246 int orig_val, val;
1247 CHECK_NUMBER (count, 0);
1248
1249 val = orig_val = scan_words (PT, XINT (count));
1250 if (! orig_val)
1251 val = XINT (count) > 0 ? ZV : BEGV;
1252
1253 /* Avoid jumping out of an input field. */
1254 val = XFASTINT (Fconstrain_to_field (make_number (val), make_number (PT),
1255 Qt, Qnil));
1256
1257 SET_PT (val);
1258 return val == orig_val ? Qt : Qnil;
1259 }
1260 \f
1261 Lisp_Object skip_chars ();
1262
1263 DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
1264 "Move point forward, stopping before a char not in STRING, or at pos LIM.\n\
1265 STRING is like the inside of a `[...]' in a regular expression\n\
1266 except that `]' is never special and `\\' quotes `^', `-' or `\\'\n\
1267 (but not as the end of a range; quoting is never needed there).\n\
1268 Thus, with arg \"a-zA-Z\", this skips letters stopping before first nonletter.\n\
1269 With arg \"^a-zA-Z\", skips nonletters stopping before first letter.\n\
1270 Returns the distance traveled, either zero or positive.")
1271 (string, lim)
1272 Lisp_Object string, lim;
1273 {
1274 return skip_chars (1, 0, string, lim);
1275 }
1276
1277 DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
1278 "Move point backward, stopping after a char not in STRING, or at pos LIM.\n\
1279 See `skip-chars-forward' for details.\n\
1280 Returns the distance traveled, either zero or negative.")
1281 (string, lim)
1282 Lisp_Object string, lim;
1283 {
1284 return skip_chars (0, 0, string, lim);
1285 }
1286
1287 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
1288 "Move point forward across chars in specified syntax classes.\n\
1289 SYNTAX is a string of syntax code characters.\n\
1290 Stop before a char whose syntax is not in SYNTAX, or at position LIM.\n\
1291 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
1292 This function returns the distance traveled, either zero or positive.")
1293 (syntax, lim)
1294 Lisp_Object syntax, lim;
1295 {
1296 return skip_chars (1, 1, syntax, lim);
1297 }
1298
1299 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
1300 "Move point backward across chars in specified syntax classes.\n\
1301 SYNTAX is a string of syntax code characters.\n\
1302 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.\n\
1303 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
1304 This function returns the distance traveled, either zero or negative.")
1305 (syntax, lim)
1306 Lisp_Object syntax, lim;
1307 {
1308 return skip_chars (0, 1, syntax, lim);
1309 }
1310
1311 static Lisp_Object
1312 skip_chars (forwardp, syntaxp, string, lim)
1313 int forwardp, syntaxp;
1314 Lisp_Object string, lim;
1315 {
1316 register unsigned int c;
1317 register int ch;
1318 unsigned char fastmap[0400];
1319 /* If SYNTAXP is 0, STRING may contain multi-byte form of characters
1320 of which codes don't fit in FASTMAP. In that case, we set the
1321 first byte of multibyte form (i.e. base leading-code) in FASTMAP
1322 and set the actual ranges of characters in CHAR_RANGES. In the
1323 form "X-Y" of STRING, both X and Y must belong to the same
1324 character set because a range striding across character sets is
1325 meaningless. */
1326 int *char_ranges;
1327 int n_char_ranges = 0;
1328 int negate = 0;
1329 register int i, i_byte;
1330 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1331 int string_multibyte;
1332 int size_byte;
1333
1334 CHECK_STRING (string, 0);
1335 char_ranges = (int *) alloca (XSTRING (string)->size * (sizeof (int)) * 2);
1336 string_multibyte = STRING_MULTIBYTE (string);
1337 size_byte = STRING_BYTES (XSTRING (string));
1338
1339 if (NILP (lim))
1340 XSETINT (lim, forwardp ? ZV : BEGV);
1341 else
1342 CHECK_NUMBER_COERCE_MARKER (lim, 0);
1343
1344 /* In any case, don't allow scan outside bounds of buffer. */
1345 if (XINT (lim) > ZV)
1346 XSETFASTINT (lim, ZV);
1347 if (XINT (lim) < BEGV)
1348 XSETFASTINT (lim, BEGV);
1349
1350 bzero (fastmap, sizeof fastmap);
1351
1352 i = 0, i_byte = 0;
1353
1354 if (i_byte < size_byte
1355 && XSTRING (string)->data[0] == '^')
1356 {
1357 negate = 1; i++, i_byte++;
1358 }
1359
1360 /* Find the characters specified and set their elements of fastmap.
1361 If syntaxp, each character counts as itself.
1362 Otherwise, handle backslashes and ranges specially. */
1363
1364 while (i_byte < size_byte)
1365 {
1366 int c_leading_code = XSTRING (string)->data[i_byte];
1367
1368 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1369
1370 /* Convert multibyteness between what the string has
1371 and what the buffer has. */
1372 if (multibyte)
1373 c = unibyte_char_to_multibyte (c);
1374 else
1375 c &= 0377;
1376
1377 if (syntaxp)
1378 fastmap[syntax_spec_code[c & 0377]] = 1;
1379 else
1380 {
1381 if (c == '\\')
1382 {
1383 if (i_byte == size_byte)
1384 break;
1385
1386 c_leading_code = XSTRING (string)->data[i_byte];
1387 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1388 }
1389 if (i_byte < size_byte
1390 && XSTRING (string)->data[i_byte] == '-')
1391 {
1392 unsigned int c2, c2_leading_code;
1393
1394 /* Skip over the dash. */
1395 i++, i_byte++;
1396
1397 if (i_byte == size_byte)
1398 break;
1399
1400 /* Get the end of the range. */
1401 c2_leading_code = XSTRING (string)->data[i_byte];
1402 FETCH_STRING_CHAR_ADVANCE (c2, string, i, i_byte);
1403
1404 if (SINGLE_BYTE_CHAR_P (c))
1405 {
1406 if (! SINGLE_BYTE_CHAR_P (c2))
1407 error ("Invalid charcter range: %s",
1408 XSTRING (string)->data);
1409 while (c <= c2)
1410 {
1411 fastmap[c] = 1;
1412 c++;
1413 }
1414 }
1415 else
1416 {
1417 if (c_leading_code != c2_leading_code)
1418 error ("Invalid charcter range: %s",
1419 XSTRING (string)->data);
1420 fastmap[c_leading_code] = 1;
1421 if (c <= c2)
1422 {
1423 char_ranges[n_char_ranges++] = c;
1424 char_ranges[n_char_ranges++] = c2;
1425 }
1426 }
1427 }
1428 else
1429 {
1430 fastmap[c_leading_code] = 1;
1431 if (!SINGLE_BYTE_CHAR_P (c))
1432 {
1433 char_ranges[n_char_ranges++] = c;
1434 char_ranges[n_char_ranges++] = c;
1435 }
1436 }
1437 }
1438 }
1439
1440 /* If ^ was the first character, complement the fastmap. In
1441 addition, as all multibyte characters have possibility of
1442 matching, set all entries for base leading codes, which is
1443 harmless even if SYNTAXP is 1. */
1444
1445 if (negate)
1446 for (i = 0; i < sizeof fastmap; i++)
1447 {
1448 if (!multibyte || !BASE_LEADING_CODE_P (i))
1449 fastmap[i] ^= 1;
1450 else
1451 fastmap[i] = 1;
1452 }
1453
1454 {
1455 int start_point = PT;
1456 int pos = PT;
1457 int pos_byte = PT_BYTE;
1458
1459 immediate_quit = 1;
1460 if (syntaxp)
1461 {
1462 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
1463 if (forwardp)
1464 {
1465 if (multibyte)
1466 {
1467 if (pos < XINT (lim))
1468 while (fastmap[(int) SYNTAX (FETCH_CHAR (pos_byte))])
1469 {
1470 /* Since we already checked for multibyteness,
1471 avoid using INC_BOTH which checks again. */
1472 INC_POS (pos_byte);
1473 pos++;
1474 if (pos >= XINT (lim))
1475 break;
1476 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1477 }
1478 }
1479 else
1480 {
1481 while (pos < XINT (lim)
1482 && fastmap[(int) SYNTAX (FETCH_BYTE (pos))])
1483 {
1484 pos++;
1485 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1486 }
1487 }
1488 }
1489 else
1490 {
1491 if (multibyte)
1492 {
1493 while (pos > XINT (lim))
1494 {
1495 int savepos = pos_byte;
1496 /* Since we already checked for multibyteness,
1497 avoid using DEC_BOTH which checks again. */
1498 pos--;
1499 DEC_POS (pos_byte);
1500 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
1501 if (!fastmap[(int) SYNTAX (FETCH_CHAR (pos_byte))])
1502 {
1503 pos++;
1504 pos_byte = savepos;
1505 break;
1506 }
1507 }
1508 }
1509 else
1510 {
1511 if (pos > XINT (lim))
1512 while (fastmap[(int) SYNTAX (FETCH_BYTE (pos - 1))])
1513 {
1514 pos--;
1515 if (pos <= XINT (lim))
1516 break;
1517 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
1518 }
1519 }
1520 }
1521 }
1522 else
1523 {
1524 if (forwardp)
1525 {
1526 if (multibyte)
1527 while (pos < XINT (lim) && fastmap[(c = FETCH_BYTE (pos_byte))])
1528 {
1529 if (!BASE_LEADING_CODE_P (c))
1530 INC_BOTH (pos, pos_byte);
1531 else if (n_char_ranges)
1532 {
1533 /* We much check CHAR_RANGES for a multibyte
1534 character. */
1535 ch = FETCH_MULTIBYTE_CHAR (pos_byte);
1536 for (i = 0; i < n_char_ranges; i += 2)
1537 if ((ch >= char_ranges[i] && ch <= char_ranges[i + 1]))
1538 break;
1539 if (!(negate ^ (i < n_char_ranges)))
1540 break;
1541
1542 INC_BOTH (pos, pos_byte);
1543 }
1544 else
1545 {
1546 if (!negate) break;
1547 INC_BOTH (pos, pos_byte);
1548 }
1549 }
1550 else
1551 while (pos < XINT (lim) && fastmap[FETCH_BYTE (pos)])
1552 pos++;
1553 }
1554 else
1555 {
1556 if (multibyte)
1557 while (pos > XINT (lim))
1558 {
1559 int savepos = pos_byte;
1560 DEC_BOTH (pos, pos_byte);
1561 if (fastmap[(c = FETCH_BYTE (pos_byte))])
1562 {
1563 if (!BASE_LEADING_CODE_P (c))
1564 ;
1565 else if (n_char_ranges)
1566 {
1567 /* We much check CHAR_RANGES for a multibyte
1568 character. */
1569 ch = FETCH_MULTIBYTE_CHAR (pos_byte);
1570 for (i = 0; i < n_char_ranges; i += 2)
1571 if (ch >= char_ranges[i] && ch <= char_ranges[i + 1])
1572 break;
1573 if (!(negate ^ (i < n_char_ranges)))
1574 {
1575 pos++;
1576 pos_byte = savepos;
1577 break;
1578 }
1579 }
1580 else
1581 if (!negate)
1582 {
1583 pos++;
1584 pos_byte = savepos;
1585 break;
1586 }
1587 }
1588 else
1589 {
1590 pos++;
1591 pos_byte = savepos;
1592 break;
1593 }
1594 }
1595 else
1596 while (pos > XINT (lim) && fastmap[FETCH_BYTE (pos - 1)])
1597 pos--;
1598 }
1599 }
1600
1601 #if 0 /* Not needed now that a position in mid-character
1602 cannot be specified in Lisp. */
1603 if (multibyte
1604 /* INC_POS or DEC_POS might have moved POS over LIM. */
1605 && (forwardp ? (pos > XINT (lim)) : (pos < XINT (lim))))
1606 pos = XINT (lim);
1607 #endif
1608
1609 if (! multibyte)
1610 pos_byte = pos;
1611
1612 SET_PT_BOTH (pos, pos_byte);
1613 immediate_quit = 0;
1614
1615 return make_number (PT - start_point);
1616 }
1617 }
1618 \f
1619 /* Jump over a comment, assuming we are at the beginning of one.
1620 FROM is the current position.
1621 FROM_BYTE is the bytepos corresponding to FROM.
1622 Do not move past STOP (a charpos).
1623 The comment over which we have to jump is of style STYLE
1624 (either SYNTAX_COMMENT_STYLE(foo) or ST_COMMENT_STYLE).
1625 NESTING should be positive to indicate the nesting at the beginning
1626 for nested comments and should be zero or negative else.
1627 ST_COMMENT_STYLE cannot be nested.
1628 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
1629 (or 0 If the search cannot start in the middle of a two-character).
1630
1631 If successful, return 1 and store the charpos of the comment's end
1632 into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
1633 Else, return 0 and store the charpos STOP into *CHARPOS_PTR, the
1634 corresponding bytepos into *BYTEPOS_PTR and the current nesting
1635 (as defined for state.incomment) in *INCOMMENT_PTR.
1636
1637 The comment end is the last character of the comment rather than the
1638 character just after the comment.
1639
1640 Global syntax data is assumed to initially be valid for FROM and
1641 remains valid for forward search starting at the returned position. */
1642
1643 static int
1644 forw_comment (from, from_byte, stop, nesting, style, prev_syntax,
1645 charpos_ptr, bytepos_ptr, incomment_ptr)
1646 int from, from_byte, stop;
1647 int nesting, style, prev_syntax;
1648 int *charpos_ptr, *bytepos_ptr, *incomment_ptr;
1649 {
1650 register int c, c1;
1651 register enum syntaxcode code;
1652 register int syntax;
1653
1654 if (nesting <= 0) nesting = -1;
1655
1656 /* Enter the loop in the middle so that we find
1657 a 2-char comment ender if we start in the middle of it. */
1658 syntax = prev_syntax;
1659 if (syntax != 0) goto forw_incomment;
1660
1661 while (1)
1662 {
1663 if (from == stop)
1664 {
1665 *incomment_ptr = nesting;
1666 *charpos_ptr = from;
1667 *bytepos_ptr = from_byte;
1668 return 0;
1669 }
1670 c = FETCH_CHAR (from_byte);
1671 syntax = SYNTAX_WITH_FLAGS (c);
1672 code = syntax & 0xff;
1673 if (code == Sendcomment
1674 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
1675 && --nesting <= 0)
1676 /* we have encountered a comment end of the same style
1677 as the comment sequence which began this comment
1678 section */
1679 break;
1680 if (code == Scomment_fence
1681 && style == ST_COMMENT_STYLE)
1682 /* we have encountered a comment end of the same style
1683 as the comment sequence which began this comment
1684 section. */
1685 break;
1686 if (nesting > 0
1687 && code == Scomment
1688 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style)
1689 /* we have encountered a nested comment of the same style
1690 as the comment sequence which began this comment section */
1691 nesting++;
1692 INC_BOTH (from, from_byte);
1693 UPDATE_SYNTAX_TABLE_FORWARD (from);
1694
1695 forw_incomment:
1696 if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
1697 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
1698 && (c1 = FETCH_CHAR (from_byte),
1699 SYNTAX_COMEND_SECOND (c1)))
1700 {
1701 if (--nesting <= 0)
1702 /* we have encountered a comment end of the same style
1703 as the comment sequence which began this comment
1704 section */
1705 break;
1706 else
1707 {
1708 INC_BOTH (from, from_byte);
1709 UPDATE_SYNTAX_TABLE_FORWARD (from);
1710 }
1711 }
1712 if (nesting > 0
1713 && from < stop
1714 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
1715 && (c1 = FETCH_CHAR (from_byte),
1716 SYNTAX_COMMENT_STYLE (c1) == style
1717 && SYNTAX_COMSTART_SECOND (c1)))
1718 /* we have encountered a nested comment of the same style
1719 as the comment sequence which began this comment
1720 section */
1721 {
1722 INC_BOTH (from, from_byte);
1723 UPDATE_SYNTAX_TABLE_FORWARD (from);
1724 nesting++;
1725 }
1726 }
1727 *charpos_ptr = from;
1728 *bytepos_ptr = from_byte;
1729 return 1;
1730 }
1731
1732 DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
1733 "Move forward across up to N comments. If N is negative, move backward.\n\
1734 Stop scanning if we find something other than a comment or whitespace.\n\
1735 Set point to where scanning stops.\n\
1736 If N comments are found as expected, with nothing except whitespace\n\
1737 between them, return t; otherwise return nil.")
1738 (count)
1739 Lisp_Object count;
1740 {
1741 register int from;
1742 int from_byte;
1743 register int stop;
1744 register int c, c1;
1745 register enum syntaxcode code;
1746 int comstyle = 0; /* style of comment encountered */
1747 int comnested = 0; /* whether the comment is nestable or not */
1748 int found;
1749 int count1;
1750 int out_charpos, out_bytepos;
1751 int dummy;
1752
1753 CHECK_NUMBER (count, 0);
1754 count1 = XINT (count);
1755 stop = count1 > 0 ? ZV : BEGV;
1756
1757 immediate_quit = 1;
1758 QUIT;
1759
1760 from = PT;
1761 from_byte = PT_BYTE;
1762
1763 SETUP_SYNTAX_TABLE (from, count1);
1764 while (count1 > 0)
1765 {
1766 do
1767 {
1768 int comstart_first;
1769
1770 if (from == stop)
1771 {
1772 SET_PT_BOTH (from, from_byte);
1773 immediate_quit = 0;
1774 return Qnil;
1775 }
1776 c = FETCH_CHAR (from_byte);
1777 code = SYNTAX (c);
1778 comstart_first = SYNTAX_COMSTART_FIRST (c);
1779 comnested = SYNTAX_COMMENT_NESTED (c);
1780 INC_BOTH (from, from_byte);
1781 UPDATE_SYNTAX_TABLE_FORWARD (from);
1782 comstyle = 0;
1783 if (from < stop && comstart_first
1784 && (c1 = FETCH_CHAR (from_byte),
1785 SYNTAX_COMSTART_SECOND (c1)))
1786 {
1787 /* We have encountered a comment start sequence and we
1788 are ignoring all text inside comments. We must record
1789 the comment style this sequence begins so that later,
1790 only a comment end of the same style actually ends
1791 the comment section. */
1792 code = Scomment;
1793 comstyle = SYNTAX_COMMENT_STYLE (c1);
1794 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
1795 INC_BOTH (from, from_byte);
1796 UPDATE_SYNTAX_TABLE_FORWARD (from);
1797 }
1798 /* FIXME: here we ignore 2-char endcomments while we don't
1799 when going backwards. */
1800 }
1801 while (code == Swhitespace || code == Sendcomment);
1802
1803 if (code == Scomment_fence)
1804 comstyle = ST_COMMENT_STYLE;
1805 else if (code != Scomment)
1806 {
1807 immediate_quit = 0;
1808 DEC_BOTH (from, from_byte);
1809 SET_PT_BOTH (from, from_byte);
1810 return Qnil;
1811 }
1812 /* We're at the start of a comment. */
1813 found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
1814 &out_charpos, &out_bytepos, &dummy);
1815 from = out_charpos; from_byte = out_bytepos;
1816 if (!found)
1817 {
1818 immediate_quit = 0;
1819 SET_PT_BOTH (from, from_byte);
1820 return Qnil;
1821 }
1822 INC_BOTH (from, from_byte);
1823 UPDATE_SYNTAX_TABLE_FORWARD (from);
1824 /* We have skipped one comment. */
1825 count1--;
1826 }
1827
1828 while (count1 < 0)
1829 {
1830 while (1)
1831 {
1832 int quoted, comstart_second;
1833
1834 if (from <= stop)
1835 {
1836 SET_PT_BOTH (BEGV, BEGV_BYTE);
1837 immediate_quit = 0;
1838 return Qnil;
1839 }
1840
1841 DEC_BOTH (from, from_byte);
1842 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
1843 quoted = char_quoted (from, from_byte);
1844 if (quoted)
1845 {
1846 DEC_BOTH (from, from_byte);
1847 goto leave;
1848 }
1849 c = FETCH_CHAR (from_byte);
1850 code = SYNTAX (c);
1851 comstyle = 0;
1852 comnested = SYNTAX_COMMENT_NESTED (c);
1853 if (code == Sendcomment)
1854 comstyle = SYNTAX_COMMENT_STYLE (c);
1855 comstart_second = SYNTAX_COMSTART_SECOND (c);
1856 if (from > stop && SYNTAX_COMEND_SECOND (c)
1857 && prev_char_comend_first (from, from_byte)
1858 && !char_quoted (from - 1, dec_bytepos (from_byte)))
1859 {
1860 /* We must record the comment style encountered so that
1861 later, we can match only the proper comment begin
1862 sequence of the same style. */
1863 DEC_BOTH (from, from_byte);
1864 code = Sendcomment;
1865 /* Calling char_quoted, above, set up global syntax position
1866 at the new value of FROM. */
1867 c1 = FETCH_CHAR (from_byte);
1868 comstyle = SYNTAX_COMMENT_STYLE (c1);
1869 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
1870 }
1871 if (from > stop && comstart_second
1872 && prev_char_comstart_first (from, from_byte)
1873 && !char_quoted (from - 1, dec_bytepos (from_byte)))
1874 {
1875 code = Scomment;
1876 DEC_BOTH (from, from_byte);
1877 }
1878
1879 if (code == Scomment_fence)
1880 {
1881 /* Skip until first preceding unquoted comment_fence. */
1882 int found = 0, ini = from, ini_byte = from_byte;
1883
1884 while (1)
1885 {
1886 DEC_BOTH (from, from_byte);
1887 if (from == stop)
1888 break;
1889 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1890 c = FETCH_CHAR (from_byte);
1891 if (SYNTAX (c) == Scomment_fence
1892 && !char_quoted (from, from_byte))
1893 {
1894 found = 1;
1895 break;
1896 }
1897 }
1898 if (found == 0)
1899 {
1900 from = ini; /* Set point to ini + 1. */
1901 from_byte = ini_byte;
1902 goto leave;
1903 }
1904 }
1905 else if (code == Sendcomment)
1906 {
1907 found = back_comment (from, from_byte, stop, comnested, comstyle,
1908 &out_charpos, &out_bytepos);
1909 if (found == -1)
1910 {
1911 #if 0 /* cc-mode (and maybe others) relies on the bogus behavior. */
1912 /* Failure: we should go back to the end of this
1913 not-quite-endcomment. */
1914 if (SYNTAX(c) != code)
1915 /* It was a two-char Sendcomment. */
1916 INC_BOTH (from, from_byte);
1917 goto leave;
1918 #endif
1919 }
1920 else
1921 /* We have skipped one comment. */
1922 from = out_charpos, from_byte = out_bytepos;
1923 break;
1924 }
1925 else if (code != Swhitespace && code != Scomment)
1926 {
1927 leave:
1928 immediate_quit = 0;
1929 INC_BOTH (from, from_byte);
1930 SET_PT_BOTH (from, from_byte);
1931 return Qnil;
1932 }
1933 }
1934
1935 count1++;
1936 }
1937
1938 SET_PT_BOTH (from, from_byte);
1939 immediate_quit = 0;
1940 return Qt;
1941 }
1942 \f
1943 /* Return syntax code of character C if C is a single byte character
1944 or `multibyte_symbol_p' is zero. Otherwise, retrun Ssymbol. */
1945
1946 #define SYNTAX_WITH_MULTIBYTE_CHECK(c) \
1947 ((SINGLE_BYTE_CHAR_P (c) || !multibyte_symbol_p) \
1948 ? SYNTAX (c) : Ssymbol)
1949
1950 static Lisp_Object
1951 scan_lists (from, count, depth, sexpflag)
1952 register int from;
1953 int count, depth, sexpflag;
1954 {
1955 Lisp_Object val;
1956 register int stop = count > 0 ? ZV : BEGV;
1957 register int c, c1;
1958 int stringterm;
1959 int quoted;
1960 int mathexit = 0;
1961 register enum syntaxcode code, temp_code;
1962 int min_depth = depth; /* Err out if depth gets less than this. */
1963 int comstyle = 0; /* style of comment encountered */
1964 int comnested = 0; /* whether the comment is nestable or not */
1965 int temp_pos;
1966 int last_good = from;
1967 int found;
1968 int from_byte;
1969 int out_bytepos, out_charpos;
1970 int temp, dummy;
1971 int multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
1972
1973 if (depth > 0) min_depth = 0;
1974
1975 if (from > ZV) from = ZV;
1976 if (from < BEGV) from = BEGV;
1977
1978 from_byte = CHAR_TO_BYTE (from);
1979
1980 immediate_quit = 1;
1981 QUIT;
1982
1983 SETUP_SYNTAX_TABLE (from, count);
1984 while (count > 0)
1985 {
1986 while (from < stop)
1987 {
1988 int comstart_first, prefix;
1989 UPDATE_SYNTAX_TABLE_FORWARD (from);
1990 c = FETCH_CHAR (from_byte);
1991 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
1992 comstart_first = SYNTAX_COMSTART_FIRST (c);
1993 comnested = SYNTAX_COMMENT_NESTED (c);
1994 prefix = SYNTAX_PREFIX (c);
1995 if (depth == min_depth)
1996 last_good = from;
1997 INC_BOTH (from, from_byte);
1998 UPDATE_SYNTAX_TABLE_FORWARD (from);
1999 if (from < stop && comstart_first
2000 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from_byte))
2001 && parse_sexp_ignore_comments)
2002 {
2003 /* we have encountered a comment start sequence and we
2004 are ignoring all text inside comments. We must record
2005 the comment style this sequence begins so that later,
2006 only a comment end of the same style actually ends
2007 the comment section */
2008 code = Scomment;
2009 c1 = FETCH_CHAR (from_byte);
2010 comstyle = SYNTAX_COMMENT_STYLE (c1);
2011 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2012 INC_BOTH (from, from_byte);
2013 UPDATE_SYNTAX_TABLE_FORWARD (from);
2014 }
2015
2016 if (prefix)
2017 continue;
2018
2019 switch (SWITCH_ENUM_CAST (code))
2020 {
2021 case Sescape:
2022 case Scharquote:
2023 if (from == stop) goto lose;
2024 INC_BOTH (from, from_byte);
2025 /* treat following character as a word constituent */
2026 case Sword:
2027 case Ssymbol:
2028 if (depth || !sexpflag) break;
2029 /* This word counts as a sexp; return at end of it. */
2030 while (from < stop)
2031 {
2032 UPDATE_SYNTAX_TABLE_FORWARD (from);
2033
2034 /* Some compilers can't handle this inside the switch. */
2035 c = FETCH_CHAR (from_byte);
2036 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2037 switch (temp)
2038 {
2039 case Scharquote:
2040 case Sescape:
2041 INC_BOTH (from, from_byte);
2042 if (from == stop) goto lose;
2043 break;
2044 case Sword:
2045 case Ssymbol:
2046 case Squote:
2047 break;
2048 default:
2049 goto done;
2050 }
2051 INC_BOTH (from, from_byte);
2052 }
2053 goto done;
2054
2055 case Scomment_fence:
2056 comstyle = ST_COMMENT_STYLE;
2057 /* FALLTHROUGH */
2058 case Scomment:
2059 if (!parse_sexp_ignore_comments) break;
2060 UPDATE_SYNTAX_TABLE_FORWARD (from);
2061 found = forw_comment (from, from_byte, stop,
2062 comnested, comstyle, 0,
2063 &out_charpos, &out_bytepos, &dummy);
2064 from = out_charpos, from_byte = out_bytepos;
2065 if (!found)
2066 {
2067 if (depth == 0)
2068 goto done;
2069 goto lose;
2070 }
2071 INC_BOTH (from, from_byte);
2072 UPDATE_SYNTAX_TABLE_FORWARD (from);
2073 break;
2074
2075 case Smath:
2076 if (!sexpflag)
2077 break;
2078 if (from != stop && c == FETCH_CHAR (from_byte))
2079 {
2080 INC_BOTH (from, from_byte);
2081 }
2082 if (mathexit)
2083 {
2084 mathexit = 0;
2085 goto close1;
2086 }
2087 mathexit = 1;
2088
2089 case Sopen:
2090 if (!++depth) goto done;
2091 break;
2092
2093 case Sclose:
2094 close1:
2095 if (!--depth) goto done;
2096 if (depth < min_depth)
2097 Fsignal (Qscan_error,
2098 Fcons (build_string ("Containing expression ends prematurely"),
2099 Fcons (make_number (last_good),
2100 Fcons (make_number (from), Qnil))));
2101 break;
2102
2103 case Sstring:
2104 case Sstring_fence:
2105 temp_pos = dec_bytepos (from_byte);
2106 stringterm = FETCH_CHAR (temp_pos);
2107 while (1)
2108 {
2109 if (from >= stop) goto lose;
2110 UPDATE_SYNTAX_TABLE_FORWARD (from);
2111 c = FETCH_CHAR (from_byte);
2112 if (code == Sstring
2113 ? c == stringterm
2114 : SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring_fence)
2115 break;
2116
2117 /* Some compilers can't handle this inside the switch. */
2118 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2119 switch (temp)
2120 {
2121 case Scharquote:
2122 case Sescape:
2123 INC_BOTH (from, from_byte);
2124 }
2125 INC_BOTH (from, from_byte);
2126 }
2127 INC_BOTH (from, from_byte);
2128 if (!depth && sexpflag) goto done;
2129 break;
2130 }
2131 }
2132
2133 /* Reached end of buffer. Error if within object, return nil if between */
2134 if (depth) goto lose;
2135
2136 immediate_quit = 0;
2137 return Qnil;
2138
2139 /* End of object reached */
2140 done:
2141 count--;
2142 }
2143
2144 while (count < 0)
2145 {
2146 while (from > stop)
2147 {
2148 DEC_BOTH (from, from_byte);
2149 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2150 c = FETCH_CHAR (from_byte);
2151 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2152 if (depth == min_depth)
2153 last_good = from;
2154 comstyle = 0;
2155 comnested = SYNTAX_COMMENT_NESTED (c);
2156 if (code == Sendcomment)
2157 comstyle = SYNTAX_COMMENT_STYLE (c);
2158 if (from > stop && SYNTAX_COMEND_SECOND (c)
2159 && prev_char_comend_first (from, from_byte)
2160 && parse_sexp_ignore_comments)
2161 {
2162 /* We must record the comment style encountered so that
2163 later, we can match only the proper comment begin
2164 sequence of the same style. */
2165 DEC_BOTH (from, from_byte);
2166 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2167 code = Sendcomment;
2168 c1 = FETCH_CHAR (from_byte);
2169 comstyle = SYNTAX_COMMENT_STYLE (c1);
2170 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2171 }
2172
2173 /* Quoting turns anything except a comment-ender
2174 into a word character. Note that this if cannot be true
2175 if we decremented FROM in the if-statement above. */
2176 if (code != Sendcomment && char_quoted (from, from_byte))
2177 code = Sword;
2178 else if (SYNTAX_PREFIX (c))
2179 continue;
2180
2181 switch (SWITCH_ENUM_CAST (code))
2182 {
2183 case Sword:
2184 case Ssymbol:
2185 case Sescape:
2186 case Scharquote:
2187 if (depth || !sexpflag) break;
2188 /* This word counts as a sexp; count object finished
2189 after passing it. */
2190 while (from > stop)
2191 {
2192 temp_pos = from_byte;
2193 if (! NILP (current_buffer->enable_multibyte_characters))
2194 DEC_POS (temp_pos);
2195 else
2196 temp_pos--;
2197 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2198 c1 = FETCH_CHAR (temp_pos);
2199 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
2200 /* Don't allow comment-end to be quoted. */
2201 if (temp_code == Sendcomment)
2202 goto done2;
2203 quoted = char_quoted (from - 1, temp_pos);
2204 if (quoted)
2205 {
2206 DEC_BOTH (from, from_byte);
2207 temp_pos = dec_bytepos (temp_pos);
2208 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2209 }
2210 c1 = FETCH_CHAR (temp_pos);
2211 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
2212 if (! (quoted || temp_code == Sword
2213 || temp_code == Ssymbol
2214 || temp_code == Squote))
2215 goto done2;
2216 DEC_BOTH (from, from_byte);
2217 }
2218 goto done2;
2219
2220 case Smath:
2221 if (!sexpflag)
2222 break;
2223 temp_pos = dec_bytepos (from_byte);
2224 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2225 if (from != stop && c == FETCH_CHAR (temp_pos))
2226 DEC_BOTH (from, from_byte);
2227 if (mathexit)
2228 {
2229 mathexit = 0;
2230 goto open2;
2231 }
2232 mathexit = 1;
2233
2234 case Sclose:
2235 if (!++depth) goto done2;
2236 break;
2237
2238 case Sopen:
2239 open2:
2240 if (!--depth) goto done2;
2241 if (depth < min_depth)
2242 Fsignal (Qscan_error,
2243 Fcons (build_string ("Containing expression ends prematurely"),
2244 Fcons (make_number (last_good),
2245 Fcons (make_number (from), Qnil))));
2246 break;
2247
2248 case Sendcomment:
2249 if (!parse_sexp_ignore_comments)
2250 break;
2251 found = back_comment (from, from_byte, stop, comnested, comstyle,
2252 &out_charpos, &out_bytepos);
2253 /* FIXME: if found == -1, then it really wasn't a comment-end.
2254 For single-char Sendcomment, we can't do much about it apart
2255 from skipping the char.
2256 For 2-char endcomments, we could try again, taking both
2257 chars as separate entities, but it's a lot of trouble
2258 for very little gain, so we don't bother either. -sm */
2259 if (found != -1)
2260 from = out_charpos, from_byte = out_bytepos;
2261 break;
2262
2263 case Scomment_fence:
2264 case Sstring_fence:
2265 while (1)
2266 {
2267 DEC_BOTH (from, from_byte);
2268 if (from == stop) goto lose;
2269 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2270 if (!char_quoted (from, from_byte)
2271 && (c = FETCH_CHAR (from_byte),
2272 SYNTAX_WITH_MULTIBYTE_CHECK (c) == code))
2273 break;
2274 }
2275 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2276 break;
2277
2278 case Sstring:
2279 stringterm = FETCH_CHAR (from_byte);
2280 while (1)
2281 {
2282 if (from == stop) goto lose;
2283 temp_pos = from_byte;
2284 if (! NILP (current_buffer->enable_multibyte_characters))
2285 DEC_POS (temp_pos);
2286 else
2287 temp_pos--;
2288 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2289 if (!char_quoted (from - 1, temp_pos)
2290 && stringterm == FETCH_CHAR (temp_pos))
2291 break;
2292 DEC_BOTH (from, from_byte);
2293 }
2294 DEC_BOTH (from, from_byte);
2295 if (!depth && sexpflag) goto done2;
2296 break;
2297 }
2298 }
2299
2300 /* Reached start of buffer. Error if within object, return nil if between */
2301 if (depth) goto lose;
2302
2303 immediate_quit = 0;
2304 return Qnil;
2305
2306 done2:
2307 count++;
2308 }
2309
2310
2311 immediate_quit = 0;
2312 XSETFASTINT (val, from);
2313 return val;
2314
2315 lose:
2316 Fsignal (Qscan_error,
2317 Fcons (build_string ("Unbalanced parentheses"),
2318 Fcons (make_number (last_good),
2319 Fcons (make_number (from), Qnil))));
2320
2321 /* NOTREACHED */
2322 }
2323
2324 DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
2325 "Scan from character number FROM by COUNT lists.\n\
2326 Returns the character number of the position thus found.\n\
2327 \n\
2328 If DEPTH is nonzero, paren depth begins counting from that value,\n\
2329 only places where the depth in parentheses becomes zero\n\
2330 are candidates for stopping; COUNT such places are counted.\n\
2331 Thus, a positive value for DEPTH means go out levels.\n\
2332 \n\
2333 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
2334 \n\
2335 If the beginning or end of (the accessible part of) the buffer is reached\n\
2336 and the depth is wrong, an error is signaled.\n\
2337 If the depth is right but the count is not used up, nil is returned.")
2338 (from, count, depth)
2339 Lisp_Object from, count, depth;
2340 {
2341 CHECK_NUMBER (from, 0);
2342 CHECK_NUMBER (count, 1);
2343 CHECK_NUMBER (depth, 2);
2344
2345 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2346 }
2347
2348 DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
2349 "Scan from character number FROM by COUNT balanced expressions.\n\
2350 If COUNT is negative, scan backwards.\n\
2351 Returns the character number of the position thus found.\n\
2352 \n\
2353 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
2354 \n\
2355 If the beginning or end of (the accessible part of) the buffer is reached\n\
2356 in the middle of a parenthetical grouping, an error is signaled.\n\
2357 If the beginning or end is reached between groupings\n\
2358 but before count is used up, nil is returned.")
2359 (from, count)
2360 Lisp_Object from, count;
2361 {
2362 CHECK_NUMBER (from, 0);
2363 CHECK_NUMBER (count, 1);
2364
2365 return scan_lists (XINT (from), XINT (count), 0, 1);
2366 }
2367
2368 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
2369 0, 0, 0,
2370 "Move point backward over any number of chars with prefix syntax.\n\
2371 This includes chars with \"quote\" or \"prefix\" syntax (' or p).")
2372 ()
2373 {
2374 int beg = BEGV;
2375 int opoint = PT;
2376 int opoint_byte = PT_BYTE;
2377 int pos = PT;
2378 int pos_byte = PT_BYTE;
2379 int c;
2380
2381 if (pos <= beg)
2382 {
2383 SET_PT_BOTH (opoint, opoint_byte);
2384
2385 return Qnil;
2386 }
2387
2388 SETUP_SYNTAX_TABLE (pos, -1);
2389
2390 DEC_BOTH (pos, pos_byte);
2391
2392 while (!char_quoted (pos, pos_byte)
2393 /* Previous statement updates syntax table. */
2394 && ((c = FETCH_CHAR (pos_byte), SYNTAX (c) == Squote)
2395 || SYNTAX_PREFIX (c)))
2396 {
2397 opoint = pos;
2398 opoint_byte = pos_byte;
2399
2400 if (pos + 1 > beg)
2401 DEC_BOTH (pos, pos_byte);
2402 }
2403
2404 SET_PT_BOTH (opoint, opoint_byte);
2405
2406 return Qnil;
2407 }
2408 \f
2409 /* Parse forward from FROM / FROM_BYTE to END,
2410 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2411 and return a description of the state of the parse at END.
2412 If STOPBEFORE is nonzero, stop at the start of an atom.
2413 If COMMENTSTOP is 1, stop at the start of a comment.
2414 If COMMENTSTOP is -1, stop at the start or end of a comment,
2415 after the beginning of a string, or after the end of a string. */
2416
2417 static void
2418 scan_sexps_forward (stateptr, from, from_byte, end, targetdepth,
2419 stopbefore, oldstate, commentstop)
2420 struct lisp_parse_state *stateptr;
2421 register int from;
2422 int end, targetdepth, stopbefore;
2423 Lisp_Object oldstate;
2424 int commentstop;
2425 {
2426 struct lisp_parse_state state;
2427
2428 register enum syntaxcode code;
2429 int c1;
2430 int comnested;
2431 struct level { int last, prev; };
2432 struct level levelstart[100];
2433 register struct level *curlevel = levelstart;
2434 struct level *endlevel = levelstart + 100;
2435 register int depth; /* Paren depth of current scanning location.
2436 level - levelstart equals this except
2437 when the depth becomes negative. */
2438 int mindepth; /* Lowest DEPTH value seen. */
2439 int start_quoted = 0; /* Nonzero means starting after a char quote */
2440 Lisp_Object tem;
2441 int prev_from; /* Keep one character before FROM. */
2442 int prev_from_byte;
2443 int prev_from_syntax;
2444 int boundary_stop = commentstop == -1;
2445 int nofence;
2446 int found;
2447 int out_bytepos, out_charpos;
2448 int temp;
2449
2450 prev_from = from;
2451 prev_from_byte = from_byte;
2452 if (from != BEGV)
2453 DEC_BOTH (prev_from, prev_from_byte);
2454
2455 /* Use this macro instead of `from++'. */
2456 #define INC_FROM \
2457 do { prev_from = from; \
2458 prev_from_byte = from_byte; \
2459 prev_from_syntax \
2460 = SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte)); \
2461 INC_BOTH (from, from_byte); \
2462 UPDATE_SYNTAX_TABLE_FORWARD (from); \
2463 } while (0)
2464
2465 immediate_quit = 1;
2466 QUIT;
2467
2468 if (NILP (oldstate))
2469 {
2470 depth = 0;
2471 state.instring = -1;
2472 state.incomment = 0;
2473 state.comstyle = 0; /* comment style a by default. */
2474 state.comstr_start = -1; /* no comment/string seen. */
2475 }
2476 else
2477 {
2478 tem = Fcar (oldstate);
2479 if (!NILP (tem))
2480 depth = XINT (tem);
2481 else
2482 depth = 0;
2483
2484 oldstate = Fcdr (oldstate);
2485 oldstate = Fcdr (oldstate);
2486 oldstate = Fcdr (oldstate);
2487 tem = Fcar (oldstate);
2488 /* Check whether we are inside string_fence-style string: */
2489 state.instring = (!NILP (tem)
2490 ? (INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE)
2491 : -1);
2492
2493 oldstate = Fcdr (oldstate);
2494 tem = Fcar (oldstate);
2495 state.incomment = (!NILP (tem)
2496 ? (INTEGERP (tem) ? XINT (tem) : -1)
2497 : 0);
2498
2499 oldstate = Fcdr (oldstate);
2500 tem = Fcar (oldstate);
2501 start_quoted = !NILP (tem);
2502
2503 /* if the eighth element of the list is nil, we are in comment
2504 style a. If it is non-nil, we are in comment style b */
2505 oldstate = Fcdr (oldstate);
2506 oldstate = Fcdr (oldstate);
2507 tem = Fcar (oldstate);
2508 state.comstyle = NILP (tem) ? 0 : (EQ (tem, Qsyntax_table)
2509 ? ST_COMMENT_STYLE : 1);
2510
2511 oldstate = Fcdr (oldstate);
2512 tem = Fcar (oldstate);
2513 state.comstr_start = NILP (tem) ? -1 : XINT (tem) ;
2514 oldstate = Fcdr (oldstate);
2515 tem = Fcar (oldstate);
2516 while (!NILP (tem)) /* >= second enclosing sexps. */
2517 {
2518 /* curlevel++->last ran into compiler bug on Apollo */
2519 curlevel->last = XINT (Fcar (tem));
2520 if (++curlevel == endlevel)
2521 error ("Nesting too deep for parser");
2522 curlevel->prev = -1;
2523 curlevel->last = -1;
2524 tem = Fcdr (tem);
2525 }
2526 }
2527 state.quoted = 0;
2528 mindepth = depth;
2529
2530 curlevel->prev = -1;
2531 curlevel->last = -1;
2532
2533 SETUP_SYNTAX_TABLE (prev_from, 1);
2534 prev_from_syntax = SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte));
2535 UPDATE_SYNTAX_TABLE_FORWARD (from);
2536
2537 /* Enter the loop at a place appropriate for initial state. */
2538
2539 if (state.incomment)
2540 goto startincomment;
2541 if (state.instring >= 0)
2542 {
2543 nofence = state.instring != ST_STRING_STYLE;
2544 if (start_quoted)
2545 goto startquotedinstring;
2546 goto startinstring;
2547 }
2548 else if (start_quoted)
2549 goto startquoted;
2550
2551 #if 0 /* This seems to be redundant with the identical code above. */
2552 SETUP_SYNTAX_TABLE (prev_from, 1);
2553 prev_from_syntax = SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte));
2554 UPDATE_SYNTAX_TABLE_FORWARD (from);
2555 #endif
2556
2557 while (from < end)
2558 {
2559 INC_FROM;
2560 code = prev_from_syntax & 0xff;
2561
2562 if (code == Scomment)
2563 {
2564 state.incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
2565 1 : -1);
2566 state.comstr_start = prev_from;
2567 }
2568 else if (code == Scomment_fence)
2569 {
2570 /* Record the comment style we have entered so that only
2571 the comment-end sequence of the same style actually
2572 terminates the comment section. */
2573 state.comstyle = ST_COMMENT_STYLE;
2574 state.incomment = -1;
2575 state.comstr_start = prev_from;
2576 code = Scomment;
2577 }
2578 else if (from < end)
2579 if (SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax))
2580 if (c1 = FETCH_CHAR (from_byte),
2581 SYNTAX_COMSTART_SECOND (c1))
2582 /* Duplicate code to avoid a complex if-expression
2583 which causes trouble for the SGI compiler. */
2584 {
2585 /* Record the comment style we have entered so that only
2586 the comment-end sequence of the same style actually
2587 terminates the comment section. */
2588 state.comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from_byte));
2589 comnested = SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax);
2590 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2591 state.incomment = comnested ? 1 : -1;
2592 state.comstr_start = prev_from;
2593 INC_FROM;
2594 code = Scomment;
2595 }
2596
2597 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
2598 continue;
2599 switch (SWITCH_ENUM_CAST (code))
2600 {
2601 case Sescape:
2602 case Scharquote:
2603 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2604 curlevel->last = prev_from;
2605 startquoted:
2606 if (from == end) goto endquoted;
2607 INC_FROM;
2608 goto symstarted;
2609 /* treat following character as a word constituent */
2610 case Sword:
2611 case Ssymbol:
2612 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2613 curlevel->last = prev_from;
2614 symstarted:
2615 while (from < end)
2616 {
2617 /* Some compilers can't handle this inside the switch. */
2618 temp = SYNTAX (FETCH_CHAR (from_byte));
2619 switch (temp)
2620 {
2621 case Scharquote:
2622 case Sescape:
2623 INC_FROM;
2624 if (from == end) goto endquoted;
2625 break;
2626 case Sword:
2627 case Ssymbol:
2628 case Squote:
2629 break;
2630 default:
2631 goto symdone;
2632 }
2633 INC_FROM;
2634 }
2635 symdone:
2636 curlevel->prev = curlevel->last;
2637 break;
2638
2639 startincomment:
2640 if (commentstop == 1)
2641 goto done;
2642 goto commentloop;
2643
2644 case Scomment:
2645 if (! state.incomment)
2646 abort ();
2647 if (commentstop || boundary_stop) goto done;
2648 commentloop:
2649 /* The (from == BEGV) test is to enter the loop in the middle so
2650 that we find a 2-char comment ender even if we start in the
2651 middle of it. */
2652 found = forw_comment (from, from_byte, end,
2653 state.incomment, state.comstyle,
2654 (from == BEGV) ? 0 : prev_from_syntax,
2655 &out_charpos, &out_bytepos, &state.incomment);
2656 from = out_charpos; from_byte = out_bytepos;
2657 /* Beware! prev_from and friends are invalid now.
2658 Luckily, the `done' doesn't use them and the INC_FROM
2659 sets them to a sane value without looking at them. */
2660 if (!found) goto done;
2661 INC_FROM;
2662 state.incomment = 0;
2663 state.comstyle = 0; /* reset the comment style */
2664 if (boundary_stop) goto done;
2665 break;
2666
2667 case Sopen:
2668 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2669 depth++;
2670 /* curlevel++->last ran into compiler bug on Apollo */
2671 curlevel->last = prev_from;
2672 if (++curlevel == endlevel)
2673 error ("Nesting too deep for parser");
2674 curlevel->prev = -1;
2675 curlevel->last = -1;
2676 if (targetdepth == depth) goto done;
2677 break;
2678
2679 case Sclose:
2680 depth--;
2681 if (depth < mindepth)
2682 mindepth = depth;
2683 if (curlevel != levelstart)
2684 curlevel--;
2685 curlevel->prev = curlevel->last;
2686 if (targetdepth == depth) goto done;
2687 break;
2688
2689 case Sstring:
2690 case Sstring_fence:
2691 state.comstr_start = from - 1;
2692 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2693 curlevel->last = prev_from;
2694 state.instring = (code == Sstring
2695 ? (FETCH_CHAR (prev_from_byte))
2696 : ST_STRING_STYLE);
2697 if (boundary_stop) goto done;
2698 startinstring:
2699 {
2700 nofence = state.instring != ST_STRING_STYLE;
2701
2702 while (1)
2703 {
2704 int c;
2705
2706 if (from >= end) goto done;
2707 c = FETCH_CHAR (from_byte);
2708 /* Some compilers can't handle this inside the switch. */
2709 temp = SYNTAX (c);
2710
2711 /* Check TEMP here so that if the char has
2712 a syntax-table property which says it is NOT
2713 a string character, it does not end the string. */
2714 if (nofence && c == state.instring && temp == Sstring)
2715 break;
2716
2717 switch (temp)
2718 {
2719 case Sstring_fence:
2720 if (!nofence) goto string_end;
2721 break;
2722 case Scharquote:
2723 case Sescape:
2724 INC_FROM;
2725 startquotedinstring:
2726 if (from >= end) goto endquoted;
2727 }
2728 INC_FROM;
2729 }
2730 }
2731 string_end:
2732 state.instring = -1;
2733 curlevel->prev = curlevel->last;
2734 INC_FROM;
2735 if (boundary_stop) goto done;
2736 break;
2737
2738 case Smath:
2739 break;
2740 }
2741 }
2742 goto done;
2743
2744 stop: /* Here if stopping before start of sexp. */
2745 from = prev_from; /* We have just fetched the char that starts it; */
2746 goto done; /* but return the position before it. */
2747
2748 endquoted:
2749 state.quoted = 1;
2750 done:
2751 state.depth = depth;
2752 state.mindepth = mindepth;
2753 state.thislevelstart = curlevel->prev;
2754 state.prevlevelstart
2755 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
2756 state.location = from;
2757 state.levelstarts = Qnil;
2758 while (--curlevel >= levelstart)
2759 state.levelstarts = Fcons (make_number (curlevel->last),
2760 state.levelstarts);
2761 immediate_quit = 0;
2762
2763 *stateptr = state;
2764 }
2765
2766 /* This comment supplies the doc string for parse-partial-sexp,
2767 for make-docfile to see. We cannot put this in the real DEFUN
2768 due to limits in the Unix cpp.
2769
2770 DEFUN ("parse-partial-sexp", Ffoo, Sfoo, 2, 6, 0,
2771 "Parse Lisp syntax starting at FROM until TO; return status of parse at TO.\n\
2772 Parsing stops at TO or when certain criteria are met;\n\
2773 point is set to where parsing stops.\n\
2774 If fifth arg STATE is omitted or nil,\n\
2775 parsing assumes that FROM is the beginning of a function.\n\
2776 Value is a list of ten elements describing final state of parsing:\n\
2777 0. depth in parens.\n\
2778 1. character address of start of innermost containing list; nil if none.\n\
2779 2. character address of start of last complete sexp terminated.\n\
2780 3. non-nil if inside a string.\n\
2781 (it is the character that will terminate the string,\n\
2782 or t if the string should be terminated by a generic string delimiter.)\n\
2783 4. nil if outside a comment, t if inside a non-nestable comment, \n\
2784 else an integer (the current comment nesting).\n\
2785 5. t if following a quote character.\n\
2786 6. the minimum paren-depth encountered during this scan.\n\
2787 7. t if in a comment of style b; `syntax-table' if the comment\n\
2788 should be terminated by a generic comment delimiter.\n\
2789 8. character address of start of comment or string; nil if not in one.\n\
2790 9. Intermediate data for continuation of parsing (subject to change).\n\
2791 If third arg TARGETDEPTH is non-nil, parsing stops if the depth\n\
2792 in parentheses becomes equal to TARGETDEPTH.\n\
2793 Fourth arg STOPBEFORE non-nil means stop when come to\n\
2794 any character that starts a sexp.\n\
2795 Fifth arg STATE is a nine-element list like what this function returns.\n\
2796 It is used to initialize the state of the parse. Elements number 1, 2, 6\n\
2797 and 8 are ignored; you can leave off element 8 (the last) entirely.\n\
2798 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.\n\
2799 If it is `syntax-table', stop after the start of a comment or a string,\n\
2800 or after end of a comment or a string.")
2801 (from, to, targetdepth, stopbefore, state, commentstop)
2802 */
2803
2804 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
2805 0 /* See immediately above */)
2806 (from, to, targetdepth, stopbefore, oldstate, commentstop)
2807 Lisp_Object from, to, targetdepth, stopbefore, oldstate, commentstop;
2808 {
2809 struct lisp_parse_state state;
2810 int target;
2811
2812 if (!NILP (targetdepth))
2813 {
2814 CHECK_NUMBER (targetdepth, 3);
2815 target = XINT (targetdepth);
2816 }
2817 else
2818 target = -100000; /* We won't reach this depth */
2819
2820 validate_region (&from, &to);
2821 scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
2822 XINT (to),
2823 target, !NILP (stopbefore), oldstate,
2824 (NILP (commentstop)
2825 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
2826
2827 SET_PT (state.location);
2828
2829 return Fcons (make_number (state.depth),
2830 Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart),
2831 Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart),
2832 Fcons (state.instring >= 0
2833 ? (state.instring == ST_STRING_STYLE
2834 ? Qt : make_number (state.instring)) : Qnil,
2835 Fcons (state.incomment < 0 ? Qt :
2836 (state.incomment == 0 ? Qnil :
2837 make_number (state.incomment)),
2838 Fcons (state.quoted ? Qt : Qnil,
2839 Fcons (make_number (state.mindepth),
2840 Fcons ((state.comstyle
2841 ? (state.comstyle == ST_COMMENT_STYLE
2842 ? Qsyntax_table : Qt) :
2843 Qnil),
2844 Fcons (((state.incomment
2845 || (state.instring >= 0))
2846 ? make_number (state.comstr_start)
2847 : Qnil),
2848 Fcons (state.levelstarts, Qnil))))))))));
2849 }
2850 \f
2851 void
2852 init_syntax_once ()
2853 {
2854 register int i, c;
2855 Lisp_Object temp;
2856
2857 /* This has to be done here, before we call Fmake_char_table. */
2858 Qsyntax_table = intern ("syntax-table");
2859 staticpro (&Qsyntax_table);
2860
2861 /* Intern this now in case it isn't already done.
2862 Setting this variable twice is harmless.
2863 But don't staticpro it here--that is done in alloc.c. */
2864 Qchar_table_extra_slots = intern ("char-table-extra-slots");
2865
2866 /* Create objects which can be shared among syntax tables. */
2867 Vsyntax_code_object = Fmake_vector (make_number (13), Qnil);
2868 for (i = 0; i < XVECTOR (Vsyntax_code_object)->size; i++)
2869 XVECTOR (Vsyntax_code_object)->contents[i]
2870 = Fcons (make_number (i), Qnil);
2871
2872 /* Now we are ready to set up this property, so we can
2873 create syntax tables. */
2874 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
2875
2876 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
2877
2878 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
2879
2880 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
2881 for (i = 'a'; i <= 'z'; i++)
2882 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
2883 for (i = 'A'; i <= 'Z'; i++)
2884 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
2885 for (i = '0'; i <= '9'; i++)
2886 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
2887
2888 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
2889 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
2890
2891 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
2892 Fcons (make_number (Sopen), make_number (')')));
2893 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
2894 Fcons (make_number (Sclose), make_number ('(')));
2895 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
2896 Fcons (make_number (Sopen), make_number (']')));
2897 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
2898 Fcons (make_number (Sclose), make_number ('[')));
2899 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
2900 Fcons (make_number (Sopen), make_number ('}')));
2901 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
2902 Fcons (make_number (Sclose), make_number ('{')));
2903 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
2904 Fcons (make_number ((int) Sstring), Qnil));
2905 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
2906 Fcons (make_number ((int) Sescape), Qnil));
2907
2908 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Ssymbol];
2909 for (i = 0; i < 10; i++)
2910 {
2911 c = "_-+*/&|<>="[i];
2912 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
2913 }
2914
2915 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
2916 for (i = 0; i < 12; i++)
2917 {
2918 c = ".,;:?!#@~^'`"[i];
2919 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
2920 }
2921
2922 /* All multibyte characters have syntax `word' by default. */
2923 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
2924 for (i = CHAR_TABLE_SINGLE_BYTE_SLOTS; i < CHAR_TABLE_ORDINARY_SLOTS; i++)
2925 XCHAR_TABLE (Vstandard_syntax_table)->contents[i] = temp;
2926 }
2927
2928 void
2929 syms_of_syntax ()
2930 {
2931 Qsyntax_table_p = intern ("syntax-table-p");
2932 staticpro (&Qsyntax_table_p);
2933
2934 staticpro (&Vsyntax_code_object);
2935
2936 Qscan_error = intern ("scan-error");
2937 staticpro (&Qscan_error);
2938 Fput (Qscan_error, Qerror_conditions,
2939 Fcons (Qscan_error, Fcons (Qerror, Qnil)));
2940 Fput (Qscan_error, Qerror_message,
2941 build_string ("Scan error"));
2942
2943 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
2944 "Non-nil means `forward-sexp', etc., should treat comments as whitespace.");
2945
2946 DEFVAR_BOOL ("parse-sexp-lookup-properties", &parse_sexp_lookup_properties,
2947 "Non-nil means `forward-sexp', etc., grant `syntax-table' property.\n\
2948 The value of this property should be either a syntax table, or a cons\n\
2949 of the form (SYNTAXCODE . MATCHCHAR), SYNTAXCODE being the numeric\n\
2950 syntax code, MATCHCHAR being nil or the character to match (which is\n\
2951 relevant only for open/close type.");
2952
2953 words_include_escapes = 0;
2954 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
2955 "Non-nil means `forward-word', etc., should treat escape chars part of words.");
2956
2957 DEFVAR_BOOL ("multibyte-syntax-as-symbol", &multibyte_syntax_as_symbol,
2958 "Non-nil means `scan-sexps' treats all multibyte characters as symbol.");
2959 multibyte_syntax_as_symbol = 0;
2960
2961 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
2962 &open_paren_in_column_0_is_defun_start,
2963 "Non-nil means an open paren in column 0 denotes the start of a defun.");
2964 open_paren_in_column_0_is_defun_start = 1;
2965
2966 defsubr (&Ssyntax_table_p);
2967 defsubr (&Ssyntax_table);
2968 defsubr (&Sstandard_syntax_table);
2969 defsubr (&Scopy_syntax_table);
2970 defsubr (&Sset_syntax_table);
2971 defsubr (&Schar_syntax);
2972 defsubr (&Smatching_paren);
2973 defsubr (&Smodify_syntax_entry);
2974 defsubr (&Sdescribe_syntax);
2975
2976 defsubr (&Sforward_word);
2977
2978 defsubr (&Sskip_chars_forward);
2979 defsubr (&Sskip_chars_backward);
2980 defsubr (&Sskip_syntax_forward);
2981 defsubr (&Sskip_syntax_backward);
2982
2983 defsubr (&Sforward_comment);
2984 defsubr (&Sscan_lists);
2985 defsubr (&Sscan_sexps);
2986 defsubr (&Sbackward_prefix_chars);
2987 defsubr (&Sparse_partial_sexp);
2988 }