]> code.delx.au - gnu-emacs/blob - src/syntax.c
(get_keyelt): Handle an indirect entry with meta char.
[gnu-emacs] / src / syntax.c
1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 87, 93, 94, 95, 1997 Free Software Foundation, Inc.
3
4 This file is part of GNU Emacs.
5
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
21
22 #include <config.h>
23 #include <ctype.h>
24 #include "lisp.h"
25 #include "commands.h"
26 #include "buffer.h"
27 #include "charset.h"
28
29 /* Make syntax table lookup grant data in gl_state. */
30 #define SYNTAX_ENTRY_VIA_PROPERTY
31
32 #include "syntax.h"
33 #include "intervals.h"
34
35 /* We use these constants in place for comment-style and
36 string-ender-char to distinguish comments/strings started by
37 comment_fence and string_fence codes. */
38
39 #define ST_COMMENT_STYLE (256 + 1)
40 #define ST_STRING_STYLE (256 + 2)
41 #include "category.h"
42
43 Lisp_Object Qsyntax_table_p, Qsyntax_table, Qscan_error;
44
45 static void scan_sexps_forward ();
46 static int char_quoted ();
47
48 int words_include_escapes;
49 int parse_sexp_lookup_properties;
50
51 /* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
52 if not compiled with GCC. No need to mark it, since it is used
53 only very temporarily. */
54 Lisp_Object syntax_temp;
55
56 /* This is the internal form of the parse state used in parse-partial-sexp. */
57
58 struct lisp_parse_state
59 {
60 int depth; /* Depth at end of parsing. */
61 int instring; /* -1 if not within string, else desired terminator. */
62 int incomment; /* Nonzero if within a comment at end of parsing. */
63 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
64 int quoted; /* Nonzero if just after an escape char at end of parsing */
65 int thislevelstart; /* Char number of most recent start-of-expression at current level */
66 int prevlevelstart; /* Char number of start of containing expression */
67 int location; /* Char number at which parsing stopped. */
68 int mindepth; /* Minimum depth seen while scanning. */
69 int comstr_start; /* Position just after last comment/string starter. */
70 };
71 \f
72 /* These variables are a cache for finding the start of a defun.
73 find_start_pos is the place for which the defun start was found.
74 find_start_value is the defun start position found for it.
75 find_start_buffer is the buffer it was found in.
76 find_start_begv is the BEGV value when it was found.
77 find_start_modiff is the value of MODIFF when it was found. */
78
79 static int find_start_pos;
80 static int find_start_value;
81 static struct buffer *find_start_buffer;
82 static int find_start_begv;
83 static int find_start_modiff;
84 \f
85
86 struct gl_state_s gl_state; /* Global state of syntax parser. */
87
88 INTERVAL interval_of ();
89 #define INTERVALS_AT_ONCE 10 /* 1 + max-number of intervals
90 to scan to property-change. */
91
92 /*
93 Update gl_state to an appropriate interval which contains POS. The
94 sign of COUNT give the relative position of POS wrt the previously
95 valid interval. If INIT, only [be]_property fields of gl_state are
96 valid at start, the rest is filled basing on OBJECT.
97
98 `gl_state.*_i' are the intervals, and pos is further in the search
99 direction than the intervals - or in an interval. We update the
100 current syntax-table basing on the property of this interval, and
101 update the interval to start further than POS - or be
102 NULL_INTERVAL. We also update lim_property to be the next value of
103 pos to call this subroutine again - or be before/after the
104 start/end of OBJECT. */
105
106 void
107 update_syntax_table (pos, count, init, object)
108 int pos, count, init;
109 Lisp_Object object;
110 {
111 Lisp_Object tmp_table;
112 int cnt = 0, doing_extra = 0, invalidate = 1;
113 INTERVAL i, oldi;
114
115 if (init)
116 {
117 gl_state.start = gl_state.b_property;
118 gl_state.stop = gl_state.e_property;
119 gl_state.forward_i = interval_of (pos, object);
120 i = gl_state.backward_i = gl_state.forward_i;
121 gl_state.left_ok = gl_state.right_ok = 1;
122 invalidate = 0;
123 if (NULL_INTERVAL_P (i))
124 return;
125 gl_state.b_property = i->position - 1;
126 gl_state.e_property = INTERVAL_LAST_POS (i);
127 goto update;
128 }
129 oldi = i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
130
131 /* We are guarantied to be called with pos either in i, of further off. */
132 if (NULL_INTERVAL_P (i))
133 error ("Error in syntax_table logic for to-the-end intervals");
134 else if (pos < i->position) /* Move left. */
135 {
136 if (count > 0)
137 error ("Error in syntax_table logic for intervals <-.");
138 /* Update the interval. */
139 i = update_interval (i, pos);
140 if (oldi->position != INTERVAL_LAST_POS (i))
141 {
142 invalidate = 0;
143 gl_state.right_ok = 1; /* Invalidate the other end. */
144 gl_state.forward_i = i;
145 gl_state.e_property = INTERVAL_LAST_POS (i);
146 }
147 }
148 else if (pos >= INTERVAL_LAST_POS (i)) /* Move right. */
149 {
150 if (count < 0)
151 error ("Error in syntax_table logic for intervals ->.");
152 /* Update the interval. */
153 i = update_interval (i, pos);
154 if (i->position != INTERVAL_LAST_POS (oldi))
155 {
156 invalidate = 0;
157 gl_state.left_ok = 1; /* Invalidate the other end. */
158 gl_state.backward_i = i;
159 gl_state.b_property = i->position - 1;
160 }
161 }
162 else if (count > 0 ? gl_state.right_ok : gl_state.left_ok)
163 {
164 /* We do not need to recalculate tmp_table. */
165 tmp_table = gl_state.old_prop;
166 }
167
168 update:
169 tmp_table = textget (i->plist, Qsyntax_table);
170
171 if (invalidate)
172 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
173
174 if (invalidate) /* Did not get to adjacent interval. */
175 { /* with the same table => */
176 /* invalidate the old range. */
177 if (count > 0)
178 {
179 gl_state.backward_i = i;
180 gl_state.left_ok = 1; /* Invalidate the other end. */
181 gl_state.b_property = i->position - 1;
182 }
183 else
184 {
185 gl_state.forward_i = i;
186 gl_state.right_ok = 1; /* Invalidate the other end. */
187 gl_state.e_property = INTERVAL_LAST_POS (i);
188 }
189 }
190
191 gl_state.current_syntax_table = tmp_table;
192 gl_state.old_prop = tmp_table;
193 if (Fsyntax_table_p (tmp_table) == Qt)
194 {
195 gl_state.use_global = 0;
196 }
197 else if (CONSP (tmp_table))
198 {
199 gl_state.use_global = 1;
200 gl_state.global_code = tmp_table;
201 }
202 else
203 {
204 gl_state.use_global = 0;
205 gl_state.current_syntax_table = current_buffer->syntax_table;
206 }
207
208 while (!NULL_INTERVAL_P (i))
209 {
210 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
211 {
212 if (count > 0)
213 gl_state.right_ok = 0;
214 else
215 gl_state.left_ok = 0;
216 break;
217 }
218 else if (cnt == INTERVALS_AT_ONCE)
219 {
220 if (count > 0)
221 gl_state.right_ok = 1;
222 else
223 gl_state.left_ok = 1;
224 break;
225 }
226 cnt++;
227 i = count > 0 ? next_interval (i) : previous_interval (i);
228 }
229 if (NULL_INTERVAL_P (i))
230 { /* This property goes to the end. */
231 if (count > 0)
232 gl_state.e_property = gl_state.stop;
233 else
234 gl_state.b_property = gl_state.start;
235 }
236 else
237 {
238 if (count > 0)
239 {
240 gl_state.e_property = i->position;
241 gl_state.forward_i = i;
242 }
243 else
244 {
245 gl_state.b_property = i->position + LENGTH (i) - 1;
246 gl_state.backward_i = i;
247 }
248 }
249 }
250 \f
251 /* Returns TRUE if char at POS is quoted.
252 Global syntax-table data should be set up already to be good at pos
253 or after. On return global syntax data is good for lookup at POS. */
254
255 static int
256 char_quoted (pos)
257 register int pos;
258 {
259 register enum syntaxcode code;
260 register int beg = BEGV;
261 register int quoted = 0;
262 int temp_pos = pos;
263
264 DEC_POS (temp_pos);
265 while (temp_pos >= beg
266 && ( UPDATE_SYNTAX_TABLE_BACKWARD (temp_pos), 1)
267 && ((code = SYNTAX (FETCH_CHAR (temp_pos))) == Scharquote
268 || code == Sescape))
269 {
270 temp_pos--, quoted = !quoted;
271 }
272 UPDATE_SYNTAX_TABLE (pos);
273 return quoted;
274 }
275 \f
276 /* Find a defun-start that is the last one before POS (or nearly the last).
277 We record what we find, so that another call in the same area
278 can return the same value right away.
279
280 There is no promise at which position the global syntax data is
281 valid on return from the subroutine, so the caller should explicitly
282 update the global data. */
283
284 static int
285 find_defun_start (pos)
286 int pos;
287 {
288 int tem;
289 int shortage;
290
291 /* Use previous finding, if it's valid and applies to this inquiry. */
292 if (current_buffer == find_start_buffer
293 /* Reuse the defun-start even if POS is a little farther on.
294 POS might be in the next defun, but that's ok.
295 Our value may not be the best possible, but will still be usable. */
296 && pos <= find_start_pos + 1000
297 && pos >= find_start_value
298 && BEGV == find_start_begv
299 && MODIFF == find_start_modiff)
300 return find_start_value;
301
302 /* Back up to start of line. */
303 tem = scan_buffer ('\n', pos, BEGV, -1, &shortage, 1);
304
305 /* We optimize syntax-table lookup for rare updates. Thus we accept
306 only those `^\s(' which are good in global _and_ text-property
307 syntax-tables. */
308 gl_state.current_syntax_table = current_buffer->syntax_table;
309 gl_state.use_global = 0;
310 while (tem > BEGV)
311 {
312 /* Open-paren at start of line means we found our defun-start. */
313 if (SYNTAX (FETCH_CHAR (tem)) == Sopen)
314 {
315 SETUP_SYNTAX_TABLE (tem + 1, -1); /* Try again... */
316 if (SYNTAX (FETCH_CHAR (tem)) == Sopen)
317 break;
318 /* Now fallback to the default value. */
319 gl_state.current_syntax_table = current_buffer->syntax_table;
320 gl_state.use_global = 0;
321 }
322 /* Move to beg of previous line. */
323 tem = scan_buffer ('\n', tem, BEGV, -2, &shortage, 1);
324 }
325
326 /* Record what we found, for the next try. */
327 find_start_value = tem;
328 find_start_buffer = current_buffer;
329 find_start_modiff = MODIFF;
330 find_start_begv = BEGV;
331 find_start_pos = pos;
332
333 return find_start_value;
334 }
335 \f
336 /* Checks whether FROM is the end of comment. Does not try to
337 fallback more than to STOP.
338 Returns -1 if cannot find comment ending at from, otherwise start
339 of comment. Global syntax data remains valid for
340 backward search starting at the returned value (or at FROM, if
341 the search was not successful). */
342
343 static int
344 back_comment (from, stop)
345 int from, stop;
346 {
347 /* Look back, counting the parity of string-quotes,
348 and recording the comment-starters seen.
349 When we reach a safe place, assume that's not in a string;
350 then step the main scan to the earliest comment-starter seen
351 an even number of string quotes away from the safe place.
352
353 OFROM[I] is position of the earliest comment-starter seen
354 which is I+2X quotes from the comment-end.
355 PARITY is current parity of quotes from the comment end. */
356 int parity = 0;
357 int my_stringend = 0;
358 int string_lossage = 0;
359 int comment_end = from;
360 int comstart_pos = 0;
361 int comstart_parity = 0;
362 int scanstart = from - 1;
363 register enum syntaxcode code;
364 int c, comstyle = 0;
365
366 /* At beginning of range to scan, we're outside of strings;
367 that determines quote parity to the comment-end. */
368 while (from != stop)
369 {
370 /* Move back and examine a character. */
371 DEC_POS (from);
372 UPDATE_SYNTAX_TABLE_BACKWARD (from);
373
374 c = FETCH_CHAR (from);
375 code = SYNTAX (c);
376
377 /* If this char is the second of a 2-char comment sequence,
378 back up and give the pair the appropriate syntax. */
379 if (from > stop && SYNTAX_COMEND_SECOND (c)
380 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from - 1)))
381 {
382 code = Sendcomment;
383 DEC_POS (from);
384 /* This is apparently the best we can do: */
385 UPDATE_SYNTAX_TABLE_BACKWARD (from);
386 c = FETCH_CHAR (from);
387 }
388
389 /* If this char starts a 2-char comment start sequence,
390 treat it like a 1-char comment starter. */
391 if (from < scanstart && SYNTAX_COMSTART_FIRST (c)
392 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from + 1))
393 && comstyle == SYNTAX_COMMENT_STYLE (FETCH_CHAR (from + 1)))
394 code = Scomment;
395
396 /* Ignore escaped characters. */
397 if (char_quoted (from))
398 continue;
399
400 /* Track parity of quotes. */
401 if (code == Sstring)
402 {
403 parity ^= 1;
404 if (my_stringend == 0)
405 my_stringend = c;
406 /* If we have two kinds of string delimiters.
407 There's no way to grok this scanning backwards. */
408 else if (my_stringend != c)
409 string_lossage = 1;
410 }
411
412 if (code == Sstring_fence || code == Scomment_fence)
413 {
414 parity ^= 1;
415 if (my_stringend == 0)
416 my_stringend =
417 code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE;
418 /* If we have two kinds of string delimiters.
419 There's no way to grok this scanning backwards. */
420 else if (my_stringend != (code == Sstring_fence
421 ? ST_STRING_STYLE : ST_COMMENT_STYLE))
422 string_lossage = 1;
423 }
424
425 /* Record comment-starters according to that
426 quote-parity to the comment-end. */
427 if (code == Scomment)
428 {
429 comstart_parity = parity;
430 comstart_pos = from;
431 }
432
433 /* If we find another earlier comment-ender,
434 any comment-starts earlier than that don't count
435 (because they go with the earlier comment-ender). */
436 if (code == Sendcomment
437 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)) == comstyle)
438 break;
439
440 /* Assume a defun-start point is outside of strings. */
441 if (code == Sopen
442 && (from == stop || FETCH_CHAR (from - 1) == '\n'))
443 break;
444 }
445
446 if (comstart_pos == 0)
447 {
448 from = comment_end;
449 UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
450 }
451 /* If the earliest comment starter
452 is followed by uniform paired string quotes or none,
453 we know it can't be inside a string
454 since if it were then the comment ender would be inside one.
455 So it does start a comment. Skip back to it. */
456 else if (comstart_parity == 0 && !string_lossage)
457 {
458 from = comstart_pos;
459 /* Globals are correct now. */
460 }
461 else
462 {
463 /* We had two kinds of string delimiters mixed up
464 together. Decode this going forwards.
465 Scan fwd from the previous comment ender
466 to the one in question; this records where we
467 last passed a comment starter. */
468 struct lisp_parse_state state;
469 scan_sexps_forward (&state, find_defun_start (comment_end),
470 comment_end - 1, -10000, 0, Qnil, 0);
471 if (state.incomment)
472 {
473 /* scan_sexps_forward changed the direction of search in
474 global variables, so we need to update it completely. */
475
476 from = state.comstr_start;
477 }
478 else
479 {
480 from = comment_end;
481 }
482 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
483 }
484
485 return from;
486 }
487 \f
488 DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
489 "Return t if OBJECT is a syntax table.\n\
490 Currently, any char-table counts as a syntax table.")
491 (object)
492 Lisp_Object object;
493 {
494 if (CHAR_TABLE_P (object)
495 && XCHAR_TABLE (object)->purpose == Qsyntax_table)
496 return Qt;
497 return Qnil;
498 }
499
500 static void
501 check_syntax_table (obj)
502 Lisp_Object obj;
503 {
504 if (!(CHAR_TABLE_P (obj)
505 && XCHAR_TABLE (obj)->purpose == Qsyntax_table))
506 wrong_type_argument (Qsyntax_table_p, obj);
507 }
508
509 DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
510 "Return the current syntax table.\n\
511 This is the one specified by the current buffer.")
512 ()
513 {
514 return current_buffer->syntax_table;
515 }
516
517 DEFUN ("standard-syntax-table", Fstandard_syntax_table,
518 Sstandard_syntax_table, 0, 0, 0,
519 "Return the standard syntax table.\n\
520 This is the one used for new buffers.")
521 ()
522 {
523 return Vstandard_syntax_table;
524 }
525
526 DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
527 "Construct a new syntax table and return it.\n\
528 It is a copy of the TABLE, which defaults to the standard syntax table.")
529 (table)
530 Lisp_Object table;
531 {
532 Lisp_Object copy;
533
534 if (!NILP (table))
535 check_syntax_table (table);
536 else
537 table = Vstandard_syntax_table;
538
539 copy = Fcopy_sequence (table);
540
541 /* Only the standard syntax table should have a default element.
542 Other syntax tables should inherit from parents instead. */
543 XCHAR_TABLE (copy)->defalt = Qnil;
544
545 /* Copied syntax tables should all have parents.
546 If we copied one with no parent, such as the standard syntax table,
547 use the standard syntax table as the copy's parent. */
548 if (NILP (XCHAR_TABLE (copy)->parent))
549 Fset_char_table_parent (copy, Vstandard_syntax_table);
550 return copy;
551 }
552
553 DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
554 "Select a new syntax table for the current buffer.\n\
555 One argument, a syntax table.")
556 (table)
557 Lisp_Object table;
558 {
559 check_syntax_table (table);
560 current_buffer->syntax_table = table;
561 /* Indicate that this buffer now has a specified syntax table. */
562 current_buffer->local_var_flags
563 |= XFASTINT (buffer_local_flags.syntax_table);
564 return table;
565 }
566 \f
567 /* Convert a letter which signifies a syntax code
568 into the code it signifies.
569 This is used by modify-syntax-entry, and other things. */
570
571 unsigned char syntax_spec_code[0400] =
572 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
573 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
574 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
575 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
576 (char) Swhitespace, (char) Scomment_fence, (char) Sstring, 0377,
577 (char) Smath, 0377, 0377, (char) Squote,
578 (char) Sopen, (char) Sclose, 0377, 0377,
579 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
580 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
581 0377, 0377, 0377, 0377,
582 (char) Scomment, 0377, (char) Sendcomment, 0377,
583 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
584 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
585 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
586 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
587 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
588 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
589 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
590 0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
591 };
592
593 /* Indexed by syntax code, give the letter that describes it. */
594
595 char syntax_code_spec[16] =
596 {
597 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
598 '!', '|'
599 };
600
601 /* Indexed by syntax code, give the object (cons of syntax code and
602 nil) to be stored in syntax table. Since these objects can be
603 shared among syntax tables, we generate them in advance. By
604 sharing objects, the function `describe-syntax' can give a more
605 compact listing. */
606 static Lisp_Object Vsyntax_code_object;
607
608 \f
609 /* Look up the value for CHARACTER in syntax table TABLE's parent
610 and its parents. SYNTAX_ENTRY calls this, when TABLE itself has nil
611 for CHARACTER. It's actually used only when not compiled with GCC. */
612
613 Lisp_Object
614 syntax_parent_lookup (table, character)
615 Lisp_Object table;
616 int character;
617 {
618 Lisp_Object value;
619
620 while (1)
621 {
622 table = XCHAR_TABLE (table)->parent;
623 if (NILP (table))
624 return Qnil;
625
626 value = XCHAR_TABLE (table)->contents[character];
627 if (!NILP (value))
628 return value;
629 }
630 }
631
632 DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
633 "Return the syntax code of CHARACTER, described by a character.\n\
634 For example, if CHARACTER is a word constituent,\n\
635 the character `w' is returned.\n\
636 The characters that correspond to various syntax codes\n\
637 are listed in the documentation of `modify-syntax-entry'.")
638 (character)
639 Lisp_Object character;
640 {
641 int char_int;
642 gl_state.current_syntax_table = current_buffer->syntax_table;
643
644 gl_state.use_global = 0;
645 CHECK_NUMBER (character, 0);
646 char_int = XINT (character);
647 return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
648 }
649
650 DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
651 "Return the matching parenthesis of CHARACTER, or nil if none.")
652 (character)
653 Lisp_Object character;
654 {
655 int char_int, code;
656 gl_state.current_syntax_table = current_buffer->syntax_table;
657 gl_state.use_global = 0;
658 CHECK_NUMBER (character, 0);
659 char_int = XINT (character);
660 code = SYNTAX (char_int);
661 if (code == Sopen || code == Sclose)
662 return make_number (SYNTAX_MATCH (char_int));
663 return Qnil;
664 }
665
666 /* This comment supplies the doc string for modify-syntax-entry,
667 for make-docfile to see. We cannot put this in the real DEFUN
668 due to limits in the Unix cpp.
669
670 DEFUN ("modify-syntax-entry", foo, bar, 2, 3, 0,
671 "Set syntax for character CHAR according to string S.\n\
672 The syntax is changed only for table TABLE, which defaults to\n\
673 the current buffer's syntax table.\n\
674 The first character of S should be one of the following:\n\
675 Space or - whitespace syntax. w word constituent.\n\
676 _ symbol constituent. . punctuation.\n\
677 ( open-parenthesis. ) close-parenthesis.\n\
678 \" string quote. \\ escape.\n\
679 $ paired delimiter. ' expression quote or prefix operator.\n\
680 < comment starter. > comment ender.\n\
681 / character-quote. @ inherit from `standard-syntax-table'.\n\
682 \n\
683 Only single-character comment start and end sequences are represented thus.\n\
684 Two-character sequences are represented as described below.\n\
685 The second character of S is the matching parenthesis,\n\
686 used only if the first character is `(' or `)'.\n\
687 Any additional characters are flags.\n\
688 Defined flags are the characters 1, 2, 3, 4, b, and p.\n\
689 1 means CHAR is the start of a two-char comment start sequence.\n\
690 2 means CHAR is the second character of such a sequence.\n\
691 3 means CHAR is the start of a two-char comment end sequence.\n\
692 4 means CHAR is the second character of such a sequence.\n\
693 \n\
694 There can be up to two orthogonal comment sequences. This is to support\n\
695 language modes such as C++. By default, all comment sequences are of style\n\
696 a, but you can set the comment sequence style to b (on the second character\n\
697 of a comment-start, or the first character of a comment-end sequence) using\n\
698 this flag:\n\
699 b means CHAR is part of comment sequence b.\n\
700 \n\
701 p means CHAR is a prefix character for `backward-prefix-chars';\n\
702 such characters are treated as whitespace when they occur\n\
703 between expressions.")
704 (char, s, table)
705 */
706
707 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
708 /* I really don't know why this is interactive
709 help-form should at least be made useful whilst reading the second arg
710 */
711 "cSet syntax for character: \nsSet syntax for %s to: ",
712 0 /* See immediately above */)
713 (c, newentry, syntax_table)
714 Lisp_Object c, newentry, syntax_table;
715 {
716 register unsigned char *p;
717 register enum syntaxcode code;
718 int val;
719 Lisp_Object match;
720
721 CHECK_NUMBER (c, 0);
722 CHECK_STRING (newentry, 1);
723
724 if (NILP (syntax_table))
725 syntax_table = current_buffer->syntax_table;
726 else
727 check_syntax_table (syntax_table);
728
729 p = XSTRING (newentry)->data;
730 code = (enum syntaxcode) syntax_spec_code[*p++];
731 if (((int) code & 0377) == 0377)
732 error ("invalid syntax description letter: %c", c);
733
734 if (code == Sinherit)
735 {
736 SET_RAW_SYNTAX_ENTRY (syntax_table, c, Qnil);
737 return Qnil;
738 }
739
740 if (*p)
741 {
742 int len;
743 int character = STRING_CHAR_AND_LENGTH (p, XSTRING (newentry)->size - 1,
744 len);
745 XSETINT (match, character);
746 if (XFASTINT (match) == ' ')
747 match = Qnil;
748 p += len;
749 }
750 else
751 match = Qnil;
752
753 val = (int) code;
754 while (*p)
755 switch (*p++)
756 {
757 case '1':
758 val |= 1 << 16;
759 break;
760
761 case '2':
762 val |= 1 << 17;
763 break;
764
765 case '3':
766 val |= 1 << 18;
767 break;
768
769 case '4':
770 val |= 1 << 19;
771 break;
772
773 case 'p':
774 val |= 1 << 20;
775 break;
776
777 case 'b':
778 val |= 1 << 21;
779 break;
780 }
781
782 if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match))
783 newentry = XVECTOR (Vsyntax_code_object)->contents[val];
784 else
785 /* Since we can't use a shared object, let's make a new one. */
786 newentry = Fcons (make_number (val), match);
787
788 SET_RAW_SYNTAX_ENTRY (syntax_table, c, newentry);
789
790 return Qnil;
791 }
792 \f
793 /* Dump syntax table to buffer in human-readable format */
794
795 static void
796 describe_syntax (value)
797 Lisp_Object value;
798 {
799 register enum syntaxcode code;
800 char desc, match, start1, start2, end1, end2, prefix, comstyle;
801 char str[2];
802 Lisp_Object first, match_lisp;
803
804 Findent_to (make_number (16), make_number (1));
805
806 if (NILP (value))
807 {
808 insert_string ("default\n");
809 return;
810 }
811
812 if (CHAR_TABLE_P (value))
813 {
814 insert_string ("deeper char-table ...\n");
815 return;
816 }
817
818 if (!CONSP (value))
819 {
820 insert_string ("invalid\n");
821 return;
822 }
823
824 first = XCONS (value)->car;
825 match_lisp = XCONS (value)->cdr;
826
827 if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
828 {
829 insert_string ("invalid\n");
830 return;
831 }
832
833 code = (enum syntaxcode) (first & 0377);
834 start1 = (XINT (first) >> 16) & 1;
835 start2 = (XINT (first) >> 17) & 1;
836 end1 = (XINT (first) >> 18) & 1;
837 end2 = (XINT (first) >> 19) & 1;
838 prefix = (XINT (first) >> 20) & 1;
839 comstyle = (XINT (first) >> 21) & 1;
840
841 if ((int) code < 0 || (int) code >= (int) Smax)
842 {
843 insert_string ("invalid");
844 return;
845 }
846 desc = syntax_code_spec[(int) code];
847
848 str[0] = desc, str[1] = 0;
849 insert (str, 1);
850
851 if (NILP (match_lisp))
852 insert (" ", 1);
853 else
854 insert_char (XINT (match_lisp));
855
856 if (start1)
857 insert ("1", 1);
858 if (start2)
859 insert ("2", 1);
860
861 if (end1)
862 insert ("3", 1);
863 if (end2)
864 insert ("4", 1);
865
866 if (prefix)
867 insert ("p", 1);
868 if (comstyle)
869 insert ("b", 1);
870
871 insert_string ("\twhich means: ");
872
873 switch (SWITCH_ENUM_CAST (code))
874 {
875 case Swhitespace:
876 insert_string ("whitespace"); break;
877 case Spunct:
878 insert_string ("punctuation"); break;
879 case Sword:
880 insert_string ("word"); break;
881 case Ssymbol:
882 insert_string ("symbol"); break;
883 case Sopen:
884 insert_string ("open"); break;
885 case Sclose:
886 insert_string ("close"); break;
887 case Squote:
888 insert_string ("quote"); break;
889 case Sstring:
890 insert_string ("string"); break;
891 case Smath:
892 insert_string ("math"); break;
893 case Sescape:
894 insert_string ("escape"); break;
895 case Scharquote:
896 insert_string ("charquote"); break;
897 case Scomment:
898 insert_string ("comment"); break;
899 case Sendcomment:
900 insert_string ("endcomment"); break;
901 default:
902 insert_string ("invalid");
903 return;
904 }
905
906 if (!NILP (match_lisp))
907 {
908 insert_string (", matches ");
909 insert_char (XINT (match_lisp));
910 }
911
912 if (start1)
913 insert_string (",\n\t is the first character of a comment-start sequence");
914 if (start2)
915 insert_string (",\n\t is the second character of a comment-start sequence");
916
917 if (end1)
918 insert_string (",\n\t is the first character of a comment-end sequence");
919 if (end2)
920 insert_string (",\n\t is the second character of a comment-end sequence");
921 if (comstyle)
922 insert_string (" (comment style b)");
923
924 if (prefix)
925 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
926
927 insert_string ("\n");
928 }
929
930 static Lisp_Object
931 describe_syntax_1 (vector)
932 Lisp_Object vector;
933 {
934 struct buffer *old = current_buffer;
935 set_buffer_internal (XBUFFER (Vstandard_output));
936 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil, (int *) 0, 0);
937 while (! NILP (XCHAR_TABLE (vector)->parent))
938 {
939 vector = XCHAR_TABLE (vector)->parent;
940 insert_string ("\nThe parent syntax table is:");
941 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil,
942 (int *) 0, 0);
943 }
944
945 call0 (intern ("help-mode"));
946 set_buffer_internal (old);
947 return Qnil;
948 }
949
950 DEFUN ("describe-syntax", Fdescribe_syntax, Sdescribe_syntax, 0, 0, "",
951 "Describe the syntax specifications in the syntax table.\n\
952 The descriptions are inserted in a buffer, which is then displayed.")
953 ()
954 {
955 internal_with_output_to_temp_buffer
956 ("*Help*", describe_syntax_1, current_buffer->syntax_table);
957
958 return Qnil;
959 }
960 \f
961 int parse_sexp_ignore_comments;
962
963 /* Return the position across COUNT words from FROM.
964 If that many words cannot be found before the end of the buffer, return 0.
965 COUNT negative means scan backward and stop at word beginning. */
966
967 scan_words (from, count)
968 register int from, count;
969 {
970 register int beg = BEGV;
971 register int end = ZV;
972 register enum syntaxcode code;
973 int ch0, ch1;
974 int temp_pos;
975
976 immediate_quit = 1;
977 QUIT;
978
979 SETUP_SYNTAX_TABLE (from, count);
980
981 while (count > 0)
982 {
983 while (1)
984 {
985 if (from == end)
986 {
987 immediate_quit = 0;
988 return 0;
989 }
990 UPDATE_SYNTAX_TABLE_FORWARD (from);
991 ch0 = FETCH_CHAR (from);
992 code = SYNTAX (ch0);
993 INC_POS (from);
994 if (words_include_escapes
995 && (code == Sescape || code == Scharquote))
996 break;
997 if (code == Sword)
998 break;
999 }
1000 /* Now CH0 is a character which begins a word and FROM is the
1001 position of the next character. */
1002 while (1)
1003 {
1004 if (from == end) break;
1005 UPDATE_SYNTAX_TABLE_FORWARD (from);
1006 ch1 = FETCH_CHAR (from);
1007 code = SYNTAX (ch1);
1008 if (!(words_include_escapes
1009 && (code == Sescape || code == Scharquote)))
1010 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
1011 break;
1012 INC_POS (from);
1013 ch0 = ch1;
1014 }
1015 count--;
1016 }
1017 while (count < 0)
1018 {
1019 while (1)
1020 {
1021 if (from == beg)
1022 {
1023 immediate_quit = 0;
1024 return 0;
1025 }
1026 DEC_POS (from);
1027 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1028 ch1 = FETCH_CHAR (from);
1029 code = SYNTAX (ch1);
1030 if (words_include_escapes
1031 && (code == Sescape || code == Scharquote))
1032 break;
1033 if (code == Sword)
1034 break;
1035 }
1036 /* Now CH1 is a character which ends a word and FROM is the
1037 position of it. */
1038 while (1)
1039 {
1040 if (from == beg) break;
1041 temp_pos = from;
1042 DEC_POS (temp_pos);
1043 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1044 ch0 = FETCH_CHAR (temp_pos);
1045 code = SYNTAX (ch0);
1046 if (!(words_include_escapes
1047 && (code == Sescape || code == Scharquote)))
1048 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
1049 break;
1050 from = temp_pos;
1051 ch1 = ch0;
1052 }
1053 count++;
1054 }
1055
1056 immediate_quit = 0;
1057
1058 return from;
1059 }
1060
1061 DEFUN ("forward-word", Fforward_word, Sforward_word, 1, 1, "p",
1062 "Move point forward ARG words (backward if ARG is negative).\n\
1063 Normally returns t.\n\
1064 If an edge of the buffer is reached, point is left there\n\
1065 and nil is returned.")
1066 (count)
1067 Lisp_Object count;
1068 {
1069 int val;
1070 CHECK_NUMBER (count, 0);
1071
1072 if (!(val = scan_words (PT, XINT (count))))
1073 {
1074 SET_PT (XINT (count) > 0 ? ZV : BEGV);
1075 return Qnil;
1076 }
1077 SET_PT (val);
1078 return Qt;
1079 }
1080 \f
1081 Lisp_Object skip_chars ();
1082
1083 DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
1084 "Move point forward, stopping before a char not in STRING, or at pos LIM.\n\
1085 STRING is like the inside of a `[...]' in a regular expression\n\
1086 except that `]' is never special and `\\' quotes `^', `-' or `\\'.\n\
1087 Thus, with arg \"a-zA-Z\", this skips letters stopping before first nonletter.\n\
1088 With arg \"^a-zA-Z\", skips nonletters stopping before first letter.\n\
1089 Returns the distance traveled, either zero or positive.")
1090 (string, lim)
1091 Lisp_Object string, lim;
1092 {
1093 return skip_chars (1, 0, string, lim);
1094 }
1095
1096 DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
1097 "Move point backward, stopping after a char not in STRING, or at pos LIM.\n\
1098 See `skip-chars-forward' for details.\n\
1099 Returns the distance traveled, either zero or negative.")
1100 (string, lim)
1101 Lisp_Object string, lim;
1102 {
1103 return skip_chars (0, 0, string, lim);
1104 }
1105
1106 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
1107 "Move point forward across chars in specified syntax classes.\n\
1108 SYNTAX is a string of syntax code characters.\n\
1109 Stop before a char whose syntax is not in SYNTAX, or at position LIM.\n\
1110 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
1111 This function returns the distance traveled, either zero or positive.")
1112 (syntax, lim)
1113 Lisp_Object syntax, lim;
1114 {
1115 return skip_chars (1, 1, syntax, lim);
1116 }
1117
1118 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
1119 "Move point backward across chars in specified syntax classes.\n\
1120 SYNTAX is a string of syntax code characters.\n\
1121 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.\n\
1122 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
1123 This function returns the distance traveled, either zero or negative.")
1124 (syntax, lim)
1125 Lisp_Object syntax, lim;
1126 {
1127 return skip_chars (0, 1, syntax, lim);
1128 }
1129
1130 Lisp_Object
1131 skip_chars (forwardp, syntaxp, string, lim)
1132 int forwardp, syntaxp;
1133 Lisp_Object string, lim;
1134 {
1135 register unsigned char *p, *pend;
1136 register unsigned int c;
1137 register int ch;
1138 unsigned char fastmap[0400];
1139 /* If SYNTAXP is 0, STRING may contain multi-byte form of characters
1140 of which codes don't fit in FASTMAP. In that case, we set the
1141 first byte of multibyte form (i.e. base leading-code) in FASTMAP
1142 and set the actual ranges of characters in CHAR_RANGES. In the
1143 form "X-Y" of STRING, both X and Y must belong to the same
1144 character set because a range striding across character sets is
1145 meaningless. */
1146 int *char_ranges
1147 = (int *) alloca (XSTRING (string)->size * (sizeof (int)) * 2);
1148 int n_char_ranges = 0;
1149 int negate = 0;
1150 register int i;
1151 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1152
1153 CHECK_STRING (string, 0);
1154
1155 if (NILP (lim))
1156 XSETINT (lim, forwardp ? ZV : BEGV);
1157 else
1158 CHECK_NUMBER_COERCE_MARKER (lim, 1);
1159
1160 /* In any case, don't allow scan outside bounds of buffer. */
1161 /* jla turned this off, for no known reason.
1162 bfox turned the ZV part on, and rms turned the
1163 BEGV part back on. */
1164 if (XINT (lim) > ZV)
1165 XSETFASTINT (lim, ZV);
1166 if (XINT (lim) < BEGV)
1167 XSETFASTINT (lim, BEGV);
1168
1169 p = XSTRING (string)->data;
1170 pend = p + XSTRING (string)->size;
1171 bzero (fastmap, sizeof fastmap);
1172
1173 if (p != pend && *p == '^')
1174 {
1175 negate = 1; p++;
1176 }
1177
1178 /* Find the characters specified and set their elements of fastmap.
1179 If syntaxp, each character counts as itself.
1180 Otherwise, handle backslashes and ranges specially. */
1181
1182 while (p != pend)
1183 {
1184 c = *p;
1185 if (multibyte)
1186 {
1187 ch = STRING_CHAR (p, pend - p);
1188 p += BYTES_BY_CHAR_HEAD (*p);
1189 }
1190 else
1191 {
1192 ch = c;
1193 p++;
1194 }
1195 if (syntaxp)
1196 fastmap[syntax_spec_code[c]] = 1;
1197 else
1198 {
1199 if (c == '\\')
1200 {
1201 if (p == pend) break;
1202 c = *p++;
1203 }
1204 if (p != pend && *p == '-')
1205 {
1206 unsigned int ch2;
1207
1208 p++;
1209 if (p == pend) break;
1210 if (SINGLE_BYTE_CHAR_P (ch))
1211 while (c <= *p)
1212 {
1213 fastmap[c] = 1;
1214 c++;
1215 }
1216 else
1217 {
1218 fastmap[c] = 1; /* C is the base leading-code. */
1219 ch2 = STRING_CHAR (p, pend - p);
1220 if (ch <= ch2)
1221 char_ranges[n_char_ranges++] = ch,
1222 char_ranges[n_char_ranges++] = ch2;
1223 }
1224 p += multibyte ? BYTES_BY_CHAR_HEAD (*p) : 1;
1225 }
1226 else
1227 {
1228 fastmap[c] = 1;
1229 if (!SINGLE_BYTE_CHAR_P (ch))
1230 {
1231 char_ranges[n_char_ranges++] = ch;
1232 char_ranges[n_char_ranges++] = ch;
1233 }
1234 }
1235 }
1236 }
1237
1238 /* If ^ was the first character, complement the fastmap. In
1239 addition, as all multibyte characters have possibility of
1240 matching, set all entries for base leading codes, which is
1241 harmless even if SYNTAXP is 1. */
1242
1243 if (negate)
1244 for (i = 0; i < sizeof fastmap; i++)
1245 {
1246 if (!multibyte || !BASE_LEADING_CODE_P (i))
1247 fastmap[i] ^= 1;
1248 else
1249 fastmap[i] = 1;
1250 }
1251
1252 {
1253 int start_point = PT;
1254 int pos = PT;
1255
1256 immediate_quit = 1;
1257 if (syntaxp)
1258 {
1259 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
1260 if (forwardp)
1261 {
1262 if (multibyte)
1263 {
1264 while (pos < XINT (lim)
1265 && fastmap[(int) SYNTAX (FETCH_CHAR (pos))])
1266 {
1267 INC_POS (pos);
1268 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1269 }
1270 }
1271 else
1272 {
1273 while (pos < XINT (lim)
1274 && fastmap[(int) SYNTAX (FETCH_BYTE (pos))])
1275 {
1276 pos++;
1277 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1278 }
1279 }
1280 }
1281 else
1282 {
1283 if (multibyte)
1284 {
1285 while (pos > XINT (lim))
1286 {
1287 int savepos = pos;
1288 DEC_POS (pos);
1289 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
1290 if (!fastmap[(int) SYNTAX (FETCH_CHAR (pos))])
1291 {
1292 pos = savepos;
1293 break;
1294 }
1295 }
1296 }
1297 else
1298 {
1299 while (pos > XINT (lim))
1300 {
1301 pos--;
1302 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
1303 if (!fastmap[(int) SYNTAX (FETCH_BYTE (pos))])
1304 {
1305 pos++;
1306 break;
1307 }
1308 }
1309 }
1310 }
1311 }
1312 else
1313 {
1314 if (forwardp)
1315 {
1316 if (multibyte)
1317 while (pos < XINT (lim) && fastmap[(c = FETCH_BYTE (pos))])
1318 {
1319 if (!BASE_LEADING_CODE_P (c))
1320 pos++;
1321 else if (n_char_ranges)
1322 {
1323 /* We much check CHAR_RANGES for a multibyte
1324 character. */
1325 ch = FETCH_MULTIBYTE_CHAR (pos);
1326 for (i = 0; i < n_char_ranges; i += 2)
1327 if ((ch >= char_ranges[i] && ch <= char_ranges[i + 1]))
1328 break;
1329 if (!(negate ^ (i < n_char_ranges)))
1330 break;
1331
1332 INC_POS (pos);
1333 }
1334 else
1335 {
1336 if (!negate) break;
1337 INC_POS (pos);
1338 }
1339 }
1340 else
1341 while (pos < XINT (lim) && fastmap[FETCH_BYTE (pos)])
1342 pos++;
1343 }
1344 else
1345 {
1346 if (multibyte)
1347 while (pos > XINT (lim))
1348 {
1349 int savepos = pos;
1350 DEC_POS (pos);
1351 if (fastmap[(c = FETCH_BYTE (pos))])
1352 {
1353 if (!BASE_LEADING_CODE_P (c))
1354 ;
1355 else if (n_char_ranges)
1356 {
1357 /* We much check CHAR_RANGES for a multibyte
1358 character. */
1359 ch = FETCH_MULTIBYTE_CHAR (pos);
1360 for (i = 0; i < n_char_ranges; i += 2)
1361 if (ch >= char_ranges[i] && ch <= char_ranges[i + 1])
1362 break;
1363 if (!(negate ^ (i < n_char_ranges)))
1364 {
1365 pos = savepos;
1366 break;
1367 }
1368 }
1369 else
1370 if (!negate)
1371 {
1372 pos = savepos;
1373 break;
1374 }
1375 }
1376 else
1377 {
1378 pos = savepos;
1379 break;
1380 }
1381 }
1382 else
1383 while (pos > XINT (lim) && fastmap[FETCH_BYTE (pos - 1)])
1384 pos--;
1385 }
1386 }
1387
1388 if (multibyte
1389 /* INC_POS or DEC_POS might have moved POS over LIM. */
1390 && (forwardp ? (pos > XINT (lim)) : (pos < XINT (lim))))
1391 pos = XINT (lim);
1392
1393 SET_PT (pos);
1394 immediate_quit = 0;
1395
1396 return make_number (PT - start_point);
1397 }
1398 }
1399 \f
1400 DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
1401 "Move forward across up to N comments. If N is negative, move backward.\n\
1402 Stop scanning if we find something other than a comment or whitespace.\n\
1403 Set point to where scanning stops.\n\
1404 If N comments are found as expected, with nothing except whitespace\n\
1405 between them, return t; otherwise return nil.")
1406 (count)
1407 Lisp_Object count;
1408 {
1409 register int from;
1410 register int stop;
1411 register int c, c1;
1412 register enum syntaxcode code;
1413 int comstyle = 0; /* style of comment encountered */
1414 int found;
1415 int count1;
1416 int temp_pos;
1417
1418 CHECK_NUMBER (count, 0);
1419 count1 = XINT (count);
1420 stop = count1 > 0 ? ZV : BEGV;
1421
1422 immediate_quit = 1;
1423 QUIT;
1424
1425 from = PT;
1426
1427 SETUP_SYNTAX_TABLE (from, count1);
1428 while (count1 > 0)
1429 {
1430 do
1431 {
1432 if (from == stop)
1433 {
1434 SET_PT (from);
1435 immediate_quit = 0;
1436 return Qnil;
1437 }
1438 UPDATE_SYNTAX_TABLE_FORWARD (from);
1439 c = FETCH_CHAR (from);
1440 code = SYNTAX (c);
1441 INC_POS (from);
1442 comstyle = 0;
1443 if (from < stop && SYNTAX_COMSTART_FIRST (c)
1444 && (c1 = FETCH_CHAR (from),
1445 SYNTAX_COMSTART_SECOND (c1)))
1446 {
1447 /* We have encountered a comment start sequence and we
1448 are ignoring all text inside comments. We must record
1449 the comment style this sequence begins so that later,
1450 only a comment end of the same style actually ends
1451 the comment section. */
1452 code = Scomment;
1453 comstyle = SYNTAX_COMMENT_STYLE (c1);
1454 INC_POS (from);
1455 }
1456 }
1457 while (code == Swhitespace || code == Sendcomment);
1458 if (code != Scomment && code != Scomment_fence)
1459 {
1460 immediate_quit = 0;
1461 DEC_POS (from);
1462 SET_PT (from);
1463 return Qnil;
1464 }
1465 /* We're at the start of a comment. */
1466 while (1)
1467 {
1468 if (from == stop)
1469 {
1470 immediate_quit = 0;
1471 SET_PT (from);
1472 return Qnil;
1473 }
1474 UPDATE_SYNTAX_TABLE_FORWARD (from);
1475 c = FETCH_CHAR (from);
1476 INC_POS (from);
1477 if (SYNTAX (c) == Sendcomment
1478 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1479 /* we have encountered a comment end of the same style
1480 as the comment sequence which began this comment
1481 section */
1482 break;
1483 if (SYNTAX (c) == Scomment_fence
1484 && comstyle == ST_COMMENT_STYLE)
1485 /* we have encountered a comment end of the same style
1486 as the comment sequence which began this comment
1487 section. */
1488 break;
1489 if (from < stop && SYNTAX_COMEND_FIRST (c)
1490 && (c1 = FETCH_CHAR (from),
1491 SYNTAX_COMEND_SECOND (c1))
1492 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1493 /* we have encountered a comment end of the same style
1494 as the comment sequence which began this comment
1495 section */
1496 { INC_POS (from); break; }
1497 }
1498 /* We have skipped one comment. */
1499 count1--;
1500 }
1501
1502 while (count1 < 0)
1503 {
1504 while (from > stop)
1505 {
1506 int quoted;
1507
1508 DEC_POS (from);
1509 quoted = char_quoted (from);
1510 if (quoted)
1511 {
1512 DEC_POS (from);
1513 goto leave; /* ????? XXXXX */
1514 }
1515 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1516 c = FETCH_CHAR (from);
1517 code = SYNTAX (c);
1518 comstyle = 0;
1519 if (code == Sendcomment)
1520 comstyle = SYNTAX_COMMENT_STYLE (c);
1521 temp_pos = from;
1522 DEC_POS (temp_pos);
1523 if (from > stop && SYNTAX_COMEND_SECOND (c)
1524 && (c1 = FETCH_CHAR (temp_pos),
1525 SYNTAX_COMEND_FIRST (c1))
1526 && !char_quoted (temp_pos))
1527 {
1528 /* We must record the comment style encountered so that
1529 later, we can match only the proper comment begin
1530 sequence of the same style. */
1531 code = Sendcomment;
1532 comstyle = SYNTAX_COMMENT_STYLE (c1);
1533 from = temp_pos;
1534 }
1535
1536 if (code == Scomment_fence)
1537 {
1538 /* Skip until first preceding unquoted comment_fence. */
1539 int found = 0, ini = from;
1540
1541 while (--from != stop)
1542 {
1543 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1544 c = FETCH_CHAR (from);
1545 if (SYNTAX (c) == Scomment_fence && !char_quoted (from))
1546 {
1547 found = 1;
1548 break;
1549 }
1550 }
1551 if (found == 0)
1552 {
1553 from = ini; /* Set point to ini + 1. */
1554 goto leave;
1555 }
1556 }
1557 else if (code == Sendcomment)
1558 {
1559 #if 0
1560 if (code != SYNTAX (c))
1561 /* For a two-char comment ender, we can assume
1562 it does end a comment. So scan back in a simple way. */
1563 {
1564 if (from != stop) DEC_POS (from);
1565 while (1)
1566 {
1567 if ((c = FETCH_CHAR (from),
1568 SYNTAX (c) == Scomment)
1569 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1570 break;
1571 if (from == stop)
1572 {
1573 immediate_quit = 0;
1574 SET_PT (from);
1575 return Qnil;
1576 }
1577 DEC_POS (from);
1578 if (SYNTAX_COMSTART_SECOND (c)
1579 && (c1 = FETCH_CHAR (from),
1580 SYNTAX_COMSTART_FIRST (c1))
1581 && SYNTAX_COMMENT_STYLE (c) == comstyle
1582 && !char_quoted (from))
1583 break;
1584 }
1585 break;
1586 }
1587 #endif /* 0 */
1588 found = back_comment (from, stop);
1589 if (found != -1) from = found;
1590 #if 0
1591 /* Look back, counting the parity of string-quotes,
1592 and recording the comment-starters seen.
1593 When we reach a safe place, assume that's not in a string;
1594 then step the main scan to the earliest comment-starter seen
1595 an even number of string quotes away from the safe place.
1596
1597 OFROM[I] is position of the earliest comment-starter seen
1598 which is I+2X quotes from the comment-end.
1599 PARITY is current parity of quotes from the comment end. */
1600 {
1601 int parity = 0;
1602 char my_stringend = 0;
1603 int string_lossage = 0;
1604 int comment_end = from;
1605 int comstart_pos = 0;
1606 int comstart_parity = 0;
1607 int scanstart = from;
1608
1609 DEC_POS (scanstart);
1610 /* At beginning of range to scan, we're outside of strings;
1611 that determines quote parity to the comment-end. */
1612 while (from != stop)
1613 {
1614 /* Move back and examine a character. */
1615 DEC_POS (from);
1616
1617 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1618 c = FETCH_CHAR (from);
1619 code = SYNTAX (c);
1620
1621 /* If this char is the second of a 2-char comment sequence,
1622 back up and give the pair the appropriate syntax. */
1623 temp_pos = from;
1624 DEC_POS (temp_pos);
1625 if (from > stop && SYNTAX_COMEND_SECOND (c)
1626 && (c1 = FETCH_CHAR (temp_pos),
1627 SYNTAX_COMEND_FIRST (c1)))
1628 {
1629 code = Sendcomment;
1630 from = temp_pos;
1631 c = c1;
1632 }
1633
1634 temp_pos = from;
1635 INC_POS (temp_pos);
1636 /* If this char starts a 2-char comment start sequence,
1637 treat it like a 1-char comment starter. */
1638 if (from < scanstart && SYNTAX_COMSTART_FIRST (c)
1639 && (c1 = FETCH_CHAR (temp_pos),
1640 SYNTAX_COMSTART_SECOND (c1))
1641 && comstyle == SYNTAX_COMMENT_STYLE (c1))
1642 code = Scomment;
1643
1644 /* Ignore escaped characters. */
1645 if (char_quoted (from))
1646 continue;
1647
1648 /* Track parity of quotes. */
1649 if (code == Sstring)
1650 {
1651 parity ^= 1;
1652 if (my_stringend == 0)
1653 my_stringend = c;
1654 /* If we have two kinds of string delimiters.
1655 There's no way to grok this scanning backwards. */
1656 else if (my_stringend != c)
1657 string_lossage = 1;
1658 }
1659
1660 /* Record comment-starters according to that
1661 quote-parity to the comment-end. */
1662 if (code == Scomment)
1663 {
1664 comstart_parity = parity;
1665 comstart_pos = from;
1666 }
1667
1668 /* If we find another earlier comment-ender,
1669 any comment-starts earlier than that don't count
1670 (because they go with the earlier comment-ender). */
1671 if (code == Sendcomment
1672 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)) == comstyle)
1673 break;
1674
1675 /* Assume a defun-start point is outside of strings. */
1676 if (code == Sopen
1677 && (from == stop || FETCH_BYTE (from - 1) == '\n'))
1678 break;
1679 }
1680
1681 if (comstart_pos == 0)
1682 from = comment_end;
1683 /* If the earliest comment starter
1684 is followed by uniform paired string quotes or none,
1685 we know it can't be inside a string
1686 since if it were then the comment ender would be inside one.
1687 So it does start a comment. Skip back to it. */
1688 else if (comstart_parity == 0 && !string_lossage)
1689 from = comstart_pos;
1690 else
1691 {
1692 /* We had two kinds of string delimiters mixed up
1693 together. Decode this going forwards.
1694 Scan fwd from the previous comment ender
1695 to the one in question; this records where we
1696 last passed a comment starter. */
1697 struct lisp_parse_state state;
1698 scan_sexps_forward (&state, find_defun_start (comment_end),
1699 comment_end - 1, -10000, 0, Qnil, 0);
1700 if (state.incomment)
1701 from = state.comstr_start;
1702 else
1703 /* We can't grok this as a comment; scan it normally. */
1704 from = comment_end;
1705 }
1706 }
1707 #endif /* 0 */
1708 /* We have skipped one comment. */
1709 break;
1710 }
1711 else if (code != Swhitespace && code != Scomment)
1712 {
1713 leave:
1714 immediate_quit = 0;
1715 INC_POS (from);
1716 SET_PT (from);
1717 return Qnil;
1718 }
1719 }
1720
1721 count1++;
1722 }
1723
1724 SET_PT (from);
1725 immediate_quit = 0;
1726 return Qt;
1727 }
1728 \f
1729 Lisp_Object
1730 scan_lists (from, count, depth, sexpflag)
1731 register int from;
1732 int count, depth, sexpflag;
1733 {
1734 Lisp_Object val;
1735 register int stop = count > 0 ? ZV : BEGV;
1736 register int c, c1;
1737 int stringterm;
1738 int quoted;
1739 int mathexit = 0;
1740 register enum syntaxcode code, temp_code;
1741 int min_depth = depth; /* Err out if depth gets less than this. */
1742 int comstyle = 0; /* style of comment encountered */
1743 int temp_pos;
1744 int last_good = from;
1745 int found;
1746
1747 if (depth > 0) min_depth = 0;
1748
1749 immediate_quit = 1;
1750 QUIT;
1751
1752 SETUP_SYNTAX_TABLE (from, count);
1753 while (count > 0)
1754 {
1755 while (from < stop)
1756 {
1757 UPDATE_SYNTAX_TABLE_FORWARD (from);
1758 c = FETCH_CHAR (from);
1759 code = SYNTAX (c);
1760 if (depth == min_depth)
1761 last_good = from;
1762 INC_POS (from);
1763 UPDATE_SYNTAX_TABLE_FORWARD (from);
1764 if (from < stop && SYNTAX_COMSTART_FIRST (c)
1765 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from))
1766 && parse_sexp_ignore_comments)
1767 {
1768 /* we have encountered a comment start sequence and we
1769 are ignoring all text inside comments. We must record
1770 the comment style this sequence begins so that later,
1771 only a comment end of the same style actually ends
1772 the comment section */
1773 code = Scomment;
1774 comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from));
1775 INC_POS (from);
1776 }
1777
1778 UPDATE_SYNTAX_TABLE_FORWARD (from);
1779 if (SYNTAX_PREFIX (c))
1780 continue;
1781
1782 switch (SWITCH_ENUM_CAST (code))
1783 {
1784 case Sescape:
1785 case Scharquote:
1786 if (from == stop) goto lose;
1787 INC_POS (from);
1788 /* treat following character as a word constituent */
1789 case Sword:
1790 case Ssymbol:
1791 if (depth || !sexpflag) break;
1792 /* This word counts as a sexp; return at end of it. */
1793 while (from < stop)
1794 {
1795 UPDATE_SYNTAX_TABLE_FORWARD (from);
1796 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
1797 {
1798 case Scharquote:
1799 case Sescape:
1800 INC_POS (from);
1801 if (from == stop) goto lose;
1802 break;
1803 case Sword:
1804 case Ssymbol:
1805 case Squote:
1806 break;
1807 default:
1808 goto done;
1809 }
1810 INC_POS (from);
1811 }
1812 goto done;
1813
1814 case Scomment:
1815 case Scomment_fence:
1816 if (!parse_sexp_ignore_comments) break;
1817 while (1)
1818 {
1819 if (from == stop)
1820 {
1821 if (depth == 0)
1822 goto done;
1823 goto lose;
1824 }
1825 UPDATE_SYNTAX_TABLE_FORWARD (from);
1826 c = FETCH_CHAR (from);
1827 if (code == Scomment
1828 ? (SYNTAX (c) == Sendcomment
1829 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1830 : (SYNTAX (c) == Scomment_fence))
1831 /* we have encountered a comment end of the same style
1832 as the comment sequence which began this comment
1833 section */
1834 break;
1835 INC_POS (from);
1836 if (from < stop && SYNTAX_COMEND_FIRST (c)
1837 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from))
1838 && SYNTAX_COMMENT_STYLE (c) == comstyle
1839 && code == Scomment)
1840 /* we have encountered a comment end of the same style
1841 as the comment sequence which began this comment
1842 section */
1843 { INC_POS (from); break; }
1844 }
1845 break;
1846
1847 case Smath:
1848 if (!sexpflag)
1849 break;
1850 if (from != stop && c == FETCH_CHAR (from))
1851 INC_POS (from);
1852 if (mathexit)
1853 {
1854 mathexit = 0;
1855 goto close1;
1856 }
1857 mathexit = 1;
1858
1859 case Sopen:
1860 if (!++depth) goto done;
1861 break;
1862
1863 case Sclose:
1864 close1:
1865 if (!--depth) goto done;
1866 if (depth < min_depth)
1867 Fsignal (Qscan_error,
1868 Fcons (build_string ("Containing expression ends prematurely"),
1869 Fcons (make_number (last_good),
1870 Fcons (make_number (from), Qnil))));
1871 break;
1872
1873 case Sstring:
1874 case Sstring_fence:
1875 temp_pos = from;
1876 DEC_POS (temp_pos);
1877 stringterm = FETCH_CHAR (temp_pos);
1878 while (1)
1879 {
1880 if (from >= stop) goto lose;
1881 UPDATE_SYNTAX_TABLE_FORWARD (from);
1882 if (code == Sstring
1883 ? (FETCH_CHAR (from) == stringterm)
1884 : SYNTAX (FETCH_CHAR (from)) == Sstring_fence)
1885 break;
1886 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
1887 {
1888 case Scharquote:
1889 case Sescape:
1890 INC_POS (from);
1891 }
1892 INC_POS (from);
1893 }
1894 INC_POS (from);
1895 if (!depth && sexpflag) goto done;
1896 break;
1897 }
1898 }
1899
1900 /* Reached end of buffer. Error if within object, return nil if between */
1901 if (depth) goto lose;
1902
1903 immediate_quit = 0;
1904 return Qnil;
1905
1906 /* End of object reached */
1907 done:
1908 count--;
1909 }
1910
1911 while (count < 0)
1912 {
1913 while (from > stop)
1914 {
1915 DEC_POS (from);
1916 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1917 if (quoted = char_quoted (from))
1918 {
1919 DEC_POS (from);
1920 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1921 }
1922 c = FETCH_CHAR (from);
1923 code = SYNTAX (c);
1924 if (depth == min_depth)
1925 last_good = from;
1926 comstyle = 0;
1927 if (code == Sendcomment)
1928 comstyle = SYNTAX_COMMENT_STYLE (c);
1929 temp_pos = from;
1930 DEC_POS (temp_pos);
1931 if (from > stop && SYNTAX_COMEND_SECOND (c)
1932 && (c1 = FETCH_CHAR (temp_pos), SYNTAX_COMEND_FIRST (c1))
1933 && !char_quoted (temp_pos)
1934 && parse_sexp_ignore_comments)
1935 {
1936 /* we must record the comment style encountered so that
1937 later, we can match only the proper comment begin
1938 sequence of the same style */
1939 code = Sendcomment;
1940 comstyle = SYNTAX_COMMENT_STYLE (c1);
1941 from = temp_pos;
1942 }
1943
1944 if (SYNTAX_PREFIX (c))
1945 continue;
1946
1947 switch (SWITCH_ENUM_CAST (quoted ? Sword : code))
1948 {
1949 case Sword:
1950 case Ssymbol:
1951 if (depth || !sexpflag) break;
1952 /* This word counts as a sexp; count object finished
1953 after passing it. */
1954 while (from > stop)
1955 {
1956 temp_pos = from;
1957 DEC_POS (temp_pos);
1958 UPDATE_SYNTAX_TABLE_BACKWARD (temp_pos);
1959 quoted = char_quoted (temp_pos);
1960 if (quoted)
1961 {
1962 from = temp_pos;
1963 DEC_POS (temp_pos);
1964 UPDATE_SYNTAX_TABLE_BACKWARD (temp_pos);
1965 }
1966 c1 = FETCH_CHAR (temp_pos);
1967 temp_code = SYNTAX (c1);
1968 if (! (quoted || temp_code == Sword
1969 || temp_code == Ssymbol
1970 || temp_code == Squote))
1971 goto done2;
1972 from = temp_pos;
1973 }
1974 goto done2;
1975
1976 case Smath:
1977 if (!sexpflag)
1978 break;
1979 temp_pos = from;
1980 DEC_POS (temp_pos);
1981 UPDATE_SYNTAX_TABLE_BACKWARD (temp_pos);
1982 if (from != stop && c == FETCH_CHAR (temp_pos))
1983 from = temp_pos;
1984 if (mathexit)
1985 {
1986 mathexit = 0;
1987 goto open2;
1988 }
1989 mathexit = 1;
1990
1991 case Sclose:
1992 if (!++depth) goto done2;
1993 break;
1994
1995 case Sopen:
1996 open2:
1997 if (!--depth) goto done2;
1998 if (depth < min_depth)
1999 Fsignal (Qscan_error,
2000 Fcons (build_string ("Containing expression ends prematurely"),
2001 Fcons (make_number (last_good),
2002 Fcons (make_number (from), Qnil))));
2003 break;
2004
2005 case Sendcomment:
2006 if (!parse_sexp_ignore_comments)
2007 break;
2008 #if 0
2009 if (code != SYNTAX (c))
2010 /* For a two-char comment ender, we can assume
2011 it does end a comment. So scan back in a simple way. */
2012 {
2013 if (from != stop) DEC_POS (from);
2014 while (1)
2015 {
2016 if (SYNTAX (c = FETCH_CHAR (from)) == Scomment
2017 && SYNTAX_COMMENT_STYLE (c) == comstyle)
2018 break;
2019 if (from == stop)
2020 {
2021 if (depth == 0)
2022 goto done2;
2023 goto lose;
2024 }
2025 DEC_POS (from);
2026 if (SYNTAX_COMSTART_SECOND (c)
2027 && SYNTAX_COMSTART_FIRST (FETCH_CHAR (from))
2028 && SYNTAX_COMMENT_STYLE (c) == comstyle
2029 && !char_quoted (from))
2030 break;
2031 }
2032 break;
2033 }
2034 #endif /* 0 */
2035 found = back_comment (from, stop);
2036 if (found != -1) from = found;
2037 #if 0
2038 /* Look back, counting the parity of string-quotes,
2039 and recording the comment-starters seen.
2040 When we reach a safe place, assume that's not in a string;
2041 then step the main scan to the earliest comment-starter seen
2042 an even number of string quotes away from the safe place.
2043
2044 OFROM[I] is position of the earliest comment-starter seen
2045 which is I+2X quotes from the comment-end.
2046 PARITY is current parity of quotes from the comment end. */
2047 {
2048 int parity = 0;
2049 char my_stringend = 0;
2050 int string_lossage = 0;
2051 int comment_end = from;
2052 int comstart_pos = 0;
2053 int comstart_parity = 0;
2054 int scanstart = from;
2055
2056 DEC_POS (scanstart);
2057
2058 /* At beginning of range to scan, we're outside of strings;
2059 that determines quote parity to the comment-end. */
2060 while (from != stop)
2061 {
2062 /* Move back and examine a character. */
2063 DEC_POS (from);
2064
2065 c = FETCH_CHAR (from);
2066 code = SYNTAX (c);
2067
2068 /* If this char is the second of a 2-char comment sequence,
2069 back up and give the pair the appropriate syntax. */
2070 temp_pos = from;
2071 DEC_POS (temp_pos);
2072 if (from > stop && SYNTAX_COMEND_SECOND (c)
2073 && (c1 = FETCH_CHAR (temp_pos),
2074 SYNTAX_COMEND_FIRST (c1)))
2075 {
2076 code = Sendcomment;
2077 from = temp_pos;
2078 c = c1;
2079 }
2080
2081 /* If this char starts a 2-char comment start sequence,
2082 treat it like a 1-char comment starter. */
2083 temp_pos = from;
2084 INC_POS (temp_pos);
2085 if (from < scanstart && SYNTAX_COMSTART_FIRST (c)
2086 && (c1 = FETCH_CHAR (temp_pos),
2087 SYNTAX_COMSTART_SECOND (c1))
2088 && comstyle == SYNTAX_COMMENT_STYLE (c1))
2089 code = Scomment;
2090
2091 /* Ignore escaped characters. */
2092 if (char_quoted (from))
2093 continue;
2094
2095 /* Track parity of quotes. */
2096 if (code == Sstring)
2097 {
2098 parity ^= 1;
2099 if (my_stringend == 0)
2100 my_stringend = c;
2101 /* If we have two kinds of string delimiters.
2102 There's no way to grok this scanning backwards. */
2103 else if (my_stringend != c)
2104 string_lossage = 1;
2105 }
2106
2107 /* Record comment-starters according to that
2108 quote-parity to the comment-end. */
2109 if (code == Scomment)
2110 {
2111 comstart_parity = parity;
2112 comstart_pos = from;
2113 }
2114
2115 /* If we find another earlier comment-ender,
2116 any comment-starts earlier than that don't count
2117 (because they go with the earlier comment-ender). */
2118 if (code == Sendcomment
2119 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)) == comstyle)
2120 break;
2121
2122 /* Assume a defun-start point is outside of strings. */
2123 if (code == Sopen
2124 && (from == stop || FETCH_BYTE (from - 1) == '\n'))
2125 break;
2126 }
2127
2128 if (comstart_pos == 0)
2129 from = comment_end;
2130 /* If the earliest comment starter
2131 is followed by uniform paired string quotes or none,
2132 we know it can't be inside a string
2133 since if it were then the comment ender would be inside one.
2134 So it does start a comment. Skip back to it. */
2135 else if (comstart_parity == 0 && !string_lossage)
2136 from = comstart_pos;
2137 else
2138 {
2139 /* We had two kinds of string delimiters mixed up
2140 together. Decode this going forwards.
2141 Scan fwd from the previous comment ender
2142 to the one in question; this records where we
2143 last passed a comment starter. */
2144 struct lisp_parse_state state;
2145 scan_sexps_forward (&state, find_defun_start (comment_end),
2146 comment_end - 1, -10000, 0, Qnil, 0);
2147 if (state.incomment)
2148 from = state.comstr_start;
2149 else
2150 /* We can't grok this as a comment; scan it normally. */
2151 from = comment_end;
2152 }
2153 }
2154 #endif /* 0 */
2155 break;
2156
2157 case Scomment_fence:
2158 case Sstring_fence:
2159 while (1)
2160 {
2161 DEC_POS (from);
2162 if (from == stop) goto lose;
2163 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2164 if (!char_quoted (from)
2165 && SYNTAX (FETCH_CHAR (from)) == code)
2166 break;
2167 }
2168 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2169 break;
2170
2171 case Sstring:
2172 stringterm = FETCH_CHAR (from);
2173 while (1)
2174 {
2175 if (from == stop) goto lose;
2176 temp_pos = from;
2177 DEC_POS (temp_pos);
2178 UPDATE_SYNTAX_TABLE_BACKWARD (temp_pos);
2179 if (!char_quoted (temp_pos)
2180 && stringterm == FETCH_CHAR (temp_pos))
2181 break;
2182 from = temp_pos;
2183 }
2184 DEC_POS (from);
2185 if (!depth && sexpflag) goto done2;
2186 break;
2187 }
2188 }
2189
2190 /* Reached start of buffer. Error if within object, return nil if between */
2191 if (depth) goto lose;
2192
2193 immediate_quit = 0;
2194 return Qnil;
2195
2196 done2:
2197 count++;
2198 }
2199
2200
2201 immediate_quit = 0;
2202 XSETFASTINT (val, from);
2203 return val;
2204
2205 lose:
2206 Fsignal (Qscan_error,
2207 Fcons (build_string ("Unbalanced parentheses"),
2208 Fcons (make_number (last_good),
2209 Fcons (make_number (from), Qnil))));
2210
2211 /* NOTREACHED */
2212 }
2213
2214 DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
2215 "Scan from character number FROM by COUNT lists.\n\
2216 Returns the character number of the position thus found.\n\
2217 \n\
2218 If DEPTH is nonzero, paren depth begins counting from that value,\n\
2219 only places where the depth in parentheses becomes zero\n\
2220 are candidates for stopping; COUNT such places are counted.\n\
2221 Thus, a positive value for DEPTH means go out levels.\n\
2222 \n\
2223 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
2224 \n\
2225 If the beginning or end of (the accessible part of) the buffer is reached\n\
2226 and the depth is wrong, an error is signaled.\n\
2227 If the depth is right but the count is not used up, nil is returned.")
2228 (from, count, depth)
2229 Lisp_Object from, count, depth;
2230 {
2231 CHECK_NUMBER (from, 0);
2232 CHECK_NUMBER (count, 1);
2233 CHECK_NUMBER (depth, 2);
2234
2235 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2236 }
2237
2238 DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
2239 "Scan from character number FROM by COUNT balanced expressions.\n\
2240 If COUNT is negative, scan backwards.\n\
2241 Returns the character number of the position thus found.\n\
2242 \n\
2243 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
2244 \n\
2245 If the beginning or end of (the accessible part of) the buffer is reached\n\
2246 in the middle of a parenthetical grouping, an error is signaled.\n\
2247 If the beginning or end is reached between groupings\n\
2248 but before count is used up, nil is returned.")
2249 (from, count)
2250 Lisp_Object from, count;
2251 {
2252 CHECK_NUMBER (from, 0);
2253 CHECK_NUMBER (count, 1);
2254
2255 return scan_lists (XINT (from), XINT (count), 0, 1);
2256 }
2257
2258 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
2259 0, 0, 0,
2260 "Move point backward over any number of chars with prefix syntax.\n\
2261 This includes chars with \"quote\" or \"prefix\" syntax (' or p).")
2262 ()
2263 {
2264 int beg = BEGV;
2265 int pos = PT;
2266 int c;
2267 int temp_pos = pos;
2268
2269 if (pos > beg)
2270 {
2271 SETUP_SYNTAX_TABLE (pos, -1);
2272 }
2273 DEC_POS (temp_pos);
2274
2275 while (pos > beg && !char_quoted (temp_pos)
2276 /* Previous statement updates syntax table. */
2277 && ((c = FETCH_CHAR (temp_pos), SYNTAX (c) == Squote)
2278 || SYNTAX_PREFIX (c)))
2279 {
2280 pos = temp_pos;
2281 DEC_POS (temp_pos);
2282 }
2283
2284 SET_PT (pos);
2285
2286 return Qnil;
2287 }
2288 \f
2289 /* Parse forward from FROM to END,
2290 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2291 and return a description of the state of the parse at END.
2292 If STOPBEFORE is nonzero, stop at the start of an atom.
2293 If COMMENTSTOP is nonzero, stop at the start of a comment. */
2294
2295 static void
2296 scan_sexps_forward (stateptr, from, end, targetdepth,
2297 stopbefore, oldstate, commentstop)
2298 struct lisp_parse_state *stateptr;
2299 register int from;
2300 int end, targetdepth, stopbefore;
2301 Lisp_Object oldstate;
2302 int commentstop;
2303 {
2304 struct lisp_parse_state state;
2305
2306 register enum syntaxcode code;
2307 struct level { int last, prev; };
2308 struct level levelstart[100];
2309 register struct level *curlevel = levelstart;
2310 struct level *endlevel = levelstart + 100;
2311 int prev;
2312 register int depth; /* Paren depth of current scanning location.
2313 level - levelstart equals this except
2314 when the depth becomes negative. */
2315 int mindepth; /* Lowest DEPTH value seen. */
2316 int start_quoted = 0; /* Nonzero means starting after a char quote */
2317 Lisp_Object tem;
2318 int prev_from; /* Keep one character before FROM. */
2319 int boundary_stop = commentstop == -1;
2320 int nofence;
2321
2322 prev_from = from;
2323 DEC_POS (prev_from);
2324
2325 /* Use this macro instead of `from++'. */
2326 #define INC_FROM do { prev_from = from; INC_POS (from); } while (0)
2327
2328 immediate_quit = 1;
2329 QUIT;
2330
2331 SETUP_SYNTAX_TABLE (from, 1);
2332
2333 if (NILP (oldstate))
2334 {
2335 depth = 0;
2336 state.instring = -1;
2337 state.incomment = 0;
2338 state.comstyle = 0; /* comment style a by default. */
2339 state.comstr_start = -1; /* no comment/string seen. */
2340 }
2341 else
2342 {
2343 tem = Fcar (oldstate);
2344 if (!NILP (tem))
2345 depth = XINT (tem);
2346 else
2347 depth = 0;
2348
2349 oldstate = Fcdr (oldstate);
2350 oldstate = Fcdr (oldstate);
2351 oldstate = Fcdr (oldstate);
2352 tem = Fcar (oldstate);
2353 /* Check whether we are inside string_fence-style string: */
2354 state.instring = ( !NILP (tem)
2355 ? ( INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE)
2356 : -1);
2357
2358 oldstate = Fcdr (oldstate);
2359 tem = Fcar (oldstate);
2360 state.incomment = !NILP (tem);
2361
2362 oldstate = Fcdr (oldstate);
2363 tem = Fcar (oldstate);
2364 start_quoted = !NILP (tem);
2365
2366 /* if the eight element of the list is nil, we are in comment
2367 style a. If it is non-nil, we are in comment style b */
2368 oldstate = Fcdr (oldstate);
2369 oldstate = Fcdr (oldstate);
2370 tem = Fcar (oldstate);
2371 state.comstyle = NILP (tem) ? 0 : ( EQ (tem, Qsyntax_table)
2372 ? ST_COMMENT_STYLE : 1 );
2373
2374 oldstate = Fcdr (oldstate);
2375 tem = Fcar (oldstate);
2376 state.comstr_start = NILP (tem) ? -1 : XINT (tem) ;
2377 }
2378 state.quoted = 0;
2379 mindepth = depth;
2380
2381 curlevel->prev = -1;
2382 curlevel->last = -1;
2383
2384 /* Enter the loop at a place appropriate for initial state. */
2385
2386 if (state.incomment) goto startincomment;
2387 if (state.instring >= 0)
2388 {
2389 nofence = state.instring != ST_STRING_STYLE;
2390 if (start_quoted) goto startquotedinstring;
2391 goto startinstring;
2392 }
2393 if (start_quoted) goto startquoted;
2394
2395 while (from < end)
2396 {
2397 UPDATE_SYNTAX_TABLE_FORWARD (from);
2398 code = SYNTAX (FETCH_CHAR (from));
2399 INC_FROM;
2400 if (code == Scomment)
2401 state.comstr_start = prev_from;
2402
2403 else if (code == Scomment_fence
2404 || (from < end && SYNTAX_COMSTART_FIRST (FETCH_CHAR (prev_from))
2405 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from))))
2406 {
2407 /* Record the comment style we have entered so that only
2408 the comment-end sequence of the same style actually
2409 terminates the comment section. */
2410 state.comstyle = ( code == Scomment_fence
2411 ? ST_COMMENT_STYLE
2412 : SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)));
2413 state.comstr_start = prev_from;
2414 if (code != Scomment_fence) INC_FROM;
2415 code = Scomment;
2416 }
2417
2418 if (SYNTAX_PREFIX (FETCH_CHAR (prev_from)))
2419 continue;
2420 switch (SWITCH_ENUM_CAST (code))
2421 {
2422 case Sescape:
2423 case Scharquote:
2424 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2425 curlevel->last = prev_from;
2426 startquoted:
2427 if (from == end) goto endquoted;
2428 INC_FROM;
2429 goto symstarted;
2430 /* treat following character as a word constituent */
2431 case Sword:
2432 case Ssymbol:
2433 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2434 curlevel->last = prev_from;
2435 symstarted:
2436 while (from < end)
2437 {
2438 UPDATE_SYNTAX_TABLE_FORWARD (from);
2439 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
2440 {
2441 case Scharquote:
2442 case Sescape:
2443 INC_FROM;
2444 if (from == end) goto endquoted;
2445 break;
2446 case Sword:
2447 case Ssymbol:
2448 case Squote:
2449 break;
2450 default:
2451 goto symdone;
2452 }
2453 INC_FROM;
2454 }
2455 symdone:
2456 curlevel->prev = curlevel->last;
2457 break;
2458
2459 startincomment:
2460 if (commentstop == 1)
2461 goto done;
2462 if (from != BEGV)
2463 {
2464 /* Enter the loop in the middle so that we find
2465 a 2-char comment ender if we start in the middle of it. */
2466 prev = FETCH_CHAR (prev_from);
2467 goto startincomment_1;
2468 }
2469 /* At beginning of buffer, enter the loop the ordinary way. */
2470 state.incomment = 1;
2471 goto commentloop;
2472
2473 case Scomment:
2474 state.incomment = 1;
2475 if (commentstop || boundary_stop) goto done;
2476 commentloop:
2477 while (1)
2478 {
2479 if (from == end) goto done;
2480 UPDATE_SYNTAX_TABLE_FORWARD (from);
2481 prev = FETCH_CHAR (from);
2482 if (SYNTAX (prev) == Sendcomment
2483 && SYNTAX_COMMENT_STYLE (prev) == state.comstyle)
2484 /* Only terminate the comment section if the endcomment
2485 of the same style as the start sequence has been
2486 encountered. */
2487 break;
2488 if (state.comstyle == ST_COMMENT_STYLE
2489 && SYNTAX (prev) == Scomment_fence)
2490 break;
2491 INC_FROM;
2492 startincomment_1:
2493 if (from < end && SYNTAX_COMEND_FIRST (prev)
2494 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from))
2495 && SYNTAX_COMMENT_STYLE (prev) == state.comstyle)
2496 /* Only terminate the comment section if the end-comment
2497 sequence of the same style as the start sequence has
2498 been encountered. */
2499 { break; }
2500 }
2501 INC_FROM;
2502 state.incomment = 0;
2503 state.comstyle = 0; /* reset the comment style */
2504 if (boundary_stop) goto done;
2505 break;
2506
2507 case Sopen:
2508 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2509 depth++;
2510 /* curlevel++->last ran into compiler bug on Apollo */
2511 curlevel->last = prev_from;
2512 if (++curlevel == endlevel)
2513 error ("Nesting too deep for parser");
2514 curlevel->prev = -1;
2515 curlevel->last = -1;
2516 if (targetdepth == depth) goto done;
2517 break;
2518
2519 case Sclose:
2520 depth--;
2521 if (depth < mindepth)
2522 mindepth = depth;
2523 if (curlevel != levelstart)
2524 curlevel--;
2525 curlevel->prev = curlevel->last;
2526 if (targetdepth == depth) goto done;
2527 break;
2528
2529 case Sstring:
2530 case Sstring_fence:
2531 state.comstr_start = from - 1;
2532 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2533 curlevel->last = prev_from;
2534 state.instring = (code == Sstring
2535 ? (FETCH_CHAR (prev_from))
2536 : ST_STRING_STYLE);
2537 if (boundary_stop) goto done;
2538 startinstring:
2539 {
2540 nofence = state.instring != ST_STRING_STYLE;
2541
2542 while (1)
2543 {
2544 int c;
2545
2546 if (from >= end) goto done;
2547 c = FETCH_CHAR (from);
2548 if (nofence && c == state.instring) break;
2549 UPDATE_SYNTAX_TABLE_FORWARD (from);
2550 switch (SWITCH_ENUM_CAST (SYNTAX (c)))
2551 {
2552 case Sstring_fence:
2553 if (!nofence) goto string_end;
2554 break;
2555 case Scharquote:
2556 case Sescape:
2557 INC_FROM;
2558 startquotedinstring:
2559 if (from >= end) goto endquoted;
2560 }
2561 INC_FROM;
2562 }
2563 }
2564 string_end:
2565 state.instring = -1;
2566 curlevel->prev = curlevel->last;
2567 INC_FROM;
2568 if (boundary_stop) goto done;
2569 break;
2570
2571 case Smath:
2572 break;
2573 }
2574 }
2575 goto done;
2576
2577 stop: /* Here if stopping before start of sexp. */
2578 from = prev_from; /* We have just fetched the char that starts it; */
2579 goto done; /* but return the position before it. */
2580
2581 endquoted:
2582 state.quoted = 1;
2583 done:
2584 state.depth = depth;
2585 state.mindepth = mindepth;
2586 state.thislevelstart = curlevel->prev;
2587 state.prevlevelstart
2588 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
2589 state.location = from;
2590 immediate_quit = 0;
2591
2592 *stateptr = state;
2593 }
2594
2595 /* This comment supplies the doc string for parse-partial-sexp,
2596 for make-docfile to see. We cannot put this in the real DEFUN
2597 due to limits in the Unix cpp.
2598
2599 DEFUN ("parse-partial-sexp", Ffoo, Sfoo, 2, 6, 0,
2600 "Parse Lisp syntax starting at FROM until TO; return status of parse at TO.\n\
2601 Parsing stops at TO or when certain criteria are met;\n\
2602 point is set to where parsing stops.\n\
2603 If fifth arg STATE is omitted or nil,\n\
2604 parsing assumes that FROM is the beginning of a function.\n\
2605 Value is a list of nine elements describing final state of parsing:\n\
2606 0. depth in parens.\n\
2607 1. character address of start of innermost containing list; nil if none.\n\
2608 2. character address of start of last complete sexp terminated.\n\
2609 3. non-nil if inside a string.\n\
2610 (it is the character that will terminate the string,\n\
2611 or t if the string should be terminated by an explicit\n\
2612 `syntax-table' property.)\n\
2613 4. t if inside a comment.\n\
2614 5. t if following a quote character.\n\
2615 6. the minimum paren-depth encountered during this scan.\n\
2616 7. t if in a comment of style `b'; `syntax-table' if given by an explicit\n\
2617 `syntax-table' property.\n\
2618 8. character address of start of last comment or string; nil if none.\n\
2619 If third arg TARGETDEPTH is non-nil, parsing stops if the depth\n\
2620 in parentheses becomes equal to TARGETDEPTH.\n\
2621 Fourth arg STOPBEFORE non-nil means stop when come to\n\
2622 any character that starts a sexp.\n\
2623 Fifth arg STATE is an eight-list like what this function returns.\n\
2624 It is used to initialize the state of the parse. Its second and third
2625 elements are ignored.
2626 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment. If\n\
2627 it is `syntax-table', stop after the start of a comment or a string, or\n\
2628 after end of a comment or a string.")
2629 (from, to, targetdepth, stopbefore, state, commentstop)
2630 */
2631
2632 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
2633 0 /* See immediately above */)
2634 (from, to, targetdepth, stopbefore, oldstate, commentstop)
2635 Lisp_Object from, to, targetdepth, stopbefore, oldstate, commentstop;
2636 {
2637 struct lisp_parse_state state;
2638 int target;
2639
2640 if (!NILP (targetdepth))
2641 {
2642 CHECK_NUMBER (targetdepth, 3);
2643 target = XINT (targetdepth);
2644 }
2645 else
2646 target = -100000; /* We won't reach this depth */
2647
2648 validate_region (&from, &to);
2649 scan_sexps_forward (&state, XINT (from), XINT (to),
2650 target, !NILP (stopbefore), oldstate,
2651 (NILP (commentstop)
2652 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
2653
2654 SET_PT (state.location);
2655
2656 return Fcons (make_number (state.depth),
2657 Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart),
2658 Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart),
2659 Fcons (state.instring >= 0
2660 ? (state.instring == ST_STRING_STYLE
2661 ? Qt : make_number (state.instring)) : Qnil,
2662 Fcons (state.incomment ? Qt : Qnil,
2663 Fcons (state.quoted ? Qt : Qnil,
2664 Fcons (make_number (state.mindepth),
2665 Fcons (state.comstyle
2666 ? (state.comstyle == ST_COMMENT_STYLE
2667 ? Qsyntax_table : Qt) : Qnil,
2668 Fcons (state.comstr_start != -1 ? make_number (state.comstr_start) : Qnil,
2669 Qnil)))))))));
2670 }
2671 \f
2672 init_syntax_once ()
2673 {
2674 register int i, c;
2675 Lisp_Object temp;
2676
2677 /* This has to be done here, before we call Fmake_char_table. */
2678 Qsyntax_table = intern ("syntax-table");
2679 staticpro (&Qsyntax_table);
2680
2681 /* Intern this now in case it isn't already done.
2682 Setting this variable twice is harmless.
2683 But don't staticpro it here--that is done in alloc.c. */
2684 Qchar_table_extra_slots = intern ("char-table-extra-slots");
2685
2686 /* Create objects which can be shared among syntax tables. */
2687 Vsyntax_code_object = Fmake_vector (13, Qnil);
2688 for (i = 0; i < XVECTOR (Vsyntax_code_object)->size; i++)
2689 XVECTOR (Vsyntax_code_object)->contents[i]
2690 = Fcons (make_number (i), Qnil);
2691
2692 /* Now we are ready to set up this property, so we can
2693 create syntax tables. */
2694 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
2695
2696 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
2697
2698 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
2699
2700 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
2701 for (i = 'a'; i <= 'z'; i++)
2702 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
2703 for (i = 'A'; i <= 'Z'; i++)
2704 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
2705 for (i = '0'; i <= '9'; i++)
2706 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
2707
2708 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
2709 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
2710
2711 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
2712 Fcons (make_number (Sopen), make_number (')')));
2713 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
2714 Fcons (make_number (Sclose), make_number ('(')));
2715 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
2716 Fcons (make_number (Sopen), make_number (']')));
2717 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
2718 Fcons (make_number (Sclose), make_number ('[')));
2719 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
2720 Fcons (make_number (Sopen), make_number ('}')));
2721 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
2722 Fcons (make_number (Sclose), make_number ('{')));
2723 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
2724 Fcons (make_number ((int) Sstring), Qnil));
2725 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
2726 Fcons (make_number ((int) Sescape), Qnil));
2727
2728 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Ssymbol];
2729 for (i = 0; i < 10; i++)
2730 {
2731 c = "_-+*/&|<>="[i];
2732 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
2733 }
2734
2735 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
2736 for (i = 0; i < 12; i++)
2737 {
2738 c = ".,;:?!#@~^'`"[i];
2739 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
2740 }
2741 }
2742
2743 syms_of_syntax ()
2744 {
2745 Qsyntax_table_p = intern ("syntax-table-p");
2746 staticpro (&Qsyntax_table_p);
2747
2748 staticpro (&Vsyntax_code_object);
2749
2750 Qscan_error = intern ("scan-error");
2751 staticpro (&Qscan_error);
2752 Fput (Qscan_error, Qerror_conditions,
2753 Fcons (Qerror, Qnil));
2754 Fput (Qscan_error, Qerror_message,
2755 build_string ("Scan error"));
2756
2757 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
2758 "Non-nil means `forward-sexp', etc., should treat comments as whitespace.");
2759
2760 DEFVAR_BOOL ("parse-sexp-lookup-properties", &parse_sexp_lookup_properties,
2761 "Non-nil means `forward-sexp', etc., grant `syntax-table' property.\n\
2762 The value of this property should be either a syntax table, or a cons\n\
2763 of the form (SYNTAXCODE . MATCHCHAR), SYNTAXCODE being the numeric\n\
2764 syntax code, MATCHCHAR being nil or the character to match (which is\n\
2765 relevant only for open/close type.");
2766
2767 words_include_escapes = 0;
2768 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
2769 "Non-nil means `forward-word', etc., should treat escape chars part of words.");
2770
2771 defsubr (&Ssyntax_table_p);
2772 defsubr (&Ssyntax_table);
2773 defsubr (&Sstandard_syntax_table);
2774 defsubr (&Scopy_syntax_table);
2775 defsubr (&Sset_syntax_table);
2776 defsubr (&Schar_syntax);
2777 defsubr (&Smatching_paren);
2778 defsubr (&Smodify_syntax_entry);
2779 defsubr (&Sdescribe_syntax);
2780
2781 defsubr (&Sforward_word);
2782
2783 defsubr (&Sskip_chars_forward);
2784 defsubr (&Sskip_chars_backward);
2785 defsubr (&Sskip_syntax_forward);
2786 defsubr (&Sskip_syntax_backward);
2787
2788 defsubr (&Sforward_comment);
2789 defsubr (&Sscan_lists);
2790 defsubr (&Sscan_sexps);
2791 defsubr (&Sbackward_prefix_chars);
2792 defsubr (&Sparse_partial_sexp);
2793 }