1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 1987, 1993-1995, 1997-1999, 2001-2015 Free
3 Software Foundation, Inc.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
23 #include <sys/types.h>
27 #include "character.h"
33 #include "intervals.h"
36 /* Make syntax table lookup grant data in gl_state. */
37 #define SYNTAX(c) syntax_property (c, 1)
38 #define SYNTAX_ENTRY(c) syntax_property_entry (c, 1)
39 #define SYNTAX_WITH_FLAGS(c) syntax_property_with_flags (c, 1)
41 /* Eight single-bit flags have the following meanings:
42 1. This character is the first of a two-character comment-start sequence.
43 2. This character is the second of a two-character comment-start sequence.
44 3. This character is the first of a two-character comment-end sequence.
45 4. This character is the second of a two-character comment-end sequence.
46 5. This character is a prefix, for backward-prefix-chars.
47 6. The char is part of a delimiter for comments of style "b".
48 7. This character is part of a nestable comment sequence.
49 8. The char is part of a delimiter for comments of style "c".
50 Note that any two-character sequence whose first character has flag 1
51 and whose second character has flag 2 will be interpreted as a comment start.
53 Bits 6 and 8 discriminate among different comment styles.
54 Languages such as C++ allow two orthogonal syntax start/end pairs
55 and bit 6 determines whether a comment-end or Scommentend
56 ends style a or b. Comment markers can start style a, b, c, or bc.
57 Style a is always the default.
58 For 2-char comment markers, the style b flag is looked up only on the second
59 char of the comment marker and on the first char of the comment ender.
60 For style c (like the nested flag), the flag can be placed on any of
63 /* These functions extract specific flags from an integer
64 that holds the syntax code and the flags. */
67 SYNTAX_FLAGS_COMSTART_FIRST (int flags
)
69 return (flags
>> 16) & 1;
72 SYNTAX_FLAGS_COMSTART_SECOND (int flags
)
74 return (flags
>> 17) & 1;
77 SYNTAX_FLAGS_COMEND_FIRST (int flags
)
79 return (flags
>> 18) & 1;
82 SYNTAX_FLAGS_COMEND_SECOND (int flags
)
84 return (flags
>> 19) & 1;
87 SYNTAX_FLAGS_PREFIX (int flags
)
89 return (flags
>> 20) & 1;
92 SYNTAX_FLAGS_COMMENT_STYLEB (int flags
)
94 return (flags
>> 21) & 1;
97 SYNTAX_FLAGS_COMMENT_STYLEC (int flags
)
99 return (flags
>> 23) & 1;
102 SYNTAX_FLAGS_COMMENT_STYLEC2 (int flags
)
104 return (flags
>> 22) & 2; /* SYNTAX_FLAGS_COMMENT_STYLEC (flags) * 2 */
107 SYNTAX_FLAGS_COMMENT_NESTED (int flags
)
109 return (flags
>> 22) & 1;
112 /* FLAGS should be the flags of the main char of the comment marker, e.g.
113 the second for comstart and the first for comend. */
115 SYNTAX_FLAGS_COMMENT_STYLE (int flags
, int other_flags
)
117 return (SYNTAX_FLAGS_COMMENT_STYLEB (flags
)
118 | SYNTAX_FLAGS_COMMENT_STYLEC2 (flags
)
119 | SYNTAX_FLAGS_COMMENT_STYLEC2 (other_flags
));
122 /* Extract a particular flag for a given character. */
125 SYNTAX_COMEND_FIRST (int c
)
127 return SYNTAX_FLAGS_COMEND_FIRST (SYNTAX_WITH_FLAGS (c
));
130 /* We use these constants in place for comment-style and
131 string-ender-char to distinguish comments/strings started by
132 comment_fence and string_fence codes. */
136 ST_COMMENT_STYLE
= 256 + 1,
137 ST_STRING_STYLE
= 256 + 2
140 /* This is the internal form of the parse state used in parse-partial-sexp. */
142 struct lisp_parse_state
144 EMACS_INT depth
; /* Depth at end of parsing. */
145 int instring
; /* -1 if not within string, else desired terminator. */
146 EMACS_INT incomment
; /* -1 if in unnestable comment else comment nesting */
147 int comstyle
; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
148 bool quoted
; /* True if just after an escape char at end of parsing. */
149 EMACS_INT mindepth
; /* Minimum depth seen while scanning. */
150 /* Char number of most recent start-of-expression at current level */
151 ptrdiff_t thislevelstart
;
152 /* Char number of start of containing expression */
153 ptrdiff_t prevlevelstart
;
154 ptrdiff_t location
; /* Char number at which parsing stopped. */
155 ptrdiff_t location_byte
; /* Corresponding byte position. */
156 ptrdiff_t comstr_start
; /* Position of last comment/string starter. */
157 Lisp_Object levelstarts
; /* Char numbers of starts-of-expression
158 of levels (starting from outermost). */
161 /* These variables are a cache for finding the start of a defun.
162 find_start_pos is the place for which the defun start was found.
163 find_start_value is the defun start position found for it.
164 find_start_value_byte is the corresponding byte position.
165 find_start_buffer is the buffer it was found in.
166 find_start_begv is the BEGV value when it was found.
167 find_start_modiff is the value of MODIFF when it was found. */
169 static ptrdiff_t find_start_pos
;
170 static ptrdiff_t find_start_value
;
171 static ptrdiff_t find_start_value_byte
;
172 static struct buffer
*find_start_buffer
;
173 static ptrdiff_t find_start_begv
;
174 static EMACS_INT find_start_modiff
;
177 static Lisp_Object
skip_chars (bool, Lisp_Object
, Lisp_Object
, bool);
178 static Lisp_Object
skip_syntaxes (bool, Lisp_Object
, Lisp_Object
);
179 static Lisp_Object
scan_lists (EMACS_INT
, EMACS_INT
, EMACS_INT
, bool);
180 static void scan_sexps_forward (struct lisp_parse_state
*,
181 ptrdiff_t, ptrdiff_t, ptrdiff_t, EMACS_INT
,
182 bool, Lisp_Object
, int);
183 static bool in_classes (int, Lisp_Object
);
185 /* This setter is used only in this file, so it can be private. */
187 bset_syntax_table (struct buffer
*b
, Lisp_Object val
)
189 b
->syntax_table_
= val
;
192 /* Whether the syntax of the character C has the prefix flag set. */
194 syntax_prefix_flag_p (int c
)
196 return SYNTAX_FLAGS_PREFIX (SYNTAX_WITH_FLAGS (c
));
199 struct gl_state_s gl_state
; /* Global state of syntax parser. */
201 enum { INTERVALS_AT_ONCE
= 10 }; /* 1 + max-number of intervals
202 to scan to property-change. */
204 /* Set the syntax entry VAL for char C in table TABLE. */
207 SET_RAW_SYNTAX_ENTRY (Lisp_Object table
, int c
, Lisp_Object val
)
209 CHAR_TABLE_SET (table
, c
, val
);
212 /* Set the syntax entry VAL for char-range RANGE in table TABLE.
213 RANGE is a cons (FROM . TO) specifying the range of characters. */
216 SET_RAW_SYNTAX_ENTRY_RANGE (Lisp_Object table
, Lisp_Object range
,
219 Fset_char_table_range (table
, range
, val
);
222 /* Extract the information from the entry for character C
223 in the current syntax table. */
228 Lisp_Object ent
= SYNTAX_ENTRY (c
);
229 return CONSP (ent
) ? XCDR (ent
) : Qnil
;
232 /* This should be called with FROM at the start of forward
233 search, or after the last position of the backward search. It
234 makes sure that the first char is picked up with correct table, so
235 one does not need to call UPDATE_SYNTAX_TABLE immediately after the
237 Sign of COUNT gives the direction of the search.
241 SETUP_SYNTAX_TABLE (ptrdiff_t from
, ptrdiff_t count
)
243 SETUP_BUFFER_SYNTAX_TABLE ();
244 gl_state
.b_property
= BEGV
;
245 gl_state
.e_property
= ZV
+ 1;
246 gl_state
.object
= Qnil
;
248 if (parse_sexp_lookup_properties
)
250 if (count
> 0 || from
> BEGV
)
251 update_syntax_table (count
> 0 ? from
: from
- 1, count
, true, Qnil
);
252 if (gl_state
.e_property
> parse_sexp_propertize_done
)
254 gl_state
.e_property
= parse_sexp_propertize_done
;
255 gl_state
.e_property_truncated
= true;
260 /* Same as above, but in OBJECT. If OBJECT is nil, use current buffer.
261 If it is t (which is only used in fast_c_string_match_ignore_case),
262 ignore properties altogether.
264 This is meant for regex.c to use. For buffers, regex.c passes arguments
265 to the UPDATE_SYNTAX_TABLE functions which are relative to BEGV.
266 So if it is a buffer, we set the offset field to BEGV. */
269 SETUP_SYNTAX_TABLE_FOR_OBJECT (Lisp_Object object
,
270 ptrdiff_t from
, ptrdiff_t count
)
272 SETUP_BUFFER_SYNTAX_TABLE ();
273 gl_state
.object
= object
;
274 if (BUFFERP (gl_state
.object
))
276 struct buffer
*buf
= XBUFFER (gl_state
.object
);
277 gl_state
.b_property
= 1;
278 gl_state
.e_property
= BUF_ZV (buf
) - BUF_BEGV (buf
) + 1;
279 gl_state
.offset
= BUF_BEGV (buf
) - 1;
281 else if (NILP (gl_state
.object
))
283 gl_state
.b_property
= 1;
284 gl_state
.e_property
= ZV
- BEGV
+ 1;
285 gl_state
.offset
= BEGV
- 1;
287 else if (EQ (gl_state
.object
, Qt
))
289 gl_state
.b_property
= 0;
290 gl_state
.e_property
= PTRDIFF_MAX
;
295 gl_state
.b_property
= 0;
296 gl_state
.e_property
= 1 + SCHARS (gl_state
.object
);
299 if (parse_sexp_lookup_properties
)
300 update_syntax_table (from
+ gl_state
.offset
- (count
<= 0),
301 count
, 1, gl_state
.object
);
304 /* Update gl_state to an appropriate interval which contains CHARPOS. The
305 sign of COUNT give the relative position of CHARPOS wrt the previously
306 valid interval. If INIT, only [be]_property fields of gl_state are
307 valid at start, the rest is filled basing on OBJECT.
309 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
310 direction than the intervals - or in an interval. We update the
311 current syntax-table basing on the property of this interval, and
312 update the interval to start further than CHARPOS - or be
313 NULL. We also update lim_property to be the next value of
314 charpos to call this subroutine again - or be before/after the
315 start/end of OBJECT. */
318 update_syntax_table (ptrdiff_t charpos
, EMACS_INT count
, bool init
,
321 Lisp_Object tmp_table
;
323 bool invalidate
= true;
328 gl_state
.old_prop
= Qnil
;
329 gl_state
.start
= gl_state
.b_property
;
330 gl_state
.stop
= gl_state
.e_property
;
331 i
= interval_of (charpos
, object
);
332 gl_state
.backward_i
= gl_state
.forward_i
= i
;
336 /* interval_of updates only ->position of the return value, so
337 update the parents manually to speed up update_interval. */
338 while (!NULL_PARENT (i
))
340 if (AM_RIGHT_CHILD (i
))
341 INTERVAL_PARENT (i
)->position
= i
->position
342 - LEFT_TOTAL_LENGTH (i
) + TOTAL_LENGTH (i
) /* right end */
343 - TOTAL_LENGTH (INTERVAL_PARENT (i
))
344 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i
));
346 INTERVAL_PARENT (i
)->position
= i
->position
- LEFT_TOTAL_LENGTH (i
)
348 i
= INTERVAL_PARENT (i
);
350 i
= gl_state
.forward_i
;
351 gl_state
.b_property
= i
->position
- gl_state
.offset
;
352 gl_state
.e_property
= INTERVAL_LAST_POS (i
) - gl_state
.offset
;
355 i
= count
> 0 ? gl_state
.forward_i
: gl_state
.backward_i
;
357 /* We are guaranteed to be called with CHARPOS either in i,
360 error ("Error in syntax_table logic for to-the-end intervals");
361 else if (charpos
< i
->position
) /* Move left. */
364 error ("Error in syntax_table logic for intervals <-");
365 /* Update the interval. */
366 i
= update_interval (i
, charpos
);
367 if (INTERVAL_LAST_POS (i
) != gl_state
.b_property
)
370 gl_state
.forward_i
= i
;
371 gl_state
.e_property
= INTERVAL_LAST_POS (i
) - gl_state
.offset
;
374 else if (charpos
>= INTERVAL_LAST_POS (i
)) /* Move right. */
377 error ("Error in syntax_table logic for intervals ->");
378 /* Update the interval. */
379 i
= update_interval (i
, charpos
);
380 if (i
->position
!= gl_state
.e_property
)
383 gl_state
.backward_i
= i
;
384 gl_state
.b_property
= i
->position
- gl_state
.offset
;
389 tmp_table
= textget (i
->plist
, Qsyntax_table
);
392 invalidate
= !EQ (tmp_table
, gl_state
.old_prop
); /* Need to invalidate? */
394 if (invalidate
) /* Did not get to adjacent interval. */
395 { /* with the same table => */
396 /* invalidate the old range. */
399 gl_state
.backward_i
= i
;
400 gl_state
.b_property
= i
->position
- gl_state
.offset
;
404 gl_state
.forward_i
= i
;
405 gl_state
.e_property
= INTERVAL_LAST_POS (i
) - gl_state
.offset
;
409 if (!EQ (tmp_table
, gl_state
.old_prop
))
411 gl_state
.current_syntax_table
= tmp_table
;
412 gl_state
.old_prop
= tmp_table
;
413 if (EQ (Fsyntax_table_p (tmp_table
), Qt
))
415 gl_state
.use_global
= 0;
417 else if (CONSP (tmp_table
))
419 gl_state
.use_global
= 1;
420 gl_state
.global_code
= tmp_table
;
424 gl_state
.use_global
= 0;
425 gl_state
.current_syntax_table
= BVAR (current_buffer
, syntax_table
);
431 if (cnt
&& !EQ (tmp_table
, textget (i
->plist
, Qsyntax_table
)))
435 gl_state
.e_property
= i
->position
- gl_state
.offset
;
436 gl_state
.forward_i
= i
;
441 = i
->position
+ LENGTH (i
) - gl_state
.offset
;
442 gl_state
.backward_i
= i
;
446 else if (cnt
== INTERVALS_AT_ONCE
)
451 = i
->position
+ LENGTH (i
) - gl_state
.offset
452 /* e_property at EOB is not set to ZV but to ZV+1, so that
453 we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
454 having to check eob between the two. */
455 + (next_interval (i
) ? 0 : 1);
456 gl_state
.forward_i
= i
;
460 gl_state
.b_property
= i
->position
- gl_state
.offset
;
461 gl_state
.backward_i
= i
;
466 i
= count
> 0 ? next_interval (i
) : previous_interval (i
);
468 eassert (i
== NULL
); /* This property goes to the end. */
471 gl_state
.e_property
= gl_state
.stop
;
472 gl_state
.forward_i
= i
;
475 gl_state
.b_property
= gl_state
.start
;
479 parse_sexp_propertize (ptrdiff_t charpos
)
481 EMACS_INT modiffs
= CHARS_MODIFF
;
482 safe_call1 (Vparse_sexp_propertize_function
,
483 make_number (1 + charpos
));
484 if (modiffs
!= CHARS_MODIFF
)
485 error ("parse-sexp-propertize-function modified the buffer!");
486 if (parse_sexp_propertize_done
<= charpos
)
487 error ("parse-sexp-propertize-function did not move"
488 " parse-sexp-propertize-done");
489 SETUP_SYNTAX_TABLE (charpos
, 1);
493 update_syntax_table_forward (ptrdiff_t charpos
, bool init
,
496 if (!(gl_state
.e_property_truncated
))
497 update_syntax_table (charpos
, 1, init
, object
);
498 if ((gl_state
.e_property
> parse_sexp_propertize_done
499 || gl_state
.e_property_truncated
)
502 if (parse_sexp_propertize_done
> charpos
)
504 gl_state
.e_property
= parse_sexp_propertize_done
;
505 gl_state
.e_property_truncated
= true;
508 parse_sexp_propertize (charpos
);
512 /* Returns true if char at CHARPOS is quoted.
513 Global syntax-table data should be set up already to be good at CHARPOS
514 or after. On return global syntax data is good for lookup at CHARPOS. */
517 char_quoted (ptrdiff_t charpos
, ptrdiff_t bytepos
)
519 enum syntaxcode code
;
520 ptrdiff_t beg
= BEGV
;
522 ptrdiff_t orig
= charpos
;
524 while (charpos
> beg
)
527 DEC_BOTH (charpos
, bytepos
);
529 UPDATE_SYNTAX_TABLE_BACKWARD (charpos
);
530 c
= FETCH_CHAR_AS_MULTIBYTE (bytepos
);
532 if (! (code
== Scharquote
|| code
== Sescape
))
538 UPDATE_SYNTAX_TABLE (orig
);
542 /* Return the bytepos one character before BYTEPOS.
543 We assume that BYTEPOS is not at the start of the buffer. */
546 dec_bytepos (ptrdiff_t bytepos
)
548 if (NILP (BVAR (current_buffer
, enable_multibyte_characters
)))
555 /* Return a defun-start position before POS and not too far before.
556 It should be the last one before POS, or nearly the last.
558 When open_paren_in_column_0_is_defun_start is nonzero,
559 only the beginning of the buffer is treated as a defun-start.
561 We record the information about where the scan started
562 and what its result was, so that another call in the same area
563 can return the same value very quickly.
565 There is no promise at which position the global syntax data is
566 valid on return from the subroutine, so the caller should explicitly
567 update the global data. */
570 find_defun_start (ptrdiff_t pos
, ptrdiff_t pos_byte
)
572 ptrdiff_t opoint
= PT
, opoint_byte
= PT_BYTE
;
574 /* Use previous finding, if it's valid and applies to this inquiry. */
575 if (current_buffer
== find_start_buffer
576 /* Reuse the defun-start even if POS is a little farther on.
577 POS might be in the next defun, but that's ok.
578 Our value may not be the best possible, but will still be usable. */
579 && pos
<= find_start_pos
+ 1000
580 && pos
>= find_start_value
581 && BEGV
== find_start_begv
582 && MODIFF
== find_start_modiff
)
583 return find_start_value
;
585 if (!open_paren_in_column_0_is_defun_start
)
587 find_start_value
= BEGV
;
588 find_start_value_byte
= BEGV_BYTE
;
592 /* Back up to start of line. */
593 scan_newline (pos
, pos_byte
, BEGV
, BEGV_BYTE
, -1, 1);
595 /* We optimize syntax-table lookup for rare updates. Thus we accept
596 only those `^\s(' which are good in global _and_ text-property
598 SETUP_BUFFER_SYNTAX_TABLE ();
603 /* Open-paren at start of line means we may have found our
605 c
= FETCH_CHAR_AS_MULTIBYTE (PT_BYTE
);
606 if (SYNTAX (c
) == Sopen
)
608 SETUP_SYNTAX_TABLE (PT
+ 1, -1); /* Try again... */
609 c
= FETCH_CHAR_AS_MULTIBYTE (PT_BYTE
);
610 if (SYNTAX (c
) == Sopen
)
612 /* Now fallback to the default value. */
613 SETUP_BUFFER_SYNTAX_TABLE ();
615 /* Move to beg of previous line. */
616 scan_newline (PT
, PT_BYTE
, BEGV
, BEGV_BYTE
, -2, 1);
619 /* Record what we found, for the next try. */
620 find_start_value
= PT
;
621 find_start_value_byte
= PT_BYTE
;
622 TEMP_SET_PT_BOTH (opoint
, opoint_byte
);
625 find_start_buffer
= current_buffer
;
626 find_start_modiff
= MODIFF
;
627 find_start_begv
= BEGV
;
628 find_start_pos
= pos
;
630 return find_start_value
;
633 /* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
636 prev_char_comend_first (ptrdiff_t pos
, ptrdiff_t pos_byte
)
641 DEC_BOTH (pos
, pos_byte
);
642 UPDATE_SYNTAX_TABLE_BACKWARD (pos
);
643 c
= FETCH_CHAR (pos_byte
);
644 val
= SYNTAX_COMEND_FIRST (c
);
645 UPDATE_SYNTAX_TABLE_FORWARD (pos
+ 1);
649 /* Check whether charpos FROM is at the end of a comment.
650 FROM_BYTE is the bytepos corresponding to FROM.
651 Do not move back before STOP.
653 Return true if we find a comment ending at FROM/FROM_BYTE.
655 If successful, store the charpos of the comment's beginning
656 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
658 Global syntax data remains valid for backward search starting at
659 the returned value (or at FROM, if the search was not successful). */
662 back_comment (ptrdiff_t from
, ptrdiff_t from_byte
, ptrdiff_t stop
,
663 bool comnested
, int comstyle
, ptrdiff_t *charpos_ptr
,
664 ptrdiff_t *bytepos_ptr
)
666 /* Look back, counting the parity of string-quotes,
667 and recording the comment-starters seen.
668 When we reach a safe place, assume that's not in a string;
669 then step the main scan to the earliest comment-starter seen
670 an even number of string quotes away from the safe place.
672 OFROM[I] is position of the earliest comment-starter seen
673 which is I+2X quotes from the comment-end.
674 PARITY is current parity of quotes from the comment end. */
675 int string_style
= -1; /* Presumed outside of any string. */
676 bool string_lossage
= 0;
677 /* Not a real lossage: indicates that we have passed a matching comment
678 starter plus a non-matching comment-ender, meaning that any matching
679 comment-starter we might see later could be a false positive (hidden
680 inside another comment).
681 Test case: { a (* b } c (* d *) */
682 bool comment_lossage
= 0;
683 ptrdiff_t comment_end
= from
;
684 ptrdiff_t comment_end_byte
= from_byte
;
685 ptrdiff_t comstart_pos
= 0;
686 ptrdiff_t comstart_byte
IF_LINT (= 0);
687 /* Place where the containing defun starts,
688 or 0 if we didn't come across it yet. */
689 ptrdiff_t defun_start
= 0;
690 ptrdiff_t defun_start_byte
= 0;
691 enum syntaxcode code
;
692 ptrdiff_t nesting
= 1; /* Current comment nesting. */
696 /* FIXME: A }} comment-ender style leads to incorrect behavior
697 in the case of {{ c }}} because we ignore the last two chars which are
698 assumed to be comment-enders although they aren't. */
700 /* At beginning of range to scan, we're outside of strings;
701 that determines quote parity to the comment-end. */
706 bool com2start
, com2end
, comstart
;
708 /* Move back and examine a character. */
709 DEC_BOTH (from
, from_byte
);
710 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
712 prev_syntax
= syntax
;
713 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
714 syntax
= SYNTAX_WITH_FLAGS (c
);
717 /* Check for 2-char comment markers. */
718 com2start
= (SYNTAX_FLAGS_COMSTART_FIRST (syntax
)
719 && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax
)
721 == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax
, syntax
))
722 && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax
)
723 || SYNTAX_FLAGS_COMMENT_NESTED (syntax
)) == comnested
);
724 com2end
= (SYNTAX_FLAGS_COMEND_FIRST (syntax
)
725 && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax
));
726 comstart
= (com2start
|| code
== Scomment
);
728 /* Nasty cases with overlapping 2-char comment markers:
729 - snmp-mode: -- c -- foo -- c --
737 /* If a 2-char comment sequence partly overlaps with another,
738 we don't try to be clever. E.g. |*| in C, or }% in modes that
739 have %..\n and %{..}%. */
740 if (from
> stop
&& (com2end
|| comstart
))
742 ptrdiff_t next
= from
, next_byte
= from_byte
;
743 int next_c
, next_syntax
;
744 DEC_BOTH (next
, next_byte
);
745 UPDATE_SYNTAX_TABLE_BACKWARD (next
);
746 next_c
= FETCH_CHAR_AS_MULTIBYTE (next_byte
);
747 next_syntax
= SYNTAX_WITH_FLAGS (next_c
);
748 if (((comstart
|| comnested
)
749 && SYNTAX_FLAGS_COMEND_SECOND (syntax
)
750 && SYNTAX_FLAGS_COMEND_FIRST (next_syntax
))
751 || ((com2end
|| comnested
)
752 && SYNTAX_FLAGS_COMSTART_SECOND (syntax
)
754 == SYNTAX_FLAGS_COMMENT_STYLE (syntax
, prev_syntax
))
755 && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax
)))
757 /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
760 if (com2start
&& comstart_pos
== 0)
761 /* We're looking at a comment starter. But it might be a comment
762 ender as well (see snmp-mode). The first time we see one, we
763 need to consider it as a comment starter,
764 and the subsequent times as a comment ender. */
767 /* Turn a 2-char comment sequences into the appropriate syntax. */
772 /* Ignore comment starters of a different style. */
773 else if (code
== Scomment
774 && (comstyle
!= SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0)
775 || SYNTAX_FLAGS_COMMENT_NESTED (syntax
) != comnested
))
778 /* Ignore escaped characters, except comment-enders. */
779 if (code
!= Sendcomment
&& char_quoted (from
, from_byte
))
786 c
= (code
== Sstring_fence
? ST_STRING_STYLE
: ST_COMMENT_STYLE
);
788 /* Track parity of quotes. */
789 if (string_style
== -1)
790 /* Entering a string. */
792 else if (string_style
== c
)
793 /* Leaving the string. */
796 /* If we have two kinds of string delimiters.
797 There's no way to grok this scanning backwards. */
802 /* We've already checked that it is the relevant comstyle. */
803 if (string_style
!= -1 || comment_lossage
|| string_lossage
)
804 /* There are odd string quotes involved, so let's be careful.
805 Test case in Pascal: " { " a { " } */
810 /* Record best comment-starter so far. */
812 comstart_byte
= from_byte
;
814 else if (--nesting
<= 0)
815 /* nested comments have to be balanced, so we don't need to
816 keep looking for earlier ones. We use here the same (slightly
817 incorrect) reasoning as below: since it is followed by uniform
818 paired string quotes, this comment-start has to be outside of
819 strings, else the comment-end itself would be inside a string. */
824 if (SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0) == comstyle
825 && ((com2end
&& SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax
))
826 || SYNTAX_FLAGS_COMMENT_NESTED (syntax
)) == comnested
)
827 /* This is the same style of comment ender as ours. */
832 /* Anything before that can't count because it would match
833 this comment-ender rather than ours. */
834 from
= stop
; /* Break out of the loop. */
836 else if (comstart_pos
!= 0 || c
!= '\n')
837 /* We're mixing comment styles here, so we'd better be careful.
838 The (comstart_pos != 0 || c != '\n') check is not quite correct
839 (we should just always set comment_lossage), but removing it
840 would imply that any multiline comment in C would go through
841 lossage, which seems overkill.
842 The failure should only happen in the rare cases such as
848 /* Assume a defun-start point is outside of strings. */
849 if (open_paren_in_column_0_is_defun_start
851 || (temp_byte
= dec_bytepos (from_byte
),
852 FETCH_CHAR (temp_byte
) == '\n')))
855 defun_start_byte
= from_byte
;
856 from
= stop
; /* Break out of the loop. */
865 if (comstart_pos
== 0)
868 from_byte
= comment_end_byte
;
869 UPDATE_SYNTAX_TABLE_FORWARD (comment_end
);
871 /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
872 or `done'), then we've found the beginning of the non-nested comment. */
873 else if (1) /* !comnested */
876 from_byte
= comstart_byte
;
877 UPDATE_SYNTAX_TABLE_FORWARD (from
- 1);
881 struct lisp_parse_state state
;
882 bool adjusted
= true;
883 /* We had two kinds of string delimiters mixed up
884 together. Decode this going forwards.
885 Scan fwd from a known safe place (beginning-of-defun)
886 to the one in question; this records where we
887 last passed a comment starter. */
888 /* If we did not already find the defun start, find it now. */
889 if (defun_start
== 0)
891 defun_start
= find_defun_start (comment_end
, comment_end_byte
);
892 defun_start_byte
= find_start_value_byte
;
893 adjusted
= (defun_start
> BEGV
);
897 scan_sexps_forward (&state
,
898 defun_start
, defun_start_byte
,
899 comment_end
, TYPE_MINIMUM (EMACS_INT
),
901 defun_start
= comment_end
;
906 = CONSP (state
.levelstarts
) ? XINT (XCAR (state
.levelstarts
))
907 : state
.thislevelstart
>= 0 ? state
.thislevelstart
909 find_start_value_byte
= CHAR_TO_BYTE (find_start_value
);
912 if (state
.incomment
== (comnested
? 1 : -1)
913 && state
.comstyle
== comstyle
)
914 from
= state
.comstr_start
;
919 /* If comment_end is inside some other comment, maybe ours
920 is nested, so we need to try again from within the
921 surrounding comment. Example: { a (* " *) */
923 /* FIXME: We should advance by one or two chars. */
924 defun_start
= state
.comstr_start
+ 2;
925 defun_start_byte
= CHAR_TO_BYTE (defun_start
);
928 } while (defun_start
< comment_end
);
930 from_byte
= CHAR_TO_BYTE (from
);
931 UPDATE_SYNTAX_TABLE_FORWARD (from
- 1);
936 *bytepos_ptr
= from_byte
;
938 return from
!= comment_end
;
941 DEFUN ("syntax-table-p", Fsyntax_table_p
, Ssyntax_table_p
, 1, 1, 0,
942 doc
: /* Return t if OBJECT is a syntax table.
943 Currently, any char-table counts as a syntax table. */)
946 if (CHAR_TABLE_P (object
)
947 && EQ (XCHAR_TABLE (object
)->purpose
, Qsyntax_table
))
953 check_syntax_table (Lisp_Object obj
)
955 CHECK_TYPE (CHAR_TABLE_P (obj
) && EQ (XCHAR_TABLE (obj
)->purpose
, Qsyntax_table
),
956 Qsyntax_table_p
, obj
);
959 DEFUN ("syntax-table", Fsyntax_table
, Ssyntax_table
, 0, 0, 0,
960 doc
: /* Return the current syntax table.
961 This is the one specified by the current buffer. */)
964 return BVAR (current_buffer
, syntax_table
);
967 DEFUN ("standard-syntax-table", Fstandard_syntax_table
,
968 Sstandard_syntax_table
, 0, 0, 0,
969 doc
: /* Return the standard syntax table.
970 This is the one used for new buffers. */)
973 return Vstandard_syntax_table
;
976 DEFUN ("copy-syntax-table", Fcopy_syntax_table
, Scopy_syntax_table
, 0, 1, 0,
977 doc
: /* Construct a new syntax table and return it.
978 It is a copy of the TABLE, which defaults to the standard syntax table. */)
984 check_syntax_table (table
);
986 table
= Vstandard_syntax_table
;
988 copy
= Fcopy_sequence (table
);
990 /* Only the standard syntax table should have a default element.
991 Other syntax tables should inherit from parents instead. */
992 set_char_table_defalt (copy
, Qnil
);
994 /* Copied syntax tables should all have parents.
995 If we copied one with no parent, such as the standard syntax table,
996 use the standard syntax table as the copy's parent. */
997 if (NILP (XCHAR_TABLE (copy
)->parent
))
998 Fset_char_table_parent (copy
, Vstandard_syntax_table
);
1002 DEFUN ("set-syntax-table", Fset_syntax_table
, Sset_syntax_table
, 1, 1, 0,
1003 doc
: /* Select a new syntax table for the current buffer.
1004 One argument, a syntax table. */)
1008 check_syntax_table (table
);
1009 bset_syntax_table (current_buffer
, table
);
1010 /* Indicate that this buffer now has a specified syntax table. */
1011 idx
= PER_BUFFER_VAR_IDX (syntax_table
);
1012 SET_PER_BUFFER_VALUE_P (current_buffer
, idx
, 1);
1016 /* Convert a letter which signifies a syntax code
1017 into the code it signifies.
1018 This is used by modify-syntax-entry, and other things. */
1020 unsigned char const syntax_spec_code
[0400] =
1021 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1022 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1023 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1024 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1025 Swhitespace
, Scomment_fence
, Sstring
, 0377, Smath
, 0377, 0377, Squote
,
1026 Sopen
, Sclose
, 0377, 0377, 0377, Swhitespace
, Spunct
, Scharquote
,
1027 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1028 0377, 0377, 0377, 0377, Scomment
, 0377, Sendcomment
, 0377,
1029 Sinherit
, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
1030 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1031 0377, 0377, 0377, 0377, 0377, 0377, 0377, Sword
,
1032 0377, 0377, 0377, 0377, Sescape
, 0377, 0377, Ssymbol
,
1033 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
1034 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1035 0377, 0377, 0377, 0377, 0377, 0377, 0377, Sword
,
1036 0377, 0377, 0377, 0377, Sstring_fence
, 0377, 0377, 0377
1039 /* Indexed by syntax code, give the letter that describes it. */
1041 char const syntax_code_spec
[16] =
1043 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
1047 /* Indexed by syntax code, give the object (cons of syntax code and
1048 nil) to be stored in syntax table. Since these objects can be
1049 shared among syntax tables, we generate them in advance. By
1050 sharing objects, the function `describe-syntax' can give a more
1052 static Lisp_Object Vsyntax_code_object
;
1055 DEFUN ("char-syntax", Fchar_syntax
, Schar_syntax
, 1, 1, 0,
1056 doc
: /* Return the syntax code of CHARACTER, described by a character.
1057 For example, if CHARACTER is a word constituent, the
1058 character `w' (119) is returned.
1059 The characters that correspond to various syntax codes
1060 are listed in the documentation of `modify-syntax-entry'. */)
1061 (Lisp_Object character
)
1064 CHECK_CHARACTER (character
);
1065 char_int
= XINT (character
);
1066 SETUP_BUFFER_SYNTAX_TABLE ();
1067 return make_number (syntax_code_spec
[SYNTAX (char_int
)]);
1070 DEFUN ("matching-paren", Fmatching_paren
, Smatching_paren
, 1, 1, 0,
1071 doc
: /* Return the matching parenthesis of CHARACTER, or nil if none. */)
1072 (Lisp_Object character
)
1075 enum syntaxcode code
;
1076 CHECK_CHARACTER (character
);
1077 char_int
= XINT (character
);
1078 SETUP_BUFFER_SYNTAX_TABLE ();
1079 code
= SYNTAX (char_int
);
1080 if (code
== Sopen
|| code
== Sclose
)
1081 return SYNTAX_MATCH (char_int
);
1085 DEFUN ("string-to-syntax", Fstring_to_syntax
, Sstring_to_syntax
, 1, 1, 0,
1086 doc
: /* Convert a syntax descriptor STRING into a raw syntax descriptor.
1087 STRING should be a string of the form allowed as argument of
1088 `modify-syntax-entry'. The return value is a raw syntax descriptor: a
1089 cons cell \(CODE . MATCHING-CHAR) which can be used, for example, as
1090 the value of a `syntax-table' text property. */)
1091 (Lisp_Object string
)
1093 const unsigned char *p
;
1097 CHECK_STRING (string
);
1100 val
= syntax_spec_code
[*p
++];
1102 error ("Invalid syntax description letter: %c", p
[-1]);
1104 if (val
== Sinherit
)
1110 int character
= STRING_CHAR_AND_LENGTH (p
, len
);
1111 XSETINT (match
, character
);
1112 if (XFASTINT (match
) == ' ')
1155 if (val
< ASIZE (Vsyntax_code_object
) && NILP (match
))
1156 return AREF (Vsyntax_code_object
, val
);
1158 /* Since we can't use a shared object, let's make a new one. */
1159 return Fcons (make_number (val
), match
);
1162 /* I really don't know why this is interactive
1163 help-form should at least be made useful whilst reading the second arg. */
1164 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry
, Smodify_syntax_entry
, 2, 3,
1165 "cSet syntax for character: \nsSet syntax for %s to: ",
1166 doc
: /* Set syntax for character CHAR according to string NEWENTRY.
1167 The syntax is changed only for table SYNTAX-TABLE, which defaults to
1168 the current buffer's syntax table.
1169 CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
1170 in the range MIN to MAX are changed.
1171 The first character of NEWENTRY should be one of the following:
1172 Space or - whitespace syntax. w word constituent.
1173 _ symbol constituent. . punctuation.
1174 ( open-parenthesis. ) close-parenthesis.
1175 " string quote. \\ escape.
1176 $ paired delimiter. \\=' expression quote or prefix operator.
1177 < comment starter. > comment ender.
1178 / character-quote. @ inherit from parent table.
1179 | generic string fence. ! generic comment fence.
1181 Only single-character comment start and end sequences are represented thus.
1182 Two-character sequences are represented as described below.
1183 The second character of NEWENTRY is the matching parenthesis,
1184 used only if the first character is `(' or `)'.
1185 Any additional characters are flags.
1186 Defined flags are the characters 1, 2, 3, 4, b, p, and n.
1187 1 means CHAR is the start of a two-char comment start sequence.
1188 2 means CHAR is the second character of such a sequence.
1189 3 means CHAR is the start of a two-char comment end sequence.
1190 4 means CHAR is the second character of such a sequence.
1192 There can be several orthogonal comment sequences. This is to support
1193 language modes such as C++. By default, all comment sequences are of style
1194 a, but you can set the comment sequence style to b (on the second character
1195 of a comment-start, and the first character of a comment-end sequence) and/or
1196 c (on any of its chars) using this flag:
1197 b means CHAR is part of comment sequence b.
1198 c means CHAR is part of comment sequence c.
1199 n means CHAR is part of a nestable comment sequence.
1201 p means CHAR is a prefix character for `backward-prefix-chars';
1202 such characters are treated as whitespace when they occur
1203 between expressions.
1204 usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
1205 (Lisp_Object c
, Lisp_Object newentry
, Lisp_Object syntax_table
)
1209 CHECK_CHARACTER_CAR (c
);
1210 CHECK_CHARACTER_CDR (c
);
1213 CHECK_CHARACTER (c
);
1215 if (NILP (syntax_table
))
1216 syntax_table
= BVAR (current_buffer
, syntax_table
);
1218 check_syntax_table (syntax_table
);
1220 newentry
= Fstring_to_syntax (newentry
);
1222 SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table
, c
, newentry
);
1224 SET_RAW_SYNTAX_ENTRY (syntax_table
, XINT (c
), newentry
);
1226 /* We clear the regexp cache, since character classes can now have
1227 different values from those in the compiled regexps.*/
1228 clear_regexp_cache ();
1233 /* Dump syntax table to buffer in human-readable format */
1235 DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value
,
1236 Sinternal_describe_syntax_value
, 1, 1, 0,
1237 doc
: /* Insert a description of the internal syntax description SYNTAX at point. */)
1238 (Lisp_Object syntax
)
1240 int code
, syntax_code
;
1241 bool start1
, start2
, end1
, end2
, prefix
, comstyleb
, comstylec
, comnested
;
1243 Lisp_Object first
, match_lisp
, value
= syntax
;
1247 insert_string ("default");
1251 if (CHAR_TABLE_P (value
))
1253 insert_string ("deeper char-table ...");
1259 insert_string ("invalid");
1263 first
= XCAR (value
);
1264 match_lisp
= XCDR (value
);
1266 if (!INTEGERP (first
) || !(NILP (match_lisp
) || CHARACTERP (match_lisp
)))
1268 insert_string ("invalid");
1272 syntax_code
= XINT (first
) & INT_MAX
;
1273 code
= syntax_code
& 0377;
1274 start1
= SYNTAX_FLAGS_COMSTART_FIRST (syntax_code
);
1275 start2
= SYNTAX_FLAGS_COMSTART_SECOND (syntax_code
);
1276 end1
= SYNTAX_FLAGS_COMEND_FIRST (syntax_code
);
1277 end2
= SYNTAX_FLAGS_COMEND_SECOND (syntax_code
);
1278 prefix
= SYNTAX_FLAGS_PREFIX (syntax_code
);
1279 comstyleb
= SYNTAX_FLAGS_COMMENT_STYLEB (syntax_code
);
1280 comstylec
= SYNTAX_FLAGS_COMMENT_STYLEC (syntax_code
);
1281 comnested
= SYNTAX_FLAGS_COMMENT_NESTED (syntax_code
);
1285 insert_string ("invalid");
1289 str
[0] = syntax_code_spec
[code
], str
[1] = 0;
1292 if (NILP (match_lisp
))
1295 insert_char (XINT (match_lisp
));
1316 insert_string ("\twhich means: ");
1321 insert_string ("whitespace"); break;
1323 insert_string ("punctuation"); break;
1325 insert_string ("word"); break;
1327 insert_string ("symbol"); break;
1329 insert_string ("open"); break;
1331 insert_string ("close"); break;
1333 insert_string ("prefix"); break;
1335 insert_string ("string"); break;
1337 insert_string ("math"); break;
1339 insert_string ("escape"); break;
1341 insert_string ("charquote"); break;
1343 insert_string ("comment"); break;
1345 insert_string ("endcomment"); break;
1347 insert_string ("inherit"); break;
1348 case Scomment_fence
:
1349 insert_string ("comment fence"); break;
1351 insert_string ("string fence"); break;
1353 insert_string ("invalid");
1357 if (!NILP (match_lisp
))
1359 insert_string (", matches ");
1360 insert_char (XINT (match_lisp
));
1364 insert_string (",\n\t is the first character of a comment-start sequence");
1366 insert_string (",\n\t is the second character of a comment-start sequence");
1369 insert_string (",\n\t is the first character of a comment-end sequence");
1371 insert_string (",\n\t is the second character of a comment-end sequence");
1373 insert_string (" (comment style b)");
1375 insert_string (" (comment style c)");
1377 insert_string (" (nestable)");
1381 AUTO_STRING (prefixdoc
,
1382 ",\n\t is a prefix character for `backward-prefix-chars'");
1383 insert1 (Fsubstitute_command_keys (prefixdoc
));
1389 /* Return the position across COUNT words from FROM.
1390 If that many words cannot be found before the end of the buffer, return 0.
1391 COUNT negative means scan backward and stop at word beginning. */
1394 scan_words (register ptrdiff_t from
, register EMACS_INT count
)
1396 register ptrdiff_t beg
= BEGV
;
1397 register ptrdiff_t end
= ZV
;
1398 register ptrdiff_t from_byte
= CHAR_TO_BYTE (from
);
1399 register enum syntaxcode code
;
1401 Lisp_Object func
, pos
;
1406 SETUP_SYNTAX_TABLE (from
, count
);
1417 UPDATE_SYNTAX_TABLE_FORWARD (from
);
1418 ch0
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
1419 code
= SYNTAX (ch0
);
1420 INC_BOTH (from
, from_byte
);
1421 if (words_include_escapes
1422 && (code
== Sescape
|| code
== Scharquote
))
1427 /* Now CH0 is a character which begins a word and FROM is the
1428 position of the next character. */
1429 func
= CHAR_TABLE_REF (Vfind_word_boundary_function_table
, ch0
);
1430 if (! NILP (Ffboundp (func
)))
1432 pos
= call2 (func
, make_number (from
- 1), make_number (end
));
1433 if (INTEGERP (pos
) && from
< XINT (pos
) && XINT (pos
) <= ZV
)
1436 from_byte
= CHAR_TO_BYTE (from
);
1443 if (from
== end
) break;
1444 UPDATE_SYNTAX_TABLE_FORWARD (from
);
1445 ch1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
1446 code
= SYNTAX (ch1
);
1448 && (! words_include_escapes
1449 || (code
!= Sescape
&& code
!= Scharquote
)))
1450 || word_boundary_p (ch0
, ch1
))
1452 INC_BOTH (from
, from_byte
);
1467 DEC_BOTH (from
, from_byte
);
1468 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
1469 ch1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
1470 code
= SYNTAX (ch1
);
1471 if (words_include_escapes
1472 && (code
== Sescape
|| code
== Scharquote
))
1477 /* Now CH1 is a character which ends a word and FROM is the
1479 func
= CHAR_TABLE_REF (Vfind_word_boundary_function_table
, ch1
);
1480 if (! NILP (Ffboundp (func
)))
1482 pos
= call2 (func
, make_number (from
), make_number (beg
));
1483 if (INTEGERP (pos
) && BEGV
<= XINT (pos
) && XINT (pos
) < from
)
1486 from_byte
= CHAR_TO_BYTE (from
);
1495 DEC_BOTH (from
, from_byte
);
1496 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
1497 ch0
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
1498 code
= SYNTAX (ch0
);
1500 && (! words_include_escapes
1501 || (code
!= Sescape
&& code
!= Scharquote
)))
1502 || word_boundary_p (ch0
, ch1
))
1504 INC_BOTH (from
, from_byte
);
1518 DEFUN ("forward-word", Fforward_word
, Sforward_word
, 0, 1, "^p",
1519 doc
: /* Move point forward ARG words (backward if ARG is negative).
1520 If ARG is omitted or nil, move point forward one word.
1522 If an edge of the buffer or a field boundary is reached, point is left there
1523 and the function returns nil. Field boundaries are not noticed if
1524 `inhibit-field-text-motion' is non-nil. */)
1528 ptrdiff_t orig_val
, val
;
1531 XSETFASTINT (arg
, 1);
1535 val
= orig_val
= scan_words (PT
, XINT (arg
));
1537 val
= XINT (arg
) > 0 ? ZV
: BEGV
;
1539 /* Avoid jumping out of an input field. */
1540 tmp
= Fconstrain_to_field (make_number (val
), make_number (PT
),
1542 val
= XFASTINT (tmp
);
1545 return val
== orig_val
? Qt
: Qnil
;
1548 DEFUN ("skip-chars-forward", Fskip_chars_forward
, Sskip_chars_forward
, 1, 2, 0,
1549 doc
: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1550 STRING is like the inside of a `[...]' in a regular expression
1551 except that `]' is never special and `\\' quotes `^', `-' or `\\'
1552 (but not at the end of a range; quoting is never needed there).
1553 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1554 With arg "^a-zA-Z", skips nonletters stopping before first letter.
1555 Char classes, e.g. `[:alpha:]', are supported.
1557 Returns the distance traveled, either zero or positive. */)
1558 (Lisp_Object string
, Lisp_Object lim
)
1560 return skip_chars (1, string
, lim
, 1);
1563 DEFUN ("skip-chars-backward", Fskip_chars_backward
, Sskip_chars_backward
, 1, 2, 0,
1564 doc
: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1565 See `skip-chars-forward' for details.
1566 Returns the distance traveled, either zero or negative. */)
1567 (Lisp_Object string
, Lisp_Object lim
)
1569 return skip_chars (0, string
, lim
, 1);
1572 DEFUN ("skip-syntax-forward", Fskip_syntax_forward
, Sskip_syntax_forward
, 1, 2, 0,
1573 doc
: /* Move point forward across chars in specified syntax classes.
1574 SYNTAX is a string of syntax code characters.
1575 Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1576 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1577 This function returns the distance traveled, either zero or positive. */)
1578 (Lisp_Object syntax
, Lisp_Object lim
)
1580 return skip_syntaxes (1, syntax
, lim
);
1583 DEFUN ("skip-syntax-backward", Fskip_syntax_backward
, Sskip_syntax_backward
, 1, 2, 0,
1584 doc
: /* Move point backward across chars in specified syntax classes.
1585 SYNTAX is a string of syntax code characters.
1586 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1587 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1588 This function returns either zero or a negative number, and the absolute value
1589 of this is the distance traveled. */)
1590 (Lisp_Object syntax
, Lisp_Object lim
)
1592 return skip_syntaxes (0, syntax
, lim
);
1596 skip_chars (bool forwardp
, Lisp_Object string
, Lisp_Object lim
,
1597 bool handle_iso_classes
)
1601 /* Store the ranges of non-ASCII characters. */
1602 int *char_ranges
IF_LINT (= NULL
);
1603 int n_char_ranges
= 0;
1605 ptrdiff_t i
, i_byte
;
1606 /* True if the current buffer is multibyte and the region contains
1609 /* True if STRING is multibyte and it contains non-ASCII chars. */
1610 bool string_multibyte
;
1611 ptrdiff_t size_byte
;
1612 const unsigned char *str
;
1614 Lisp_Object iso_classes
;
1617 CHECK_STRING (string
);
1621 XSETINT (lim
, forwardp
? ZV
: BEGV
);
1623 CHECK_NUMBER_COERCE_MARKER (lim
);
1625 /* In any case, don't allow scan outside bounds of buffer. */
1626 if (XINT (lim
) > ZV
)
1627 XSETFASTINT (lim
, ZV
);
1628 if (XINT (lim
) < BEGV
)
1629 XSETFASTINT (lim
, BEGV
);
1631 multibyte
= (!NILP (BVAR (current_buffer
, enable_multibyte_characters
))
1632 && (XINT (lim
) - PT
!= CHAR_TO_BYTE (XINT (lim
)) - PT_BYTE
));
1633 string_multibyte
= SBYTES (string
) > SCHARS (string
);
1635 memset (fastmap
, 0, sizeof fastmap
);
1637 str
= SDATA (string
);
1638 size_byte
= SBYTES (string
);
1641 if (i_byte
< size_byte
1642 && SREF (string
, 0) == '^')
1644 negate
= 1; i_byte
++;
1647 /* Find the characters specified and set their elements of fastmap.
1648 Handle backslashes and ranges specially.
1650 If STRING contains non-ASCII characters, setup char_ranges for
1651 them and use fastmap only for their leading codes. */
1653 if (! string_multibyte
)
1655 bool string_has_eight_bit
= 0;
1657 /* At first setup fastmap. */
1658 while (i_byte
< size_byte
)
1662 if (handle_iso_classes
&& c
== '['
1663 && i_byte
< size_byte
1664 && str
[i_byte
] == ':')
1666 const unsigned char *class_beg
= str
+ i_byte
+ 1;
1667 const unsigned char *class_end
= class_beg
;
1668 const unsigned char *class_limit
= str
+ size_byte
- 2;
1669 /* Leave room for the null. */
1670 unsigned char class_name
[CHAR_CLASS_MAX_LENGTH
+ 1];
1673 if (class_limit
- class_beg
> CHAR_CLASS_MAX_LENGTH
)
1674 class_limit
= class_beg
+ CHAR_CLASS_MAX_LENGTH
;
1676 while (class_end
< class_limit
1677 && *class_end
>= 'a' && *class_end
<= 'z')
1680 if (class_end
== class_beg
1681 || *class_end
!= ':' || class_end
[1] != ']')
1682 goto not_a_class_name
;
1684 memcpy (class_name
, class_beg
, class_end
- class_beg
);
1685 class_name
[class_end
- class_beg
] = 0;
1687 cc
= re_wctype (class_name
);
1689 error ("Invalid ISO C character class");
1691 iso_classes
= Fcons (make_number (cc
), iso_classes
);
1693 i_byte
= class_end
+ 2 - str
;
1700 if (i_byte
== size_byte
)
1705 /* Treat `-' as range character only if another character
1707 if (i_byte
+ 1 < size_byte
1708 && str
[i_byte
] == '-')
1712 /* Skip over the dash. */
1715 /* Get the end of the range. */
1718 && i_byte
< size_byte
)
1726 if (! ASCII_CHAR_P (c2
))
1727 string_has_eight_bit
= 1;
1733 if (! ASCII_CHAR_P (c
))
1734 string_has_eight_bit
= 1;
1738 /* If the current range is multibyte and STRING contains
1739 eight-bit chars, arrange fastmap and setup char_ranges for
1740 the corresponding multibyte chars. */
1741 if (multibyte
&& string_has_eight_bit
)
1744 char himap
[0200 + 1];
1745 memcpy (himap
, fastmap
+ 0200, 0200);
1747 memset (fastmap
+ 0200, 0, 0200);
1748 SAFE_NALLOCA (char_ranges
, 2, 128);
1751 while ((p1
= memchr (himap
+ i
, 1, 0200 - i
)))
1753 /* Deduce the next range C..C2 from the next clump of 1s
1754 in HIMAP starting with &HIMAP[I]. HIMAP is the high
1755 order half of the old FASTMAP. */
1756 int c2
, leading_code
;
1758 c
= BYTE8_TO_CHAR (i
+ 0200);
1760 c2
= BYTE8_TO_CHAR (i
+ 0200 - 1);
1762 char_ranges
[n_char_ranges
++] = c
;
1763 char_ranges
[n_char_ranges
++] = c2
;
1764 leading_code
= CHAR_LEADING_CODE (c
);
1765 memset (fastmap
+ leading_code
, 1,
1766 CHAR_LEADING_CODE (c2
) - leading_code
+ 1);
1770 else /* STRING is multibyte */
1772 SAFE_NALLOCA (char_ranges
, 2, SCHARS (string
));
1774 while (i_byte
< size_byte
)
1776 int leading_code
= str
[i_byte
];
1777 c
= STRING_CHAR_AND_LENGTH (str
+ i_byte
, len
);
1780 if (handle_iso_classes
&& c
== '['
1781 && i_byte
< size_byte
1782 && STRING_CHAR (str
+ i_byte
) == ':')
1784 const unsigned char *class_beg
= str
+ i_byte
+ 1;
1785 const unsigned char *class_end
= class_beg
;
1786 const unsigned char *class_limit
= str
+ size_byte
- 2;
1787 /* Leave room for the null. */
1788 unsigned char class_name
[CHAR_CLASS_MAX_LENGTH
+ 1];
1791 if (class_limit
- class_beg
> CHAR_CLASS_MAX_LENGTH
)
1792 class_limit
= class_beg
+ CHAR_CLASS_MAX_LENGTH
;
1794 while (class_end
< class_limit
1795 && *class_end
>= 'a' && *class_end
<= 'z')
1798 if (class_end
== class_beg
1799 || *class_end
!= ':' || class_end
[1] != ']')
1800 goto not_a_class_name_multibyte
;
1802 memcpy (class_name
, class_beg
, class_end
- class_beg
);
1803 class_name
[class_end
- class_beg
] = 0;
1805 cc
= re_wctype (class_name
);
1807 error ("Invalid ISO C character class");
1809 iso_classes
= Fcons (make_number (cc
), iso_classes
);
1811 i_byte
= class_end
+ 2 - str
;
1815 not_a_class_name_multibyte
:
1818 if (i_byte
== size_byte
)
1821 leading_code
= str
[i_byte
];
1822 c
= STRING_CHAR_AND_LENGTH (str
+ i_byte
, len
);
1825 /* Treat `-' as range character only if another character
1827 if (i_byte
+ 1 < size_byte
1828 && str
[i_byte
] == '-')
1830 int c2
, leading_code2
;
1832 /* Skip over the dash. */
1835 /* Get the end of the range. */
1836 leading_code2
= str
[i_byte
];
1837 c2
= STRING_CHAR_AND_LENGTH (str
+ i_byte
, len
);
1841 && i_byte
< size_byte
)
1843 leading_code2
= str
[i_byte
];
1844 c2
= STRING_CHAR_AND_LENGTH (str
+ i_byte
, len
);
1850 if (ASCII_CHAR_P (c
))
1852 while (c
<= c2
&& c
< 0x80)
1854 leading_code
= CHAR_LEADING_CODE (c
);
1856 if (! ASCII_CHAR_P (c
))
1858 int lim2
= leading_code2
+ 1;
1859 while (leading_code
< lim2
)
1860 fastmap
[leading_code
++] = 1;
1863 char_ranges
[n_char_ranges
++] = c
;
1864 char_ranges
[n_char_ranges
++] = c2
;
1870 if (ASCII_CHAR_P (c
))
1874 fastmap
[leading_code
] = 1;
1875 char_ranges
[n_char_ranges
++] = c
;
1876 char_ranges
[n_char_ranges
++] = c
;
1881 /* If the current range is unibyte and STRING contains non-ASCII
1882 chars, arrange fastmap for the corresponding unibyte
1885 if (! multibyte
&& n_char_ranges
> 0)
1887 memset (fastmap
+ 0200, 0, 0200);
1888 for (i
= 0; i
< n_char_ranges
; i
+= 2)
1890 int c1
= char_ranges
[i
];
1891 int lim2
= char_ranges
[i
+ 1] + 1;
1893 for (; c1
< lim2
; c1
++)
1895 int b
= CHAR_TO_BYTE_SAFE (c1
);
1903 /* If ^ was the first character, complement the fastmap. */
1907 for (i
= 0; i
< sizeof fastmap
; i
++)
1911 for (i
= 0; i
< 0200; i
++)
1913 /* All non-ASCII chars possibly match. */
1914 for (; i
< sizeof fastmap
; i
++)
1920 ptrdiff_t start_point
= PT
;
1922 ptrdiff_t pos_byte
= PT_BYTE
;
1923 unsigned char *p
= PT_ADDR
, *endp
, *stop
;
1927 endp
= (XINT (lim
) == GPT
) ? GPT_ADDR
: CHAR_POS_ADDR (XINT (lim
));
1928 stop
= (pos
< GPT
&& GPT
< XINT (lim
)) ? GPT_ADDR
: endp
;
1932 endp
= CHAR_POS_ADDR (XINT (lim
));
1933 stop
= (pos
>= GPT
&& GPT
> XINT (lim
)) ? GAP_END_ADDR
: endp
;
1937 /* This code may look up syntax tables using functions that rely on the
1938 gl_state object. To make sure this object is not out of date,
1939 let's initialize it manually.
1940 We ignore syntax-table text-properties for now, since that's
1941 what we've done in the past. */
1942 SETUP_BUFFER_SYNTAX_TABLE ();
1957 c
= STRING_CHAR_AND_LENGTH (p
, nbytes
);
1958 if (! NILP (iso_classes
) && in_classes (c
, iso_classes
))
1968 if (! ASCII_CHAR_P (c
))
1970 /* As we are looking at a multibyte character, we
1971 must look up the character in the table
1972 CHAR_RANGES. If there's no data in the table,
1973 that character is not what we want to skip. */
1975 /* The following code do the right thing even if
1976 n_char_ranges is zero (i.e. no data in
1978 for (i
= 0; i
< n_char_ranges
; i
+= 2)
1979 if (c
>= char_ranges
[i
] && c
<= char_ranges
[i
+ 1])
1981 if (!(negate
^ (i
< n_char_ranges
)))
1985 p
+= nbytes
, pos
++, pos_byte
+= nbytes
;
1998 if (!NILP (iso_classes
) && in_classes (*p
, iso_classes
))
2003 goto fwd_unibyte_ok
;
2009 p
++, pos
++, pos_byte
++;
2017 unsigned char *prev_p
;
2027 while (--p
>= stop
&& ! CHAR_HEAD_P (*p
));
2028 c
= STRING_CHAR (p
);
2030 if (! NILP (iso_classes
) && in_classes (c
, iso_classes
))
2040 if (! ASCII_CHAR_P (c
))
2042 /* See the comment in the previous similar code. */
2043 for (i
= 0; i
< n_char_ranges
; i
+= 2)
2044 if (c
>= char_ranges
[i
] && c
<= char_ranges
[i
+ 1])
2046 if (!(negate
^ (i
< n_char_ranges
)))
2050 pos
--, pos_byte
-= prev_p
- p
;
2063 if (! NILP (iso_classes
) && in_classes (p
[-1], iso_classes
))
2068 goto back_unibyte_ok
;
2071 if (!fastmap
[p
[-1]])
2074 p
--, pos
--, pos_byte
--;
2078 SET_PT_BOTH (pos
, pos_byte
);
2082 return make_number (PT
- start_point
);
2088 skip_syntaxes (bool forwardp
, Lisp_Object string
, Lisp_Object lim
)
2091 unsigned char fastmap
[0400];
2093 ptrdiff_t i
, i_byte
;
2095 ptrdiff_t size_byte
;
2098 CHECK_STRING (string
);
2101 XSETINT (lim
, forwardp
? ZV
: BEGV
);
2103 CHECK_NUMBER_COERCE_MARKER (lim
);
2105 /* In any case, don't allow scan outside bounds of buffer. */
2106 if (XINT (lim
) > ZV
)
2107 XSETFASTINT (lim
, ZV
);
2108 if (XINT (lim
) < BEGV
)
2109 XSETFASTINT (lim
, BEGV
);
2111 if (forwardp
? (PT
>= XFASTINT (lim
)) : (PT
<= XFASTINT (lim
)))
2112 return make_number (0);
2114 multibyte
= (!NILP (BVAR (current_buffer
, enable_multibyte_characters
))
2115 && (XINT (lim
) - PT
!= CHAR_TO_BYTE (XINT (lim
)) - PT_BYTE
));
2117 memset (fastmap
, 0, sizeof fastmap
);
2119 if (SBYTES (string
) > SCHARS (string
))
2120 /* As this is very rare case (syntax spec is ASCII only), don't
2121 consider efficiency. */
2122 string
= string_make_unibyte (string
);
2124 str
= SDATA (string
);
2125 size_byte
= SBYTES (string
);
2128 if (i_byte
< size_byte
2129 && SREF (string
, 0) == '^')
2131 negate
= 1; i_byte
++;
2134 /* Find the syntaxes specified and set their elements of fastmap. */
2136 while (i_byte
< size_byte
)
2139 fastmap
[syntax_spec_code
[c
]] = 1;
2142 /* If ^ was the first character, complement the fastmap. */
2144 for (i
= 0; i
< sizeof fastmap
; i
++)
2148 ptrdiff_t start_point
= PT
;
2150 ptrdiff_t pos_byte
= PT_BYTE
;
2151 unsigned char *p
= PT_ADDR
, *endp
, *stop
;
2155 endp
= (XINT (lim
) == GPT
) ? GPT_ADDR
: CHAR_POS_ADDR (XINT (lim
));
2156 stop
= (pos
< GPT
&& GPT
< XINT (lim
)) ? GPT_ADDR
: endp
;
2160 endp
= CHAR_POS_ADDR (XINT (lim
));
2161 stop
= (pos
>= GPT
&& GPT
> XINT (lim
)) ? GAP_END_ADDR
: endp
;
2165 SETUP_SYNTAX_TABLE (pos
, forwardp
? 1 : -1);
2181 c
= STRING_CHAR_AND_LENGTH (p
, nbytes
);
2182 if (! fastmap
[SYNTAX (c
)])
2184 p
+= nbytes
, pos
++, pos_byte
+= nbytes
;
2185 UPDATE_SYNTAX_TABLE_FORWARD (pos
);
2199 if (! fastmap
[SYNTAX (*p
)])
2201 p
++, pos
++, pos_byte
++;
2202 UPDATE_SYNTAX_TABLE_FORWARD (pos
);
2212 unsigned char *prev_p
;
2221 UPDATE_SYNTAX_TABLE_BACKWARD (pos
- 1);
2223 while (--p
>= stop
&& ! CHAR_HEAD_P (*p
));
2224 c
= STRING_CHAR (p
);
2225 if (! fastmap
[SYNTAX (c
)])
2227 pos
--, pos_byte
-= prev_p
- p
;
2241 UPDATE_SYNTAX_TABLE_BACKWARD (pos
- 1);
2242 if (! fastmap
[SYNTAX (p
[-1])])
2244 p
--, pos
--, pos_byte
--;
2249 SET_PT_BOTH (pos
, pos_byte
);
2252 return make_number (PT
- start_point
);
2256 /* Return true if character C belongs to one of the ISO classes
2257 in the list ISO_CLASSES. Each class is represented by an
2258 integer which is its type according to re_wctype. */
2261 in_classes (int c
, Lisp_Object iso_classes
)
2263 bool fits_class
= 0;
2265 while (CONSP (iso_classes
))
2268 elt
= XCAR (iso_classes
);
2269 iso_classes
= XCDR (iso_classes
);
2271 if (re_iswctype (c
, XFASTINT (elt
)))
2278 /* Jump over a comment, assuming we are at the beginning of one.
2279 FROM is the current position.
2280 FROM_BYTE is the bytepos corresponding to FROM.
2281 Do not move past STOP (a charpos).
2282 The comment over which we have to jump is of style STYLE
2283 (either SYNTAX_FLAGS_COMMENT_STYLE (foo) or ST_COMMENT_STYLE).
2284 NESTING should be positive to indicate the nesting at the beginning
2285 for nested comments and should be zero or negative else.
2286 ST_COMMENT_STYLE cannot be nested.
2287 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
2288 (or 0 If the search cannot start in the middle of a two-character).
2290 If successful, return true and store the charpos of the comment's end
2291 into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
2292 Else, return false and store the charpos STOP into *CHARPOS_PTR, the
2293 corresponding bytepos into *BYTEPOS_PTR and the current nesting
2294 (as defined for state.incomment) in *INCOMMENT_PTR.
2296 The comment end is the last character of the comment rather than the
2297 character just after the comment.
2299 Global syntax data is assumed to initially be valid for FROM and
2300 remains valid for forward search starting at the returned position. */
2303 forw_comment (ptrdiff_t from
, ptrdiff_t from_byte
, ptrdiff_t stop
,
2304 EMACS_INT nesting
, int style
, int prev_syntax
,
2305 ptrdiff_t *charpos_ptr
, ptrdiff_t *bytepos_ptr
,
2306 EMACS_INT
*incomment_ptr
)
2309 register enum syntaxcode code
;
2310 register int syntax
, other_syntax
;
2312 if (nesting
<= 0) nesting
= -1;
2314 /* Enter the loop in the middle so that we find
2315 a 2-char comment ender if we start in the middle of it. */
2316 syntax
= prev_syntax
;
2317 if (syntax
!= 0) goto forw_incomment
;
2323 *incomment_ptr
= nesting
;
2324 *charpos_ptr
= from
;
2325 *bytepos_ptr
= from_byte
;
2328 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2329 syntax
= SYNTAX_WITH_FLAGS (c
);
2330 code
= syntax
& 0xff;
2331 if (code
== Sendcomment
2332 && SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0) == style
2333 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax
) ?
2334 (nesting
> 0 && --nesting
== 0) : nesting
< 0))
2335 /* we have encountered a comment end of the same style
2336 as the comment sequence which began this comment
2339 if (code
== Scomment_fence
2340 && style
== ST_COMMENT_STYLE
)
2341 /* we have encountered a comment end of the same style
2342 as the comment sequence which began this comment
2347 && SYNTAX_FLAGS_COMMENT_NESTED (syntax
)
2348 && SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0) == style
)
2349 /* we have encountered a nested comment of the same style
2350 as the comment sequence which began this comment section */
2352 INC_BOTH (from
, from_byte
);
2353 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2356 if (from
< stop
&& SYNTAX_FLAGS_COMEND_FIRST (syntax
)
2357 && (c1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
),
2358 other_syntax
= SYNTAX_WITH_FLAGS (c1
),
2359 SYNTAX_FLAGS_COMEND_SECOND (other_syntax
))
2360 && SYNTAX_FLAGS_COMMENT_STYLE (syntax
, other_syntax
) == style
2361 && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax
) ||
2362 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax
))
2363 ? nesting
> 0 : nesting
< 0))
2366 /* we have encountered a comment end of the same style
2367 as the comment sequence which began this comment
2372 INC_BOTH (from
, from_byte
);
2373 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2378 && SYNTAX_FLAGS_COMSTART_FIRST (syntax
)
2379 && (c1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
),
2380 other_syntax
= SYNTAX_WITH_FLAGS (c1
),
2381 SYNTAX_FLAGS_COMMENT_STYLE (other_syntax
, syntax
) == style
2382 && SYNTAX_FLAGS_COMSTART_SECOND (other_syntax
))
2383 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax
) ||
2384 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax
)))
2385 /* we have encountered a nested comment of the same style
2386 as the comment sequence which began this comment
2389 INC_BOTH (from
, from_byte
);
2390 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2394 *charpos_ptr
= from
;
2395 *bytepos_ptr
= from_byte
;
2399 DEFUN ("forward-comment", Fforward_comment
, Sforward_comment
, 1, 1, 0,
2401 Move forward across up to COUNT comments. If COUNT is negative, move backward.
2402 Stop scanning if we find something other than a comment or whitespace.
2403 Set point to where scanning stops.
2404 If COUNT comments are found as expected, with nothing except whitespace
2405 between them, return t; otherwise return nil. */)
2408 ptrdiff_t from
, from_byte
, stop
;
2410 enum syntaxcode code
;
2411 int comstyle
= 0; /* style of comment encountered */
2412 bool comnested
= 0; /* whether the comment is nestable or not */
2415 ptrdiff_t out_charpos
, out_bytepos
;
2418 CHECK_NUMBER (count
);
2419 count1
= XINT (count
);
2420 stop
= count1
> 0 ? ZV
: BEGV
;
2426 from_byte
= PT_BYTE
;
2428 SETUP_SYNTAX_TABLE (from
, count1
);
2433 bool comstart_first
;
2434 int syntax
, other_syntax
;
2438 SET_PT_BOTH (from
, from_byte
);
2442 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2443 syntax
= SYNTAX_WITH_FLAGS (c
);
2445 comstart_first
= SYNTAX_FLAGS_COMSTART_FIRST (syntax
);
2446 comnested
= SYNTAX_FLAGS_COMMENT_NESTED (syntax
);
2447 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0);
2448 INC_BOTH (from
, from_byte
);
2449 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2450 if (from
< stop
&& comstart_first
2451 && (c1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
),
2452 other_syntax
= SYNTAX_WITH_FLAGS (c1
),
2453 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax
)))
2455 /* We have encountered a comment start sequence and we
2456 are ignoring all text inside comments. We must record
2457 the comment style this sequence begins so that later,
2458 only a comment end of the same style actually ends
2459 the comment section. */
2461 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (other_syntax
, syntax
);
2462 comnested
|= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax
);
2463 INC_BOTH (from
, from_byte
);
2464 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2467 while (code
== Swhitespace
|| (code
== Sendcomment
&& c
== '\n'));
2469 if (code
== Scomment_fence
)
2470 comstyle
= ST_COMMENT_STYLE
;
2471 else if (code
!= Scomment
)
2474 DEC_BOTH (from
, from_byte
);
2475 SET_PT_BOTH (from
, from_byte
);
2478 /* We're at the start of a comment. */
2479 found
= forw_comment (from
, from_byte
, stop
, comnested
, comstyle
, 0,
2480 &out_charpos
, &out_bytepos
, &dummy
);
2481 from
= out_charpos
; from_byte
= out_bytepos
;
2485 SET_PT_BOTH (from
, from_byte
);
2488 INC_BOTH (from
, from_byte
);
2489 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2490 /* We have skipped one comment. */
2503 SET_PT_BOTH (BEGV
, BEGV_BYTE
);
2508 DEC_BOTH (from
, from_byte
);
2509 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
2510 quoted
= char_quoted (from
, from_byte
);
2511 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2512 syntax
= SYNTAX_WITH_FLAGS (c
);
2515 comnested
= SYNTAX_FLAGS_COMMENT_NESTED (syntax
);
2516 if (code
== Sendcomment
)
2517 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0);
2518 if (from
> stop
&& SYNTAX_FLAGS_COMEND_SECOND (syntax
)
2519 && prev_char_comend_first (from
, from_byte
)
2520 && !char_quoted (from
- 1, dec_bytepos (from_byte
)))
2523 /* We must record the comment style encountered so that
2524 later, we can match only the proper comment begin
2525 sequence of the same style. */
2526 DEC_BOTH (from
, from_byte
);
2528 /* Calling char_quoted, above, set up global syntax position
2529 at the new value of FROM. */
2530 c1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2531 other_syntax
= SYNTAX_WITH_FLAGS (c1
);
2532 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (other_syntax
, syntax
);
2533 comnested
|= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax
);
2536 if (code
== Scomment_fence
)
2538 /* Skip until first preceding unquoted comment_fence. */
2539 bool fence_found
= 0;
2540 ptrdiff_t ini
= from
, ini_byte
= from_byte
;
2544 DEC_BOTH (from
, from_byte
);
2545 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
2546 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2547 if (SYNTAX (c
) == Scomment_fence
2548 && !char_quoted (from
, from_byte
))
2553 else if (from
== stop
)
2556 if (fence_found
== 0)
2558 from
= ini
; /* Set point to ini + 1. */
2559 from_byte
= ini_byte
;
2563 /* We have skipped one comment. */
2566 else if (code
== Sendcomment
)
2568 found
= back_comment (from
, from_byte
, stop
, comnested
, comstyle
,
2569 &out_charpos
, &out_bytepos
);
2573 /* This end-of-line is not an end-of-comment.
2574 Treat it like a whitespace.
2575 CC-mode (and maybe others) relies on this behavior. */
2579 /* Failure: we should go back to the end of this
2580 not-quite-endcomment. */
2581 if (SYNTAX (c
) != code
)
2582 /* It was a two-char Sendcomment. */
2583 INC_BOTH (from
, from_byte
);
2589 /* We have skipped one comment. */
2590 from
= out_charpos
, from_byte
= out_bytepos
;
2594 else if (code
!= Swhitespace
|| quoted
)
2598 INC_BOTH (from
, from_byte
);
2599 SET_PT_BOTH (from
, from_byte
);
2607 SET_PT_BOTH (from
, from_byte
);
2612 /* Return syntax code of character C if C is an ASCII character
2613 or if MULTIBYTE_SYMBOL_P is false. Otherwise, return Ssymbol. */
2615 static enum syntaxcode
2616 syntax_multibyte (int c
, bool multibyte_symbol_p
)
2618 return ASCII_CHAR_P (c
) || !multibyte_symbol_p
? SYNTAX (c
) : Ssymbol
;
2622 scan_lists (EMACS_INT from
, EMACS_INT count
, EMACS_INT depth
, bool sexpflag
)
2625 ptrdiff_t stop
= count
> 0 ? ZV
: BEGV
;
2630 enum syntaxcode code
;
2631 EMACS_INT min_depth
= depth
; /* Err out if depth gets less than this. */
2632 int comstyle
= 0; /* style of comment encountered */
2633 bool comnested
= 0; /* whether the comment is nestable or not */
2635 EMACS_INT last_good
= from
;
2637 ptrdiff_t from_byte
;
2638 ptrdiff_t out_bytepos
, out_charpos
;
2640 bool multibyte_symbol_p
= sexpflag
&& multibyte_syntax_as_symbol
;
2642 if (depth
> 0) min_depth
= 0;
2644 if (from
> ZV
) from
= ZV
;
2645 if (from
< BEGV
) from
= BEGV
;
2647 from_byte
= CHAR_TO_BYTE (from
);
2652 SETUP_SYNTAX_TABLE (from
, count
);
2657 bool comstart_first
, prefix
;
2658 int syntax
, other_syntax
;
2659 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2660 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2661 syntax
= SYNTAX_WITH_FLAGS (c
);
2662 code
= syntax_multibyte (c
, multibyte_symbol_p
);
2663 comstart_first
= SYNTAX_FLAGS_COMSTART_FIRST (syntax
);
2664 comnested
= SYNTAX_FLAGS_COMMENT_NESTED (syntax
);
2665 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0);
2666 prefix
= SYNTAX_FLAGS_PREFIX (syntax
);
2667 if (depth
== min_depth
)
2669 INC_BOTH (from
, from_byte
);
2670 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2671 if (from
< stop
&& comstart_first
2672 && (c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
),
2673 other_syntax
= SYNTAX_WITH_FLAGS (c
),
2674 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax
))
2675 && parse_sexp_ignore_comments
)
2677 /* we have encountered a comment start sequence and we
2678 are ignoring all text inside comments. We must record
2679 the comment style this sequence begins so that later,
2680 only a comment end of the same style actually ends
2681 the comment section */
2683 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (other_syntax
, syntax
);
2684 comnested
|= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax
);
2685 INC_BOTH (from
, from_byte
);
2686 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2698 INC_BOTH (from
, from_byte
);
2699 /* treat following character as a word constituent */
2702 if (depth
|| !sexpflag
) break;
2703 /* This word counts as a sexp; return at end of it. */
2706 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2708 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2709 switch (syntax_multibyte (c
, multibyte_symbol_p
))
2713 INC_BOTH (from
, from_byte
);
2724 INC_BOTH (from
, from_byte
);
2728 case Scomment_fence
:
2729 comstyle
= ST_COMMENT_STYLE
;
2732 if (!parse_sexp_ignore_comments
) break;
2733 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2734 found
= forw_comment (from
, from_byte
, stop
,
2735 comnested
, comstyle
, 0,
2736 &out_charpos
, &out_bytepos
, &dummy
);
2737 from
= out_charpos
, from_byte
= out_bytepos
;
2744 INC_BOTH (from
, from_byte
);
2745 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2751 if (from
!= stop
&& c
== FETCH_CHAR_AS_MULTIBYTE (from_byte
))
2753 INC_BOTH (from
, from_byte
);
2763 if (!++depth
) goto done
;
2768 if (!--depth
) goto done
;
2769 if (depth
< min_depth
)
2770 xsignal3 (Qscan_error
,
2771 build_string ("Containing expression ends prematurely"),
2772 make_number (last_good
), make_number (from
));
2777 temp_pos
= dec_bytepos (from_byte
);
2778 stringterm
= FETCH_CHAR_AS_MULTIBYTE (temp_pos
);
2781 enum syntaxcode c_code
;
2784 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2785 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2786 c_code
= syntax_multibyte (c
, multibyte_symbol_p
);
2788 ? c
== stringterm
&& c_code
== Sstring
2789 : c_code
== Sstring_fence
)
2796 INC_BOTH (from
, from_byte
);
2798 INC_BOTH (from
, from_byte
);
2800 INC_BOTH (from
, from_byte
);
2801 if (!depth
&& sexpflag
) goto done
;
2804 /* Ignore whitespace, punctuation, quote, endcomment. */
2809 /* Reached end of buffer. Error if within object, return nil if between */
2816 /* End of object reached */
2826 DEC_BOTH (from
, from_byte
);
2827 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
2828 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2829 syntax
= SYNTAX_WITH_FLAGS (c
);
2830 code
= syntax_multibyte (c
, multibyte_symbol_p
);
2831 if (depth
== min_depth
)
2834 comnested
= SYNTAX_FLAGS_COMMENT_NESTED (syntax
);
2835 if (code
== Sendcomment
)
2836 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0);
2837 if (from
> stop
&& SYNTAX_FLAGS_COMEND_SECOND (syntax
)
2838 && prev_char_comend_first (from
, from_byte
)
2839 && parse_sexp_ignore_comments
)
2841 /* We must record the comment style encountered so that
2842 later, we can match only the proper comment begin
2843 sequence of the same style. */
2844 int c2
, other_syntax
;
2845 DEC_BOTH (from
, from_byte
);
2846 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
2848 c2
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2849 other_syntax
= SYNTAX_WITH_FLAGS (c2
);
2850 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (other_syntax
, syntax
);
2851 comnested
|= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax
);
2854 /* Quoting turns anything except a comment-ender
2855 into a word character. Note that this cannot be true
2856 if we decremented FROM in the if-statement above. */
2857 if (code
!= Sendcomment
&& char_quoted (from
, from_byte
))
2859 DEC_BOTH (from
, from_byte
);
2862 else if (SYNTAX_FLAGS_PREFIX (syntax
))
2871 if (depth
|| !sexpflag
) break;
2872 /* This word counts as a sexp; count object finished
2873 after passing it. */
2876 temp_pos
= from_byte
;
2877 if (! NILP (BVAR (current_buffer
, enable_multibyte_characters
)))
2881 UPDATE_SYNTAX_TABLE_BACKWARD (from
- 1);
2882 c1
= FETCH_CHAR_AS_MULTIBYTE (temp_pos
);
2883 /* Don't allow comment-end to be quoted. */
2884 if (syntax_multibyte (c1
, multibyte_symbol_p
) == Sendcomment
)
2886 quoted
= char_quoted (from
- 1, temp_pos
);
2889 DEC_BOTH (from
, from_byte
);
2890 temp_pos
= dec_bytepos (temp_pos
);
2891 UPDATE_SYNTAX_TABLE_BACKWARD (from
- 1);
2893 c1
= FETCH_CHAR_AS_MULTIBYTE (temp_pos
);
2895 switch (syntax_multibyte (c1
, multibyte_symbol_p
))
2897 case Sword
: case Ssymbol
: case Squote
: break;
2898 default: goto done2
;
2900 DEC_BOTH (from
, from_byte
);
2909 temp_pos
= dec_bytepos (from_byte
);
2910 UPDATE_SYNTAX_TABLE_BACKWARD (from
- 1);
2911 if (from
!= stop
&& c
== FETCH_CHAR_AS_MULTIBYTE (temp_pos
))
2912 DEC_BOTH (from
, from_byte
);
2922 if (!++depth
) goto done2
;
2927 if (!--depth
) goto done2
;
2928 if (depth
< min_depth
)
2929 xsignal3 (Qscan_error
,
2930 build_string ("Containing expression ends prematurely"),
2931 make_number (last_good
), make_number (from
));
2935 if (!parse_sexp_ignore_comments
)
2937 found
= back_comment (from
, from_byte
, stop
, comnested
, comstyle
,
2938 &out_charpos
, &out_bytepos
);
2939 /* FIXME: if !found, it really wasn't a comment-end.
2940 For single-char Sendcomment, we can't do much about it apart
2941 from skipping the char.
2942 For 2-char endcomments, we could try again, taking both
2943 chars as separate entities, but it's a lot of trouble
2944 for very little gain, so we don't bother either. -sm */
2946 from
= out_charpos
, from_byte
= out_bytepos
;
2949 case Scomment_fence
:
2955 DEC_BOTH (from
, from_byte
);
2956 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
2957 if (!char_quoted (from
, from_byte
))
2959 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2960 if (syntax_multibyte (c
, multibyte_symbol_p
) == code
)
2964 if (code
== Sstring_fence
&& !depth
&& sexpflag
) goto done2
;
2968 stringterm
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2973 DEC_BOTH (from
, from_byte
);
2974 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
2975 if (!char_quoted (from
, from_byte
))
2977 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2979 && (syntax_multibyte (c
, multibyte_symbol_p
)
2984 if (!depth
&& sexpflag
) goto done2
;
2987 /* Ignore whitespace, punctuation, quote, endcomment. */
2992 /* Reached start of buffer. Error if within object, return nil if between */
3005 XSETFASTINT (val
, from
);
3009 xsignal3 (Qscan_error
,
3010 build_string ("Unbalanced parentheses"),
3011 make_number (last_good
), make_number (from
));
3014 DEFUN ("scan-lists", Fscan_lists
, Sscan_lists
, 3, 3, 0,
3015 doc
: /* Scan from character number FROM by COUNT lists.
3016 Scan forward if COUNT is positive, backward if COUNT is negative.
3017 Return the character number of the position thus found.
3019 A \"list", in this context, refers to a balanced parenthetical
3020 grouping, as determined by the syntax table.
3022 If DEPTH is nonzero, treat that as the nesting depth of the starting
3023 point (i.e. the starting point is DEPTH parentheses deep). This
3024 function scans over parentheses until the depth goes to zero COUNT
3025 times. Hence, positive DEPTH moves out that number of levels of
3026 parentheses, while negative DEPTH moves to a deeper level.
3028 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
3030 If we reach the beginning or end of the accessible part of the buffer
3031 before we have scanned over COUNT lists, return nil if the depth at
3032 that point is zero, and signal a error if the depth is nonzero. */)
3033 (Lisp_Object from
, Lisp_Object count
, Lisp_Object depth
)
3035 CHECK_NUMBER (from
);
3036 CHECK_NUMBER (count
);
3037 CHECK_NUMBER (depth
);
3039 return scan_lists (XINT (from
), XINT (count
), XINT (depth
), 0);
3042 DEFUN ("scan-sexps", Fscan_sexps
, Sscan_sexps
, 2, 2, 0,
3043 doc
: /* Scan from character number FROM by COUNT balanced expressions.
3044 If COUNT is negative, scan backwards.
3045 Returns the character number of the position thus found.
3047 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
3049 If the beginning or end of (the accessible part of) the buffer is reached
3050 in the middle of a parenthetical grouping, an error is signaled.
3051 If the beginning or end is reached between groupings
3052 but before count is used up, nil is returned. */)
3053 (Lisp_Object from
, Lisp_Object count
)
3055 CHECK_NUMBER (from
);
3056 CHECK_NUMBER (count
);
3058 return scan_lists (XINT (from
), XINT (count
), 0, 1);
3061 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars
, Sbackward_prefix_chars
,
3063 doc
: /* Move point backward over any number of chars with prefix syntax.
3064 This includes chars with expression prefix syntax class (') and those with
3065 the prefix syntax flag (p). */)
3068 ptrdiff_t beg
= BEGV
;
3069 ptrdiff_t opoint
= PT
;
3070 ptrdiff_t opoint_byte
= PT_BYTE
;
3072 ptrdiff_t pos_byte
= PT_BYTE
;
3077 SET_PT_BOTH (opoint
, opoint_byte
);
3082 SETUP_SYNTAX_TABLE (pos
, -1);
3084 DEC_BOTH (pos
, pos_byte
);
3086 while (!char_quoted (pos
, pos_byte
)
3087 /* Previous statement updates syntax table. */
3088 && ((c
= FETCH_CHAR_AS_MULTIBYTE (pos_byte
), SYNTAX (c
) == Squote
)
3089 || syntax_prefix_flag_p (c
)))
3092 opoint_byte
= pos_byte
;
3095 DEC_BOTH (pos
, pos_byte
);
3098 SET_PT_BOTH (opoint
, opoint_byte
);
3103 /* Parse forward from FROM / FROM_BYTE to END,
3104 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
3105 and return a description of the state of the parse at END.
3106 If STOPBEFORE, stop at the start of an atom.
3107 If COMMENTSTOP is 1, stop at the start of a comment.
3108 If COMMENTSTOP is -1, stop at the start or end of a comment,
3109 after the beginning of a string, or after the end of a string. */
3112 scan_sexps_forward (struct lisp_parse_state
*stateptr
,
3113 ptrdiff_t from
, ptrdiff_t from_byte
, ptrdiff_t end
,
3114 EMACS_INT targetdepth
, bool stopbefore
,
3115 Lisp_Object oldstate
, int commentstop
)
3117 struct lisp_parse_state state
;
3118 enum syntaxcode code
;
3121 struct level
{ ptrdiff_t last
, prev
; };
3122 struct level levelstart
[100];
3123 struct level
*curlevel
= levelstart
;
3124 struct level
*endlevel
= levelstart
+ 100;
3125 EMACS_INT depth
; /* Paren depth of current scanning location.
3126 level - levelstart equals this except
3127 when the depth becomes negative. */
3128 EMACS_INT mindepth
; /* Lowest DEPTH value seen. */
3129 bool start_quoted
= 0; /* True means starting after a char quote. */
3131 ptrdiff_t prev_from
; /* Keep one character before FROM. */
3132 ptrdiff_t prev_from_byte
;
3133 int prev_from_syntax
;
3134 bool boundary_stop
= commentstop
== -1;
3137 ptrdiff_t out_bytepos
, out_charpos
;
3141 prev_from_byte
= from_byte
;
3143 DEC_BOTH (prev_from
, prev_from_byte
);
3145 /* Use this macro instead of `from++'. */
3147 do { prev_from = from; \
3148 prev_from_byte = from_byte; \
3149 temp = FETCH_CHAR_AS_MULTIBYTE (prev_from_byte); \
3150 prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
3151 INC_BOTH (from, from_byte); \
3153 UPDATE_SYNTAX_TABLE_FORWARD (from); \
3159 if (NILP (oldstate
))
3162 state
.instring
= -1;
3163 state
.incomment
= 0;
3164 state
.comstyle
= 0; /* comment style a by default. */
3165 state
.comstr_start
= -1; /* no comment/string seen. */
3169 tem
= Fcar (oldstate
);
3175 oldstate
= Fcdr (oldstate
);
3176 oldstate
= Fcdr (oldstate
);
3177 oldstate
= Fcdr (oldstate
);
3178 tem
= Fcar (oldstate
);
3179 /* Check whether we are inside string_fence-style string: */
3180 state
.instring
= (!NILP (tem
)
3181 ? (CHARACTERP (tem
) ? XFASTINT (tem
) : ST_STRING_STYLE
)
3184 oldstate
= Fcdr (oldstate
);
3185 tem
= Fcar (oldstate
);
3186 state
.incomment
= (!NILP (tem
)
3187 ? (INTEGERP (tem
) ? XINT (tem
) : -1)
3190 oldstate
= Fcdr (oldstate
);
3191 tem
= Fcar (oldstate
);
3192 start_quoted
= !NILP (tem
);
3194 /* if the eighth element of the list is nil, we are in comment
3195 style a. If it is non-nil, we are in comment style b */
3196 oldstate
= Fcdr (oldstate
);
3197 oldstate
= Fcdr (oldstate
);
3198 tem
= Fcar (oldstate
);
3199 state
.comstyle
= (NILP (tem
)
3201 : (RANGED_INTEGERP (0, tem
, ST_COMMENT_STYLE
)
3203 : ST_COMMENT_STYLE
));
3205 oldstate
= Fcdr (oldstate
);
3206 tem
= Fcar (oldstate
);
3207 state
.comstr_start
=
3208 RANGED_INTEGERP (PTRDIFF_MIN
, tem
, PTRDIFF_MAX
) ? XINT (tem
) : -1;
3209 oldstate
= Fcdr (oldstate
);
3210 tem
= Fcar (oldstate
);
3211 while (!NILP (tem
)) /* >= second enclosing sexps. */
3213 Lisp_Object temhd
= Fcar (tem
);
3214 if (RANGED_INTEGERP (PTRDIFF_MIN
, temhd
, PTRDIFF_MAX
))
3215 curlevel
->last
= XINT (temhd
);
3216 if (++curlevel
== endlevel
)
3217 curlevel
--; /* error ("Nesting too deep for parser"); */
3218 curlevel
->prev
= -1;
3219 curlevel
->last
= -1;
3226 curlevel
->prev
= -1;
3227 curlevel
->last
= -1;
3229 SETUP_SYNTAX_TABLE (prev_from
, 1);
3230 temp
= FETCH_CHAR (prev_from_byte
);
3231 prev_from_syntax
= SYNTAX_WITH_FLAGS (temp
);
3232 UPDATE_SYNTAX_TABLE_FORWARD (from
);
3234 /* Enter the loop at a place appropriate for initial state. */
3236 if (state
.incomment
)
3237 goto startincomment
;
3238 if (state
.instring
>= 0)
3240 nofence
= state
.instring
!= ST_STRING_STYLE
;
3242 goto startquotedinstring
;
3245 else if (start_quoted
)
3252 code
= prev_from_syntax
& 0xff;
3255 && SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax
)
3256 && (c1
= FETCH_CHAR (from_byte
),
3257 syntax
= SYNTAX_WITH_FLAGS (c1
),
3258 SYNTAX_FLAGS_COMSTART_SECOND (syntax
)))
3259 /* Duplicate code to avoid a complex if-expression
3260 which causes trouble for the SGI compiler. */
3262 /* Record the comment style we have entered so that only
3263 the comment-end sequence of the same style actually
3264 terminates the comment section. */
3266 = SYNTAX_FLAGS_COMMENT_STYLE (syntax
, prev_from_syntax
);
3267 comnested
= (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax
)
3268 | SYNTAX_FLAGS_COMMENT_NESTED (syntax
));
3269 state
.incomment
= comnested
? 1 : -1;
3270 state
.comstr_start
= prev_from
;
3274 else if (code
== Scomment_fence
)
3276 /* Record the comment style we have entered so that only
3277 the comment-end sequence of the same style actually
3278 terminates the comment section. */
3279 state
.comstyle
= ST_COMMENT_STYLE
;
3280 state
.incomment
= -1;
3281 state
.comstr_start
= prev_from
;
3284 else if (code
== Scomment
)
3286 state
.comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax
, 0);
3287 state
.incomment
= (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax
) ?
3289 state
.comstr_start
= prev_from
;
3292 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax
))
3298 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
3299 curlevel
->last
= prev_from
;
3301 if (from
== end
) goto endquoted
;
3304 /* treat following character as a word constituent */
3307 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
3308 curlevel
->last
= prev_from
;
3312 int symchar
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
3313 switch (SYNTAX (symchar
))
3318 if (from
== end
) goto endquoted
;
3330 curlevel
->prev
= curlevel
->last
;
3333 case Scomment_fence
: /* Can't happen because it's handled above. */
3335 if (commentstop
|| boundary_stop
) goto done
;
3337 /* The (from == BEGV) test was to enter the loop in the middle so
3338 that we find a 2-char comment ender even if we start in the
3339 middle of it. We don't want to do that if we're just at the
3340 beginning of the comment (think of (*) ... (*)). */
3341 found
= forw_comment (from
, from_byte
, end
,
3342 state
.incomment
, state
.comstyle
,
3343 (from
== BEGV
|| from
< state
.comstr_start
+ 3)
3344 ? 0 : prev_from_syntax
,
3345 &out_charpos
, &out_bytepos
, &state
.incomment
);
3346 from
= out_charpos
; from_byte
= out_bytepos
;
3347 /* Beware! prev_from and friends are invalid now.
3348 Luckily, the `done' doesn't use them and the INC_FROM
3349 sets them to a sane value without looking at them. */
3350 if (!found
) goto done
;
3352 state
.incomment
= 0;
3353 state
.comstyle
= 0; /* reset the comment style */
3354 if (boundary_stop
) goto done
;
3358 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
3360 /* curlevel++->last ran into compiler bug on Apollo */
3361 curlevel
->last
= prev_from
;
3362 if (++curlevel
== endlevel
)
3363 curlevel
--; /* error ("Nesting too deep for parser"); */
3364 curlevel
->prev
= -1;
3365 curlevel
->last
= -1;
3366 if (targetdepth
== depth
) goto done
;
3371 if (depth
< mindepth
)
3373 if (curlevel
!= levelstart
)
3375 curlevel
->prev
= curlevel
->last
;
3376 if (targetdepth
== depth
) goto done
;
3381 state
.comstr_start
= from
- 1;
3382 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
3383 curlevel
->last
= prev_from
;
3384 state
.instring
= (code
== Sstring
3385 ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte
))
3387 if (boundary_stop
) goto done
;
3390 nofence
= state
.instring
!= ST_STRING_STYLE
;
3395 enum syntaxcode c_code
;
3397 if (from
>= end
) goto done
;
3398 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
3399 c_code
= SYNTAX (c
);
3401 /* Check C_CODE here so that if the char has
3402 a syntax-table property which says it is NOT
3403 a string character, it does not end the string. */
3404 if (nofence
&& c
== state
.instring
&& c_code
== Sstring
)
3410 if (!nofence
) goto string_end
;
3415 startquotedinstring
:
3416 if (from
>= end
) goto endquoted
;
3422 state
.instring
= -1;
3423 curlevel
->prev
= curlevel
->last
;
3425 if (boundary_stop
) goto done
;
3429 /* FIXME: We should do something with it. */
3432 /* Ignore whitespace, punctuation, quote, endcomment. */
3438 stop
: /* Here if stopping before start of sexp. */
3439 from
= prev_from
; /* We have just fetched the char that starts it; */
3440 from_byte
= prev_from_byte
;
3441 goto done
; /* but return the position before it. */
3446 state
.depth
= depth
;
3447 state
.mindepth
= mindepth
;
3448 state
.thislevelstart
= curlevel
->prev
;
3449 state
.prevlevelstart
3450 = (curlevel
== levelstart
) ? -1 : (curlevel
- 1)->last
;
3451 state
.location
= from
;
3452 state
.location_byte
= from_byte
;
3453 state
.levelstarts
= Qnil
;
3454 while (curlevel
> levelstart
)
3455 state
.levelstarts
= Fcons (make_number ((--curlevel
)->last
),
3462 DEFUN ("parse-partial-sexp", Fparse_partial_sexp
, Sparse_partial_sexp
, 2, 6, 0,
3463 doc
: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
3464 Parsing stops at TO or when certain criteria are met;
3465 point is set to where parsing stops.
3466 If fifth arg OLDSTATE is omitted or nil,
3467 parsing assumes that FROM is the beginning of a function.
3468 Value is a list of elements describing final state of parsing:
3470 1. character address of start of innermost containing list; nil if none.
3471 2. character address of start of last complete sexp terminated.
3472 3. non-nil if inside a string.
3473 (it is the character that will terminate the string,
3474 or t if the string should be terminated by a generic string delimiter.)
3475 4. nil if outside a comment, t if inside a non-nestable comment,
3476 else an integer (the current comment nesting).
3477 5. t if following a quote character.
3478 6. the minimum paren-depth encountered during this scan.
3479 7. style of comment, if any.
3480 8. character address of start of comment or string; nil if not in one.
3481 9. Intermediate data for continuation of parsing (subject to change).
3482 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
3483 in parentheses becomes equal to TARGETDEPTH.
3484 Fourth arg STOPBEFORE non-nil means stop when come to
3485 any character that starts a sexp.
3486 Fifth arg OLDSTATE is a list like what this function returns.
3487 It is used to initialize the state of the parse. Elements number 1, 2, 6
3489 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
3490 If it is symbol `syntax-table', stop after the start of a comment or a
3491 string, or after end of a comment or a string. */)
3492 (Lisp_Object from
, Lisp_Object to
, Lisp_Object targetdepth
,
3493 Lisp_Object stopbefore
, Lisp_Object oldstate
, Lisp_Object commentstop
)
3495 struct lisp_parse_state state
;
3498 if (!NILP (targetdepth
))
3500 CHECK_NUMBER (targetdepth
);
3501 target
= XINT (targetdepth
);
3504 target
= TYPE_MINIMUM (EMACS_INT
); /* We won't reach this depth */
3506 validate_region (&from
, &to
);
3507 scan_sexps_forward (&state
, XINT (from
), CHAR_TO_BYTE (XINT (from
)),
3509 target
, !NILP (stopbefore
), oldstate
,
3511 ? 0 : (EQ (commentstop
, Qsyntax_table
) ? -1 : 1)));
3513 SET_PT_BOTH (state
.location
, state
.location_byte
);
3515 return Fcons (make_number (state
.depth
),
3516 Fcons (state
.prevlevelstart
< 0
3517 ? Qnil
: make_number (state
.prevlevelstart
),
3518 Fcons (state
.thislevelstart
< 0
3519 ? Qnil
: make_number (state
.thislevelstart
),
3520 Fcons (state
.instring
>= 0
3521 ? (state
.instring
== ST_STRING_STYLE
3522 ? Qt
: make_number (state
.instring
)) : Qnil
,
3523 Fcons (state
.incomment
< 0 ? Qt
:
3524 (state
.incomment
== 0 ? Qnil
:
3525 make_number (state
.incomment
)),
3526 Fcons (state
.quoted
? Qt
: Qnil
,
3527 Fcons (make_number (state
.mindepth
),
3528 Fcons ((state
.comstyle
3529 ? (state
.comstyle
== ST_COMMENT_STYLE
3531 : make_number (state
.comstyle
))
3533 Fcons (((state
.incomment
3534 || (state
.instring
>= 0))
3535 ? make_number (state
.comstr_start
)
3537 Fcons (state
.levelstarts
, Qnil
))))))))));
3541 init_syntax_once (void)
3546 /* This has to be done here, before we call Fmake_char_table. */
3547 DEFSYM (Qsyntax_table
, "syntax-table");
3549 /* Create objects which can be shared among syntax tables. */
3550 Vsyntax_code_object
= make_uninit_vector (Smax
);
3551 for (i
= 0; i
< Smax
; i
++)
3552 ASET (Vsyntax_code_object
, i
, Fcons (make_number (i
), Qnil
));
3554 /* Now we are ready to set up this property, so we can
3555 create syntax tables. */
3556 Fput (Qsyntax_table
, Qchar_table_extra_slots
, make_number (0));
3558 temp
= AREF (Vsyntax_code_object
, Swhitespace
);
3560 Vstandard_syntax_table
= Fmake_char_table (Qsyntax_table
, temp
);
3562 /* Control characters should not be whitespace. */
3563 temp
= AREF (Vsyntax_code_object
, Spunct
);
3564 for (i
= 0; i
<= ' ' - 1; i
++)
3565 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
3566 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, 0177, temp
);
3568 /* Except that a few really are whitespace. */
3569 temp
= AREF (Vsyntax_code_object
, Swhitespace
);
3570 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, ' ', temp
);
3571 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '\t', temp
);
3572 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '\n', temp
);
3573 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, 015, temp
);
3574 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, 014, temp
);
3576 temp
= AREF (Vsyntax_code_object
, Sword
);
3577 for (i
= 'a'; i
<= 'z'; i
++)
3578 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
3579 for (i
= 'A'; i
<= 'Z'; i
++)
3580 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
3581 for (i
= '0'; i
<= '9'; i
++)
3582 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
3584 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '$', temp
);
3585 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '%', temp
);
3587 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '(',
3588 Fcons (make_number (Sopen
), make_number (')')));
3589 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, ')',
3590 Fcons (make_number (Sclose
), make_number ('(')));
3591 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '[',
3592 Fcons (make_number (Sopen
), make_number (']')));
3593 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, ']',
3594 Fcons (make_number (Sclose
), make_number ('[')));
3595 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '{',
3596 Fcons (make_number (Sopen
), make_number ('}')));
3597 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '}',
3598 Fcons (make_number (Sclose
), make_number ('{')));
3599 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '"',
3600 Fcons (make_number (Sstring
), Qnil
));
3601 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '\\',
3602 Fcons (make_number (Sescape
), Qnil
));
3604 temp
= AREF (Vsyntax_code_object
, Ssymbol
);
3605 for (i
= 0; i
< 10; i
++)
3607 c
= "_-+*/&|<>="[i
];
3608 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, c
, temp
);
3611 temp
= AREF (Vsyntax_code_object
, Spunct
);
3612 for (i
= 0; i
< 12; i
++)
3614 c
= ".,;:?!#@~^'`"[i
];
3615 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, c
, temp
);
3618 /* All multibyte characters have syntax `word' by default. */
3619 temp
= AREF (Vsyntax_code_object
, Sword
);
3620 char_table_set_range (Vstandard_syntax_table
, 0x80, MAX_CHAR
, temp
);
3624 syms_of_syntax (void)
3626 DEFSYM (Qsyntax_table_p
, "syntax-table-p");
3628 staticpro (&Vsyntax_code_object
);
3630 staticpro (&gl_state
.object
);
3631 staticpro (&gl_state
.global_code
);
3632 staticpro (&gl_state
.current_syntax_table
);
3633 staticpro (&gl_state
.old_prop
);
3635 /* Defined in regex.c. */
3636 staticpro (&re_match_object
);
3638 DEFSYM (Qscan_error
, "scan-error");
3639 Fput (Qscan_error
, Qerror_conditions
,
3640 listn (CONSTYPE_PURE
, 2, Qscan_error
, Qerror
));
3641 Fput (Qscan_error
, Qerror_message
,
3642 build_pure_c_string ("Scan error"));
3644 DEFVAR_BOOL ("parse-sexp-ignore-comments", parse_sexp_ignore_comments
,
3645 doc
: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace. */);
3647 DEFVAR_BOOL ("parse-sexp-lookup-properties", parse_sexp_lookup_properties
,
3648 doc
: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3649 Otherwise, that text property is simply ignored.
3650 See the info node `(elisp)Syntax Properties' for a description of the
3651 `syntax-table' property. */);
3653 DEFVAR_INT ("parse-sexp-propertize-done", parse_sexp_propertize_done
,
3654 doc
: /* Position up to which syntax-table properties have been set. */);
3655 parse_sexp_propertize_done
= -1;
3657 DEFVAR_LISP ("parse-sexp-propertize-function",
3658 Vparse_sexp_propertize_function
,
3659 doc
: /* Function to set the `syntax-table' text property.
3660 Called with one argument, the position at which the property is needed.
3661 After running it, `parse-sexp-propertize-done' should be strictly greater
3662 than the argument passed. */);
3663 /* Note: Qnil is a temporary (and invalid) value; it will be properly set in
3665 Vparse_sexp_propertize_function
= Qnil
;
3667 words_include_escapes
= 0;
3668 DEFVAR_BOOL ("words-include-escapes", words_include_escapes
,
3669 doc
: /* Non-nil means `forward-word', etc., should treat escape chars part of words. */);
3671 DEFVAR_BOOL ("multibyte-syntax-as-symbol", multibyte_syntax_as_symbol
,
3672 doc
: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol. */);
3673 multibyte_syntax_as_symbol
= 0;
3675 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3676 open_paren_in_column_0_is_defun_start
,
3677 doc
: /* Non-nil means an open paren in column 0 denotes the start of a defun. */);
3678 open_paren_in_column_0_is_defun_start
= 1;
3681 DEFVAR_LISP ("find-word-boundary-function-table",
3682 Vfind_word_boundary_function_table
,
3684 Char table of functions to search for the word boundary.
3685 Each function is called with two arguments; POS and LIMIT.
3686 POS and LIMIT are character positions in the current buffer.
3688 If POS is less than LIMIT, POS is at the first character of a word,
3689 and the return value of a function is a position after the last
3690 character of that word.
3692 If POS is not less than LIMIT, POS is at the last character of a word,
3693 and the return value of a function is a position at the first
3694 character of that word.
3696 In both cases, LIMIT bounds the search. */);
3697 Vfind_word_boundary_function_table
= Fmake_char_table (Qnil
, Qnil
);
3699 defsubr (&Ssyntax_table_p
);
3700 defsubr (&Ssyntax_table
);
3701 defsubr (&Sstandard_syntax_table
);
3702 defsubr (&Scopy_syntax_table
);
3703 defsubr (&Sset_syntax_table
);
3704 defsubr (&Schar_syntax
);
3705 defsubr (&Smatching_paren
);
3706 defsubr (&Sstring_to_syntax
);
3707 defsubr (&Smodify_syntax_entry
);
3708 defsubr (&Sinternal_describe_syntax_value
);
3710 defsubr (&Sforward_word
);
3712 defsubr (&Sskip_chars_forward
);
3713 defsubr (&Sskip_chars_backward
);
3714 defsubr (&Sskip_syntax_forward
);
3715 defsubr (&Sskip_syntax_backward
);
3717 defsubr (&Sforward_comment
);
3718 defsubr (&Sscan_lists
);
3719 defsubr (&Sscan_sexps
);
3720 defsubr (&Sbackward_prefix_chars
);
3721 defsubr (&Sparse_partial_sexp
);